CommunityDetectionCodes
CommunityDetectionCodes copied to clipboard
Normalized Mutual Information
The extended NMI is between 0 and 1. But the nmi of example in overlap_nmi.py is 2.60794966304. Would you like to explain this question?
I have same question. Anybody know why???
the code is a little wrong, the math.log(2,num) should be math.log(num,2)
The result of overlap_nmi.py will be -0.2354300373718371 if change the math.log(2, num) to be math.log(num, 2), which is incorrect either.
Here is my code, it may work for you:
import math import numpy as np
def calc_overlap_nmi(num_vertices, result_comm_list, ground_truth_comm_list): return OverlapNMI(num_vertices, result_comm_list, ground_truth_comm_list).calculate_overlap_nmi()
class OverlapNMI:
def __init__(self, num_vertices, result_comm_list, ground_truth_comm_list):
self.x_comm_list = result_comm_list
self.y_comm_list = ground_truth_comm_list
self.num_vertices = num_vertices
def calculate_overlap_nmi(self):
# h(num)
def h(num):
if num > 0:
return -1 * num * math.log2(num)
else:
return 0
# H(X_i)
def H_comm(comm):
prob1 = float(len(comm)) / self.num_vertices
prob2 = 1 - prob1
return h(prob1) + h(prob2)
# H(X)
def H_cap(cap):
res = 0.0
for comm in cap:
res += H_comm(comm)
return res
# H(X_i, Y_j)
def H_Xi_joint_Yj(comm_x, comm_y):
intersect_size = float(len(set(comm_x) & set(comm_y)))
cap_n = self.num_vertices + 4
prob11 = (intersect_size + 1) / cap_n
prob10 = (len(comm_x) - intersect_size + 1) / cap_n
prob01 = (len(comm_y) - intersect_size + 1) / cap_n
# prob00 = 1 - prob11 - prob10 - prob01
prob00 = (self.num_vertices - intersect_size + 1) / cap_n
if (h(prob11) + h(prob00)) >= (h(prob01) + h(prob10)):
return h(prob11) + h(prob10) + h(prob01) + h(prob00)
else:
return H_comm(comm_x) + H_comm(comm_y)
# H(X_i|Y_j)
def H_Xi_given_Yj(comm_x, comm_y):
return float(H_Xi_joint_Yj(comm_x, comm_y) - H_comm(comm_y))
# H(X_i|Y) return min{H(Xi|Yj)} for all j
def H_Xi_given_Y(comm_x, cap_y):
tmp_H_Xi_given_Yj = []
for comm_y in cap_y:
tmp_H_Xi_given_Yj.append(H_Xi_given_Yj(comm_x, comm_y))
return float(min(tmp_H_Xi_given_Yj))
# H(Xi|Y)_norm
def H_Xi_given_Y_norm(comm_x, cap_y):
return float(H_Xi_given_Y(comm_x, cap_y) / H_comm(comm_x))
# # H(X|Y)
# def H_X_given_Y(cap_x, cap_y):
# res = 0.0
# for comm_x in cap_x:
# res += H_Xi_given_Y(comm_x, cap_y)
# return res
# H(X|Y)_norm
def H_X_given_Y_norm(cap_x, cap_y):
res = 0.0
for comm_x in cap_x:
res += H_Xi_given_Y_norm(comm_x, cap_y)
return res / len(cap_x)
def NMI(cap_x, cap_y):
if len(cap_x) == 0 or len(cap_y) == 0:
return 0
return 1 - 0.5 * (H_X_given_Y_norm(cap_x, cap_y) + H_X_given_Y_norm(cap_y, cap_x))
return NMI(self.x_comm_list, self.y_comm_list)