CommunityDetectionCodes icon indicating copy to clipboard operation
CommunityDetectionCodes copied to clipboard

Normalized Mutual Information

Open zjming opened this issue 5 years ago • 4 comments

The extended NMI is between 0 and 1. But the nmi of example in overlap_nmi.py is 2.60794966304. Would you like to explain this question?

zjming avatar Dec 07 '18 01:12 zjming

I have same question. Anybody know why???

Joey-Hu avatar Jul 21 '20 02:07 Joey-Hu

the code is a little wrong, the math.log(2,num) should be math.log(num,2)

matrixfang avatar Oct 04 '20 02:10 matrixfang

The result of overlap_nmi.py will be -0.2354300373718371 if change the math.log(2, num) to be math.log(num, 2), which is incorrect either.

x3y1 avatar Nov 15 '20 14:11 x3y1

Here is my code, it may work for you:

import math import numpy as np

def calc_overlap_nmi(num_vertices, result_comm_list, ground_truth_comm_list): return OverlapNMI(num_vertices, result_comm_list, ground_truth_comm_list).calculate_overlap_nmi()

class OverlapNMI:

def __init__(self, num_vertices, result_comm_list, ground_truth_comm_list):
    self.x_comm_list = result_comm_list
    self.y_comm_list = ground_truth_comm_list
    self.num_vertices = num_vertices

def calculate_overlap_nmi(self):

    # h(num)
    def h(num):
        if num > 0:
            return -1 * num * math.log2(num)
        else:
            return 0

    # H(X_i)
    def H_comm(comm):
        prob1 = float(len(comm)) / self.num_vertices
        prob2 = 1 - prob1
        return h(prob1) + h(prob2)

    # H(X)
    def H_cap(cap):
        res = 0.0
        for comm in cap:
            res += H_comm(comm)
        return res

    # H(X_i, Y_j)
    def H_Xi_joint_Yj(comm_x, comm_y):
        intersect_size = float(len(set(comm_x) & set(comm_y)))
        cap_n = self.num_vertices + 4
        prob11 = (intersect_size + 1) / cap_n
        prob10 = (len(comm_x) - intersect_size + 1) / cap_n
        prob01 = (len(comm_y) - intersect_size + 1) / cap_n
        # prob00 = 1 - prob11 - prob10 - prob01
        prob00 = (self.num_vertices - intersect_size + 1) / cap_n

        if (h(prob11) + h(prob00)) >= (h(prob01) + h(prob10)):
            return h(prob11) + h(prob10) + h(prob01) + h(prob00)
        else:
            return H_comm(comm_x) + H_comm(comm_y)

    # H(X_i|Y_j)
    def H_Xi_given_Yj(comm_x, comm_y):
        return float(H_Xi_joint_Yj(comm_x, comm_y) - H_comm(comm_y))

    # H(X_i|Y)  return min{H(Xi|Yj)} for all j
    def H_Xi_given_Y(comm_x, cap_y):
        tmp_H_Xi_given_Yj = []
        for comm_y in cap_y:
            tmp_H_Xi_given_Yj.append(H_Xi_given_Yj(comm_x, comm_y))
        return float(min(tmp_H_Xi_given_Yj))

    # H(Xi|Y)_norm
    def H_Xi_given_Y_norm(comm_x, cap_y):
        return float(H_Xi_given_Y(comm_x, cap_y) / H_comm(comm_x))

    # # H(X|Y)
    # def H_X_given_Y(cap_x, cap_y):
    #     res = 0.0
    #     for comm_x in cap_x:
    #         res += H_Xi_given_Y(comm_x, cap_y)

    #     return res

    # H(X|Y)_norm
    def H_X_given_Y_norm(cap_x, cap_y):
        res = 0.0
        for comm_x in cap_x:
            res += H_Xi_given_Y_norm(comm_x, cap_y)

        return res / len(cap_x)

    def NMI(cap_x, cap_y):
        if len(cap_x) == 0 or len(cap_y) == 0:
            return 0
        return 1 - 0.5 * (H_X_given_Y_norm(cap_x, cap_y) + H_X_given_Y_norm(cap_y, cap_x))

    return NMI(self.x_comm_list, self.y_comm_list)

Joey-Hu avatar Nov 18 '20 08:11 Joey-Hu