LINE
LINE copied to clipboard
simple wrapper for this implemention for Keras 2.1.1
class LINE(object):
def __init__(self, graph_edge_num, graph_nodes_num,
dimension):
self.e = graph_edge_num
self.n = graph_nodes_num
self.steps_per_epoch = None
self.epoch_train_size = None
self.dimension = dimension
def _generate_batch_train(self, adj_list,
graph_nodes_num, graph_edge_num,
batch_size, negativeRatio,
negative_sampling):
# 使用 negative sampling 优化
table_size = 1e8
power = 0.75
sampling_table = None
data = np.ones((adj_list.shape[0]), dtype=np.int8)
mat = csr_matrix((data, (adj_list[:, 0], adj_list[:, 1])), shape=(graph_nodes_num, graph_nodes_num),
dtype=np.int8)
batch_size_ones = np.ones((batch_size), dtype=np.int8)
nb_train_sample = adj_list.shape[0]
index_array = np.arange(nb_train_sample)
nb_batch = int(np.ceil(nb_train_sample / float(batch_size)))
batches = [(i * batch_size, min(nb_train_sample, (i + 1) * batch_size)) for i in range(0, nb_batch)]
if negative_sampling == "NON-UNIFORM":
print("Pre-procesing for non-uniform negative sampling!")
node_degree = np.zeros(graph_nodes_num)
for i in range(graph_edge_num):
node_degree[adj_list[i, 0]] += 1
node_degree[adj_list[i, 1]] += 1
norm = sum([math.pow(node_degree[i], power) for i in range(graph_nodes_num)])
sampling_table = np.zeros(int(table_size), dtype=np.uint32)
p = 0
i = 0
for j in range(graph_nodes_num):
p += float(math.pow(node_degree[j], power)) / norm
while i < table_size and float(i) / table_size < p:
sampling_table[i] = j
i += 1
while 1:
for batch_index, (batch_start, batch_end) in enumerate(batches):
pos_edge_list = index_array[batch_start:batch_end]
pos_left_nodes = adj_list[pos_edge_list, 0]
pos_right_nodes = adj_list[pos_edge_list, 1]
pos_relation_y = batch_size_ones[0:len(pos_edge_list)]
neg_left_nodes = np.zeros(len(pos_edge_list) * negativeRatio, dtype=np.int32)
neg_right_nodes = np.zeros(len(pos_edge_list) * negativeRatio, dtype=np.int32)
neg_relation_y = np.zeros(len(pos_edge_list) * negativeRatio, dtype=np.int8)
h = 0
for i in pos_left_nodes:
for k in range(negativeRatio):
rn = sampling_table[random.randint(0,
table_size - 1)] if negative_sampling == "NON-UNIFORM" else random.randint(
0, graph_nodes_num - 1)
while mat[i, rn] == 1 or i == rn:
rn = sampling_table[random.randint(0,
table_size - 1)] if negative_sampling == "NON-UNIFORM" else random.randint(
0, graph_nodes_num - 1)
neg_left_nodes[h] = i
neg_right_nodes[h] = rn
h += 1
left_nodes = np.concatenate((pos_left_nodes, neg_left_nodes), axis=0)
right_nodes = np.concatenate((pos_right_nodes, neg_right_nodes), axis=0)
relation_y = np.concatenate((pos_relation_y, neg_relation_y), axis=0)
yield ([left_nodes, right_nodes], [relation_y])
def _model(self, graph_nodes_num, dimension):
left_input = Input(shape=(1,))
right_input = Input(shape=(1,))
left_model = Sequential()
left_model.add(Embedding(input_dim=graph_nodes_num + 1, output_dim=dimension, input_length=1, mask_zero=False))
left_model.add(Reshape((dimension,)))
right_model = Sequential()
right_model.add(Embedding(input_dim=graph_nodes_num + 1, output_dim=dimension, input_length=1, mask_zero=False))
right_model.add(Reshape((dimension,)))
left_embed = left_model(left_input)
right_embed = left_model(right_input)
left_right_dot = dot(inputs=[left_embed, right_embed], axes=1, name="left_right_dot")
model = Model(inputs=[left_input, right_input], outputs=[left_right_dot])
embed_generator = Model(inputs=[left_input, right_input], outputs=[left_embed, right_embed])
return model, embed_generator
def _line_loss(self, y_true, y_pred):
coeff = y_true*2 - 1
return -K.mean(K.log(K.sigmoid(coeff*y_pred)))
def fit(self, adj_list, batch_size, negative_ratio, negative_sampling, epoch_num):
self.steps_per_epoch = int(self.e / batch_size)
self.epoch_train_size = (1 + negative_ratio) * self.e
# 产生训练样本
data = self._generate_batch_train(adj_list, self.n, self.e, batch_size, negative_ratio,
negative_sampling)
self.model, self.embed_generator = self._model(self.n, self.dimension)
# model.summary()
self.model.compile(optimizer='rmsprop', loss={'left_right_dot': self._line_loss})
self.model.fit_generator(data, steps_per_epoch=self.epoch_train_size / batch_size, epochs=epoch_num, verbose=1)
def predict(self, data):
# link prediction
return self.embed_generator.predict_on_batch(data)
usage
line = LINE(graph_edge_num, graph_nodes_num, dimension)
line.fit(adj_list, batch_size, negative_ratio, negative_sampling, epoch_num)
What are the pre-requisites for running this code?