1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
| import networkx as nx import pandas as pd import numpy as np import random from tqdm import tqdm from gensim.models import Word2Vec
df = pd.read_csv("dataset/seealsology-data.tsv", sep = "\t") print(df.head())
G = nx.from_pandas_edgelist(df, source = "source", target = "target", edge_attr = True, create_using = nx.Graph()) print("Number of nodes:", G.number_of_nodes()) print("Number of edges:", G.number_of_edges())
def random_walk(G, node, walk_length): """ Simulate a random walk starting from start node. G: NetworkX graph node: Start node for the random walk walk_length: Length of the random walk """ walk = [node] for i in range(walk_length - 1): neighbors = list(G.neighbors(node)) if len(neighbors) == 0: break node = random.choice(neighbors) walk.append(node) return walk
all_nodes = list(G.nodes()) random_walks = []
for n in tqdm(all_nodes): for i in range(5): random_walks.append(random_walk(G, n, 10)) print("Number of random walks:", len(random_walks))
model = Word2Vec(window = 4, sg = 1, hs = 0, negative=10, alpha=0.03, min_alpha=0.0007, seed=14)
model.build_vocab(random_walks, progress_per=2) model.train(random_walks, total_examples = model.corpus_count, epochs=20, report_delay=1) print(model)
|