Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
src = []
dst = []
for idx, line in tqdm.tqdm(enumerate(f)):
s, d = line.strip().split()
src.append(s)
dst.append(d)
dst.append(s)
src.append(d)
src = np.array(src, dtype="int64").reshape(-1, 1)
dst = np.array(dst, dtype="int64").reshape(-1, 1)
edges = np.hstack([src, dst])
log.info("Begin Build Index")
ret_dict.graph = pgl.graph.Graph(num_nodes=args.num_nodes, edges=edges)
ret_dict.graph.indegree()
log.info("End Build Index")
if args.phase == "train":
#just the worker, load the sample
data = load_pos_neg(args.data_path)
feed_name_list = [var.name for var in ret_dict.feed_list]
train_iter = reader.graph_reader(
args.num_layers,
ret_dict.graph_wrappers,
batch_size=args.batch_size,
data=data['train_data'],
samples=args.samples,
num_workers=args.sample_workers,
feed_name_list=feed_name_list,
use_pyreader=args.use_pyreader,
graph=ret_dict.graph)
with open(args.data_path) as f:
log.info("Begin Load Graph")
src = []
dst = []
for idx, line in tqdm.tqdm(enumerate(f)):
s, d = line.strip().split()
src.append(s)
dst.append(d)
dst.append(s)
src.append(d)
src = np.array(src, dtype="int64").reshape(-1, 1)
dst = np.array(dst, dtype="int64").reshape(-1, 1)
edges = np.hstack([src, dst])
log.info("Begin Build Index")
ret_dict.graph = pgl.graph.Graph(num_nodes=args.num_nodes, edges=edges)
ret_dict.graph.indegree()
log.info("End Build Index")
if args.phase == "train":
#just the worker, load the sample
data = load_pos_neg(args.data_path)
feed_name_list = [var.name for var in ret_dict.feed_list]
train_iter = reader.graph_reader(
args.num_layers,
ret_dict.graph_wrappers,
batch_size=args.batch_size,
data=data['train_data'],
samples=args.samples,
num_workers=args.sample_workers,
def main(args):
data = load_data(args.normalize, args.symmetry)
log.info("preprocess finish")
log.info("Train Examples: %s" % len(data["train_index"]))
log.info("Val Examples: %s" % len(data["val_index"]))
log.info("Test Examples: %s" % len(data["test_index"]))
log.info("Num nodes %s" % data["graph"].num_nodes)
log.info("Num edges %s" % data["graph"].num_edges)
log.info("Average Degree %s" % np.mean(data["graph"].indegree()))
place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
train_program = fluid.Program()
startup_program = fluid.Program()
samples = []
if args.samples_1 > 0:
samples.append(args.samples_1)
if args.samples_2 > 0:
samples.append(args.samples_2)
optimization(args.lr, loss, train_steps, args.optimizer)
# init and run server or worker
if fleet.is_server():
fleet.init_server(args.warm_start_from_dir)
fleet.run_server()
if fleet.is_worker():
log.info("start init worker done")
fleet.init_worker()
#just the worker, load the sample
log.info("init worker done")
exe = F.Executor(F.CPUPlace())
exe.run(fleet.startup_program)
log.info("Startup done")
if args.dataset is not None:
if args.dataset == "BlogCatalog":
graph = data_loader.BlogCatalogDataset().graph
elif args.dataset == "ArXiv":
graph = data_loader.ArXivDataset().graph
else:
raise ValueError(args.dataset + " dataset doesn't exists")
log.info("Load buildin BlogCatalog dataset done.")
elif args.walkpath_files is None or args.walkpath_files == "None":
graph = build_graph(args.num_nodes, args.edge_path)
log.info("Load graph from '%s' done." % args.edge_path)
else:
graph = build_fake_graph(args.num_nodes)
log.info("Load fake graph done.")
'u_j': u_j,
'label': label,
'learning_rate': lr
}
ret_loss = exe.run(main_program,
feed=feed_dict,
fetch_list=[loss],
return_numpy=True)
if b % 500 == 0:
log.info("Epoch %d | Step %d | Loss %f | lr: %f" %
(epoch, b, ret_loss[0], lr))
# save parameters in every epoch
log.info("saving persistables parameters...")
cur_save_path = os.path.join(args.save_dir,
"model_epoch_%d" % (epoch + 1))
save_param(cur_save_path, ['shared_w'])
def walk(args):
graph = build_graph(args.num_nodes, args.edge_path)
num_sample_workers = args.num_sample_workers
if args.train_files is None or args.train_files == "None":
log.info("Walking from graph...")
train_files = [None for _ in range(num_sample_workers)]
else:
log.info("Walking from train_data...")
files = get_file_list(args.train_files)
train_files = [[] for i in range(num_sample_workers)]
for idx, f in enumerate(files):
train_files[idx % num_sample_workers].append(f)
def walk_to_file(walk_gen, filename, max_num):
with open(filename, "w") as outf:
num = 0
for walks in walk_gen:
for walk in walks:
outf.write("%s\n" % "\t".join([str(i) for i in walk]))
num += 1
if num % 1000 == 0:
epoch=args.epochs))
test_model['pyreader'].decorate_tensor_provider(
node_classify_generator(
dataset.graph,
dataset.test_index,
batch_size=args.batch_size,
epoch=1))
def existed_params(var):
"""existed_params
"""
if not isinstance(var, fluid.framework.Parameter):
return False
return os.path.exists(os.path.join(args.ckpt_path, var.name))
log.info('loading pretrained parameters from npy')
load_param(args.ckpt_path, ['shared_w'])
step = 0
prev_time = time.time()
train_model['pyreader'].start()
final_macro_f1 = 0.0
final_micro_f1 = 0.0
while 1:
try:
train_loss_val, train_probs_val, train_labels_val, train_topk_val = exe.run(
train_prog,
fetch_list=[
train_model['loss'], train_model['prob'],
train_model['labels'], train_model['topk']
],
train_steps = math.ceil(1. * args.num_nodes * args.epoch /
args.batch_size / num_devices / worker_num)
log.info("Train step: %s" % train_steps)
if args.optimizer == "sgd":
args.lr *= args.batch_size * args.walk_len * args.win_size
optimization(args.lr, loss, train_steps, args.optimizer)
# init and run server or worker
if fleet.is_server():
fleet.init_server(args.warm_start_from_dir)
fleet.run_server()
if fleet.is_worker():
log.info("start init worker done")
fleet.init_worker()
#just the worker, load the sample
log.info("init worker done")
exe = F.Executor(F.CPUPlace())
exe.run(fleet.startup_program)
log.info("Startup done")
if args.dataset is not None:
if args.dataset == "BlogCatalog":
graph = data_loader.BlogCatalogDataset().graph
elif args.dataset == "ArXiv":
graph = data_loader.ArXivDataset().graph
else:
raise ValueError(args.dataset + " dataset doesn't exists")
log.info("Load buildin BlogCatalog dataset done.")
def main(args):
"""The main funciton for training LINE model.
"""
make_dir(args.save_dir)
set_seed(args.seed)
dataset = FlickrDataset(args.data_path)
log.info('num nodes in graph: %d' % dataset.graph.num_nodes)
log.info('num edges in graph: %d' % dataset.graph.num_edges)
place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
main_program = fluid.default_main_program()
startup_program = fluid.default_startup_program()
# build model here
with fluid.program_guard(main_program, startup_program):
loss, opt = build_model(args, dataset.graph)
exe = fluid.Executor(place)
exe.run(startup_program) #initialize the parameters of the network
batchrange = int(dataset.graph.num_edges / args.batch_size)
T = batchrange * args.epochs