Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def main(args):
"""The main funciton for nodes classify task.
"""
set_seed(args.seed)
log.info(args)
dataset = FlickrDataset(args.data_path, train_percentage=args.percent)
train_steps = (len(dataset.train_index) // args.batch_size) * args.epochs
place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
train_prog = fluid.Program()
test_prog = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(train_prog, startup_prog):
with fluid.unique_name.guard():
train_model = node_classify_model(
dataset.graph,
dataset.num_groups,
embed_dim=args.embed_dim,
name='train')
if u != v and (u, v) not in bi_edges and (v, u
) not in bi_edges:
self.neg_edges.append((u, v))
if len(self.neg_edges) == len(bi_edges) // 2:
break
bi_edges = list(bi_edges)
np.random.shuffle(bi_edges)
self.pos_edges = bi_edges[:len(bi_edges) // 2]
bi_edges = bi_edges[len(bi_edges) // 2:]
all_edges = []
for edge in bi_edges:
u, v = edge
all_edges.append((u, v))
all_edges.append((v, u))
self.graph = graph.Graph(num_nodes=num_nodes, edges=all_edges)
src = []
dst = []
for idx, line in tqdm.tqdm(enumerate(f)):
s, d = line.strip().split()
src.append(s)
dst.append(d)
dst.append(s)
src.append(d)
src = np.array(src, dtype="int64").reshape(-1, 1)
dst = np.array(dst, dtype="int64").reshape(-1, 1)
edges = np.hstack([src, dst])
log.info("Begin Build Index")
ret_dict.graph = pgl.graph.Graph(num_nodes=args.num_nodes, edges=edges)
ret_dict.graph.indegree()
log.info("End Build Index")
if args.phase == "train":
#just the worker, load the sample
data = load_pos_neg(args.data_path)
feed_name_list = [var.name for var in ret_dict.feed_list]
train_iter = reader.graph_reader(
args.num_layers,
ret_dict.graph_wrappers,
batch_size=args.batch_size,
data=data['train_data'],
samples=args.samples,
num_workers=args.sample_workers,
feed_name_list=feed_name_list,
use_pyreader=args.use_pyreader,
graph=ret_dict.graph)
with open(args.data_path) as f:
log.info("Begin Load Graph")
src = []
dst = []
for idx, line in tqdm.tqdm(enumerate(f)):
s, d = line.strip().split()
src.append(s)
dst.append(d)
dst.append(s)
src.append(d)
src = np.array(src, dtype="int64").reshape(-1, 1)
dst = np.array(dst, dtype="int64").reshape(-1, 1)
edges = np.hstack([src, dst])
log.info("Begin Build Index")
ret_dict.graph = pgl.graph.Graph(num_nodes=args.num_nodes, edges=edges)
ret_dict.graph.indegree()
log.info("End Build Index")
if args.phase == "train":
#just the worker, load the sample
data = load_pos_neg(args.data_path)
feed_name_list = [var.name for var in ret_dict.feed_list]
train_iter = reader.graph_reader(
args.num_layers,
ret_dict.graph_wrappers,
batch_size=args.batch_size,
data=data['train_data'],
samples=args.samples,
num_workers=args.sample_workers,
def test(args):
graph = build_graph(args.num_nodes, args.edge_path)
gen_func = build_gen_func(args, graph)
start = time.time()
num = 10
for idx, _ in enumerate(gen_func()):
if idx % num == num - 1:
log.info("%s" % (1.0 * (time.time() - start) / num))
start = time.time()
with open(args.data_path) as f:
log.info("Begin Load Graph")
src = []
dst = []
for idx, line in tqdm.tqdm(enumerate(f)):
s, d = line.strip().split()
src.append(s)
dst.append(d)
dst.append(s)
src.append(d)
src = np.array(src, dtype="int64").reshape(-1, 1)
dst = np.array(dst, dtype="int64").reshape(-1, 1)
edges = np.hstack([src, dst])
log.info("Begin Build Index")
ret_dict.graph = pgl.graph.Graph(num_nodes=args.num_nodes, edges=edges)
ret_dict.graph.indegree()
log.info("End Build Index")
if args.phase == "train":
#just the worker, load the sample
data = load_pos_neg(args.data_path)
feed_name_list = [var.name for var in ret_dict.feed_list]
train_iter = reader.graph_reader(
args.num_layers,
ret_dict.graph_wrappers,
batch_size=args.batch_size,
data=data['train_data'],
samples=args.samples,
num_workers=args.sample_workers,
feed_name_list=feed_name_list,
val_label = data['y_val']
test_label = data['y_test']
train_index = data['train_index']
val_index = data['val_index']
test_index = data['test_index']
feature = data["feats"].astype("float32")
if normalize:
scaler = StandardScaler()
scaler.fit(feature[train_index])
feature = scaler.transform(feature)
log.info("Feature shape %s" % (repr(feature.shape)))
graph = pgl.graph.Graph(
num_nodes=feature.shape[0],
edges=list(zip(src, dst)),
node_feat={"index": np.arange(
0, len(feature), dtype="int64")})
return {
"graph": graph,
"train_index": train_index,
"train_label": train_label,
"val_label": val_label,
"val_index": val_index,
"test_index": test_index,
"test_label": test_label,
"feature": feature,
"num_class": 41
}
node_feature[node_id][group_id] = 1
with io.open(edge_path) as inf:
for line in inf:
u, v = line.strip('\n').split(',')
u, v = int(u) - 1, int(v) - 1
all_edges.append((u, v))
if self.symmetry_edges:
all_edges.append((v, u))
if self.self_loop:
for i in range(num_nodes):
all_edges.append((i, i))
all_edges = list(set(all_edges))
self.graph = graph.Graph(
num_nodes=num_nodes,
edges=all_edges,
node_feat={"group_id": node_feature})
perm = np.arange(0, num_nodes)
np.random.shuffle(perm)
train_num = int(num_nodes * 0.5)
self.train_index = perm[:train_num]
self.test_index = perm[train_num:]
all_edges = []
with open(cite, 'r') as f:
for line in f:
u, v = line.split()
u = paper2vid[int(u)]
v = paper2vid[int(v)]
all_edges.append((u, v))
if self.symmetry_edges:
all_edges.append((v, u))
if self.self_loop:
for i in range(num_nodes):
all_edges.append((i, i))
all_edges = list(set(all_edges))
self.graph = graph.Graph(
num_nodes=num_nodes,
edges=all_edges,
node_feat={"words": node_feature})
perm = np.arange(0, num_nodes)
#np.random.shuffle(perm)
self.train_index = perm[:140]
self.val_index = perm[200:500]
self.test_index = perm[500:1500]
self.y = np.array(y, dtype="int64")
self.num_classes = len(y_dict)
def main(args):
data = load_data(args.normalize, args.symmetry)
log.info("preprocess finish")
log.info("Train Examples: %s" % len(data["train_index"]))
log.info("Val Examples: %s" % len(data["val_index"]))
log.info("Test Examples: %s" % len(data["test_index"]))
log.info("Num nodes %s" % data["graph"].num_nodes)
log.info("Num edges %s" % data["graph"].num_edges)
log.info("Average Degree %s" % np.mean(data["graph"].indegree()))
place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
train_program = fluid.Program()
startup_program = fluid.Program()
samples = []
if args.samples_1 > 0:
samples.append(args.samples_1)
if args.samples_2 > 0:
samples.append(args.samples_2)