Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def main(args):
"""The main funciton for nodes classify task.
"""
set_seed(args.seed)
log.info(args)
dataset = FlickrDataset(args.data_path, train_percentage=args.percent)
train_steps = (len(dataset.train_index) // args.batch_size) * args.epochs
place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
train_prog = fluid.Program()
test_prog = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(train_prog, startup_prog):
with fluid.unique_name.guard():
train_model = node_classify_model(
dataset.graph,
dataset.num_groups,
embed_dim=args.embed_dim,
name='train')
def test(args):
graph = build_graph(args.num_nodes, args.edge_path)
gen_func = build_gen_func(args, graph)
start = time.time()
num = 10
for idx, _ in enumerate(gen_func()):
if idx % num == num - 1:
log.info("%s" % (1.0 * (time.time() - start) / num))
start = time.time()
feed_dict["node_index"] = np.array(test_index, dtype="int64")
feed_dict["node_label"] = np.array(test_label, dtype="int64")
test_loss, test_acc = exe.run(test_program,
feed=feed_dict,
fetch_list=[loss, acc],
return_numpy=True)
log.info("Accuracy: %f" % test_acc)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN')
parser.add_argument(
"--dataset", type=str, default="cora", help="dataset (cora, pubmed)")
parser.add_argument("--use_cuda", action='store_true', help="use_cuda")
args = parser.parse_args()
log.info(args)
main(args)
train_loss = exe.run(train_program,
feed={},
fetch_list=[train_loss_t],
return_numpy=True)
train_loss = train_loss[0]
if epoch >= 3:
time_per_epoch = 1.0 * (time.time() - t0)
dur.append(time_per_epoch)
val_loss, val_acc = exe.run(val_program,
feed={},
fetch_list=[val_loss_t, val_acc_t],
return_numpy=True)
log.info("Epoch %d " % epoch + "(%.5lf sec) " % np.mean(
dur) + "Train Loss: %f " % train_loss + "Val Loss: %f " % val_loss
+ "Val Acc: %f " % val_acc)
test_loss, test_acc = exe.run(test_program,
feed={},
fetch_list=[test_loss_t, test_acc_t],
return_numpy=True)
log.info("Accuracy: %f" % test_acc)
dur.append(time_per_epoch)
val_loss, val_acc = exe.run(val_program,
feed={},
fetch_list=[val_loss_t, val_acc_t],
return_numpy=True)
log.info("Epoch %d " % epoch + "(%.5lf sec) " % np.mean(
dur) + "Train Loss: %f " % train_loss + "Val Loss: %f " % val_loss
+ "Val Acc: %f " % val_acc)
test_loss, test_acc = exe.run(test_program,
feed={},
fetch_list=[test_loss_t, test_acc_t],
return_numpy=True)
log.info("Accuracy: %f" % test_acc)
def main(args):
data = load_data(args.normalize, args.symmetry)
log.info("preprocess finish")
log.info("Train Examples: %s" % len(data["train_index"]))
log.info("Val Examples: %s" % len(data["val_index"]))
log.info("Test Examples: %s" % len(data["test_index"]))
log.info("Num nodes %s" % data["graph"].num_nodes)
log.info("Num edges %s" % data["graph"].num_edges)
log.info("Average Degree %s" % np.mean(data["graph"].indegree()))
place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
train_program = fluid.Program()
startup_program = fluid.Program()
samples = []
if args.samples_1 > 0:
samples.append(args.samples_1)
if args.samples_2 > 0:
samples.append(args.samples_2)
with fluid.program_guard(train_program, startup_program):
feature, feature_init = paddle_helper.constant(
"feat",
dtype=data['feature'].dtype,
value=data['feature'],
hide_batch_size=False)
def train(args):
import logging
log.setLevel(logging.DEBUG)
log.info("start")
worker_num = int(os.getenv("PADDLE_TRAINERS_NUM", "0"))
num_devices = int(os.getenv("CPU_NUM", 10))
model = DeepwalkModel(args.num_nodes, args.hidden_size, args.neg_num,
args.is_sparse, args.is_distributed, 1.)
pyreader = model.pyreader
loss = model.forward()
# init fleet
init_role()
train_steps = math.ceil(1. * args.num_nodes * args.epoch /
args.batch_size / num_devices / worker_num)
log.info("Train step: %s" % train_steps)
dur.append(time_per_epoch)
val_loss, val_acc = exe.run(val_program,
feed={},
fetch_list=[val_loss_t, val_acc_t],
return_numpy=True)
log.info("Epoch %d " % epoch + "(%.5lf sec) " % np.mean(
dur) + "Train Loss: %f " % train_loss + "Val Loss: %f " % val_loss
+ "Val Acc: %f " % val_acc)
test_loss, test_acc = exe.run(test_program,
feed={},
fetch_list=[test_loss_t, test_acc_t],
return_numpy=True)
log.info("Accuracy: %f" % test_acc)
start_nodes = nodes
nodes = list(start_nodes)
eids, edges = [], []
nodes_set = set(nodes)
layer_nodes, layer_eids, layer_edges = [], [], []
ignore_edge_set = set([edge_hash(src, dst) for src, dst in ignore_edges])
for layer_idx in reversed(range(num_layers)):
if len(start_nodes) == 0:
layer_nodes = [nodes] + layer_nodes
layer_eids = [eids] + layer_eids
layer_edges = [edges] + layer_edges
continue
batch_pred_nodes, batch_pred_eids = graph.sample_predecessor(
start_nodes, samples[layer_idx], return_eids=True)
log.debug("sample_predecessor time: %s" % (time.time() - start))
start = time.time()
last_nodes_set = nodes_set
nodes, eids = copy.copy(nodes), copy.copy(eids)
edges = copy.copy(edges)
nodes_set, eids_set = set(nodes), set(eids)
for srcs, dst, pred_eids in zip(batch_pred_nodes, start_nodes,
batch_pred_eids):
for src, eid in zip(srcs, pred_eids):
if edge_hash(src, dst) in ignore_edge_set:
continue
if eid not in eids_set:
eids.append(eid)
edges.append([src, dst])
eids_set.add(eid)
if src not in nodes_set:
feed_dict["node_index"] = np.array(test_index, dtype="int64")
feed_dict["node_label"] = np.array(test_label, dtype="int64")
test_loss, test_acc = exe.run(test_program,
feed=feed_dict,
fetch_list=[loss, acc],
return_numpy=True)
log.info("Accuracy: %f" % test_acc)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GAT')
parser.add_argument(
"--dataset", type=str, default="cora", help="dataset (cora, pubmed)")
parser.add_argument("--use_cuda", action='store_true', help="use_cuda")
args = parser.parse_args()
log.info(args)
main(args)