How to use the cogdl.datasets.build_dataset function in cogdl

To help you get started, we’ve selected a few cogdl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github THUDM / cogdl / scripts / parallel_train.py View on Github external
return mp.current_process().pid


if __name__ == "__main__":
    # Magic for making multiprocessing work for PyTorch
    mp.set_start_method("spawn")

    parser = options.get_training_parser()
    args, _ = parser.parse_known_args()
    args = options.parse_args_and_arch(parser, args)

    # Make sure datasets are downloaded first
    datasets = args.dataset
    for dataset in datasets:
        args.dataset = dataset
        _ = build_dataset(args)
    args.dataset = datasets

    print(args)
    variants = list(
        gen_variants(dataset=args.dataset, model=args.model, seed=args.seed)
    )

    device_ids = args.device_id
    if args.cpu:
        num_workers = 1
    else:
        num_workers = len(device_ids)
    print("num_workers", num_workers)

    results_dict = defaultdict(list)
    with mp.Pool(processes=num_workers) as pool:
github THUDM / cogdl / cogdl / tasks / node_classification_sample.py View on Github external
def __init__(self, args):
        super(NodeClassification, self).__init__(args)

        dataset = build_dataset(args)
        data = dataset[0]
        self.data = data.cuda()
        args.num_features = dataset.num_features
        args.num_classes = dataset.num_classes
        model = build_model(args)
        self.model = model.cuda()
        self.patience = args.patience
        self.max_epoch = args.max_epoch
        self.batch_size = args.batch_size

        self.optimizer = torch.optim.Adam(
            self.model.parameters(), lr=args.lr, weight_decay=args.weight_decay
        )
github THUDM / cogdl / cogdl / tasks / unsupervised_node_classification.py View on Github external
def __init__(self, args):
        super(UnsupervisedNodeClassification, self).__init__(args)
        dataset = build_dataset(args)
        self.data = dataset[0]
        if issubclass(dataset.__class__.__bases__[0], InMemoryDataset):
            self.num_nodes = self.data.y.shape[0]
            self.num_classes = dataset.num_classes
            self.label_matrix = np.zeros((self.num_nodes, self.num_classes), dtype=int)
            self.label_matrix[range(self.num_nodes), self.data.y] = 1
        else:
            self.label_matrix = self.data.y
            self.num_nodes, self.num_classes = self.data.y.shape

        self.model = build_model(args)
        self.hidden_size = args.hidden_size
        self.num_shuffle = args.num_shuffle
        self.is_weighted = self.data.edge_attr is not None
github THUDM / cogdl / cogdl / tasks / node_classification_cotraining.py View on Github external
def __init__(self, args):
        super(NodeClassificationCotraining, self).__init__(args)

        dataset = build_dataset(args)
        data = dataset[0]
        self.data = data.cuda()
        args.num_features = dataset.num_features
        args.num_classes = dataset.num_classes
        self.model = CotrainingModel(args).cuda()
        self.patience = args.patience
        self.max_epoch = args.max_epoch
        self.order = args.order
        self._compute_A()

        self.optimizer = torch.optim.Adam(
            self.model.parameters(), lr=args.lr, weight_decay=args.weight_decay
        )
github THUDM / cogdl / cogdl / tasks / node_classification.py View on Github external
def __init__(self, args):
        super(NodeClassification, self).__init__(args)

        dataset = build_dataset(args)
        self.data = dataset.data
        self.data.apply(lambda x: x.cuda())
        args.num_features = dataset.num_features
        args.num_classes = dataset.num_classes
        model = build_model(args)
        self.model = model.cuda()
        self.patience = args.patience
        self.max_epoch = args.max_epoch

        self.optimizer = torch.optim.Adam(
            self.model.parameters(), lr=args.lr, weight_decay=args.weight_decay
        )
github THUDM / cogdl / cogdl / tasks / multiplex_link_prediction.py View on Github external
def __init__(self, args):
        super(MultiplexLinkPrediction, self).__init__(args)

        dataset = build_dataset(args)
        data = dataset[0]
        self.data = data
        if hasattr(dataset, "num_features"):
            args.num_features = dataset.num_features
        model = build_model(args)
        self.model = model
        self.patience = args.patience
        self.max_epoch = args.max_epoch
        self.eval_type = args.eval_type
github THUDM / cogdl / cogdl / tasks / link_prediction.py View on Github external
def __init__(self, args):
        super(LinkPrediction, self).__init__(args)

        dataset = build_dataset(args)
        data = dataset[0]
        self.data = data
        if hasattr(dataset, "num_features"):
            args.num_features = dataset.num_features
        model = build_model(args)
        self.model = model
        self.patience = args.patience
        self.max_epoch = args.max_epoch

        edge_list = self.data.edge_index.numpy()
        edge_list = list(zip(edge_list[0], edge_list[1]))
        self.train_data, self.valid_data, self.test_data = divide_data(
            edge_list, [0.85, 0.05, 0.10]
        )

        self.valid_data, self.test_data = gen_node_pairs(