How to use the chemprop.train.predict.predict function in chemprop

To help you get started, we’ve selected a few chemprop examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wengong-jin / chemprop / chemprop / train / run_training.py View on Github external
# Evaluate on test set using model with best validation score
        info(f'Model {model_idx} best validation {args.metric} = {best_score:.6f} on epoch {best_epoch}')
        model = load_checkpoint(os.path.join(save_dir, 'model.pt'), cuda=args.cuda, logger=logger)

        if args.split_test_by_overlap_dataset is not None:
            overlap_data = get_data(path=args.split_test_by_overlap_dataset, logger=logger)
            overlap_smiles = set(overlap_data.smiles())
            test_data_intersect, test_data_nonintersect = [], []
            for d in test_data.data:
                if d.smiles in overlap_smiles:
                    test_data_intersect.append(d)
                else:
                    test_data_nonintersect.append(d)
            test_data_intersect, test_data_nonintersect = MoleculeDataset(test_data_intersect), MoleculeDataset(test_data_nonintersect)
            for name, td in [('Intersect', test_data_intersect), ('Nonintersect', test_data_nonintersect)]:
                test_preds = predict(
                    model=model,
                    data=td,
                    args=args,
                    scaler=scaler,
                    logger=logger
                )
                test_scores = evaluate_predictions(
                    preds=test_preds,
                    targets=td.targets(),
                    metric_func=metric_func,
                    dataset_type=args.dataset_type,
                    args=args,
                    logger=logger
                )
                avg_test_score = np.nanmean(test_scores)
                info(f'Model {model_idx} test {args.metric} for {name} = {avg_test_score:.6f}')
github wengong-jin / chemprop / chemprop / train / evaluate.py View on Github external
args: Namespace,
             scaler: StandardScaler = None,
             logger: logging.Logger = None) -> List[float]:
    """
    Evaluates an ensemble of models on a dataset.

    :param model: A model.
    :param data: A MoleculeDataset.
    :param metric_func: Metric function which takes in a list of targets and a list of predictions.
    :param dataset_type: Dataset type.
    :param args: Arguments.
    :param scaler: A StandardScaler object fit on the training targets.
    :param logger: Logger.
    :return: A list with the score for each task based on `metric_func`.
    """
    preds = predict(
        model=model,
        data=data,
        args=args,
        scaler=scaler,
        bert_save_memory=True,
        logger=logger
    )

    if args.maml:
        preds, targets = preds  # in this case the targets are determined by the tasks sampled during prediction
    else:
        targets = data.targets()
        if args.dataset_type == 'bert_pretraining':
            # Only predict targets that are masked out
            targets['vocab'] = [target if mask == 0 else None for target, mask in zip(targets['vocab'], data.mask())]
github wengong-jin / chemprop / chemprop / train / run_training.py View on Github external
)
                test_scores = evaluate_predictions(
                    preds=test_preds,
                    targets=td.targets(),
                    metric_func=metric_func,
                    dataset_type=args.dataset_type,
                    args=args,
                    logger=logger
                )
                avg_test_score = np.nanmean(test_scores)
                info(f'Model {model_idx} test {args.metric} for {name} = {avg_test_score:.6f}')
        
        if len(test_data) == 0:  # just get some garbage results without crashing; in this case we didn't care anyway
            test_preds, test_scores = sum_test_preds, [0 for _ in range(len(args.task_names))]
        else:
            test_preds = predict(
                model=model,
                data=test_data,
                args=args,
                scaler=scaler,
                logger=logger
            )
            test_scores = evaluate_predictions(
                preds=test_preds,
                targets=test_targets,
                metric_func=metric_func,
                dataset_type=args.dataset_type,
                args=args,
                logger=logger
            )

        if args.maml:
github wengong-jin / chemprop / chemprop / train / make_predictions.py View on Github external
if args.compound_names:
        compound_names = test_data.compound_names()
    print(f'Test size = {len(test_data):,}')

    # Normalize features
    if train_args.features_scaling:
        test_data.normalize_features(features_scaler)

    # Predict with each model individually and sum predictions
    sum_preds = np.zeros((len(test_data), args.num_tasks))
    print(f'Predicting with an ensemble of {len(args.checkpoint_paths)} models')
    for checkpoint_path in tqdm(args.checkpoint_paths, total=len(args.checkpoint_paths)):
        # Load model
        model = load_checkpoint(checkpoint_path, cuda=args.cuda)
        model_preds = predict(
            model=model,
            data=test_data,
            args=args,
            scaler=scaler
        )
        sum_preds += np.array(model_preds)

    # Ensemble predictions
    avg_preds = sum_preds / args.ensemble_size
    avg_preds = avg_preds.tolist()

    # Save predictions
    assert len(test_data) == len(avg_preds)
    print(f'Saving predictions to {args.preds_path}')

    # Put Nones for invalid smiles