How to use the chemprop.train.evaluate.evaluate_predictions function in chemprop

To help you get started, we’ve selected a few chemprop examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wengong-jin / chemprop / chemprop / random_forest.py View on Github external
elif args.dataset_type == 'classification':
        model = RandomForestClassifier(n_estimators=args.num_trees, n_jobs=-1)
    else:
        raise ValueError(f'dataset_type "{args.dataset_type}" not supported.')

    train_targets = train_data.targets()
    if train_data.num_tasks() == 1:
        train_targets = [targets[0] for targets in train_targets]

    model.fit(train_data.features(), train_targets)

    test_preds = model.predict(test_data.features())
    if train_data.num_tasks() == 1:
        test_preds = [[pred] for pred in test_preds]

    scores = evaluate_predictions(
        preds=test_preds,
        targets=test_data.targets(),
        metric_func=metric_func,
        dataset_type=args.dataset_type
    )

    return scores
github wengong-jin / chemprop / chemprop / random_forest.py View on Github external
if args.dataset_type == 'regression':
            model = RandomForestRegressor(n_estimators=args.num_trees, n_jobs=-1)
        elif args.dataset_type == 'classification':
            model = RandomForestClassifier(class_weight=args.class_weight, n_estimators=args.num_trees, n_jobs=-1)
        else:
            raise ValueError(f'dataset_type "{args.dataset_type}" not supported.')

        model.fit(train_features, train_targets)

        test_preds = model.predict(test_features)

        test_preds = [[pred] for pred in test_preds]
        test_targets = [[target] for target in test_targets]

        score = evaluate_predictions(
            preds=test_preds,
            targets=test_targets,
            metric_func=metric_func,
            dataset_type=args.dataset_type
        )
        scores.append(score[0])

    return scores
github wengong-jin / chemprop / chemprop / train / run_training.py View on Github external
info(f'Model {model_idx} test {task_name} {args.metric} = {test_score:.6f}')
                    writer.add_scalar(f'test_{task_name}_{args.metric}', test_score, n_iter)

    # Evaluate ensemble on test set
    if args.dataset_type == 'bert_pretraining':
        avg_test_preds = {
            'features': (sum_test_preds['features'] / args.ensemble_size).tolist() if sum_test_preds['features'] is not None else None,
            'vocab': (sum_test_preds['vocab'] / args.ensemble_size).tolist()
        }
    else:
        avg_test_preds = (sum_test_preds / args.ensemble_size).tolist()

    if len(test_data) == 0:  # just return some garbage when we didn't want test data
        ensemble_scores = test_scores
    else:
        ensemble_scores = evaluate_predictions(
            preds=avg_test_preds,
            targets=test_targets,
            metric_func=metric_func,
            dataset_type=args.dataset_type,
            args=args,
            logger=logger
        )

    # Average ensemble score
    if args.dataset_type == 'bert_pretraining':
        if ensemble_scores['features'] is not None:
            info(f'Ensemble test features rmse = {ensemble_scores["features"]:.6f}')
            writer.add_scalar('ensemble_test_features_rmse', ensemble_scores['features'], 0)
        ensemble_scores = [ensemble_scores['vocab']]

    avg_ensemble_test_score = np.nanmean(ensemble_scores)
github wengong-jin / chemprop / chemprop / train / run_training.py View on Github external
test_data_intersect, test_data_nonintersect = [], []
            for d in test_data.data:
                if d.smiles in overlap_smiles:
                    test_data_intersect.append(d)
                else:
                    test_data_nonintersect.append(d)
            test_data_intersect, test_data_nonintersect = MoleculeDataset(test_data_intersect), MoleculeDataset(test_data_nonintersect)
            for name, td in [('Intersect', test_data_intersect), ('Nonintersect', test_data_nonintersect)]:
                test_preds = predict(
                    model=model,
                    data=td,
                    args=args,
                    scaler=scaler,
                    logger=logger
                )
                test_scores = evaluate_predictions(
                    preds=test_preds,
                    targets=td.targets(),
                    metric_func=metric_func,
                    dataset_type=args.dataset_type,
                    args=args,
                    logger=logger
                )
                avg_test_score = np.nanmean(test_scores)
                info(f'Model {model_idx} test {args.metric} for {name} = {avg_test_score:.6f}')
        
        if len(test_data) == 0:  # just get some garbage results without crashing; in this case we didn't care anyway
            test_preds, test_scores = sum_test_preds, [0 for _ in range(len(args.task_names))]
        else:
            test_preds = predict(
                model=model,
                data=test_data,
github wengong-jin / chemprop / chemprop / train / run_training.py View on Github external
logger=logger
                )
                avg_test_score = np.nanmean(test_scores)
                info(f'Model {model_idx} test {args.metric} for {name} = {avg_test_score:.6f}')
        
        if len(test_data) == 0:  # just get some garbage results without crashing; in this case we didn't care anyway
            test_preds, test_scores = sum_test_preds, [0 for _ in range(len(args.task_names))]
        else:
            test_preds = predict(
                model=model,
                data=test_data,
                args=args,
                scaler=scaler,
                logger=logger
            )
            test_scores = evaluate_predictions(
                preds=test_preds,
                targets=test_targets,
                metric_func=metric_func,
                dataset_type=args.dataset_type,
                args=args,
                logger=logger
            )

        if args.maml:
            if sum_test_preds is None:
                sum_test_preds = np.zeros(np.array(test_preds).shape)

        if args.dataset_type == 'bert_pretraining':
            if test_preds['features'] is not None:
                sum_test_preds['features'] += np.array(test_preds['features'])
            sum_test_preds['vocab'] += np.array(test_preds['vocab'])