How to use the ludwig.utils.data_utils.save_json function in ludwig

To help you get started, weโ€™ve selected a few ludwig examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github uber / ludwig / ludwig / data / preprocessing.py View on Github external
train_set_metadata[DATA_TRAIN_HDF5_FP] = data_train_hdf5_fp

            if test_set is not None:
                data_test_hdf5_fp = replace_file_extension(data_test_csv,
                                                           'hdf5')
                data_utils.save_hdf5(
                    data_test_hdf5_fp,
                    test_set,
                    train_set_metadata
                )
                train_set_metadata[DATA_TRAIN_HDF5_FP] = data_train_hdf5_fp

            logger.info('Writing train set metadata with vocabulary')
            train_set_metadata_json_fp = replace_file_extension(data_train_csv,
                                                                'json')
            data_utils.save_json(
                train_set_metadata_json_fp,
                train_set_metadata,
            )

    return training_set, test_set, validation_set, train_set_metadata
github uber / ludwig / ludwig / train.py View on Github external
if validation_set is not None:
            logger.info('Validation set: {0}'.format(validation_set.size))
        if test_set is not None:
            logger.info('Test set: {0}'.format(test_set.size))

    # update model definition with metadata properties
    update_model_definition_with_metadata(
        model_definition,
        train_set_metadata
    )

    if is_on_master():
        if not skip_save_model:
            # save train set metadata
            os.makedirs(model_dir, exist_ok=True)
            save_json(
                os.path.join(
                    model_dir,
                    TRAIN_SET_METADATA_FILE_NAME
                ),
                train_set_metadata
            )

    # run the experiment
    model, result = train(
        training_set=training_set,
        validation_set=validation_set,
        test_set=test_set,
        model_definition=model_definition,
        save_path=model_dir,
        model_load_path=model_load_path,
        resume=model_resume_path is not None,
github uber / ludwig / ludwig / train.py View on Github external
description = get_experiment_description(
        model_definition,
        data_csv=data_csv,
        data_train_csv=data_train_csv,
        data_validation_csv=data_validation_csv,
        data_test_csv=data_test_csv,
        data_hdf5=data_hdf5,
        data_train_hdf5=data_train_hdf5,
        data_validation_hdf5=data_validation_hdf5,
        data_test_hdf5=data_test_hdf5,
        metadata_json=train_set_metadata_json,
        random_seed=random_seed
    )
    if is_on_master():
        if not skip_save_training_description:
            save_json(description_fn, description)
        # print description
        logger.info('Experiment name: {}'.format(experiment_name))
        logger.info('Model name: {}'.format(model_name))
        logger.info('Output path: {}'.format(experiment_dir_name))
        logger.info('\n')
        for key, value in description.items():
            logger.info('{}: {}'.format(key, pformat(value, indent=4)))
        logger.info('\n')

    # preprocess
    preprocessed_data = preprocess_for_training(
        model_definition,
        data_df=data_df,
        data_train_df=data_train_df,
        data_validation_df=data_validation_df,
        data_test_df=data_test_df,
github uber / ludwig / ludwig / predict.py View on Github external
def save_test_statistics(test_stats, experiment_dir_name):
    test_stats_fn = os.path.join(
        experiment_dir_name,
        'test_statistics.json'
    )
    save_json(test_stats_fn, test_stats)
github uber / ludwig / ludwig / data / preprocessing.py View on Github external
'initialization and training set shuffling'
    )

    args = parser.parse_args()

    data, train_set_metadata = build_dataset(
        args.dataset_csv,
        args.train_set_metadata_json,
        args.features,
        args.preprocessing_parameters,
        args.random_seed
    )

    # write train set metadata, dataset
    logger.info('Writing train set metadata with vocabulary')
    data_utils.save_json(args.output_metadata_json, train_set_metadata)
    logger.info('Writing dataset')
    data_utils.save_hdf5(args.output_dataset_h5, data, train_set_metadata)
github uber / ludwig / ludwig / train.py View on Github external
)

    train_trainset_stats, train_valisest_stats, train_testset_stats = result
    train_stats = {
        'train': train_trainset_stats,
        'validation': train_valisest_stats,
        'test': train_testset_stats
    }

    if should_close_session:
        model.close_session()

    # save training statistics
    if is_on_master():
        if not skip_save_training_statistics:
            save_json(training_stats_fn, train_stats)

    # grab the results of the model with highest validation test performance
    validation_field = model_definition['training']['validation_field']
    validation_measure = model_definition['training']['validation_measure']
    validation_field_result = train_valisest_stats[validation_field]

    best_function = get_best_function(validation_measure)
    # results of the model with highest validation test performance
    if is_on_master() and validation_set is not None:
        epoch_best_vali_measure, best_vali_measure = best_function(
            enumerate(validation_field_result[validation_measure]),
            key=lambda pair: pair[1]
        )
        logger.info(
            'Best validation model epoch: {0}'.format(
                epoch_best_vali_measure + 1)
github uber / ludwig / ludwig / models / model.py View on Github external
def save_hyperparameters(self, hyperparameters, save_path):
        # removing pretrained embeddings paths from hyperparameters
        # because the weights are already saved in the model, no need to reload
        # from their path when loading the model next time

        local_hyperparamters = copy.deepcopy(hyperparameters)
        for feature in (local_hyperparamters['input_features'] +
                        local_hyperparamters['output_features']):
            if 'pretrained_embeddings' in feature:
                feature['pretrained_embeddings'] = None
        save_json(save_path, hyperparameters, sort_keys=True, indent=4)