How to use the ludwig.data.dataset.Dataset function in ludwig

To help you get started, weโ€™ve selected a few ludwig examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github uber / ludwig / ludwig / data / preprocessing.py View on Github external
data_test_csv=test_fp,
                    train_set_metadata_json=train_set_metadata_json,
                    skip_save_processed_input=skip_save_processed_input,
                    preprocessing_params=preprocessing_params,
                    random_seed=random_seed
                )
    else:
        raise RuntimeError('Insufficient input parameters')

    replace_text_feature_level(
        model_definition['input_features'] +
        model_definition['output_features'],
        [training_set, validation_set, test_set]
    )

    training_dataset = Dataset(
        training_set,
        model_definition['input_features'],
        model_definition['output_features'],
        train_set_metadata.get(DATA_TRAIN_HDF5_FP)
    )

    validation_dataset = None
    if validation_set is not None:
        validation_dataset = Dataset(
            validation_set,
            model_definition['input_features'],
            model_definition['output_features'],
            train_set_metadata.get(DATA_TRAIN_HDF5_FP)
        )

    test_dataset = None
github uber / ludwig / ludwig / api.py View on Github external
logger.debug('Preprocessing {} datapoints'.format(len(data_df)))
        features_to_load = (self.model_definition['input_features'] +
                            self.model_definition['output_features'])
        preprocessed_data = build_data(
            data_df,
            features_to_load,
            self.train_set_metadata,
            self.model_definition['preprocessing']
        )
        replace_text_feature_level(
            self.model_definition['input_features'] +
            self.model_definition['output_features'],
            [preprocessed_data]
        )
        dataset = Dataset(
            preprocessed_data,
            self.model_definition['input_features'],
            self.model_definition['output_features'],
            None
        )

        logger.debug('Training batch')
        self.model.train_online(
            dataset,
            batch_size=batch_size,
            learning_rate=learning_rate,
            regularization_lambda=regularization_lambda,
            dropout_rate=dropout_rate,
            bucketing_field=bucketing_field,
            gpus=gpus,
            gpu_fraction=gpu_fraction)
github uber / ludwig / ludwig / data / preprocessing.py View on Github external
model_definition['output_features'],
        train_set_metadata.get(DATA_TRAIN_HDF5_FP)
    )

    validation_dataset = None
    if validation_set is not None:
        validation_dataset = Dataset(
            validation_set,
            model_definition['input_features'],
            model_definition['output_features'],
            train_set_metadata.get(DATA_TRAIN_HDF5_FP)
        )

    test_dataset = None
    if test_set is not None:
        test_dataset = Dataset(
            test_set,
            model_definition['input_features'],
            model_definition['output_features'],
            train_set_metadata.get(DATA_TRAIN_HDF5_FP)
        )

    return (
        training_dataset,
        validation_dataset,
        test_dataset,
        train_set_metadata
    )
github uber / ludwig / ludwig / api.py View on Github external
logger.warning(
                'Using in_memory = False is not supported for Ludwig API.'
            )


        preprocessed_data = build_data(
            data_df,
            features_to_load,
            self.train_set_metadata,
            self.model_definition['preprocessing']
        )
        replace_text_feature_level(
            features_to_load,
            [preprocessed_data]
        )
        dataset = Dataset(
            preprocessed_data,
            self.model_definition['input_features'],
            output_features,
            None
        )

        logger.debug('Predicting')
        predict_results = self.model.predict(
            dataset,
            batch_size,
            evaluate_performance=evaluate_performance,
            gpus=gpus,
            gpu_fraction=gpu_fraction,
            session=getattr(self.model, 'session', None)
        )
github uber / ludwig / ludwig / data / preprocessing.py View on Github external
else:  # if split == 'test':
                dataset = test
        else:
            dataset, train_set_metadata = build_dataset(
                data_csv,
                features,
                preprocessing_params,
                train_set_metadata=train_set_metadata
            )

    replace_text_feature_level(
        features,
        [dataset]
    )

    dataset = Dataset(
        dataset,
        model_definition['input_features'],
        output_features,
        train_set_metadata.get(DATA_TRAIN_HDF5_FP)
    )

    return dataset, train_set_metadata
github uber / ludwig / ludwig / data / preprocessing.py View on Github external
replace_text_feature_level(
        model_definition['input_features'] +
        model_definition['output_features'],
        [training_set, validation_set, test_set]
    )

    training_dataset = Dataset(
        training_set,
        model_definition['input_features'],
        model_definition['output_features'],
        train_set_metadata.get(DATA_TRAIN_HDF5_FP)
    )

    validation_dataset = None
    if validation_set is not None:
        validation_dataset = Dataset(
            validation_set,
            model_definition['input_features'],
            model_definition['output_features'],
            train_set_metadata.get(DATA_TRAIN_HDF5_FP)
        )

    test_dataset = None
    if test_set is not None:
        test_dataset = Dataset(
            test_set,
            model_definition['input_features'],
            model_definition['output_features'],
            train_set_metadata.get(DATA_TRAIN_HDF5_FP)
        )

    return (