How to use the ludwig.utils.data_utils.read_csv function in ludwig

To help you get started, weโ€™ve selected a few ludwig examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github uber / ludwig / tests / integration_tests / test_visualization_api.py View on Github external
def obtain_df_splits(data_csv):
    """Split input data csv file in to train, validation and test dataframes.

    :param data_csv: Input data CSV file.
    :return test_df, train_df, val_df: Train, validation and test dataframe
            splits
    """
    data_df = read_csv(data_csv)
    # Obtain data split array mapping data rows to split type
    # 0-train, 1-validation, 2-test
    data_split = get_split(data_df)
    train_split, test_split, val_split = split_dataset_tvt(data_df, data_split)
    # Splits are python dictionaries not dataframes- they need to be converted.
    test_df = pd.DataFrame(test_split)
    train_df = pd.DataFrame(train_split)
    val_df = pd.DataFrame(val_split)
    return test_df, train_df, val_df
github uber / ludwig / tests / integration_tests / test_experiment.py View on Github external
'width': 8,
                'num_channels': 3,
                'num_processes': 5
            },
            fc_size=8,
            num_filters=8
        ),
        text_feature(encoder='embed', min_len=1),
        numerical_feature(normalization='minmax')
    ]
    output_features = [binary_feature(), numerical_feature()]
    rel_path = generate_data(
        input_features, output_features, csv_filename, num_examples=50
    )

    df1 = read_csv(rel_path)

    input_features[0]['preprocessing']['num_channels'] = 1
    rel_path = generate_data(
        input_features, output_features, csv_filename, num_examples=50
    )
    df2 = read_csv(rel_path)

    df = concatenate_df(df1, df2, None)
    df.to_csv(rel_path, index=False)

    # Here the user sepcifiies number of channels. Exception shouldn't be thrown
    run_experiment(input_features, output_features, data_csv=rel_path)

    del input_features[0]['preprocessing']['num_channels']

    # User now doesn't specify num channels. Should throw exception
github uber / ludwig / tests / integration_tests / test_server.py View on Github external
# Training with csv
    model.train(
        data_csv=data_csv,
        skip_save_processed_input=True,
        skip_save_progress=True,
        skip_save_unprocessed_output=True
    )

    model.predict(data_csv=data_csv)

    # Remove results/intermediate data saved to disk
    shutil.rmtree(model.exp_dir_name, ignore_errors=True)

    # Training with dataframe
    data_df = read_csv(data_csv)
    model.train(
        data_df=data_df,
        skip_save_processed_input=True,
        skip_save_progress=True,
        skip_save_unprocessed_output=True
    )
    model.predict(data_df=data_df)
    return model
github uber / ludwig / ludwig / api.py View on Github external
def _read_data(data_csv, data_dict):
        """
        :param data_csv: path to the csv data
        :param data_dict: raw data
        :return: pandas dataframe with the data
        """
        if data_csv is not None:
            data_df = read_csv(data_csv)
        elif data_dict is not None:
            data_df = pd.DataFrame(data_dict)
        else:
            raise ValueError(
                'No input data specified. '
                'One of data_df, data_csv or data_dict must be provided'
            )

        return data_df
github uber / ludwig / ludwig / data / concatenate_datasets.py View on Github external
def concatenate_csv(train_csv, vali_csv, test_csv):
    logger.info('Loading training csv...')
    train_df = read_csv(train_csv)
    logger.info('done')

    logger.info('Loading validation csv..')
    vali_df = read_csv(vali_csv) if vali_csv is not None else None
    logger.info('done')

    logger.info('Loading test csv..')
    test_df = read_csv(test_csv) if test_csv is not None else None
    logger.info('done')

    logger.info('Concatenating csvs..')
    concatenated_df = concatenate_df(train_df, vali_df, test_df)
    logger.info('done')

    return concatenated_df
github uber / ludwig / ludwig / data / concatenate_datasets.py View on Github external
def concatenate_csv(train_csv, vali_csv, test_csv):
    logger.info('Loading training csv...')
    train_df = read_csv(train_csv)
    logger.info('done')

    logger.info('Loading validation csv..')
    vali_df = read_csv(vali_csv) if vali_csv is not None else None
    logger.info('done')

    logger.info('Loading test csv..')
    test_df = read_csv(test_csv) if test_csv is not None else None
    logger.info('done')

    logger.info('Concatenating csvs..')
    concatenated_df = concatenate_df(train_df, vali_df, test_df)
    logger.info('done')

    return concatenated_df
github uber / ludwig / ludwig / data / preprocessing.py View on Github external
def build_dataset(
        dataset_csv,
        features,
        global_preprocessing_parameters,
        train_set_metadata=None,
        random_seed=default_random_seed,
        **kwargs
):
    dataset_df = read_csv(dataset_csv)
    dataset_df.csv = dataset_csv
    return build_dataset_df(
        dataset_df,
        features,
        global_preprocessing_parameters,
        train_set_metadata,
        random_seed,
        **kwargs
    )