Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def obtain_df_splits(data_csv):
"""Split input data csv file in to train, validation and test dataframes.
:param data_csv: Input data CSV file.
:return test_df, train_df, val_df: Train, validation and test dataframe
splits
"""
data_df = read_csv(data_csv)
# Obtain data split array mapping data rows to split type
# 0-train, 1-validation, 2-test
data_split = get_split(data_df)
train_split, test_split, val_split = split_dataset_tvt(data_df, data_split)
# Splits are python dictionaries not dataframes- they need to be converted.
test_df = pd.DataFrame(test_split)
train_df = pd.DataFrame(train_split)
val_df = pd.DataFrame(val_split)
return test_df, train_df, val_df
'width': 8,
'num_channels': 3,
'num_processes': 5
},
fc_size=8,
num_filters=8
),
text_feature(encoder='embed', min_len=1),
numerical_feature(normalization='minmax')
]
output_features = [binary_feature(), numerical_feature()]
rel_path = generate_data(
input_features, output_features, csv_filename, num_examples=50
)
df1 = read_csv(rel_path)
input_features[0]['preprocessing']['num_channels'] = 1
rel_path = generate_data(
input_features, output_features, csv_filename, num_examples=50
)
df2 = read_csv(rel_path)
df = concatenate_df(df1, df2, None)
df.to_csv(rel_path, index=False)
# Here the user sepcifiies number of channels. Exception shouldn't be thrown
run_experiment(input_features, output_features, data_csv=rel_path)
del input_features[0]['preprocessing']['num_channels']
# User now doesn't specify num channels. Should throw exception
# Training with csv
model.train(
data_csv=data_csv,
skip_save_processed_input=True,
skip_save_progress=True,
skip_save_unprocessed_output=True
)
model.predict(data_csv=data_csv)
# Remove results/intermediate data saved to disk
shutil.rmtree(model.exp_dir_name, ignore_errors=True)
# Training with dataframe
data_df = read_csv(data_csv)
model.train(
data_df=data_df,
skip_save_processed_input=True,
skip_save_progress=True,
skip_save_unprocessed_output=True
)
model.predict(data_df=data_df)
return model
def _read_data(data_csv, data_dict):
"""
:param data_csv: path to the csv data
:param data_dict: raw data
:return: pandas dataframe with the data
"""
if data_csv is not None:
data_df = read_csv(data_csv)
elif data_dict is not None:
data_df = pd.DataFrame(data_dict)
else:
raise ValueError(
'No input data specified. '
'One of data_df, data_csv or data_dict must be provided'
)
return data_df
def concatenate_csv(train_csv, vali_csv, test_csv):
logger.info('Loading training csv...')
train_df = read_csv(train_csv)
logger.info('done')
logger.info('Loading validation csv..')
vali_df = read_csv(vali_csv) if vali_csv is not None else None
logger.info('done')
logger.info('Loading test csv..')
test_df = read_csv(test_csv) if test_csv is not None else None
logger.info('done')
logger.info('Concatenating csvs..')
concatenated_df = concatenate_df(train_df, vali_df, test_df)
logger.info('done')
return concatenated_df
def concatenate_csv(train_csv, vali_csv, test_csv):
logger.info('Loading training csv...')
train_df = read_csv(train_csv)
logger.info('done')
logger.info('Loading validation csv..')
vali_df = read_csv(vali_csv) if vali_csv is not None else None
logger.info('done')
logger.info('Loading test csv..')
test_df = read_csv(test_csv) if test_csv is not None else None
logger.info('done')
logger.info('Concatenating csvs..')
concatenated_df = concatenate_df(train_df, vali_df, test_df)
logger.info('done')
return concatenated_df
def build_dataset(
dataset_csv,
features,
global_preprocessing_parameters,
train_set_metadata=None,
random_seed=default_random_seed,
**kwargs
):
dataset_df = read_csv(dataset_csv)
dataset_df.csv = dataset_csv
return build_dataset_df(
dataset_df,
features,
global_preprocessing_parameters,
train_set_metadata,
random_seed,
**kwargs
)