Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
data_test_csv=test_fp,
train_set_metadata_json=train_set_metadata_json,
skip_save_processed_input=skip_save_processed_input,
preprocessing_params=preprocessing_params,
random_seed=random_seed
)
else:
raise RuntimeError('Insufficient input parameters')
replace_text_feature_level(
model_definition['input_features'] +
model_definition['output_features'],
[training_set, validation_set, test_set]
)
training_dataset = Dataset(
training_set,
model_definition['input_features'],
model_definition['output_features'],
train_set_metadata.get(DATA_TRAIN_HDF5_FP)
)
validation_dataset = None
if validation_set is not None:
validation_dataset = Dataset(
validation_set,
model_definition['input_features'],
model_definition['output_features'],
train_set_metadata.get(DATA_TRAIN_HDF5_FP)
)
test_dataset = None
logger.debug('Preprocessing {} datapoints'.format(len(data_df)))
features_to_load = (self.model_definition['input_features'] +
self.model_definition['output_features'])
preprocessed_data = build_data(
data_df,
features_to_load,
self.train_set_metadata,
self.model_definition['preprocessing']
)
replace_text_feature_level(
self.model_definition['input_features'] +
self.model_definition['output_features'],
[preprocessed_data]
)
dataset = Dataset(
preprocessed_data,
self.model_definition['input_features'],
self.model_definition['output_features'],
None
)
logger.debug('Training batch')
self.model.train_online(
dataset,
batch_size=batch_size,
learning_rate=learning_rate,
regularization_lambda=regularization_lambda,
dropout_rate=dropout_rate,
bucketing_field=bucketing_field,
gpus=gpus,
gpu_fraction=gpu_fraction)
model_definition['output_features'],
train_set_metadata.get(DATA_TRAIN_HDF5_FP)
)
validation_dataset = None
if validation_set is not None:
validation_dataset = Dataset(
validation_set,
model_definition['input_features'],
model_definition['output_features'],
train_set_metadata.get(DATA_TRAIN_HDF5_FP)
)
test_dataset = None
if test_set is not None:
test_dataset = Dataset(
test_set,
model_definition['input_features'],
model_definition['output_features'],
train_set_metadata.get(DATA_TRAIN_HDF5_FP)
)
return (
training_dataset,
validation_dataset,
test_dataset,
train_set_metadata
)
logger.warning(
'Using in_memory = False is not supported for Ludwig API.'
)
preprocessed_data = build_data(
data_df,
features_to_load,
self.train_set_metadata,
self.model_definition['preprocessing']
)
replace_text_feature_level(
features_to_load,
[preprocessed_data]
)
dataset = Dataset(
preprocessed_data,
self.model_definition['input_features'],
output_features,
None
)
logger.debug('Predicting')
predict_results = self.model.predict(
dataset,
batch_size,
evaluate_performance=evaluate_performance,
gpus=gpus,
gpu_fraction=gpu_fraction,
session=getattr(self.model, 'session', None)
)
else: # if split == 'test':
dataset = test
else:
dataset, train_set_metadata = build_dataset(
data_csv,
features,
preprocessing_params,
train_set_metadata=train_set_metadata
)
replace_text_feature_level(
features,
[dataset]
)
dataset = Dataset(
dataset,
model_definition['input_features'],
output_features,
train_set_metadata.get(DATA_TRAIN_HDF5_FP)
)
return dataset, train_set_metadata
replace_text_feature_level(
model_definition['input_features'] +
model_definition['output_features'],
[training_set, validation_set, test_set]
)
training_dataset = Dataset(
training_set,
model_definition['input_features'],
model_definition['output_features'],
train_set_metadata.get(DATA_TRAIN_HDF5_FP)
)
validation_dataset = None
if validation_set is not None:
validation_dataset = Dataset(
validation_set,
model_definition['input_features'],
model_definition['output_features'],
train_set_metadata.get(DATA_TRAIN_HDF5_FP)
)
test_dataset = None
if test_set is not None:
test_dataset = Dataset(
test_set,
model_definition['input_features'],
model_definition['output_features'],
train_set_metadata.get(DATA_TRAIN_HDF5_FP)
)
return (