Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
{
'name': 'x2',
'type': 'numeric',
}
],
'output_features': [
{
'name': 'y',
'type': 'categorical',
}
]
}
config = predictor_config_schema.validate(config)
n_points = 100
data = {'x1': [i for i in range(n_points)],
'x2': [random.randint(i, i + 20) for i in range(n_points)]}
nums = [data['x1'][i] * data['x2'][i] for i in range(n_points)]
data['y'] = ['low' if i < 50 else 'high' for i in nums]
df = pd.DataFrame(data)
self.config = config
self.df = df
'type': 'numeric'
}
],
'output_features': [
{
'name': 'z',
'type': 'numeric'
},
{
'name': 'z`',
'type': 'categorical'
}
]
}
config = predictor_config_schema.validate(config)
data = {'x': [i for i in range(10)], 'y': [random.randint(i, i + 20) for i in range(10)]}
nums = [data['x'][i] * data['y'][i] for i in range(10)]
data['z'] = [i + 0.5 for i in range(10)]
data['z`'] = ['low' if i < 50 else 'high' for i in nums]
data_frame = pandas.DataFrame(data)
ds = DataSource(data_frame, config)
ds.prepare_encoders()
mixer = NnMixer({}, config)
mixer.fit(ds,ds, stop_training_after_seconds=50)
predict_input_ds = DataSource(data_frame[['x', 'y']], config)
predict_input_ds.prepare_encoders()
logging.info(f'Boosting mixer can\'t be loaded due to error: {e} !')
print((f'Boosting mixer can\'t be loaded due to error: {e} !'))
if load_from_path is not None:
pickle_in = open(load_from_path, "rb")
self_dict = dill.load(pickle_in)
pickle_in.close()
self.__dict__ = self_dict
self.convert_to_device()
return
if output is None and config is None:
raise ValueError('You must give one argument to the Predictor constructor')
try:
if config is not None and output is None:
self.config = predictor_config_schema.validate(config)
except:
error = traceback.format_exc(1)
raise ValueError('[BAD DEFINITION] argument has errors: {err}'.format(err=error))
# this is if we need to automatically generate a configuration variable
self._generate_config = True if output is not None or self.config is None else False
self._output_columns = output
self._input_columns = None
self.train_accuracy = None
self._mixer = None
self._helper_mixers = None
# if the number of uniques is elss than 100 or less,
# than 10% of the total number of rows then keep it as categorical
unique = from_data[col_name].nunique()
if unique < 100 or unique < len(from_data[col_name]) / 10:
return COLUMN_DATA_TYPES.CATEGORICAL
# else assume its text
return COLUMN_DATA_TYPES.TEXT
# generate the configuration and set the order for the input and output columns
if self._generate_config is True:
self._input_columns = [col for col in from_data if col not in self._output_columns]
self.config = {
'input_features': [{'name': col, 'type': type_map(col)} for col in self._input_columns],
'output_features': [{'name': col, 'type': type_map(col)} for col in self._output_columns]
}
self.config = predictor_config_schema.validate(self.config)
logging.info('Automatically generated a configuration')
logging.info(self.config)
else:
self._output_columns = [col['name'] for col in self.config['output_features']]
self._input_columns = [col['name'] for col in self.config['input_features']]
if stop_training_after_seconds is None:
stop_training_after_seconds = round(from_data.shape[0] * from_data.shape[1] / 5)
if stop_model_building_after_seconds is None:
stop_model_building_after_seconds = stop_training_after_seconds * 3
from_data_ds = DataSource(from_data, self.config)
if test_data is not None:
test_data_ds = DataSource(test_data, self.config)