Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
else:
y_ = None
try:
log.info("Fitting model {}".format(self.model_name))
self.model.fit(x_features, y_)
except TypeError or ValueError:
try:
if issparse(x_features):
log.info("Converting input for model {} to dense array".format(self.model_name))
self.model.fit(x_features.todense(), y_)
else:
log.info("Converting input for model {} to sparse array".format(self.model_name))
self.model.fit(csr_matrix(x_features), y_)
except:
raise ConfigError("Can not fit on the given data".format(self.model_name))
return
"from the rest datasets.".format(config['dataset_reader']['data_path']))
iterator = get_iterator_from_config(config, data)
if isinstance(iterator, DataFittingIterator):
raise ConfigError("Instance of a class 'DataFittingIterator' is not supported.")
else:
if config.get('train', None):
if config['train']['test_best'] and len(iterator.data['test']) == 0:
raise ConfigError(
"The 'test' part of dataset is empty, but 'test_best' in train config is 'True'."
" Please check the dataset_iterator config.")
if (config['train']['validate_best'] or config['train'].get('val_every_n_epochs', False) > 0) and \
len(iterator.data['valid']) == 0:
raise ConfigError(
"The 'valid' part of dataset is empty, but 'valid_best' in train config is 'True'"
" or 'val_every_n_epochs' > 0. Please check the dataset_iterator config.")
else:
if len(iterator.data['test']) == 0:
raise ConfigError("The 'test' part of dataset is empty as a 'train' part of config file, "
"but default value of 'test_best' is 'True'. "
"Please check the dataset_iterator config.")
# get a tiny data from dataset
if len(iterator.data['train']) <= 100:
print("!!!!!!!!!!!!! WARNING !!!!!!!!!!!!! Length of 'train' part dataset <= 100. "
"Please check the dataset_iterator config")
tiny_train = copy(iterator.data['train'])
else:
tiny_train = copy(iterator.data['train'][:10])
iterator.train = tiny_train
'validate_best': True,
'test_best': True
}
train_config = dict(default_train_config, **train_config)
if train_config['metric_optimization'] == 'maximize':
def improved(score, best):
return score > best
best = float('-inf')
elif train_config['metric_optimization'] == 'minimize':
def improved(score, best):
return score < best
best = float('inf')
else:
raise ConfigError('metric_optimization has to be one of {}'.format(['maximize', 'minimize']))
i = 0
epochs = 0
examples = 0
saved = False
patience = 0
log_on = train_config['log_every_n_batches'] > 0 or train_config['log_every_n_epochs'] > 0
train_y_true = []
train_y_predicted = []
start_time = time.time()
break_flag = False
try:
while True:
for x, y_true in iterator.batch_generator(train_config['batch_size']):
if log_on:
y_predicted = list(model(list(x)))
dataset_composition_ = dict(train=False, valid=False, test=False)
data = read_data_by_config(config)
if i == 0:
for dtype in dataset_composition_.keys():
if len(data.get(dtype, [])) != 0:
dataset_composition_[dtype] = True
else:
for dtype in dataset_composition_.keys():
if len(data.get(dtype, [])) == 0 and dataset_composition_[dtype]:
raise ConfigError("The file structure in the {0} dataset differs "
"from the rest datasets.".format(config['dataset_reader']['data_path']))
iterator = get_iterator_from_config(config, data)
if isinstance(iterator, DataFittingIterator):
raise ConfigError("Instance of a class 'DataFittingIterator' is not supported.")
else:
if config.get('train', None):
if config['train']['test_best'] and len(iterator.data['test']) == 0:
raise ConfigError(
"The 'test' part of dataset is empty, but 'test_best' in train config is 'True'."
" Please check the dataset_iterator config.")
if (config['train']['validate_best'] or config['train'].get('val_every_n_epochs', False) > 0) and \
len(iterator.data['valid']) == 0:
raise ConfigError(
"The 'valid' part of dataset is empty, but 'valid_best' in train config is 'True'"
" or 'val_every_n_epochs' > 0. Please check the dataset_iterator config.")
else:
if len(iterator.data['test']) == 0:
raise ConfigError("The 'test' part of dataset is empty as a 'train' part of config file, "
"but default value of 'test_best' is 'True'. "
self.root = expand_path(
self.exp_config['pipeline_search'].get('root',
'~/.deeppavlov/experiments'))
self.plot = self.exp_config['pipeline_search'].get('plot', False)
self.save_best = self.exp_config['pipeline_search'].get('save_best', False)
self.do_test = self.exp_config['pipeline_search'].get('do_test', False)
self.search_type = self.exp_config['pipeline_search'].get('search_type', 'random')
self.sample_num = self.exp_config['pipeline_search'].get('sample_num', 10)
self.target_metric = self.exp_config['pipeline_search'].get('target_metric')
self.multiprocessing = self.exp_config['pipeline_search'].get('multiprocessing', True)
self.max_num_workers = self.exp_config['pipeline_search'].get('max_num_workers')
cpu_num = cpu_count()
if self.max_num_workers:
if self.max_num_workers > cpu_num:
raise ConfigError("Parameter 'max_num_workers'={0}, "
"but amounts of cpu is {1}.".format(self.max_num_workers, cpu_num))
elif self.max_num_workers < 1:
raise ConfigError("The number of workers must be at least equal to one. "
"Please check 'max_num_workers' parameter in config.")
self.use_gpu = self.exp_config['pipeline_search'].get('use_all_gpus', False)
self.memory_fraction = self.exp_config['pipeline_search'].get('gpu_memory_fraction', 1.0)
self.max_num_workers = None
self.available_gpu = None
# create the observer
self.save_path = self.root / self.date / self.exp_name / 'checkpoints'
self.observer = ExperimentObserver(self.exp_name, self.root, self.info, self.date, self.plot)
# create the pipeline generator
self.pipeline_generator = PipeGen(self.exp_config, self.save_path, self.search_type, self.sample_num, False)
self.gen_len = self.pipeline_generator.length
Returns:
sparse or dense array of stacked data
"""
x_features = []
for i in range(len(x)):
if ((isinstance(x[i], tuple) or isinstance(x[i], list) or isinstance(x[i], np.ndarray) and len(x[i]))
or (issparse(x[i]) and x[i].shape[0])):
if issparse(x[i][0]):
x_features.append(vstack(list(x[i])))
elif isinstance(x[i][0], np.ndarray) or isinstance(x[i][0], list):
x_features.append(np.vstack(list(x[i])))
elif isinstance(x[i][0], str):
x_features.append(np.array(x[i]))
else:
raise ConfigError('Not implemented this type of vectors')
else:
raise ConfigError("Input vectors cannot be empty")
sparse = False
for inp in x_features:
if issparse(inp):
sparse = True
if sparse:
x_features = hstack(list(x_features))
else:
x_features = np.hstack(list(x_features))
return x_features
raise ConfigError("Instance of a class 'DataFittingIterator' is not supported.")
else:
if config.get('train', None):
if config['train']['test_best'] and len(iterator.data['test']) == 0:
raise ConfigError(
"The 'test' part of dataset is empty, but 'test_best' in train config is 'True'."
" Please check the dataset_iterator config.")
if (config['train']['validate_best'] or config['train'].get('val_every_n_epochs', False) > 0) and \
len(iterator.data['valid']) == 0:
raise ConfigError(
"The 'valid' part of dataset is empty, but 'valid_best' in train config is 'True'"
" or 'val_every_n_epochs' > 0. Please check the dataset_iterator config.")
else:
if len(iterator.data['test']) == 0:
raise ConfigError("The 'test' part of dataset is empty as a 'train' part of config file, "
"but default value of 'test_best' is 'True'. "
"Please check the dataset_iterator config.")
# get a tiny data from dataset
if len(iterator.data['train']) <= 100:
print("!!!!!!!!!!!!! WARNING !!!!!!!!!!!!! Length of 'train' part dataset <= 100. "
"Please check the dataset_iterator config")
tiny_train = copy(iterator.data['train'])
else:
tiny_train = copy(iterator.data['train'][:10])
iterator.train = tiny_train
if len(iterator.data['valid']) <= 20:
tiny_valid = copy(iterator.data['valid'])
else:
tiny_valid = copy(iterator.data['valid'][:5])
iterator = get_iterator_from_config(config, data)
if isinstance(iterator, DataFittingIterator):
raise ConfigError("Instance of a class 'DataFittingIterator' is not supported.")
else:
if config.get('train', None):
if config['train']['test_best'] and len(iterator.data['test']) == 0:
raise ConfigError("The 'test' part of dataset is empty, but 'test_best' in train config is 'True'."
" Please check the dataset_iterator config.")
if (config['train']['validate_best'] or config['train'].get('val_every_n_epochs', False) > 0) and \
len(iterator.data['valid']) == 0:
raise ConfigError("The 'valid' part of dataset is empty, but 'valid_best' in train config is 'True'"
" or 'val_every_n_epochs' > 0. Please check the dataset_iterator config.")
else:
if len(iterator.data['test']) == 0:
raise ConfigError("The 'test' part of dataset is empty as a 'train' part of config file, "
"but default value of 'test_best' is 'True'. "
"Please check the dataset_iterator config.")
# get a tiny data from dataset
if len(iterator.data['train']) <= 100:
print("!!!!!!!!!!!!! WARNING !!!!!!!!!!!!! Length of 'train' part dataset <= 100. "
"Please check the dataset_iterator config")
tiny_train = copy(iterator.data['train'])
else:
tiny_train = copy(iterator.data['train'][:10])
iterator.train = tiny_train
if len(iterator.data['valid']) <= 20:
tiny_valid = copy(iterator.data['valid'])
else:
tiny_valid = copy(iterator.data['valid'][:5])