Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def get_feature_meta(column, preprocessing_parameters):
tokenizer = get_from_registry(
preprocessing_parameters['tokenizer'],
tokenizer_registry
)()
max_length = 0
for timeseries in column:
processed_line = tokenizer(timeseries)
max_length = max(max_length, len(processed_line))
max_length = min(
preprocessing_parameters['timeseries_length_limit'],
max_length
)
return {'max_timeseries_length': max_length}
def get_image_encoder(self, encoder_parameters):
return get_from_registry(
self.encoder, image_encoder_registry)(
**encoder_parameters
)
def get_date_encoder(self, encoder_parameters):
return get_from_registry(
self.encoder, date_encoder_registry)(
**encoder_parameters
)
def build_sequence_matrix(
sequences,
inverse_vocabulary,
tokenizer_type,
length_limit,
padding_symbol,
padding='right',
unknown_symbol=UNKNOWN_SYMBOL,
lowercase=True,
tokenizer_vocab_file=None,
):
tokenizer = get_from_registry(tokenizer_type, tokenizer_registry)(
vocab_file=tokenizer_vocab_file
)
format_dtype = int_type(len(inverse_vocabulary) - 1)
max_length = 0
unit_vectors = []
for sequence in sequences:
unit_indices_vector = _get_sequence_vector(
sequence,
tokenizer,
format_dtype,
inverse_vocabulary,
lowercase=lowercase,
unknown_symbol=unknown_symbol
)
unit_vectors.append(unit_indices_vector)
def build_feature_parameters(features):
feature_parameters = {}
for feature in features:
fearure_builder_function = get_from_registry(
feature['type'],
parameters_builders_registry
)
feature_parameters[feature['name']] = fearure_builder_function(feature)
return feature_parameters
def get_sequence_decoder(self, decoder_parameters):
return get_from_registry(
self.decoder, sequence_decoder_registry)(
**decoder_parameters
)
def build_matrix(
timeseries,
tokenizer_name,
length_limit,
padding_value,
padding='right'
):
tokenizer = get_from_registry(
tokenizer_name,
tokenizer_registry
)()
max_length = 0
ts_vectors = []
for ts in timeseries:
ts_vector = np.array(tokenizer(ts)).astype(np.float32)
ts_vectors.append(ts_vector)
if len(ts_vector) > max_length:
max_length = len(ts_vector)
if max_length < length_limit:
logger.debug(
'max length of {0}: {1} < limit: {2}'.format(
tokenizer_name,
max_length,
def get_sequence_vector(sequence, tokenizer_type, unit_to_id, lowercase=True):
tokenizer = get_from_registry(tokenizer_type, tokenizer_registry)()
format_dtype = int_type(len(unit_to_id) - 1)
return _get_sequence_vector(
sequence,
tokenizer,
format_dtype,
unit_to_id,
lowercase=lowercase
)
def calculate_overall_stats(test_stats, output_features, dataset,
train_set_metadata):
for output_feature in output_features:
feature = get_from_registry(
output_feature['type'],
output_type_registry
)
feature.calculate_overall_stats(
test_stats, output_feature, dataset, train_set_metadata
)
def generate_datapoint(features):
datapoint = []
for feature in features:
if ('cycle' in feature and feature['cycle'] is True and
feature['type'] in cyclers_registry):
cycler_function = cyclers_registry[feature['type']]
feature_value = cycler_function(feature)
else:
generator_function = get_from_registry(
feature['type'],
generators_registry
)
feature_value = generator_function(feature)
datapoint.append(feature_value)
return datapoint