Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
:param label: entire label sequence
:param lims: indices separating data into train, validate and test subsets
:param batch_size: int batch size
:param window_size: int (odd) window of positions flanking predicted base
:returns: tuple (X_train, y_train, train_steps, X_validate, y_validate,
validate_steps, X_test, y_test, test_steps)
- a data set, encoded label and exact number of steps
required to consume the data in multipes of batch_size
- labels are converted to one-hot encoding with `to_categorical`
"""
params = (batch_size, window_size)
X_train, train_steps = trim_to_step_multiple(data[lims[0]:lims[1]], *params)
y_train, _ = trim_to_step_multiple(
to_categorical(label[lims[0]:lims[1]], num_classes=6), *params)
X_validate, validate_steps = trim_to_step_multiple(data[lims[1]:lims[2]], *params)
y_validate, _ = trim_to_step_multiple(
to_categorical(label[lims[1]:lims[2]], num_classes=6), *params)
X_test, test_steps = trim_to_step_multiple(data[lims[2]:lims[3]], *params)
y_test, _ = trim_to_step_multiple(
to_categorical(label[lims[2]:lims[3]], num_classes=6), *params)
return (X_train, y_train, train_steps,
X_validate, y_validate, validate_steps,
X_test, y_test, test_steps)
:returns: tuple (X_train, y_train, train_steps, X_validate, y_validate,
validate_steps, X_test, y_test, test_steps)
- a data set, encoded label and exact number of steps
required to consume the data in multipes of batch_size
- labels are converted to one-hot encoding with `to_categorical`
"""
params = (batch_size, window_size)
X_train, train_steps = trim_to_step_multiple(data[lims[0]:lims[1]], *params)
y_train, _ = trim_to_step_multiple(
to_categorical(label[lims[0]:lims[1]], num_classes=6), *params)
X_validate, validate_steps = trim_to_step_multiple(data[lims[1]:lims[2]], *params)
y_validate, _ = trim_to_step_multiple(
to_categorical(label[lims[1]:lims[2]], num_classes=6), *params)
X_test, test_steps = trim_to_step_multiple(data[lims[2]:lims[3]], *params)
y_test, _ = trim_to_step_multiple(
to_categorical(label[lims[2]:lims[3]], num_classes=6), *params)
return (X_train, y_train, train_steps,
X_validate, y_validate, validate_steps,
X_test, y_test, test_steps)
def write_original_and_corrected_sequence(data, model, threads=1, name=''):
"""Extract and write original and corrected fasta sequences
:param data: data array
:param model: keras model
:param threads: int num. workers
"""
batch_size, window_size, _ = model.input_shape
_, steps = trim_to_step_multiple(data, batch_size, window_size)
data_generator = serve_data_batch(data, batch_size, window_size)
predictions = model.predict_generator(data_generator, steps,
workers=threads)
corrected = model_output_to_fasta(predictions, name)
SeqIO.write(corrected, '_'.join([name, 'corrected.fa']), 'fasta')
original = get_original_sequence(data, window_size, name)
SeqIO.write(original, '_'.join([name, 'original.fa']), 'fasta')
:param data: entire feature array
:param label: entire label sequence
:param lims: indices separating data into train, validate and test subsets
:param batch_size: int batch size
:param window_size: int (odd) window of positions flanking predicted base
:returns: tuple (X_train, y_train, train_steps, X_validate, y_validate,
validate_steps, X_test, y_test, test_steps)
- a data set, encoded label and exact number of steps
required to consume the data in multipes of batch_size
- labels are converted to one-hot encoding with `to_categorical`
"""
params = (batch_size, window_size)
X_train, train_steps = trim_to_step_multiple(data[lims[0]:lims[1]], *params)
y_train, _ = trim_to_step_multiple(
to_categorical(label[lims[0]:lims[1]], num_classes=6), *params)
X_validate, validate_steps = trim_to_step_multiple(data[lims[1]:lims[2]], *params)
y_validate, _ = trim_to_step_multiple(
to_categorical(label[lims[1]:lims[2]], num_classes=6), *params)
X_test, test_steps = trim_to_step_multiple(data[lims[2]:lims[3]], *params)
y_test, _ = trim_to_step_multiple(
to_categorical(label[lims[2]:lims[3]], num_classes=6), *params)
return (X_train, y_train, train_steps,
X_validate, y_validate, validate_steps,
X_test, y_test, test_steps)
"""Split data into train, validate and test sets.
:param data: entire feature array
:param label: entire label sequence
:param lims: indices separating data into train, validate and test subsets
:param batch_size: int batch size
:param window_size: int (odd) window of positions flanking predicted base
:returns: tuple (X_train, y_train, train_steps, X_validate, y_validate,
validate_steps, X_test, y_test, test_steps)
- a data set, encoded label and exact number of steps
required to consume the data in multipes of batch_size
- labels are converted to one-hot encoding with `to_categorical`
"""
params = (batch_size, window_size)
X_train, train_steps = trim_to_step_multiple(data[lims[0]:lims[1]], *params)
y_train, _ = trim_to_step_multiple(
to_categorical(label[lims[0]:lims[1]], num_classes=6), *params)
X_validate, validate_steps = trim_to_step_multiple(data[lims[1]:lims[2]], *params)
y_validate, _ = trim_to_step_multiple(
to_categorical(label[lims[1]:lims[2]], num_classes=6), *params)
X_test, test_steps = trim_to_step_multiple(data[lims[2]:lims[3]], *params)
y_test, _ = trim_to_step_multiple(
to_categorical(label[lims[2]:lims[3]], num_classes=6), *params)
return (X_train, y_train, train_steps,
X_validate, y_validate, validate_steps,
X_test, y_test, test_steps)