Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_n_output_without_augmentation(self):
texts = [
'AAAAAAAAAAA AAAAAAAAAAAAAA'
]
flows = [
naf.Sequential([
nac.OcrAug(),
nac.OcrAug()
]),
naf.Sometimes([
nac.RandomCharAug(),
nac.RandomCharAug()
], pipeline_p=0.00001)
]
for flow in flows:
for text in texts:
for _ in range(5):
augmented_texts = flow.augment(text, n=3)
all_not_equal = False
for augmented_text in augmented_texts:
if augmented_text != text:
def test_n_output_audio(self):
audio, sampling_rate = AudioLoader.load_audio(self.sample_wav_file)
flows = [
naf.Sequential([
naa.CropAug(sampling_rate=sampling_rate),
naa.LoudnessAug()
]),
naf.Sometimes([
naa.CropAug(sampling_rate=sampling_rate),
naa.LoudnessAug()
], pipeline_p=0.9),
naf.Sequential([
naf.Sequential([
naa.CropAug(sampling_rate=sampling_rate),
naa.LoudnessAug()
]),
naf.Sometimes([
naa.CropAug(sampling_rate=sampling_rate),
naa.LoudnessAug()
], pipeline_p=0.9)
])
]
for flow in flows:
augmented_audios = flow.augment(audio, n=3)
self.assertGreater(len(augmented_audios), 1)
for augmented_audio in augmented_audios:
self.assertFalse(np.array_equal(audio, augmented_audio))
def test_multiple_actions(self):
texts = [
'The quick brown fox jumps over the lazy dog',
'Zology raku123456 fasdasd asd4123414 1234584'
]
flows = [
naf.Sequential([
naf.Sometimes([nac.RandomCharAug(action="insert"),
nac.RandomCharAug(action="delete")],
pipeline_p=0.9),
naf.Sequential([
nac.RandomCharAug(action="substitute", aug_char_min=1, aug_char_p=0.6, aug_word_p=0.6)
], name='Sub_Seq')
]),
naf.Sometimes([
naf.Sometimes([nac.RandomCharAug(action="insert"),
nac.RandomCharAug(action="delete")]),
naf.Sequential([nac.OcrAug(), nac.KeyboardAug(aug_char_min=1),
nac.RandomCharAug(action="substitute", aug_char_min=1, aug_char_p=0.6, aug_word_p=0.6)])
], pipeline_p=0.9)
]
# Since prob may be low and causing do not perform data augmentation. Retry 5 times
for flow in flows:
for text in texts:
at_least_one_not_equal = False
for _ in range(5):
def test_dry_run(self):
flow = naf.Sequential([naf.Sequential()])
results = flow.augment([])
self.assertEqual(0, len(results))
def test_n_output_spectrogram(self):
mel_spectrogram = AudioLoader.load_mel_spectrogram(self.sample_wav_file, n_mels=128)
#
flows = [
naf.Sequential([
nas.FrequencyMaskingAug(mask_factor=80),
nas.TimeMaskingAug(mask_factor=80)
]),
naf.Sometimes([
nas.FrequencyMaskingAug(mask_factor=80),
nas.TimeMaskingAug(mask_factor=80)
], pipeline_p=0.9),
naf.Sequential([
naf.Sequential([
nas.FrequencyMaskingAug(mask_factor=80),
nas.TimeMaskingAug(mask_factor=80)
]),
naf.Sometimes([
nas.FrequencyMaskingAug(mask_factor=80),
nas.TimeMaskingAug(mask_factor=80)
], pipeline_p=0.9)
naf.Sequential([
naf.Sequential([
nac.OcrAug(),
naw.WordEmbsAug(
model_type='word2vec',
model_path=os.environ["MODEL_DIR"] + 'GoogleNews-vectors-negative300.bin')
]),
naf.Sequential([
nac.RandomCharAug(),
]),
naw.ContextualWordEmbsAug(
model_path='xlnet-base-cased', action="substitute",
skip_unknown_word=True, temperature=0.7, device='cpu')
]),
naf.Sometimes([
naf.Sequential([
nac.OcrAug(),
nac.RandomCharAug(),
]),
naf.Sometimes([
naw.WordEmbsAug(model_type='word2vec',
model_path=os.environ["MODEL_DIR"] + 'GoogleNews-vectors-negative300.bin')
], pipeline_p=0.999),
naw.ContextualWordEmbsAug(
model_path='xlnet-base-cased', action="substitute",
skip_unknown_word=True, temperature=0.7, device='cpu')
], pipeline_p=0.9999)
]
for num_thread in [1, 3]:
for flow in flows:
augmented_data = flow.augment(text, n=n, num_thread=num_thread)
texts = [
'The quick brown fox jumps over the lazy dog',
'Zology raku123456 fasdasd asd4123414 1234584',
'AAAAAAAAAAA AAAAAAAAAAAAAA'
]
flows = [
naf.Sequential([
nac.RandomCharAug(action="insert"),
naw.RandomWordAug()
]),
naf.Sometimes([
nac.RandomCharAug(action="insert"),
nac.RandomCharAug(action="delete")
], pipeline_p=0.9),
naf.Sequential([
naf.Sequential([
nac.RandomCharAug(action="insert"),
naw.RandomWordAug()
]),
naf.Sometimes([
nac.RandomCharAug(action="insert"),
nac.RandomCharAug(action="delete")
], pipeline_p=0.9)
])
]
for flow in flows:
for text in texts:
augmented_texts = flow.augment(text, n=3)
self.assertGreater(len(augmented_texts), 1)
for augmented_text in augmented_texts:
self.assertNotEqual(augmented_text, text)