Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_empty_input_for_swap(self):
texts = [' ']
aug = naw.RandomWordAug(action="swap")
for text in texts:
augmented_text = aug.augment(text)
self.assertEqual('', augmented_text)
self.assertEqual(1, len(texts))
tokens = [None]
aug = naw.RandomWordAug(action="swap")
for t in tokens:
augmented_text = aug.augment(t)
self.assertEqual(None, augmented_text)
self.assertEqual(len(tokens), 1)
def test_substitute_with_target_word(self):
texts = [
'The quick brown fox jumps over the lazy dog'
]
target_words = ['$', '#', '^^^']
aug = naw.RandomWordAug(action='substitute', target_words=target_words)
for text in texts:
augmented_text = aug.augment(text)
replaced = False
for w in target_words:
if w in augmented_text:
replaced = True
break
self.assertTrue(replaced)
self.assertNotEqual(text, augmented_text)
def test_delete(self):
texts = [
'The quick brown fox jumps over the lazy dog'
]
aug = naw.RandomWordAug()
for text in texts:
augmented_text = aug.augment(text)
self.assertNotEqual(text, augmented_text)
def test_multiple_actions(self):
texts = [
'The quick brown fox jumps over the lazy dog',
'Zology raku123456 fasdasd asd4123414 1234584'
]
flows = [
naf.Sequential([nac.RandomCharAug(action=Action.INSERT),
naw.RandomWordAug()]),
naf.Sequential([nac.OcrAug(), nac.KeyboardAug(aug_char_min=1),
nac.RandomCharAug(action=Action.SUBSTITUTE, aug_char_min=1, aug_char_p=0.6, aug_word_p=0.6)])
]
for flow in flows:
for text in texts:
augmented_text = flow.augment(text)
self.assertNotEqual(text, augmented_text)
self.assertLess(0, len(text))
self.assertLess(0, len(texts))
self.assertLess(0, len(flows))
def test_empty_input_for_delete(self):
text = ' '
# None
augs = [
naw.RandomWordAug(action="delete"),
naw.RandomWordAug(action="delete", stopwords=['a', 'an', 'the'])
]
for aug in augs:
augmented_text = aug.augment(text)
# FIXME: standardize return
is_equal = augmented_text == '' or augmented_text == ' '
self.assertTrue(is_equal)
def test_substitute_without_target_word(self):
texts = [
'The quick brown fox jumps over the lazy dog'
]
aug = naw.RandomWordAug(action='substitute')
for text in texts:
augmented_text = aug.augment(text)
self.assertIn('_', augmented_text)
self.assertNotEqual(text, augmented_text)
def test_multi_thread(self):
text = 'The quick brown fox jumps over the lazy dog.'
n = 3
augs = [
naw.RandomWordAug(),
naw.WordEmbsAug(model_type='word2vec',
model_path=os.environ["MODEL_DIR"] + 'GoogleNews-vectors-negative300.bin'),
naw.ContextualWordEmbsAug(
model_path='xlnet-base-cased', action="substitute",
skip_unknown_word=True, temperature=0.7, device='cpu')
]
for num_thread in [1, 3]:
for aug in augs:
augmented_data = aug.augment(text, n=n, num_thread=num_thread)
self.assertEqual(len(augmented_data), n)