Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_flat_map(self):
reduced = reducers.FlatMap(lambda x: [x] * 3)(self.data)
expected = [[x] * 3 for x in self.data]
expected = [x for xs in expected for x in xs]
for x, y in zip(reduced, expected):
self.assertEqual(x, y)
self.assertTrue(reduced._done)
for x, y in zip(reduced, expected):
self.assertEqual(x, y)
def build_vocab(dataset, cache='vocab.pkl'):
if not osp.isfile(cache):
seen = {}
def extract_tokens(x):
tokens = x['question']
context = x['context']
if context not in seen:
seen[context] = True
tokens += context
return tokens
counter = Counter(FlatMap(extract_tokens)(dataset))
words, _ = zip(*counter.most_common())
token_to_index = dict(zip(words, range(len(words))))
with open(cache, 'wb') as f:
pickle.dump((token_to_index, words), f)
else:
with open(cache, 'rb') as f:
token_to_index, words = pickle.load(f)
return token_to_index, words