Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
async def test__H_mongo(self):
"""
Check _H output consistent bytes length given
the same concatenated hash value size
"""
mg = WeightedMinHashGenerator(100, sample_size=128)
for l in range(2, mg.sample_size + 1, 16):
m = mg.minhash(np.random.randint(1, 99999999, 100))
async with AsyncMinHashLSH(storage_config=self._storage_config_mongo,
num_perm=128) as lsh:
await lsh.insert("m", m)
fs = (ht.keys() for ht in lsh.hashtables)
hashtables = await asyncio.gather(*fs)
sizes = [len(H) for H in hashtables]
self.assertTrue(all(sizes[0] == s for s in sizes))
async def test_insertion_session_mongo(self):
def chunk(it, size):
it = iter(it)
return iter(lambda: tuple(islice(it, size)), ())
_chunked_str = chunk((random.choice(string.ascii_lowercase) for _ in range(10000)), 4)
seq = frozenset(chain((''.join(s) for s in _chunked_str),
('aahhb', 'aahh', 'aahhc', 'aac', 'kld', 'bhg', 'kkd', 'yow', 'ppi', 'eer')))
objs = [MinHash(16) for _ in range(len(seq))]
for e, obj in zip(seq, objs):
for i in e:
obj.update(i.encode('utf-8'))
data = [(e, m) for e, m in zip(seq, objs)]
async with AsyncMinHashLSH(storage_config=self._storage_config_mongo,
threshold=0.5, num_perm=16) as lsh:
async with lsh.insertion_session(batch_size=1000) as session:
fs = (session.insert(key, minhash, check_duplication=False) for key, minhash in data)
await asyncio.gather(*fs)
for t in lsh.hashtables:
self.assertTrue(await t.size() >= 1)
items = []
for H in await t.keys():
items.extend(await t.get(H))
self.assertTrue('aahhb' in items)
self.assertTrue('kld' in items)
self.assertTrue(await lsh.has_key('aahhb'))
self.assertTrue(await lsh.has_key('kld'))
for i, H in enumerate(await lsh.keys.get('aahh')):
self.assertTrue('aahh' in await lsh.hashtables[i].get(H))
async def test_init_mongo(self):
async with AsyncMinHashLSH(storage_config=self._storage_config_mongo,
threshold=0.8) as lsh:
self.assertTrue(await lsh.is_empty())
b1, r1 = lsh.b, lsh.r
async with AsyncMinHashLSH(storage_config=self._storage_config_mongo,
threshold=0.8,
weights=(0.2, 0.8)) as lsh:
b2, r2 = lsh.b, lsh.r
self.assertTrue(b1 < b2)
self.assertTrue(r1 > r2)
async def test_init_mongo(self):
async with AsyncMinHashLSH(storage_config=self._storage_config_mongo,
threshold=0.8) as lsh:
self.assertTrue(await lsh.is_empty())
b1, r1 = lsh.b, lsh.r
async with AsyncMinHashLSH(storage_config=self._storage_config_mongo,
threshold=0.8,
weights=(0.2, 0.8)) as lsh:
b2, r2 = lsh.b, lsh.r
self.assertTrue(b1 < b2)
self.assertTrue(r1 > r2)