Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def eg2():
h1 = HyperLogLog()
h2 = HyperLogLog()
for d in data1:
h1.update(d.encode('utf8'))
for d in data2:
h2.update(d.encode('utf8'))
u = HyperLogLog.union(h1, h2)
print("Estimated union cardinality is", u.count())
s1 = set(data1)
s2 = set(data2)
su = s1.union(s2)
print("Actual union cardinality is", len(su))
def _hyperloglog_jaccard(h1, h2):
c1 = h1.count()
c2 = h2.count()
uc = HyperLogLog.union(h1, h2).count()
if uc == 0.0:
return 1.0
ic = c1 + c2 - uc
return ic / uc
def _hyperloglog_inclusion(h1, h2):
c1 = h1.count()
if c1 == 0.0:
return 1.0
c2 = h2.count()
uc = HyperLogLog.union(h1, h2).count()
ic = c1 + c2 - uc
return ic / c1