How to use the datasketch.WeightedMinHashGenerator function in datasketch

To help you get started, we’ve selected a few datasketch examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ekzhu / datasketch / benchmark / weighted_minhash_benchmark.py View on Github external
def run_perf(dim, num_rep, sample_size):
    wmg = WeightedMinHashGenerator(dim, sample_size=sample_size)
    logging.info("WeightedMinHash using %d samples" % sample_size)
    data = np.random.uniform(0, dim, (num_rep, dim))
    durs = []
    for i in range(num_rep):
        start = time.clock()
        wmg.minhash(data[i])
        duration = (time.clock() - start) * 1000
        durs.append(duration)
    ave = np.mean(durs)
    logging.info("Generated %d minhashes, average time %.4f ms" % (num_rep, ave))
    return ave
github cangyeone / GeophysicsResearch / WaveReconize / fgpoint / figure.py View on Github external
def __init__(self,staName,force=False):
        self.wlWinN=300
        self.wlLagN=5
        self.fqWinN=300
        self.fqLagN=5
        self.fqRspN=32
        self.wlRspN=32
        self.selmax=90
        self.wl_x_level=3
        self.vectLen=self.fqRspN*self.wlRspN
        self.wmg = WeightedMinHashGenerator(self.vectLen,sample_size=2, seed=12)
        self.sta=False
        self.sphs=self.fqLagN*self.wlLagN/100
        self.GetSta(staName,force=force)
        #tempdata=self.GetData("D:/Weiyuan/templates/2015318092941.s15.z")
github cangyeone / GeophysicsResearch / WaveReconize / fgpoint / plot.py View on Github external
for ix in range(len(self.wlData[0])):
                if(self.wlData[iy,ix]==1):
                    tsc.append('a')
                elif(self.wlData[iy,ix]==-1):
                    tsc.append('c')
                else:
                    tsc.append('d')
            schar.append(tsc)
      
        for cr in schar: 
            hh=simhash.simhash(''.join(cr),hashbits=hashbit)
            self.hash.append(hh.hash)

        """ 
        #"""
        wmg = WeightedMinHashGenerator(len(self.wlData[0]),sample_size=2, seed=12)
        for tr in self.wlData:
            try:
                wm = wmg.minhash(tr) # wm1 is of the type WeightedMinHash
                vl=np.transpose(wm.hashvalues)
                vl=vl[0]
                self.hash.append(vl.tolist())
            except:
                print(tr)
        #""" 
github cangyeone / GeophysicsResearch / WaveReconize / fgpoint / figure_better.py View on Github external
def __init__(self,staName,force=False):
        self.wlWinN=300
        self.wlLagN=10
        self.fqWinN=300
        self.fqLagN=10
        self.fqRspN=32
        self.wlRspN=32
        self.selmax=30
        self.wl_x_level=3
        self.vectLen=self.fqRspN*self.wlRspN
        self.wmg = WeightedMinHashGenerator(self.vectLen,sample_size=2, seed=12)
        self.sta=False
        self.sphs=self.fqLagN*self.wlLagN/100
        self.GetSta(staName,force=force)
        #tempdata=self.GetData("D:/Weiyuan/templates/2015318092941.s15.z")
github cangyeone / GeophysicsResearch / WaveReconize / fgpoint / figurenew.py View on Github external
def __init__(self,staName,force=False):
        self.wlWinN=300
        self.wlLagN=5
        self.fqWinN=300
        self.fqLagN=5
        self.fqRspN=32
        self.wlRspN=32
        self.selmax=90
        self.wl_x_level=3
        self.vectLen=self.fqRspN*self.wlRspN
        self.wmg = WeightedMinHashGenerator(self.vectLen,sample_size=2, seed=12)
        self.sta=False
        self.sphs=self.fqLagN*self.wlLagN/100
        self.GetSta(staName,force=force)
        #tempdata=self.GetData("D:/Weiyuan/templates/2015318092941.s15.z")
github ekzhu / datasketch / benchmark / weighted_minhash_benchmark.py View on Github external
def run_acc(dim, num_rep, sample_size):
    logging.info("WeightedMinHash using %d samples" % sample_size)
    wmg = WeightedMinHashGenerator(dim, sample_size=sample_size)
    data1 = np.random.uniform(0, dim, (num_rep, dim))
    data2 = np.random.uniform(0, dim, (num_rep, dim))
    errs = []
    for i in range(num_rep):
        wm1 = wmg.minhash(data1[i])
        wm2 = wmg.minhash(data2[i])
        j_e = wm1.jaccard(wm2)
        j = jaccard(data1[i], data2[i])
        errs.append(abs(j - j_e))
    ave = np.mean(errs)
    logging.info("%d runs, mean error %.4f" % (num_rep, ave))
    return ave
github cangyeone / GeophysicsResearch / WaveFingerPoint / wave_figner_point.py View on Github external
for ix in range(len(self.wlData[0])):
                if(self.wlData[iy,ix]==1):
                    tsc.append('a')
                elif(self.wlData[iy,ix]==-1):
                    tsc.append('c')
                else:
                    tsc.append('d')
            schar.append(tsc)
      
        for cr in schar: 
            hh=simhash.simhash(''.join(cr),hashbits=hashbit)
            self.hash.append(hh.hash)

        """ 
        #"""
        wmg = WeightedMinHashGenerator(len(self.wlData[0]),sample_size=2, seed=12)
        for tr in self.wlData:
            
            wm = wmg.minhash(tr) # wm1 is of the type WeightedMinHash
            vl=np.transpose(wm.hashvalues)
            vl=vl[0]
            self.hash.append(vl.tolist())
        #""" 
github cangyeone / GeophysicsResearch / WaveReconize / fgpoint / gethash.py View on Github external
if(self.wlData[iy,ix]==1):
                    tsc.append('a')
                elif(self.wlData[iy,ix]==-1):
                    tsc.append('c')
                else:
                    tsc.append('d')
            schar.append(tsc)
      
        for cr in schar: 
            hh=simhash.simhash(''.join(cr),hashbits=hashbit)
            self.hash.append(hh.hash)

        """ 
        #"""
        self.itr_hash=[]
        wmg = WeightedMinHashGenerator(len(self.wlData[0]),sample_size=4, seed=12)
        for tr in self.wlData:
            try:
                #print(np.abs(np.sum(tr)))
                #print(len(self.wlData[0]))
                if(np.abs(np.sum(tr))==len(self.wlData[0]) or np.abs(np.sum(tr))==0):
                    self.itr_hash.append([0,0,0,0])
                    continue
                wm = wmg.minhash(tr) # wm1 is of the type WeightedMinHash
                vl=np.transpose(wm.hashvalues)
                vl=vl[0]
                self.itr_hash.append(vl.tolist())
            except:
                self.itr_hash.append([0,0,0,0])
                #print(tr)
github cangyeone / GeophysicsResearch / WaveFingerPoint / wave_figner_point_map.py View on Github external
for ix in range(len(self.wlData[0])):
                if(self.wlData[iy,ix]==1):
                    tsc.append('a')
                elif(self.wlData[iy,ix]==-1):
                    tsc.append('c')
                else:
                    tsc.append('d')
            schar.append(tsc)
      
        for cr in schar: 
            hh=simhash.simhash(''.join(cr),hashbits=hashbit)
            self.hash.append(hh.hash)

        """ 
        #"""
        wmg = WeightedMinHashGenerator(len(self.wlData[0]),sample_size=2, seed=12)
        for tr in self.wlData:
            
            wm = wmg.minhash(tr) # wm1 is of the type WeightedMinHash
            vl=np.transpose(wm.hashvalues)
            vl=vl[0]
            self.hash.append(vl.tolist())
        #"""