How to use the datasketch.storage._random_name function in datasketch

To help you get started, we’ve selected a few datasketch examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ekzhu / datasketch / datasketch / experimental / aio / storage.py View on Github external
def __init__(self, config, name=None):
            assert config['type'] == 'aiomongo', 'Storage type <{}> not supported'.format(config['type'])
            self._config = config
            self._mongo_param = self._parse_config(self._config['mongo'])

            self._name = name if name else _random_name(11).decode('utf-8')
            self._collection_name = 'lsh_' + self._name

            db_lsh = self.mongo_param['db'] if 'db' in self.mongo_param else 'db_0'
            if 'replica_set' in self.mongo_param:
                dsn = 'mongodb://{replica_set_nodes}/?replicaSet={replica_set}'.format(**self.mongo_param)
            elif 'username' in self.mongo_param or 'password' in self.mongo_param:
                dsn = 'mongodb://{username}:{password}@{host}:{port}'.format(**self.mongo_param)
            else:
                dsn = 'mongodb://{host}:{port}'.format(**self.mongo_param)

            additional_args = self.mongo_param.get('args', {})

            self._batch_size = 1000
            self._mongo_client = motor.motor_asyncio.AsyncIOMotorClient(dsn, **additional_args)
            self._collection = self._mongo_client[db_lsh][self._collection_name]
            self._initialized = True
github ekzhu / datasketch / datasketch / lshensemble.py View on Github external
raise ValueError("Too few permutation functions")
        if num_part < 1:
            raise ValueError("num_part must be at least 1")
        if m < 2 or m > num_perm:
            raise ValueError("m must be in the range of [2, num_perm]")
        if any(w < 0.0 or w > 1.0 for w in weights):
            raise ValueError("Weight must be in [0.0, 1.0]")
        if sum(weights) != 1.0:
            raise ValueError("Weights must sum to 1.0")
        self.threshold = threshold
        self.h = num_perm
        self.m = m
        rs = self._init_optimal_params(weights)
        # Initialize multiple LSH indexes for each partition
        storage_config = {'type': 'dict'} if not storage_config else storage_config
        basename = storage_config.get('basename', _random_name(11))
        self.indexes = [
            dict((r, MinHashLSH(
                num_perm=self.h,
                params=(int(self.h/r), r),
                storage_config=self._get_storage_config(
                    basename, storage_config, partition, r),
                prepickle=prepickle)) for r in rs)
            for partition in range(0, num_part)]
        self.lowers = [None for _ in self.indexes]
        self.uppers = [None for _ in self.indexes]
github ekzhu / datasketch / datasketch / experimental / aio / lsh.py View on Github external
def __init__(self, threshold: float=0.9, num_perm: int=128, weights: Tuple[float, float] =(0.5, 0.5),
                 params: Tuple[int, int]=None, storage_config: Dict=None):
        if storage_config is None:
            storage_config = {
                'type': 'aiomongo',
                'mongo': {'host': 'localhost', 'port': 27017}
            }
        self._storage_config = storage_config.copy()
        self._storage_config['basename'] = self._storage_config.get('basename', _random_name(11))
        self._basename = self._storage_config['basename']
        self._batch_size = 10000
        self._threshold = threshold
        self._num_perm = num_perm
        self._weights = weights
        self._params = params

        if self._threshold > 1.0 or self._threshold < 0.0:
            raise ValueError("threshold must be in [0.0, 1.0]")
        if self._num_perm < 2:
            raise ValueError("Too few permutation functions")
        if any(w < 0.0 or w > 1.0 for w in self._weights):
            raise ValueError("Weight must be in [0.0, 1.0]")
        if sum(self._weights) != 1.0:
            raise ValueError("Weights must sum to 1.0")
        self.h = self._num_perm
github ekzhu / datasketch / datasketch / lsh.py View on Github external
raise ValueError("Weight must be in [0.0, 1.0]")
        if sum(weights) != 1.0:
            raise ValueError("Weights must sum to 1.0")
        self.h = num_perm
        if params is not None:
            self.b, self.r = params
            if self.b * self.r > num_perm:
                raise ValueError("The product of b and r must be less than num_perm")
        else:
            false_positive_weight, false_negative_weight = weights
            self.b, self.r = _optimal_param(threshold, num_perm,
                    false_positive_weight, false_negative_weight)

        self.prepickle = storage_config['type'] == 'redis' if prepickle is None else prepickle

        basename = storage_config.get('basename', _random_name(11))
        self.hashtables = [
            unordered_storage(storage_config, name=b''.join([basename, b'_bucket_', struct.pack('>H', i)]))
            for i in range(self.b)]
        self.hashranges = [(i*self.r, (i+1)*self.r) for i in range(self.b)]
        self.keys = ordered_storage(storage_config, name=b''.join([basename, b'_keys']))