How to use the lmdb.open function in lmdb

To help you get started, we’ve selected a few lmdb examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nigroup / nideep / nideep / iow / test_read_lmdb.py View on Github external
])

        img_data_str = ['\x08\x03\x10\x04\x18\x02"\x18\x01\x04\x07\n\r\x10\x13\x16\x02\x05\x08\x0b\x0e\x11\x14\x17\x03\x06\t\x0c\x0f\x12\x15\x18(\x01',
                        '\x08\x03\x10\x02\x18\x01"\x06\x10\x16\x11\x17\x12\x18(\x00']

        # write fake data to lmdb
        self.path_lmdb_num_ord = os.path.join(self.dir_tmp, 'imgs_num_ord_lmdb')
        db = lmdb.open(self.path_lmdb_num_ord, map_size=int(1e12))
        with db.begin(write=True) as in_txn:

            for idx, data_str in enumerate(img_data_str):
                in_txn.put('{:0>10d}'.format(idx), data_str)
        db.close()

        self.path_lmdb_rand_ord = os.path.join(self.dir_tmp, 'imgs_rand_ord_lmdb')
        db = lmdb.open(self.path_lmdb_rand_ord, map_size=int(1e12))
        with db.begin(write=True) as in_txn:

            for data_str in img_data_str:
                in_txn.put('{:0>10d}'.format(np.random.randint(10, 1000)), data_str)
        db.close()

        self.path_lmdb_non_num = os.path.join(self.dir_tmp, 'imgs_non_num_lmdb')
        db = lmdb.open(self.path_lmdb_non_num, map_size=int(1e12))
        with db.begin(write=True) as in_txn:

            for data_str in img_data_str:
                in_txn.put('key' + data_str, data_str)
        db.close()

        assert_not_equal(self.path_lmdb_num_ord, self.path_lmdb_rand_ord)
        assert_not_equal(self.path_lmdb_num_ord, self.path_lmdb_non_num)
github clovaai / deep-text-recognition-benchmark / create_lmdb_dataset.py View on Github external
def createDataset(inputPath, gtFile, outputPath, checkValid=True):
    """
    Create LMDB dataset for training and evaluation.
    ARGS:
        inputPath  : input folder path where starts imagePath
        outputPath : LMDB output path
        gtFile     : list of image path and label
        checkValid : if true, check the validity of every image
    """
    os.makedirs(outputPath, exist_ok=True)
    env = lmdb.open(outputPath, map_size=1099511627776)
    cache = {}
    cnt = 1

    with open(gtFile, 'r', encoding='utf-8') as data:
        datalist = data.readlines()

    nSamples = len(datalist)
    for i in range(nSamples):
        imagePath, label = datalist[i].strip('\n').split('\t')
        imagePath = os.path.join(inputPath, imagePath)

        # # only use alphanumeric data
        # if re.search('[^a-zA-Z0-9]', label):
        #     continue

        if not os.path.exists(imagePath):
github SummaLabs / DLS / app / backend / core / models / batcher_image2d.py View on Github external
def __init__(self, parPathDB=None, parSizeBatch=-1, scaleFactor=-1.):
        if parPathDB is None:
            #FIXME: check this point, LMDBBatcher is not initialized correctly
            return
        try:
            self.cfg     = DatasetImage2dInfo(parPathDB)
            self.cfg.loadDBInfo(isBuildSearchIndex=False)
            tpathTrainDB = self.cfg.pathDbTrain
            tpathValDB   = self.cfg.pathDbVal
            self.dbTrain = lmdb.open(tpathTrainDB, readonly=True)
            self.dbVal   = lmdb.open(tpathValDB,   readonly=True)
            with self.dbTrain.begin() as txnTrain, self.dbVal.begin() as txnVal:
                self.lbl    = self.cfg.labels
                self.numLbl = len(self.lbl)
                self.numTrain = self.dbTrain.stat()['entries']
                self.numVal   = self.dbVal.stat()['entries']
                with txnTrain.cursor() as cursTrain, txnVal.cursor() as cursVal:
                    self.keysTrain = np.array([key for key, _ in cursTrain])
                    self.keysVal   = np.array([key for key, _ in cursVal])
                    timg,_ = ImageTransformer2D.decodeLmdbItem2NNSampple(txnTrain.get(self.keysTrain[0]))
                    self.shapeImg = timg.shape
                if parSizeBatch > 1:
                    self.sizeBatch = parSizeBatch
                if scaleFactor > 0:
                    self.scaleFactor = scaleFactor
                self.loadMeanProto()
        except lmdb.Error as err:
github luoyetx / Joint-Face-Detection-and-Alignment / scripts / benchmark_lmdb_access.py View on Github external
def main(args):
  net_type = args.net
  if net_type == 'p':
    net = 'pnet'
  elif net_type == 'r':
    net = 'rnet'
  else:
    assert net_type == 'o'
    net = 'onet'
  logger = get_logger()
  db = lmdb.open('data/%s_nonface_train'%net)
  with db.begin() as txn:
    size = int(txn.get('size'))
    logger.info('random read')
    for i in np.random.permutation(size):
      face_key = '%08d_data'%i
      offset_key = '%08d_offset'%i
      txn.get(face_key)
      txn.get(offset_key)
    logger.info('done')
    logger.info('sequential read')
    for i in range(size):
      face_key = '%08d_data'%i
      offset_key = '%08d_offset'%i
      txn.get(face_key)
      txn.get(offset_key)
    logger.info('done')
github ArdalanM / nlp-benchmarks / src / cnn / main.py View on Github external
if not all_exist:
        print("Creating datasets")
        tr_sentences = [txt for txt,lab in tqdm(dataset.load_train_data(), desc="counting train samples")]
        te_sentences = [txt for txt,lab in tqdm(dataset.load_test_data(), desc="counting test samples")]
            
        n_tr_samples = len(tr_sentences)
        n_te_samples = len(te_sentences)
        del tr_sentences
        del te_sentences

        print("[{}/{}] train/test samples".format(n_tr_samples, n_te_samples))

        ###################
        # transform train #
        ###################
        with lmdb.open(tr_path, map_size=1099511627776) as env:
            with env.begin(write=True) as txn:
                for i, (sentence, label) in enumerate(tqdm(dataset.load_train_data(), desc="transform train...", total= n_tr_samples)):

                    xtxt = vectorizer.transform([sentence])[0]
                    lab = label

                    txt_key = 'txt-%09d' % i
                    lab_key = 'lab-%09d' % i
                    
                    txn.put(lab_key.encode(), np.array([lab]).tobytes())
                    txn.put(txt_key.encode(), np.array(xtxt).tobytes())

                txn.put('nsamples'.encode(), np.array([i+1]).tobytes())

        ##################
        # transform test #
github rszeto / click-here-cnn / view_estimation_correspondences / eval_scripts / visualize_predictions.py View on Github external
def getCorrespLmdbData(lmdbs_root, N):
    # Define LMDBs
    image_lmdb = lmdb.open(os.path.join(lmdbs_root, 'image_lmdb'), readonly=True)
    keypoint_loc_lmdb = lmdb.open(os.path.join(lmdbs_root, 'keypoint_loc_lmdb'), readonly=True)
    keypoint_class_lmdb = lmdb.open(os.path.join(lmdbs_root, 'keypoint_class_lmdb'), readonly=True)
    viewpoint_label_lmdb = lmdb.open(os.path.join(lmdbs_root, 'viewpoint_label_lmdb'), readonly=True)
    images_dict = utils.getFirstNLmdbImgs(image_lmdb, N)
    keypoint_loc_dict = utils.getFirstNLmdbImgs(keypoint_loc_lmdb, N)
    keypoint_class_dict = utils.getFirstNLmdbVecs(keypoint_class_lmdb, N)
    viewpoint_label_dict = utils.getFirstNLmdbVecs(viewpoint_label_lmdb, N)
    return images_dict.keys(), images_dict, keypoint_loc_dict, keypoint_class_dict, viewpoint_label_dict
github CZ-NIC / knot / scripts / pykeymgr.py View on Github external
def list_zones(dirname):
	print "dirname:", dirname
	env = lmdb.open(dirname, max_dbs=2, map_size=500*1024*1024)
	db_zones = env.open_db("zones_db", dupsort=True)
	zonedict = dict()
	with lmdb.Transaction(env, db_zones, write=False) as txn_zones:
		for k, v in txn_zones.cursor():
			dn = dname2str(k)
			ki = v.rstrip("\x00")
			try:
				zonedict[dn].insert(0, ki)
			except KeyError:
				zonedict[dn] = [ ki ]
	for zone in zonedict.keys():
		print zone, zonedict[zone]
github SummaLabs / DLS / app / backend / core / models / batcher_image2d.py View on Github external
def __init__(self, parPathDB=None, parSizeBatch=-1, scaleFactor=-1.):
        if parPathDB is None:
            #FIXME: check this point, LMDBBatcher is not initialized correctly
            return
        try:
            self.cfg     = DatasetImage2dInfo(parPathDB)
            self.cfg.loadDBInfo(isBuildSearchIndex=False)
            tpathTrainDB = self.cfg.pathDbTrain
            tpathValDB   = self.cfg.pathDbVal
            self.dbTrain = lmdb.open(tpathTrainDB, readonly=True)
            self.dbVal   = lmdb.open(tpathValDB,   readonly=True)
            with self.dbTrain.begin() as txnTrain, self.dbVal.begin() as txnVal:
                self.lbl    = self.cfg.labels
                self.numLbl = len(self.lbl)
                self.numTrain = self.dbTrain.stat()['entries']
                self.numVal   = self.dbVal.stat()['entries']
                with txnTrain.cursor() as cursTrain, txnVal.cursor() as cursVal:
                    self.keysTrain = np.array([key for key, _ in cursTrain])
                    self.keysVal   = np.array([key for key, _ in cursVal])
                    timg,_ = ImageTransformer2D.decodeLmdbItem2NNSampple(txnTrain.get(self.keysTrain[0]))
                    self.shapeImg = timg.shape
                if parSizeBatch > 1:
                    self.sizeBatch = parSizeBatch
                if scaleFactor > 0:
                    self.scaleFactor = scaleFactor
                self.loadMeanProto()
github WeBankFinTech / eggroll / computing / storage_adapters.py View on Github external
def __init__(self, options):
        with LmdbAdapter.env_lock:
            LOGGER.info("lmdb adapter init")
            super().__init__(options)
            self.path = options["path"]
            create_if_missing = bool(options.get("create_if_missing", "True"))
            if self.path not in LmdbAdapter.env_dict:
                if create_if_missing:
                    os.makedirs(self.path, exist_ok=True)
                LOGGER.info("path not in dict db path:{}".format(self.path))
                self.env = lmdb.open(self.path, create=create_if_missing, max_dbs=128, sync=False, map_size=LMDB_MAP_SIZE, writemap=True)
                self.sub_db = self.env.open_db(DEFAULT_DB)
                self.txn = self.env.begin(db=self.sub_db, write=True)
                LmdbAdapter.count_dict[self.path] = 0
                LmdbAdapter.env_dict[self.path] = self.env
                LmdbAdapter.sub_env_dict[self.path] = self.sub_db
                LmdbAdapter.txn_dict[self.path] = self.txn
            else:
                LOGGER.info("path in dict:{}".format(self.path))
                self.env = LmdbAdapter.env_dict[self.path]
                self.sub_db = LmdbAdapter.sub_env_dict[self.path]
                self.txn = LmdbAdapter.txn_dict[self.path]
            self.cursor = self.txn.cursor()
            LmdbAdapter.count_dict[self.path] = LmdbAdapter.count_dict[self.path] + 1
github zzzDavid / ICDAR-2019-SROIE / Task 1 - Text Localization / CTPN Method / create_dataset.py View on Github external
def createDataset(outputPath, imagePathList, labelList, lexiconList=None, checkValid=True):
    """
    Create LMDB dataset for CRNN training.

    ARGS:
        outputPath    : LMDB output path
        imagePathList : list of image path
        labelList     : list of corresponding groundtruth texts
        lexiconList   : (optional) list of lexicon lists
        checkValid    : if true, check the validity of every image
    """
    assert(len(imagePathList) == len(labelList))
    nSamples = len(imagePathList)
    env = lmdb.open(outputPath, map_size=8589934592) # minimum disk space required in Byte
    cache = {}
    cnt = 1
    for i in list(range(nSamples)):
        imagePath = imagePathList[i]
        label = labelList[i]
        if not os.path.exists(imagePath):
            print('%s does not exist' % imagePath)
            continue
        with open(imagePath, 'rb') as f:
            imageBin = f.read()
        if checkValid:
            if not checkImageIsValid(imageBin):
                print('%s is not a valid image' % imagePath)
                continue

        imageKey = 'image-%09d' % cnt