How to use the tensorpack.dataflow.dataset function in tensorpack

To help you get started, we’ve selected a few tensorpack examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tensorpack / tensorpack / examples / ImageNetModels / imagenet_utils.py View on Github external
Returns: A DataFlow which produces BGR images and labels.

    See explanations in the tutorial:
    http://tensorpack.readthedocs.io/tutorial/efficient-dataflow.html
    """
    assert name in ['train', 'val', 'test']
    isTrain = name == 'train'
    assert datadir is not None
    if augmentors is None:
        augmentors = fbresnet_augmentor(isTrain)
    assert isinstance(augmentors, list)
    if parallel is None:
        parallel = min(40, multiprocessing.cpu_count() // 2)  # assuming hyperthreading

    if isTrain:
        ds = dataset.ILSVRC12(datadir, name, shuffle=True)
        ds = AugmentImageComponent(ds, augmentors, copy=False)
        if parallel < 16:
            logger.warn("DataFlow may become the bottleneck when too few processes are used.")
        ds = MultiProcessRunnerZMQ(ds, parallel)
        ds = BatchData(ds, batch_size, remainder=False)
    else:
        ds = dataset.ILSVRC12Files(datadir, name, shuffle=False)
        aug = imgaug.AugmentorList(augmentors)

        def mapf(dp):
            fname, cls = dp
            im = cv2.imread(fname, cv2.IMREAD_COLOR)
            im = aug.augment(im)
            return im, cls
        ds = MultiThreadMapData(ds, parallel, mapf, buffer_size=2000, strict=True)
        ds = BatchData(ds, batch_size, remainder=True)
github tensorpack / tensorpack / examples / ResNet / svhn-resnet.py View on Github external
def get_data(train_or_test):
    isTrain = train_or_test == 'train'
    pp_mean = dataset.SVHNDigit.get_per_pixel_mean()
    if isTrain:
        d1 = dataset.SVHNDigit('train')
        d2 = dataset.SVHNDigit('extra')
        ds = RandomMixData([d1, d2])
    else:
        ds = dataset.SVHNDigit('test')

    if isTrain:
        augmentors = [
            imgaug.CenterPaste((40, 40)),
            imgaug.Brightness(10),
            imgaug.Contrast((0.8, 1.2)),
            imgaug.GaussianDeform(  # this is slow. without it, can only reach 1.9% error
                [(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)],
                (40, 40), 0.2, 3),
            imgaug.RandomCrop((32, 32)),
github tensorpack / tensorpack / examples / basics / mnist-tflayers.py View on Github external
def get_data():
    train = BatchData(dataset.Mnist('train'), 128)
    test = BatchData(dataset.Mnist('test'), 256, remainder=True)
    return train, test
github microsoft / petridishnn / petridish / data / cifar.py View on Github external
def get_cifar_augmented_data(
        subset, options, do_multiprocess=True, do_validation=False, shuffle=None):
    isTrain = subset == 'train' and do_multiprocess
    shuffle = shuffle if shuffle is not None else isTrain
    if options.num_classes == 10 and options.ds_name == 'cifar10':
        ds = dataset.Cifar10(subset, shuffle=shuffle, do_validation=do_validation)
        cutout_length = 16
        n_holes=1
    elif options.num_classes == 100 and options.ds_name == 'cifar100':
        ds = dataset.Cifar100(subset, shuffle=shuffle, do_validation=do_validation)
        cutout_length = 8
        n_holes=1
    else:
        raise ValueError('Number of classes must be set to 10(default) or 100 for CIFAR')
    logger.info('{} set has n_samples: {}'.format(subset, len(ds.data)))
    pp_mean = ds.get_per_pixel_mean()
    if isTrain:
        logger.info('Will do cut-out with length={} n_holes={}'.format(
            cutout_length, n_holes
        ))
        augmentors = [
            imgaug.CenterPaste((40, 40)),
            imgaug.RandomCrop((32, 32)),
            imgaug.Flip(horiz=True),
            imgaug.MapImage(lambda x: (x - pp_mean)/128.0),
            Cutout(length=cutout_length, n_holes=n_holes),
github tensorpack / tensorpack / examples / ResNet / cifar10-preact18-mixup.py View on Github external
def get_data(train_or_test, isMixup, alpha):
    isTrain = train_or_test == 'train'
    ds = dataset.Cifar10(train_or_test)
    if isTrain:
        augmentors = [
            imgaug.CenterPaste((40, 40)),
            imgaug.RandomCrop((32, 32)),
            imgaug.Flip(horiz=True),
        ]
        ds = AugmentImageComponent(ds, augmentors)

    batch = BATCH_SIZE
    ds = BatchData(ds, batch, remainder=not isTrain)

    def f(dp):
        images, labels = dp
        one_hot_labels = np.eye(CLASS_NUM)[labels]  # one hot coding
        if not isTrain or not isMixup:
            return [images, one_hot_labels]
github diux-dev / cluster / yuxin_numpy / mnist-convnet-prenumpy.py View on Github external
def get_data():
    train = BatchData(dataset.Mnist('train'), 10000)
    test = BatchData(dataset.Mnist('test'), 256, remainder=True)
    return train, test
github tensorpack / tensorpack / examples / basics / mnist-tfslim.py View on Github external
def get_data():
    train = BatchData(dataset.Mnist('train'), 128)
    test = BatchData(dataset.Mnist('test'), 256, remainder=True)
    return train, test
github tensorpack / tensorpack / examples / basics / mnist-tfslim.py View on Github external
def get_data():
    train = BatchData(dataset.Mnist('train'), 128)
    test = BatchData(dataset.Mnist('test'), 256, remainder=True)
    return train, test
github wanggrun / Kalman-Normalization / KalmanNorm / cifar-bn-largebatch.py View on Github external
def get_data(train_or_test):
    isTrain = train_or_test == 'train'
    ds = dataset.Cifar10(train_or_test)
    pp_mean = ds.get_per_pixel_mean()
    if isTrain:
        augmentors = [
            imgaug.CenterPaste((40, 40)),
            imgaug.RandomCrop((32, 32)),
            imgaug.Flip(horiz=True),
            imgaug.MapImage(lambda x: x - pp_mean),
        ]
    else:
        augmentors = [
            imgaug.MapImage(lambda x: x - pp_mean)
        ]
    ds = AugmentImageComponent(ds, augmentors)
    ds = BatchData(ds, BATCH_SIZE, remainder=not isTrain)
    if isTrain:
        ds = PrefetchData(ds, 3, 2)
github tensorpack / tensorpack / examples / basics / mnist-convnet.py View on Github external
def get_data():
    # We don't need any fancy data loading for this simple example.
    # See dataflow tutorial at https://tensorpack.readthedocs.io/tutorial/dataflow.html
    train = BatchData(dataset.Mnist('train'), 128)
    test = BatchData(dataset.Mnist('test'), 256, remainder=True)

    train = PrintData(train)

    return train, test