Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Returns: A DataFlow which produces BGR images and labels.
See explanations in the tutorial:
http://tensorpack.readthedocs.io/tutorial/efficient-dataflow.html
"""
assert name in ['train', 'val', 'test']
isTrain = name == 'train'
assert datadir is not None
if augmentors is None:
augmentors = fbresnet_augmentor(isTrain)
assert isinstance(augmentors, list)
if parallel is None:
parallel = min(40, multiprocessing.cpu_count() // 2) # assuming hyperthreading
if isTrain:
ds = dataset.ILSVRC12(datadir, name, shuffle=True)
ds = AugmentImageComponent(ds, augmentors, copy=False)
if parallel < 16:
logger.warn("DataFlow may become the bottleneck when too few processes are used.")
ds = MultiProcessRunnerZMQ(ds, parallel)
ds = BatchData(ds, batch_size, remainder=False)
else:
ds = dataset.ILSVRC12Files(datadir, name, shuffle=False)
aug = imgaug.AugmentorList(augmentors)
def mapf(dp):
fname, cls = dp
im = cv2.imread(fname, cv2.IMREAD_COLOR)
im = aug.augment(im)
return im, cls
ds = MultiThreadMapData(ds, parallel, mapf, buffer_size=2000, strict=True)
ds = BatchData(ds, batch_size, remainder=True)
def get_data(train_or_test):
isTrain = train_or_test == 'train'
pp_mean = dataset.SVHNDigit.get_per_pixel_mean()
if isTrain:
d1 = dataset.SVHNDigit('train')
d2 = dataset.SVHNDigit('extra')
ds = RandomMixData([d1, d2])
else:
ds = dataset.SVHNDigit('test')
if isTrain:
augmentors = [
imgaug.CenterPaste((40, 40)),
imgaug.Brightness(10),
imgaug.Contrast((0.8, 1.2)),
imgaug.GaussianDeform( # this is slow. without it, can only reach 1.9% error
[(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)],
(40, 40), 0.2, 3),
imgaug.RandomCrop((32, 32)),
def get_data():
train = BatchData(dataset.Mnist('train'), 128)
test = BatchData(dataset.Mnist('test'), 256, remainder=True)
return train, test
def get_cifar_augmented_data(
subset, options, do_multiprocess=True, do_validation=False, shuffle=None):
isTrain = subset == 'train' and do_multiprocess
shuffle = shuffle if shuffle is not None else isTrain
if options.num_classes == 10 and options.ds_name == 'cifar10':
ds = dataset.Cifar10(subset, shuffle=shuffle, do_validation=do_validation)
cutout_length = 16
n_holes=1
elif options.num_classes == 100 and options.ds_name == 'cifar100':
ds = dataset.Cifar100(subset, shuffle=shuffle, do_validation=do_validation)
cutout_length = 8
n_holes=1
else:
raise ValueError('Number of classes must be set to 10(default) or 100 for CIFAR')
logger.info('{} set has n_samples: {}'.format(subset, len(ds.data)))
pp_mean = ds.get_per_pixel_mean()
if isTrain:
logger.info('Will do cut-out with length={} n_holes={}'.format(
cutout_length, n_holes
))
augmentors = [
imgaug.CenterPaste((40, 40)),
imgaug.RandomCrop((32, 32)),
imgaug.Flip(horiz=True),
imgaug.MapImage(lambda x: (x - pp_mean)/128.0),
Cutout(length=cutout_length, n_holes=n_holes),
def get_data(train_or_test, isMixup, alpha):
isTrain = train_or_test == 'train'
ds = dataset.Cifar10(train_or_test)
if isTrain:
augmentors = [
imgaug.CenterPaste((40, 40)),
imgaug.RandomCrop((32, 32)),
imgaug.Flip(horiz=True),
]
ds = AugmentImageComponent(ds, augmentors)
batch = BATCH_SIZE
ds = BatchData(ds, batch, remainder=not isTrain)
def f(dp):
images, labels = dp
one_hot_labels = np.eye(CLASS_NUM)[labels] # one hot coding
if not isTrain or not isMixup:
return [images, one_hot_labels]
def get_data():
train = BatchData(dataset.Mnist('train'), 10000)
test = BatchData(dataset.Mnist('test'), 256, remainder=True)
return train, test
def get_data():
train = BatchData(dataset.Mnist('train'), 128)
test = BatchData(dataset.Mnist('test'), 256, remainder=True)
return train, test
def get_data():
train = BatchData(dataset.Mnist('train'), 128)
test = BatchData(dataset.Mnist('test'), 256, remainder=True)
return train, test
def get_data(train_or_test):
isTrain = train_or_test == 'train'
ds = dataset.Cifar10(train_or_test)
pp_mean = ds.get_per_pixel_mean()
if isTrain:
augmentors = [
imgaug.CenterPaste((40, 40)),
imgaug.RandomCrop((32, 32)),
imgaug.Flip(horiz=True),
imgaug.MapImage(lambda x: x - pp_mean),
]
else:
augmentors = [
imgaug.MapImage(lambda x: x - pp_mean)
]
ds = AugmentImageComponent(ds, augmentors)
ds = BatchData(ds, BATCH_SIZE, remainder=not isTrain)
if isTrain:
ds = PrefetchData(ds, 3, 2)
def get_data():
# We don't need any fancy data loading for this simple example.
# See dataflow tutorial at https://tensorpack.readthedocs.io/tutorial/dataflow.html
train = BatchData(dataset.Mnist('train'), 128)
test = BatchData(dataset.Mnist('test'), 256, remainder=True)
train = PrintData(train)
return train, test