Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
transforms = []
if train:
transforms += [
xforms.crop(crop_type='randomside', side_ratio=0.8, jitter_type='uniratio') # train uses jitter
]
transforms += [
xforms.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear'),
xforms.mean(mean_file)
]
# Deserializer
return C.io.MinibatchSource(
C.io.ImageDeserializer(
map_file,
C.io.StreamDefs(features=C.io.StreamDef(field='image', transforms=transforms), # 1st col in mapfile referred to as 'image'
labels=C.io.StreamDef(field='label', shape=num_classes))), # and second as 'label'
randomize=train,
max_samples=total_number_of_samples,
multithreaded_deserializer=True)
def create_reader(path, is_training, input_dim, label_dim):
return MinibatchSource(CTFDeserializer(path, StreamDefs(
features = StreamDef(field='features', shape=input_dim, is_sparse=False),
labels = StreamDef(field='labels', shape=label_dim, is_sparse=False)
)), randomize=is_training, max_sweeps = INFINITELY_REPEAT if is_training else 1)
label_file = join(data_path, data_set + '.roilabels.txt')
if not os.path.exists(map_file) or not os.path.exists(roi_file) or not os.path.exists(label_file):
raise RuntimeError("File '%s', '%s' or '%s' does not exist. " % (map_file, roi_file, label_file))
# read images
nrImages = len(readTable(map_file))
transforms = [scale(width=img_width, height=img_height, channels=3,
scale_mode="pad", pad_value=114, interpolations='linear')]
image_source = ImageDeserializer(map_file, StreamDefs(features = StreamDef(field='image', transforms=transforms)))
# read rois and labels
rois_dim = 4 * n_rois
label_dim = n_classes * n_rois
roi_source = CTFDeserializer(roi_file, StreamDefs(
rois = StreamDef(field='rois', shape=rois_dim, is_sparse=False)))
label_source = CTFDeserializer(label_file, StreamDefs(
roiLabels = StreamDef(field='roiLabels', shape=label_dim, is_sparse=False)))
# define a composite reader
mb = MinibatchSource([image_source, roi_source, label_source], max_samples=sys.maxsize, randomize=randomize)
return (mb, nrImages)
def create_reader(path, is_training, input_dim, output_dim):
featureStream = C.io.StreamDef(field='features', shape=input_dim, is_sparse=False)
labelStream = C.io.StreamDef(field='labels', shape=output_dim, is_sparse=False)
return C.io.MinibatchSource(
C.io.CTFDeserializer(
path,
C.io.StreamDefs(labels=labelStream, features=featureStream)
),
randomize=is_training,
max_sweeps=C.io.INFINITELY_REPEAT if is_training else 1
)
transforms = []
if is_training:
transforms += [
C.io.transforms.crop(crop_type='randomarea', area_ratio=(0.05, 1.0), aspect_ratio=(0.75, 1.0), jitter_type='uniratio'), # train uses jitter
C.io.transforms.scale(width=IMAGE_WIDTH, height=IMAGE_HEIGHT, channels=NUM_CHANNELS, interpolations='linear'),
C.io.transforms.color(brightness_radius=0.125, contrast_radius=0.5, saturation_radius=0.5)
]
else:
transforms += [
C.io.transforms.crop(crop_type='center', side_ratio=0.875), # test has no jitter
C.io.transforms.scale(width=IMAGE_WIDTH, height=IMAGE_HEIGHT, channels=NUM_CHANNELS, interpolations='linear')
]
# deserializer
return C.io.MinibatchSource(
C.io.ImageDeserializer(map_file, C.io.StreamDefs(
features=C.io.StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image'
labels=C.io.StreamDef(field='label', shape=NUM_CLASSES))), # and second as 'label'
randomize=is_training,
max_samples=total_number_of_samples,
multithreaded_deserializer=True)
def create_reader(path, randomize, input_vocab_dim, label_vocab_dim, size=INFINITELY_REPEAT):
return MinibatchSource(CTFDeserializer(path, StreamDefs(
features = StreamDef(field='S0', shape=input_vocab_dim, is_sparse=True),
labels = StreamDef(field='S1', shape=label_vocab_dim, is_sparse=True)
)), randomize=randomize, epoch_size = size)
def create_reader(path, is_training):
return MinibatchSource(CTFDeserializer(path, StreamDefs(
query = StreamDef(field='S0', shape=vocab_size, is_sparse=True),
intent_unused = StreamDef(field='S1', shape=num_intents, is_sparse=True), # BUGBUG: unused, and should infer dim
slot_labels = StreamDef(field='S2', shape=num_labels, is_sparse=True)
)), randomize=is_training, epoch_size = INFINITELY_REPEAT if is_training else FULL_DATA_SWEEP)
transforms = []
if train:
transforms += [
xforms.crop(crop_type='randomside', side_ratio=0.8)
]
transforms += [
xforms.scale(
width=image_width,
height=image_height,
channels=num_channels,
interpolations='linear'
),
xforms.mean(mean_file)
]
return C.io.MinibatchSource(C.io.ImageDeserializer(map_file, C.io.StreamDefs(
features=C.io.StreamDef(field='image', transforms=transforms),
labels=C.io.StreamDef(field='label', shape=num_classes)
)))
def create_mb_source(features_file, labels_file, label_mapping_filem, total_number_of_samples):
for file_name in [features_file, labels_file, label_mapping_file]:
if not os.path.exists(file_name):
raise RuntimeError("File '%s' does not exist. Please check that datadir argument is set correctly." % (file_name))
fd = HTKFeatureDeserializer(StreamDefs(
amazing_features = StreamDef(shape=feature_dim, context=(context,context), scp=features_file)))
ld = HTKMLFDeserializer(label_mapping_file, StreamDefs(
awesome_labels = StreamDef(shape=num_classes, mlf=labels_file)))
# Enabling BPTT with truncated_length > 0
return MinibatchSource([fd,ld], truncation_length=250, max_samples=total_number_of_samples)
def create_mb_source(map_file, image_width, image_height, num_channels, num_classes, randomize=True):
transforms = [xforms.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear')]
return MinibatchSource(ImageDeserializer(map_file, StreamDefs(
features =StreamDef(field='image', transforms=transforms),
labels =StreamDef(field='label', shape=num_classes))),
randomize=randomize)