Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if channel % (self.group_size * self.group_size) != 0:
raise ValueError(
'input channel must be divided by group_size * group_size:'
'{} % {} != 0'
.format(channel, self.group_size * self.group_size))
out_c = channel // (self.group_size * self.group_size)
else:
if channel != self.out_c * self.group_size * self.group_size:
raise ValueError(
'input channel must be equal to '
'outsize[0] * group_size * group_size: {} != {}'
.format(channel,
self.out_c * self.group_size * self.group_size))
out_c = self.out_c
n_roi = bottom_rois.shape[0]
top_data = cuda.cupy.empty(
(n_roi, out_c, self.out_h, self.out_w), dtype=np.float32)
self.argmax_data = cuda.cupy.empty(top_data.shape, np.int32)
cuda.elementwise(
'''
raw T bottom_data, raw T bottom_rois,
raw int32 bottom_roi_indices,
T spatial_scale, int32 channels,
int32 height, int32 width,
int32 pooled_dim, int32 pooled_height, int32 pooled_width,
int32 group_size
''',
'T top_data, int32 argmax_data',
'''
// pos in output filter
int ph = (i / pooled_width) % pooled_height;
def test_backward_gpu(self):
self.check_backward(
cuda.to_gpu(self.x), self.axis, cuda.to_gpu(self.gy))
def test_forward_gpu(self):
self.check_forward(cuda.to_gpu(self.x), cuda.to_gpu(self.W),
cuda.to_gpu(self.b))
def to_cpu(array):
if args.gpu_device >= 0:
return cuda.to_cpu(array)
return array
exs = sequence_embed(self.embed_x, xs)
h, c, _ = self.encoder(None, None, exs)
ys = self.xp.full(batch, EOS, numpy.int32)
result = []
for i in range(max_length):
eys = self.embed_y(ys)
eys = F.split_axis(eys, batch, 0)
h, c, ys = self.decoder(h, c, eys)
cys = F.concat(ys, axis=0)
wy = self.W(cys)
ys = self.xp.argmax(wy.array, axis=1).astype(numpy.int32)
result.append(ys)
# Using `xp.concatenate(...)` instead of `xp.stack(result)` here to
# support NumPy 1.9.
result = cuda.to_cpu(
self.xp.concatenate([self.xp.expand_dims(x, 0) for x in result]).T)
# Remove EOS taggs
outs = []
for y in result:
inds = numpy.argwhere(y == EOS)
if len(inds) > 0:
y = y[:inds[0, 0]]
outs.append(y)
return outs
segms = F.sigmoid(
self.mask_head(hs, mask_rois, mask_roi_indices)).data
# Put the order of proposals back to the one used by bbox head.
segms = segms[order]
segms = _flat_to_list(
segms, mask_roi_indices_before_reordering, len(imgs))
segms = [segm if segm is not None else
self.xp.zeros(
(0, self.mask_head.segm_size, self.mask_head.segm_size),
dtype=np.float32)
for segm in segms]
segms = [chainer.backends.cuda.to_cpu(segm) for segm in segms]
bboxes = [chainer.backends.cuda.to_cpu(bbox / scale)
for bbox, scale in zip(rescaled_bboxes, scales)]
labels = [chainer.backends.cuda.to_cpu(label) for label in labels]
# Currently MaskHead only supports numpy inputs
masks = self.mask_head.decode(segms, bboxes, labels, sizes)
scores = [cuda.to_cpu(score) for score in scores]
return masks, labels, scores
def can_use_cudnn(self, xp):
# TODO(bkvogel): Check for float16 support again in next cuDNN version.
# cuDNN v5 batch normalization does not seem to support float16.
return (xp is cuda.cupy and
chainer.should_use_cudnn('>=auto', 5000) and
self.cudnn_dim_ok and
self.cudnn_dtype_ok)
help='model type ("skipgram", "cbow")')
parser.add_argument('--negative-size', default=5, type=int,
help='number of negative samples')
parser.add_argument('--out-type', '-o', choices=['hsm', 'ns', 'original'],
default='hsm',
help='output model type ("hsm": hierarchical softmax, '
'"ns": negative sampling, "original": '
'no approximation)')
parser.add_argument('--out', default='result',
help='Directory to output the result')
parser.add_argument('--test', dest='test', action='store_true')
parser.set_defaults(test=False)
args = parser.parse_args()
if args.gpu >= 0:
chainer.backends.cuda.get_device_from_id(args.gpu).use()
cuda.check_cuda_available()
print('GPU: {}'.format(args.gpu))
print('# unit: {}'.format(args.unit))
print('Window: {}'.format(args.window))
print('Minibatch-size: {}'.format(args.batchsize))
print('# epoch: {}'.format(args.epoch))
print('Training model: {}'.format(args.model))
print('Output type: {}'.format(args.out_type))
print('')
if args.gpu >= 0:
cuda.get_device_from_id(args.gpu).use()
# Load the dataset
train, val, _ = chainer.datasets.get_ptb_words()
def _coo_matmul_gradsp_gpu(A, B, C_row, C_col, dtype):
# A.shape: ((nb,) _m, _k)
# B.shape: ((nb,) _k, _n)
# C_row/col.shape: ((nb,) ldnz)
_m, _k = A.shape[-2:]
_n = B.shape[-1]
ldnz = C_row.shape[-1]
if A.ndim == 2:
nb = 1
C_data = cuda.cupy.zeros((ldnz), dtype=dtype)
else:
nb = A.shape[0]
C_data = cuda.cupy.zeros((nb, ldnz), dtype=dtype)
nthreads = nb * ldnz
_cupy_coo_matmul_gradsp()(nb, _m, _n, _k, ldnz, A, B, C_row, C_col, C_data,
size=nthreads)
return C_data
def to_cpu(array):
if isinstance(array, cp.ndarray):
return cuda.to_cpu(array)
return array