Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _batched_unpack_params(params_data, buffer, dtype, stream=None):
n_params = params_data.n_params
n_elems = params_data.n_elems
params_dptr = params_data.dptr
params_dtype = params_data.dtype
params_size_csum = params_data.size_csum
buf_dtype = _communication_utility._get_nccl_type_id(dtype)
n_threads = 128
n_blocks = (n_elems + n_threads - 1) // n_threads
if stream is None:
stream = cp.cuda.get_current_stream()
with stream:
_cupy_batched_unpack_params()(
(n_blocks, ), (n_threads, ),
(buffer.memory.ptr, buf_dtype, n_elems,
params_dptr, params_dtype, params_size_csum, n_params))
def test_multi_gpu(self):
with cuda.Device(0):
a = cupy.zeros((10,))
cupy.core.core.scan(a)
with cuda.Device(1):
a = cupy.zeros((10,))
cupy.core.core.scan(a)
from __future__ import division
from builtins import object
import pickle
import pytest
try:
import cupy as cp
try:
cp.cuda.Device(0).compute_capability
except cp.cuda.runtime.CUDARuntimeError:
pytest.skip("GPU device inaccessible", allow_module_level=True)
except ImportError:
pytest.skip("cupy not installed", allow_module_level=True)
from sporco.cupy.admm import cbpdn
import sporco.cupy.linalg as sl
from sporco.cupy.util import list2array
class TestSet01(object):
def setup_method(self, method):
cp.random.seed(12345)
def optInit(self):
if self.CPUPinn == True:
cupy.cuda.set_allocator(my_pinned_allocator)
self._preInit()
self.CUPYmemmap = []
for optVar in self.optVarList:
self.CUPYmemmap.append( cupy.load( self.fileName+optVar+'.cpy.npy' , mmap_mode = 'r+' ) )
if self.CPUPinn == True:
cupy.cuda.set_allocator(None)
loader.save_stats(stats_directory)
# バッチサイズの調整
print("Searching for the best batch size ...")
batch_iter_train = loader.get_training_batch_iterator(batchsizes_train, augmentation=augmentation, gpu=using_gpu)
for _ in range(30):
for x_batch, x_length_batch, t_batch, t_length_batch, bigram_batch, bucket_id in batch_iter_train:
try:
with chainer.using_config("train", True):
y_batch = model(x_batch)
loss = gram_ctc(y_batch, t_batch, bigram_batch, ID_BLANK, x_length_batch, t_length_batch)
if args.joint_training:
loss += F.connectionist_temporal_classification(y_batch, t_batch, ID_BLANK, x_length_batch, t_length_batch)
loss.backward()
except Exception as e:
if isinstance(e, cupy.cuda.runtime.CUDARuntimeError):
batchsizes_train[bucket_id] = max(batchsizes_train[bucket_id] - 16, 4)
print("new batchsize {} for bucket {}".format(batchsizes_train[bucket_id], bucket_id + 1))
break
batchsizes_dev = [size * 3 for size in batchsizes_train]
# 学習
printb("[Training]")
epochs = Iteration(args.epochs)
report = Report(log_filename)
for epoch in epochs:
sum_loss = 0
# パラメータの更新
batch_iter_train = loader.get_training_batch_iterator(batchsizes_train, augmentation=augmentation, gpu=using_gpu)
total_iterations_train = batch_iter_train.get_total_iterations()
def synchronize(cls):
import cupy
cupy.cuda.Stream.null.synchronize()
def set_stream(self, stream=None):
if stream is None:
stream = cuda.Stream()
curand.setStream(self._generator, stream.ptr)
def main():
path = input("wave path...")
bps, wave = waver.load(path)
generator_ab = Unet()
cp.cuda.Device(0).use()
generator_ab.to_gpu()
netpath = input("net path...")
chainer.serializers.load_npz(netpath, generator_ab)
with chainer.using_config('train', False):
batch_a = load_comp(wave, 32)
x_a = convert.concat_examples(batch_a, 0)
x_a = chainer.Variable(x_a)
x_ab = generator_ab(x_a)
x_a = cp.asnumpy(x_a.data)
x_ab = cp.asnumpy(x_ab.data)
save_comp('a.wav', bps, x_a, side, pow_scale, fft_resca)
def __eq__(self, other):
return isinstance(other, DummyDeviceType)
def __ne__(self, other):
return not (self == other)
DummyDevice = DummyDeviceType()
# ------------------------------------------------------------------------------
# Global states
# ------------------------------------------------------------------------------
if available:
memory_pool = cuda.MemoryPool()
cuda.set_allocator(memory_pool.malloc)
pinned_memory_pool = cuda.PinnedMemoryPool()
cuda.set_pinned_memory_allocator(pinned_memory_pool.malloc)
_integer_types = six.integer_types + (numpy.integer,)
if six.PY2:
try:
from future.types.newint import newint as _newint
_integer_types += (_newint,)
except ImportError:
pass
# ------------------------------------------------------------------------------
# Global states
# ------------------------------------------------------------------------------
from __future__ import division
import numpy as np
import os
import math
import cmath
dtype = getattr(np, os.environ.get('QUPY_DTYPE', 'complex128'))
device = int(os.environ.get('QUPY_GPU', -1))
if device >= 0:
import cupy
cupy.cuda.Device(device).use()
xp = cupy
else:
xp = np
I = xp.array([[1, 0], [0, 1]], dtype=dtype)
X = xp.array([[0, 1], [1, 0]], dtype=dtype)
Y = xp.array([[0, -1j], [1j, 0]], dtype=dtype)
Z = xp.array([[1, 0], [0, -1]], dtype=dtype)
H = xp.array([[1, 1], [1, -1]], dtype=dtype) / math.sqrt(2)
S = xp.array([[1, 0], [0, 1j]], dtype=dtype)
T = xp.array([[1, 0], [0, (1 + 1j) / math.sqrt(2)]], dtype=dtype)
Sdag = xp.array([[1, 0], [0, -1j]], dtype=dtype)
Tdag = xp.array([[1, 0], [0, (1 - 1j) / math.sqrt(2)]], dtype=dtype)
sqrt_not = xp.array([[1 + 1j, 1 - 1j], [1 - 1j, 1 + 1j]], dtype=dtype) / 2