Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
self.use_cuda = use_cuda
if (self.use_cuda is True) and (cuda_installed is False) :
self.use_cuda = False
print('** Cuda not available for Fourier transform.')
print('** Performing the Fourier transform on the CPU.')
# Check whether to use MKL
self.use_mkl = mkl_installed
# Initialize the object for calculation on the GPU
if self.use_cuda:
# Initialize the dimension of the grid and blocks
self.dim_grid, self.dim_block = cuda_tpb_bpg_2d( Nz, Nr)
# Initialize 1d buffer for cufft
self.buffer1d_in = cuda.device_array(
(Nz*Nr,), dtype=np.complex128)
self.buffer1d_out = cuda.device_array(
(Nz*Nr,), dtype=np.complex128)
# Initialize the cuda libraries object
self.fft = cufft.FFTPlan( shape=(Nz,), itype=np.complex128,
otype=np.complex128, batch=Nr )
self.blas = cublas.Blas() # For normalization of the iFFT
self.inv_Nz = 1./Nz # For normalization of the iFFT
# Initialize the object for calculation on the CPU
else:
# For MKL FFT
if self.use_mkl:
# Initialize the MKL plan with dummy array
spect_buffer = np.zeros( (Nz, Nr), dtype=np.complex128 )
def get_gpu_message():
"""
Returns a string with information about the currently selected GPU.
"""
gpu = cuda.gpus.current
# Convert bytestring to actual string
try:
gpu_name = gpu.name.decode()
except AttributeError:
gpu_name = gpu.name
# Print the GPU that is being used
if MPI.COMM_WORLD.size > 1:
rank = MPI.COMM_WORLD.rank
node = MPI.Get_processor_name()
message = "\nMPI rank %d selected a %s GPU with id %s on node %s" %(
rank, gpu_name, gpu.id, node)
else:
message = "\nFBPIC selected a %s GPU with id %s" %( gpu_name, gpu.id )
return(message)
# Positions where the fields are to be registered
# (Change at every iteration)
self.current_z_lab = 0
self.current_z_boost = 0
# Buffered field slice and corresponding array index in z
self.buffered_slices = []
self.buffer_z_indices = []
# Allocate a buffer for only one slice (avoids having to
# reallocate arrays when running on the GPU)
data_shape = (10, 2*fld.Nm-1, fld.Nr)
if fld.use_cuda is False:
self.slice_array = np.empty( data_shape )
else:
self.slice_array = cuda.device_array( data_shape )
def print_gpu_meminfo(gpu):
"""
Prints memory information about the GPU.
Parameters :
------------
gpu : object
A numba cuda gpu context object.
"""
with gpu:
meminfo = cuda.current_context().get_memory_info()
print("GPU: %s, free: %s Mbytes, total: %s Mbytes \
" % (gpu, meminfo[0]*1e-6, meminfo[1]*1e-6))
'rho:add' : alloc_cpu( ( Nm, 2*ng, Nr), dtype=np.complex128)}
self.recv_r = {
'E:replace': alloc_cpu( (n_fld*Nm, ng, Nr), dtype=np.complex128),
'B:replace': alloc_cpu( (n_fld*Nm, ng, Nr), dtype=np.complex128),
'J:add' : alloc_cpu( ( 3*Nm, 2*ng, Nr), dtype=np.complex128),
'rho:add' : alloc_cpu( ( Nm, 2*ng, Nr), dtype=np.complex128)}
# Allocate buffers on the GPU, for the different exchange types
if cuda_installed:
self.d_send_l = { key: cuda.to_device(value) for key, value in \
self.send_l.items() }
self.d_send_r = { key: cuda.to_device(value) for key, value in \
self.send_r.items() }
self.d_recv_l = { key: cuda.to_device(value) for key, value in \
self.recv_l.items() }
self.d_recv_r = { key: cuda.to_device(value) for key, value in \
self.recv_r.items() }
n_right, right_buffer, 0, particle_array, n_left+old_Ntot )
# Assign the stay_buffer to the initial particle data array
# and fill the sending buffers (if needed for MPI)
setattr(attr_list[i_attr][0], attr_list[i_attr][1], particle_array)
# Build list of integer quantities to copy
attr_list = []
if species.tracker is not None:
attr_list.append( (species.tracker,'id') )
if species.ionizer is not None:
attr_list.append( (species.ionizer,'ionization_level') )
# Loop through the integer quantities
for i_attr in range( len(attr_list) ):
# Copy the proper buffers to the GPU
left_buffer = cuda.to_device( uint_recv_left[i_attr] )
right_buffer = cuda.to_device( uint_recv_right[i_attr] )
# Initialize the new particle array
particle_array = cuda.device_array( (new_Ntot,), dtype=np.uint64)
# Merge the arrays on the GPU
stay_buffer = getattr( attr_list[i_attr][0], attr_list[i_attr][1])
if n_left != 0:
copy_particles[n_left_grid, n_left_block](
n_left, left_buffer, 0, particle_array, 0 )
if old_Ntot != 0:
copy_particles[n_old_grid, n_old_block](
old_Ntot, stay_buffer, 0, particle_array, n_left )
if n_right != 0:
copy_particles[n_right_grid, n_right_block](
n_right, right_buffer, 0, particle_array, n_left+old_Ntot )
# Assign the stay_buffer to the initial particle data array
# and fill the sending buffers (if needed for MPI)
setattr(attr_list[i_attr][0], attr_list[i_attr][1], particle_array)
# Initialize a buffer handler object, for MPI communications
if self.size > 1:
Nr_with_damp = self.get_Nr( with_damp=True )
self.mpi_buffers = BufferHandler( self.n_guard, Nr_with_damp, Nm,
self.left_proc, self.right_proc, self.use_pml )
# Create damping arrays for the damping cells at the left
# and right of the box in the case of "open" boundaries.
if (self.nz_damp+self.n_inject) > 0:
if self.left_proc is None:
# Create the damping arrays for left proc
self.left_damp = self.generate_damp_array(
self.n_guard, self.nz_damp, self.n_inject )
if cuda_installed:
self.d_left_damp = cuda.to_device( self.left_damp )
if self.right_proc is None:
# Create the damping arrays for right proc
self.right_damp = self.generate_damp_array(
self.n_guard, self.nz_damp, self.n_inject )
if cuda_installed:
self.d_right_damp = cuda.to_device( self.right_damp )
# Create damping object for the PML
self.use_pml = (boundaries['r'] == "open")
if self.use_pml:
self.pml_damper = PMLDamper( self.nr_damp, cdt_over_dr )
np.asfortranarray( self.invM, dtype=np.float64 ) )
# Initialize buffer arrays to store the complex Nz x Nr grid
# as a real 2Nz x Nr grid, before performing the matrix product
# (This is because a matrix product of reals is faster than a matrix
# product of complexs, and the real-complex conversion is negligible.)
if not self.use_cuda:
# Initialize real buffer arrays on the CPU
zero_array = np.zeros((2*Nz, Nr), dtype=np.float64 )
self.array_in = zero_array.copy()
self.array_out = zero_array.copy()
else:
# Initialize real buffer arrays on the GPU
# The cuBlas API requires that these arrays be in Fortran order
zero_array = np.zeros((2*Nz, Nr), dtype=np.float64, order='F')
self.d_in = cuda.to_device( zero_array )
self.d_out = cuda.to_device( zero_array )
# Initialize a cuda stream (required by cublas)
self.blas = cublas.Blas()
# Initialize the threads per block and block per grid
self.dim_grid, self.dim_block = cuda_tpb_bpg_2d(Nz, Nr)