Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
left_buffer.copy_to_host( float_send_left[i_attr] )
if right_proc is not None:
right_buffer.copy_to_host( float_send_right[i_attr] )
# Integer quantities:
if n_int > 0:
attr_list = []
if species.tracker is not None:
attr_list.append( (species.tracker,'id') )
if species.ionizer is not None:
attr_list.append( (species.ionizer,'ionization_level') )
for i_attr in range(n_int):
# Initialize 3 buffer arrays on the GPU (need to be initialized
# inside the loop, as `copy_to_host` invalidates these arrays)
left_buffer = cuda.device_array((N_send_l,), dtype=np.uint64)
right_buffer = cuda.device_array((N_send_r,), dtype=np.uint64)
stay_buffer = cuda.device_array((new_Ntot,), dtype=np.uint64)
# Split the particle array into the 3 buffers on the GPU
particle_array = getattr( attr_list[i_attr][0], attr_list[i_attr][1] )
split_particles_to_buffers[dim_grid_1d, dim_block_1d]( particle_array,
left_buffer, stay_buffer, right_buffer, i_min, i_max)
# Assign the stay_buffer to the initial particle data array
# and fill the sending buffers (if needed for MPI)
setattr( attr_list[i_attr][0], attr_list[i_attr][1], stay_buffer)
if left_proc is not None:
left_buffer.copy_to_host( uint_send_left[i_attr] )
if right_proc is not None:
right_buffer.copy_to_host( uint_send_right[i_attr] )
# Change the new total number of particles
species.Ntot = new_Ntot
setattr( species, attr, new_array )
# Copy the tracking id, if needed
if species.tracker is not None:
old_array = species.tracker.id
new_array = allocate_empty( new_Ntot, use_cuda, dtype=np.uint64 )
if data_on_gpu:
copy_particle_data_cuda[ ptcl_grid_1d, ptcl_block_1d ](
old_Ntot, old_array, new_array )
else:
copy_particle_data_numba( old_Ntot, old_array, new_array )
species.tracker.id = new_array
# Allocate the auxiliary arrays for GPU
if use_cuda:
species.cell_idx = cuda.device_array((new_Ntot,), dtype=np.int32)
species.sorted_idx = cuda.device_array((new_Ntot,), dtype=np.intp)
species.sorting_buffer = cuda.device_array((new_Ntot,), dtype=np.float64)
if species.n_integer_quantities > 0:
species.int_sorting_buffer = \
cuda.device_array( (new_Ntot,), dtype=np.uint64 )
# Modify the total number of particles
species.Ntot = new_Ntot
# Copy the buffers to an enlarged array
if species.use_cuda:
add_buffers_gpu( species, float_recv_left, float_recv_right,
uint_recv_left, uint_recv_right )
else:
add_buffers_cpu( species, float_recv_left, float_recv_right,
uint_recv_left, uint_recv_right )
# Reallocate the particles auxiliary arrays. This needs to be done,
# as the total number of particles in this domain has changed.
if species.use_cuda:
shape = (species.Ntot,)
# Reallocate empty field-on-particle arrays on the GPU
species.Ex = cuda.device_array( shape, dtype=np.float64 )
species.Ex = cuda.device_array( shape, dtype=np.float64 )
species.Ey = cuda.device_array( shape, dtype=np.float64 )
species.Ez = cuda.device_array( shape, dtype=np.float64 )
species.Bx = cuda.device_array( shape, dtype=np.float64 )
species.By = cuda.device_array( shape, dtype=np.float64 )
species.Bz = cuda.device_array( shape, dtype=np.float64 )
# Reallocate empty auxiliary sorting arrays on the GPU
species.cell_idx = cuda.device_array( shape, dtype=np.int32 )
species.sorted_idx = cuda.device_array( shape, dtype=np.int32 )
species.sorting_buffer = cuda.device_array( shape, dtype=np.float64 )
if species.n_integer_quantities > 0:
species.int_sorting_buffer = \
cuda.device_array( shape, dtype=np.uint64 )
else:
# Reallocate empty field-on-particle arrays on the CPU
species.Ex = np.empty(species.Ntot, dtype=np.float64)
species.Ey = np.empty(species.Ntot, dtype=np.float64)
species.Ez = np.empty(species.Ntot, dtype=np.float64)
# and fill the sending buffers (if needed for MPI)
setattr(attr_list[i_attr][0], attr_list[i_attr][1], particle_array)
# Build list of integer quantities to copy
attr_list = []
if species.tracker is not None:
attr_list.append( (species.tracker,'id') )
if species.ionizer is not None:
attr_list.append( (species.ionizer,'ionization_level') )
# Loop through the integer quantities
for i_attr in range( len(attr_list) ):
# Copy the proper buffers to the GPU
left_buffer = cuda.to_device( uint_recv_left[i_attr] )
right_buffer = cuda.to_device( uint_recv_right[i_attr] )
# Initialize the new particle array
particle_array = cuda.device_array( (new_Ntot,), dtype=np.uint64)
# Merge the arrays on the GPU
stay_buffer = getattr( attr_list[i_attr][0], attr_list[i_attr][1])
if n_left != 0:
copy_particles[n_left_grid, n_left_block](
n_left, left_buffer, 0, particle_array, 0 )
if old_Ntot != 0:
copy_particles[n_old_grid, n_old_block](
old_Ntot, stay_buffer, 0, particle_array, n_left )
if n_right != 0:
copy_particles[n_right_grid, n_right_block](
n_right, right_buffer, 0, particle_array, n_left+old_Ntot )
# Assign the stay_buffer to the initial particle data array
# and fill the sending buffers (if needed for MPI)
setattr(attr_list[i_attr][0], attr_list[i_attr][1], particle_array)
# Adapt the total number of particles
self.use_cuda = False
if self.use_cuda:
# Initialize the dimension of the grid and blocks
self.dim_grid, self.dim_block = cuda_tpb_bpg_2d( Nz, Nr, 1, 32 )
# Initialize the DHT (local implementation, see hankel.py)
self.dht0 = DHT( m, m, Nr, Nz, rmax, use_cuda=self.use_cuda )
self.dhtp = DHT(m+1, m, Nr, Nz, rmax, use_cuda=self.use_cuda )
self.dhtm = DHT(m-1, m, Nr, Nz, rmax, use_cuda=self.use_cuda )
# Initialize the FFT
self.fft = FFT( Nr, Nz, use_cuda=self.use_cuda )
# Initialize the spectral buffers
if self.use_cuda:
self.spect_buffer_r = cuda.device_array(
(Nz, Nr), dtype=np.complex128)
self.spect_buffer_t = cuda.device_array(
(Nz, Nr), dtype=np.complex128)
else:
# Initialize the spectral buffers
self.spect_buffer_r = np.zeros( (Nz, Nr), dtype=np.complex128 )
self.spect_buffer_t = np.zeros( (Nz, Nr), dtype=np.complex128 )
# Different names for same object (for economy of memory)
self.spect_buffer_p = self.spect_buffer_r
self.spect_buffer_m = self.spect_buffer_t
if species.use_cuda:
add_buffers_gpu( species, float_recv_left, float_recv_right,
uint_recv_left, uint_recv_right )
else:
add_buffers_cpu( species, float_recv_left, float_recv_right,
uint_recv_left, uint_recv_right )
# Reallocate the particles auxiliary arrays. This needs to be done,
# as the total number of particles in this domain has changed.
if species.use_cuda:
shape = (species.Ntot,)
# Reallocate empty field-on-particle arrays on the GPU
species.Ex = cuda.device_array( shape, dtype=np.float64 )
species.Ex = cuda.device_array( shape, dtype=np.float64 )
species.Ey = cuda.device_array( shape, dtype=np.float64 )
species.Ez = cuda.device_array( shape, dtype=np.float64 )
species.Bx = cuda.device_array( shape, dtype=np.float64 )
species.By = cuda.device_array( shape, dtype=np.float64 )
species.Bz = cuda.device_array( shape, dtype=np.float64 )
# Reallocate empty auxiliary sorting arrays on the GPU
species.cell_idx = cuda.device_array( shape, dtype=np.int32 )
species.sorted_idx = cuda.device_array( shape, dtype=np.int32 )
species.sorting_buffer = cuda.device_array( shape, dtype=np.float64 )
if species.n_integer_quantities > 0:
species.int_sorting_buffer = \
cuda.device_array( shape, dtype=np.uint64 )
else:
# Reallocate empty field-on-particle arrays on the CPU
species.Ex = np.empty(species.Ntot, dtype=np.float64)
species.Ey = np.empty(species.Ntot, dtype=np.float64)
species.Ez = np.empty(species.Ntot, dtype=np.float64)
species.Bx = np.empty(species.Ntot, dtype=np.float64)
-------
particle_data : A dictionary of 1D float arrays (that are on the CPU)
A dictionary that contains the particle data of
the simulation (with normalized weigths), including optional
integer arrays (e.g. "id", "charge")
"""
# Call kernel that extracts particles from GPU
dim_grid_1d, dim_block_1d = cuda_tpb_bpg_1d(N_area)
# - General particle quantities
part_data = cuda.device_array( (8, N_area), dtype=np.float64 )
extract_particles_from_gpu[dim_grid_1d, dim_block_1d]( pref_sum_curr,
species.x, species.y, species.z, species.ux, species.uy, species.uz,
species.w, species.inv_gamma, part_data )
# - Optional particle arrays
if species.tracker is not None:
selected_particle_id = cuda.device_array( (N_area,), dtype=np.uint64 )
extract_array_from_gpu[dim_grid_1d, dim_block_1d](
pref_sum_curr, species.tracker.id, selected_particle_id )
if species.ionizer is not None:
selected_particle_charge = cuda.device_array( (N_area,), dtype=np.uint64 )
extract_array_from_gpu[dim_grid_1d, dim_block_1d]( pref_sum_curr,
species.ionizer.ionization_level, selected_particle_charge )
selected_particle_weight = cuda.device_array( (N_area,), dtype=np.float64 )
extract_array_from_gpu[dim_grid_1d, dim_block_1d]( pref_sum_curr,
species.ionizer.w_times_level, selected_particle_weight )
# Copy GPU arrays to the host
part_data = part_data.copy_to_host()
particle_data = { 'x':part_data[0], 'y':part_data[1], 'z':part_data[2],
'ux':part_data[3], 'uy':part_data[4], 'uz':part_data[5],
'w':part_data[6], 'inv_gamma':part_data[7] }
if species.tracker is not None:
# Register boolean that records whether field array should
# be rearranged whenever sorting particles
# (gets modified during the main PIC loop, on GPU)
self.keep_fields_sorted = False
# Allocate arrays and register variables when using CUDA
if self.use_cuda:
if grid_shape is None:
raise ValueError("A `grid_shape` is needed when running "
"on the GPU.\nPlease provide it when initializing particles.")
# Register grid shape
self.grid_shape = grid_shape
# Allocate arrays for the particles sorting when using CUDA
# Most required arrays always stay on GPU
Nz, Nr = grid_shape
self.cell_idx = cuda.device_array( Ntot, dtype=np.int32)
self.sorted_idx = cuda.device_array( Ntot, dtype=np.intp)
self.prefix_sum = cuda.device_array( Nz*(Nr+1), dtype=np.int32 )
# sorting buffers are initialized on CPU like other particle arrays
# (because they are swapped with these arrays during sorting)
self.sorting_buffer = np.empty( Ntot, dtype=np.float64)
# Register integer thta records shift in the indices,
# induced by the moving window
self.prefix_sum_shift = 0
# Register boolean that records if the particles are sorted or not
self.sorted = False
# Define optimal number of CUDA threads per block for deposition
# and gathering kernels (determined empirically)
if particle_shape == "cubic":
self.deposit_tpb = 32
self.gather_tpb = 256