How to use the fbpic.utils.cuda.cuda function in fbpic

To help you get started, we’ve selected a few fbpic examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github fbpic / fbpic / fbpic / fields / spectral_transform / fourier.py View on Github external
self.use_cuda = use_cuda
        if (self.use_cuda is True) and (cuda_installed is False) :
            self.use_cuda = False
            print('** Cuda not available for Fourier transform.')
            print('** Performing the Fourier transform on the CPU.')

        # Check whether to use MKL
        self.use_mkl = mkl_installed

        # Initialize the object for calculation on the GPU
        if self.use_cuda:
            # Initialize the dimension of the grid and blocks
            self.dim_grid, self.dim_block = cuda_tpb_bpg_2d( Nz, Nr)

            # Initialize 1d buffer for cufft
            self.buffer1d_in = cuda.device_array(
                (Nz*Nr,), dtype=np.complex128)
            self.buffer1d_out = cuda.device_array(
                (Nz*Nr,), dtype=np.complex128)
            # Initialize the cuda libraries object
            self.fft = cufft.FFTPlan( shape=(Nz,), itype=np.complex128,
                                      otype=np.complex128, batch=Nr )
            self.blas = cublas.Blas()   # For normalization of the iFFT
            self.inv_Nz = 1./Nz         # For normalization of the iFFT

        # Initialize the object for calculation on the CPU
        else:

            # For MKL FFT
            if self.use_mkl:
                # Initialize the MKL plan with dummy array
                spect_buffer = np.zeros( (Nz, Nr), dtype=np.complex128 )
github fbpic / fbpic / fbpic / utils / printing.py View on Github external
def get_gpu_message():
    """
    Returns a string with information about the currently selected GPU.
    """
    gpu = cuda.gpus.current
    # Convert bytestring to actual string
    try:
        gpu_name = gpu.name.decode()
    except AttributeError:
        gpu_name = gpu.name
    # Print the GPU that is being used
    if MPI.COMM_WORLD.size > 1:
        rank = MPI.COMM_WORLD.rank
        node = MPI.Get_processor_name()
        message = "\nMPI rank %d selected a %s GPU with id %s on node %s" %(
            rank, gpu_name, gpu.id, node)
    else:
        message = "\nFBPIC selected a %s GPU with id %s" %( gpu_name, gpu.id )
    return(message)
github fbpic / fbpic / fbpic / openpmd_diag / boosted_field_diag.py View on Github external
# Positions where the fields are to be registered
        # (Change at every iteration)
        self.current_z_lab = 0
        self.current_z_boost = 0

        # Buffered field slice and corresponding array index in z
        self.buffered_slices = []
        self.buffer_z_indices = []

        # Allocate a buffer for only one slice (avoids having to
        # reallocate arrays when running on the GPU)
        data_shape = (10, 2*fld.Nm-1, fld.Nr)
        if fld.use_cuda is False:
            self.slice_array = np.empty( data_shape )
        else:
            self.slice_array = cuda.device_array( data_shape )
github fbpic / fbpic / fbpic / utils / printing.py View on Github external
def print_gpu_meminfo(gpu):
    """
    Prints memory information about the GPU.

    Parameters :
    ------------
    gpu : object
        A numba cuda gpu context object.
    """
    with gpu:
        meminfo = cuda.current_context().get_memory_info()
        print("GPU: %s, free: %s Mbytes, total: %s Mbytes \
              " % (gpu, meminfo[0]*1e-6, meminfo[1]*1e-6))
github fbpic / fbpic / fbpic / boundaries / field_buffer_handling.py View on Github external
'rho:add'  : alloc_cpu( (      Nm, 2*ng, Nr), dtype=np.complex128)}
        self.recv_r = {
            'E:replace': alloc_cpu( (n_fld*Nm,   ng, Nr), dtype=np.complex128),
            'B:replace': alloc_cpu( (n_fld*Nm,   ng, Nr), dtype=np.complex128),
            'J:add'    : alloc_cpu( (    3*Nm, 2*ng, Nr), dtype=np.complex128),
            'rho:add'  : alloc_cpu( (      Nm, 2*ng, Nr), dtype=np.complex128)}

        # Allocate buffers on the GPU, for the different exchange types
        if cuda_installed:
            self.d_send_l = { key: cuda.to_device(value) for key, value in \
                                self.send_l.items() }
            self.d_send_r = { key: cuda.to_device(value) for key, value in \
                                self.send_r.items() }
            self.d_recv_l = { key: cuda.to_device(value) for key, value in \
                                self.recv_l.items() }
            self.d_recv_r = { key: cuda.to_device(value) for key, value in \
                                self.recv_r.items() }
github fbpic / fbpic / fbpic / boundaries / particle_buffer_handling.py View on Github external
n_right, right_buffer, 0, particle_array, n_left+old_Ntot )
        # Assign the stay_buffer to the initial particle data array
        # and fill the sending buffers (if needed for MPI)
        setattr(attr_list[i_attr][0], attr_list[i_attr][1], particle_array)

    # Build list of integer quantities to copy
    attr_list = []
    if species.tracker is not None:
        attr_list.append( (species.tracker,'id') )
    if species.ionizer is not None:
        attr_list.append( (species.ionizer,'ionization_level') )
    # Loop through the integer quantities
    for i_attr in range( len(attr_list) ):
        # Copy the proper buffers to the GPU
        left_buffer = cuda.to_device( uint_recv_left[i_attr] )
        right_buffer = cuda.to_device( uint_recv_right[i_attr] )
        # Initialize the new particle array
        particle_array = cuda.device_array( (new_Ntot,), dtype=np.uint64)
        # Merge the arrays on the GPU
        stay_buffer = getattr( attr_list[i_attr][0], attr_list[i_attr][1])
        if n_left != 0:
            copy_particles[n_left_grid, n_left_block](
                n_left, left_buffer, 0, particle_array, 0 )
        if old_Ntot != 0:
            copy_particles[n_old_grid, n_old_block](
                old_Ntot, stay_buffer, 0, particle_array, n_left )
        if n_right != 0:
            copy_particles[n_right_grid, n_right_block](
                n_right, right_buffer, 0, particle_array, n_left+old_Ntot )
        # Assign the stay_buffer to the initial particle data array
        # and fill the sending buffers (if needed for MPI)
        setattr(attr_list[i_attr][0], attr_list[i_attr][1], particle_array)
github fbpic / fbpic / fbpic / boundaries / boundary_communicator.py View on Github external
# Initialize a buffer handler object, for MPI communications
        if self.size > 1:
            Nr_with_damp = self.get_Nr( with_damp=True )
            self.mpi_buffers = BufferHandler( self.n_guard, Nr_with_damp, Nm,
                               self.left_proc, self.right_proc, self.use_pml )

        # Create damping arrays for the damping cells at the left
        # and right of the box in the case of "open" boundaries.
        if (self.nz_damp+self.n_inject) > 0:
            if self.left_proc is None:
                # Create the damping arrays for left proc
                self.left_damp = self.generate_damp_array(
                    self.n_guard, self.nz_damp, self.n_inject )
                if cuda_installed:
                    self.d_left_damp = cuda.to_device( self.left_damp )
            if self.right_proc is None:
                # Create the damping arrays for right proc
                self.right_damp = self.generate_damp_array(
                    self.n_guard, self.nz_damp, self.n_inject )
                if cuda_installed:
                    self.d_right_damp = cuda.to_device( self.right_damp )

        # Create damping object for the PML
        self.use_pml = (boundaries['r'] == "open")
        if self.use_pml:
            self.pml_damper = PMLDamper( self.nr_damp, cdt_over_dr )
github fbpic / fbpic / fbpic / fields / spectral_transform / hankel.py View on Github external
np.asfortranarray( self.invM, dtype=np.float64 ) )

        # Initialize buffer arrays to store the complex Nz x Nr grid
        # as a real 2Nz x Nr grid, before performing the matrix product
        # (This is because a matrix product of reals is faster than a matrix
        # product of complexs, and the real-complex conversion is negligible.)
        if not self.use_cuda:
            # Initialize real buffer arrays on the CPU
            zero_array = np.zeros((2*Nz, Nr), dtype=np.float64 )
            self.array_in = zero_array.copy()
            self.array_out = zero_array.copy()
        else:
            # Initialize real buffer arrays on the GPU
            # The cuBlas API requires that these arrays be in Fortran order
            zero_array = np.zeros((2*Nz, Nr), dtype=np.float64, order='F')
            self.d_in = cuda.to_device( zero_array )
            self.d_out = cuda.to_device( zero_array )
            # Initialize a cuda stream (required by cublas)
            self.blas = cublas.Blas()
            # Initialize the threads per block and block per grid
            self.dim_grid, self.dim_block = cuda_tpb_bpg_2d(Nz, Nr)