|
TECA
The Toolkit for Extreme Climate Analysis
|
Go to the documentation of this file. 1 #ifndef teca_cuda_util_h
2 #define teca_cuda_util_h
6 #include "teca_config.h"
14 #include <cuda_runtime.h>
40 std::vector<int> &local_dev);
78 return threadIdx.x + blockDim.x*(blockIdx.x + blockIdx.y * gridDim.x
79 + blockIdx.z * gridDim.x * gridDim.y);
92 i = threadIdx.x + blockDim.x * blockIdx.x;
95 k0 = stride * blockIdx.y;
103 return index < max_index;
117 int *block_grid_max,
int &warp_size,
118 int &max_warps_per_block);
138 int warps_per_block, dim3 &block_grid,
int &n_blocks,
161 int warps_per_block,
int warp_size,
int *block_grid_max,
162 dim3 &block_grid,
int &n_blocks, dim3 &thread_grid);
187 size_t stride,
int warps_per_block, dim3 &block_grid,
int &n_blocks_xy,
188 int &n_blocks_z, dim3 &thread_grid);
215 int warps_per_block,
int warp_size,
int *block_grid_max, dim3 &block_grid,
216 int &n_blocks_xy,
int &n_blocks_z, dim3 &thread_grid);
A collection of utility classes and functions for integrating with CUDA.
Definition: teca_cuda_util.h:18
TECA_EXPORT int set_device(int device_id)
set the CUDA device. returns non-zero on error
TECA_EXPORT int get_local_cuda_devices(MPI_Comm comm, int &ranks_per_device, std::vector< int > &local_dev)
TECA_EXPORT int partition_thread_blocks_slab(int device_id, size_t nxy, size_t nz, size_t stride, int warps_per_block, dim3 &block_grid, int &n_blocks_xy, int &n_blocks_z, dim3 &thread_grid)
TECA_EXPORT int partition_thread_blocks(int device_id, size_t array_size, int warps_per_block, dim3 &block_grid, int &n_blocks, dim3 &thread_grid)
__device__ void thread_id_to_array_index_slab(unsigned long &i, unsigned long &k0, unsigned long stride)
Definition: teca_cuda_util.h:88
TECA_EXPORT int synchronize()
stop and wait for previously launched kernels to complete
__device__ unsigned long thread_id_to_array_index()
Definition: teca_cuda_util.h:76
TECA_EXPORT int get_launch_props(int device_id, int *block_grid_max, int &warp_size, int &max_warps_per_block)
__device__ int index_is_valid(unsigned long index, unsigned long max_index)
bounds check the flat index
Definition: teca_cuda_util.h:101
p_teca_error_handler error_handler TECA_EXPORT
The global error handler instance.