![]() |
Realm
A distributed, event-based tasking library
|
#include <cuda_internal.h>
Classes | |
| struct | CudaIpcMapping |
| struct | GPUFuncInfo |
| struct | GPUReductionOpEntry |
Public Member Functions | |
| GPU (CudaModule *_module, GPUInfo *_info, GPUWorker *worker, CUcontext _context) | |
| ~GPU (void) | |
| void | push_context (void) |
| void | pop_context (void) |
| GPUAllocation & | add_allocation (GPUAllocation &&alloc) |
| void | create_processor (RuntimeImpl *runtime, size_t stack_size) |
| void | create_fb_memory (RuntimeImpl *runtime, size_t size, size_t ib_size) |
| void | create_dynamic_fb_memory (RuntimeImpl *runtime, size_t max_size) |
| void | create_dma_channels (Realm::RuntimeImpl *r) |
| bool | can_access_peer (const GPU *peer) const |
| GPUStream * | find_stream (CUstream stream) const |
| REALM_INTERNAL_API_EXTERNAL_LINKAGE GPUStream * | get_null_task_stream (void) const |
| GPUStream * | get_next_task_stream (bool create=false) |
| GPUStream * | get_next_d2d_stream () |
| void | launch_batch_affine_fill_kernel (void *fill_info, size_t dim, size_t elemSize, size_t volume, GPUStream *stream) |
| void | launch_batch_affine_kernel (void *copy_info, size_t dim, size_t elemSize, size_t volume, bool multified_optimized, GPUStream *stream) |
| void | launch_transpose_kernel (MemcpyTransposeInfo< size_t > ©_info, size_t elemSize, GPUStream *stream) |
| void | launch_indirect_copy_kernel (void *copy_info, size_t dim, size_t addr_size, size_t field_size, size_t volume, GPUStream *stream) |
| bool | is_accessible_host_mem (const MemoryImpl *mem) const |
| bool | is_accessible_gpu_mem (const MemoryImpl *mem) const |
| bool | register_reduction (ReductionOpID redop_id, CUfunction apply_excl, CUfunction apply_nonexcl, CUfunction fold_excl, CUfunction fold_nonexcl, CUfunction apply_excl_advanced, CUfunction apply_nonexcl_advanced, CUfunction fold_excl_advanced, CUfunction fold_nonexcl_advanced, CUfunction apply_excl_transpose, CUfunction apply_nonexcl_transpose, CUfunction fold_excl_transpose, CUfunction fold_nonexcl_transpose) |
| const CudaIpcMapping * | find_ipc_mapping (Memory mem) const |
Static Public Attributes | |
| static const size_t | CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES = 5 |
Protected Member Functions | |
| CUmodule | load_cuda_module (const void *data) |
| Realm::Cuda::GPU::GPU | ( | CudaModule * | _module, |
| GPUInfo * | _info, | ||
| GPUWorker * | worker, | ||
| CUcontext | _context | ||
| ) |
| Realm::Cuda::GPU::~GPU | ( | void | ) |
| GPUAllocation & Realm::Cuda::GPU::add_allocation | ( | GPUAllocation && | alloc | ) |
| bool Realm::Cuda::GPU::can_access_peer | ( | const GPU * | peer | ) | const |
| void Realm::Cuda::GPU::create_dma_channels | ( | Realm::RuntimeImpl * | r | ) |
| void Realm::Cuda::GPU::create_dynamic_fb_memory | ( | RuntimeImpl * | runtime, |
| size_t | max_size | ||
| ) |
| void Realm::Cuda::GPU::create_fb_memory | ( | RuntimeImpl * | runtime, |
| size_t | size, | ||
| size_t | ib_size | ||
| ) |
| void Realm::Cuda::GPU::create_processor | ( | RuntimeImpl * | runtime, |
| size_t | stack_size | ||
| ) |
| const CudaIpcMapping * Realm::Cuda::GPU::find_ipc_mapping | ( | Memory | mem | ) | const |
| GPUStream * Realm::Cuda::GPU::find_stream | ( | CUstream | stream | ) | const |
| GPUStream * Realm::Cuda::GPU::get_next_d2d_stream | ( | ) |
| GPUStream * Realm::Cuda::GPU::get_next_task_stream | ( | bool | create = false | ) |
| REALM_INTERNAL_API_EXTERNAL_LINKAGE GPUStream * Realm::Cuda::GPU::get_null_task_stream | ( | void | ) | const |
| bool Realm::Cuda::GPU::is_accessible_gpu_mem | ( | const MemoryImpl * | mem | ) | const |
| bool Realm::Cuda::GPU::is_accessible_host_mem | ( | const MemoryImpl * | mem | ) | const |
| void Realm::Cuda::GPU::launch_batch_affine_fill_kernel | ( | void * | fill_info, |
| size_t | dim, | ||
| size_t | elemSize, | ||
| size_t | volume, | ||
| GPUStream * | stream | ||
| ) |
| void Realm::Cuda::GPU::launch_batch_affine_kernel | ( | void * | copy_info, |
| size_t | dim, | ||
| size_t | elemSize, | ||
| size_t | volume, | ||
| bool | multified_optimized, | ||
| GPUStream * | stream | ||
| ) |
| void Realm::Cuda::GPU::launch_indirect_copy_kernel | ( | void * | copy_info, |
| size_t | dim, | ||
| size_t | addr_size, | ||
| size_t | field_size, | ||
| size_t | volume, | ||
| GPUStream * | stream | ||
| ) |
| void Realm::Cuda::GPU::launch_transpose_kernel | ( | MemcpyTransposeInfo< size_t > & | copy_info, |
| size_t | elemSize, | ||
| GPUStream * | stream | ||
| ) |
|
protected |
| void Realm::Cuda::GPU::pop_context | ( | void | ) |
| void Realm::Cuda::GPU::push_context | ( | void | ) |
| bool Realm::Cuda::GPU::register_reduction | ( | ReductionOpID | redop_id, |
| CUfunction | apply_excl, | ||
| CUfunction | apply_nonexcl, | ||
| CUfunction | fold_excl, | ||
| CUfunction | fold_nonexcl, | ||
| CUfunction | apply_excl_advanced, | ||
| CUfunction | apply_nonexcl_advanced, | ||
| CUfunction | fold_excl_advanced, | ||
| CUfunction | fold_nonexcl_advanced, | ||
| CUfunction | apply_excl_transpose, | ||
| CUfunction | apply_nonexcl_transpose, | ||
| CUfunction | fold_excl_transpose, | ||
| CUfunction | fold_nonexcl_transpose | ||
| ) |
| Mutex Realm::Cuda::GPU::alloc_mutex |
| std::map<CUdeviceptr, GPUAllocation> Realm::Cuda::GPU::allocations |
| GPUFuncInfo Realm::Cuda::GPU::batch_affine_kernels[REALM_MAX_DIM][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES] |
| GPUFuncInfo Realm::Cuda::GPU::batch_fill_affine_kernels[REALM_MAX_DIM][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES] |
| CUcontext Realm::Cuda::GPU::context = nullptr |
| ContextSynchronizer Realm::Cuda::GPU::ctxsync |
|
static |
| std::vector<CudaIpcMapping> Realm::Cuda::GPU::cudaipc_mappings |
| size_t Realm::Cuda::GPU::cupti_activity_refcount = 0 |
| CUmodule Realm::Cuda::GPU::device_module = nullptr |
| GPUStream* Realm::Cuda::GPU::device_to_device_stream = nullptr |
| std::vector<GPUStream *> Realm::Cuda::GPU::device_to_device_streams |
| GPUStream* Realm::Cuda::GPU::device_to_host_stream = nullptr |
| GPUEventPool Realm::Cuda::GPU::event_pool |
| GPUDynamicFBMemory* Realm::Cuda::GPU::fb_dmem = nullptr |
| GPUFBIBMemory* Realm::Cuda::GPU::fb_ibmem = nullptr |
| CUdeviceptr Realm::Cuda::GPU::fb_ibmem_base = 0 |
| GPUFBMemory* Realm::Cuda::GPU::fbmem = nullptr |
| CUdeviceptr Realm::Cuda::GPU::fbmem_base = 0 |
| GPUFuncInfo Realm::Cuda::GPU::fill_affine_large_kernels[REALM_MAX_DIM][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES] |
| std::unordered_map<ReductionOpID, GPUReductionOpEntry> Realm::Cuda::GPU::gpu_reduction_table |
| int Realm::Cuda::GPU::greatest_stream_priority |
| GPUStream* Realm::Cuda::GPU::host_to_device_stream = nullptr |
| GPUFuncInfo Realm::Cuda::GPU::indirect_copy_kernels[REALM_MAX_DIM][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES] |
| GPUInfo* Realm::Cuda::GPU::info = nullptr |
| int Realm::Cuda::GPU::least_stream_priority |
| std::set<Memory> Realm::Cuda::GPU::managed_mems |
| GPUFuncInfo Realm::Cuda::GPU::multi_batch_affine_kernels[REALM_MAX_DIM][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES] |
| std::set<Memory> Realm::Cuda::GPU::peer_fbs |
| std::vector<GPUStream *> Realm::Cuda::GPU::peer_to_peer_streams |
| std::set<Memory> Realm::Cuda::GPU::pinned_sysmems |
| GPUProcessor* Realm::Cuda::GPU::proc = nullptr |
| std::vector<GPUStream *> Realm::Cuda::GPU::task_streams |
| GPUFuncInfo Realm::Cuda::GPU::transpose_kernels[CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES] |
| GPUWorker* Realm::Cuda::GPU::worker = nullptr |