#include <cuda_internal.h>
|
| | GPU (CudaModule *_module, GPUInfo *_info, GPUWorker *worker, CUcontext _context) |
| |
| | ~GPU (void) |
| |
| void | push_context (void) |
| |
| void | pop_context (void) |
| |
| GPUAllocation & | add_allocation (GPUAllocation &&alloc) |
| |
| void | create_processor (RuntimeImpl *runtime, size_t stack_size) |
| |
| void | create_fb_memory (RuntimeImpl *runtime, size_t size, size_t ib_size) |
| |
| void | create_dynamic_fb_memory (RuntimeImpl *runtime, size_t max_size) |
| |
| void | create_dma_channels (Realm::RuntimeImpl *r) |
| |
| bool | can_access_peer (const GPU *peer) const |
| |
| GPUStream * | find_stream (CUstream stream) const |
| |
| REALM_INTERNAL_API_EXTERNAL_LINKAGE GPUStream * | get_null_task_stream (void) const |
| |
| GPUStream * | get_next_task_stream (bool create=false) |
| |
| GPUStream * | get_next_d2d_stream () |
| |
| void | launch_batch_affine_fill_kernel (void *fill_info, size_t dim, size_t elemSize, size_t volume, GPUStream *stream) |
| |
| void | launch_batch_affine_kernel (void *copy_info, size_t dim, size_t elemSize, size_t volume, GPUStream *stream) |
| |
| void | launch_transpose_kernel (MemcpyTransposeInfo< size_t > ©_info, size_t elemSize, GPUStream *stream) |
| |
| void | launch_indirect_copy_kernel (void *copy_info, size_t dim, size_t addr_size, size_t field_size, size_t volume, GPUStream *stream) |
| |
| bool | is_accessible_host_mem (const MemoryImpl *mem) const |
| |
| bool | is_accessible_gpu_mem (const MemoryImpl *mem) const |
| |
| bool | register_reduction (ReductionOpID redop_id, CUfunction apply_excl, CUfunction apply_nonexcl, CUfunction fold_excl, CUfunction fold_nonexcl) |
| |
| const CudaIpcMapping * | find_ipc_mapping (Memory mem) const |
| |
◆ GPU()
◆ ~GPU()
| Realm::Cuda::GPU::~GPU |
( |
void |
| ) |
|
◆ add_allocation()
◆ can_access_peer()
| bool Realm::Cuda::GPU::can_access_peer |
( |
const GPU * |
peer | ) |
const |
◆ create_dma_channels()
◆ create_dynamic_fb_memory()
| void Realm::Cuda::GPU::create_dynamic_fb_memory |
( |
RuntimeImpl * |
runtime, |
|
|
size_t |
max_size |
|
) |
| |
◆ create_fb_memory()
| void Realm::Cuda::GPU::create_fb_memory |
( |
RuntimeImpl * |
runtime, |
|
|
size_t |
size, |
|
|
size_t |
ib_size |
|
) |
| |
◆ create_processor()
| void Realm::Cuda::GPU::create_processor |
( |
RuntimeImpl * |
runtime, |
|
|
size_t |
stack_size |
|
) |
| |
◆ find_ipc_mapping()
◆ find_stream()
| GPUStream * Realm::Cuda::GPU::find_stream |
( |
CUstream |
stream | ) |
const |
◆ get_next_d2d_stream()
| GPUStream * Realm::Cuda::GPU::get_next_d2d_stream |
( |
| ) |
|
◆ get_next_task_stream()
| GPUStream * Realm::Cuda::GPU::get_next_task_stream |
( |
bool |
create = false | ) |
|
◆ get_null_task_stream()
◆ is_accessible_gpu_mem()
| bool Realm::Cuda::GPU::is_accessible_gpu_mem |
( |
const MemoryImpl * |
mem | ) |
const |
◆ is_accessible_host_mem()
| bool Realm::Cuda::GPU::is_accessible_host_mem |
( |
const MemoryImpl * |
mem | ) |
const |
◆ launch_batch_affine_fill_kernel()
| void Realm::Cuda::GPU::launch_batch_affine_fill_kernel |
( |
void * |
fill_info, |
|
|
size_t |
dim, |
|
|
size_t |
elemSize, |
|
|
size_t |
volume, |
|
|
GPUStream * |
stream |
|
) |
| |
◆ launch_batch_affine_kernel()
| void Realm::Cuda::GPU::launch_batch_affine_kernel |
( |
void * |
copy_info, |
|
|
size_t |
dim, |
|
|
size_t |
elemSize, |
|
|
size_t |
volume, |
|
|
GPUStream * |
stream |
|
) |
| |
◆ launch_indirect_copy_kernel()
| void Realm::Cuda::GPU::launch_indirect_copy_kernel |
( |
void * |
copy_info, |
|
|
size_t |
dim, |
|
|
size_t |
addr_size, |
|
|
size_t |
field_size, |
|
|
size_t |
volume, |
|
|
GPUStream * |
stream |
|
) |
| |
◆ launch_transpose_kernel()
◆ load_cuda_module()
| CUmodule Realm::Cuda::GPU::load_cuda_module |
( |
const void * |
data | ) |
|
|
protected |
◆ pop_context()
| void Realm::Cuda::GPU::pop_context |
( |
void |
| ) |
|
◆ push_context()
| void Realm::Cuda::GPU::push_context |
( |
void |
| ) |
|
◆ register_reduction()
| bool Realm::Cuda::GPU::register_reduction |
( |
ReductionOpID |
redop_id, |
|
|
CUfunction |
apply_excl, |
|
|
CUfunction |
apply_nonexcl, |
|
|
CUfunction |
fold_excl, |
|
|
CUfunction |
fold_nonexcl |
|
) |
| |
◆ alloc_mutex
| Mutex Realm::Cuda::GPU::alloc_mutex |
◆ allocations
| std::map<CUdeviceptr, GPUAllocation> Realm::Cuda::GPU::allocations |
◆ batch_affine_kernels
◆ batch_fill_affine_kernels
◆ context
| CUcontext Realm::Cuda::GPU::context = nullptr |
◆ ctxsync
◆ CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES
| const size_t Realm::Cuda::GPU::CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES = 5 |
|
static |
◆ cudaipc_mappings
◆ cudaipc_streams
◆ cupti_activity_refcount
| size_t Realm::Cuda::GPU::cupti_activity_refcount = 0 |
◆ device_module
| CUmodule Realm::Cuda::GPU::device_module = nullptr |
◆ device_to_device_stream
| GPUStream* Realm::Cuda::GPU::device_to_device_stream = nullptr |
◆ device_to_device_streams
| std::vector<GPUStream *> Realm::Cuda::GPU::device_to_device_streams |
◆ device_to_host_stream
| GPUStream* Realm::Cuda::GPU::device_to_host_stream = nullptr |
◆ event_pool
◆ fb_dmem
◆ fb_ibmem
◆ fb_ibmem_base
| CUdeviceptr Realm::Cuda::GPU::fb_ibmem_base = 0 |
◆ fbmem
◆ fbmem_base
| CUdeviceptr Realm::Cuda::GPU::fbmem_base = 0 |
◆ fill_affine_large_kernels
◆ gpu_reduction_table
◆ greatest_stream_priority
| int Realm::Cuda::GPU::greatest_stream_priority |
◆ host_to_device_stream
| GPUStream* Realm::Cuda::GPU::host_to_device_stream = nullptr |
◆ indirect_copy_kernels
◆ info
| GPUInfo* Realm::Cuda::GPU::info = nullptr |
◆ least_stream_priority
| int Realm::Cuda::GPU::least_stream_priority |
◆ managed_mems
| std::set<Memory> Realm::Cuda::GPU::managed_mems |
◆ next_d2d_stream
| atomic<unsigned> Realm::Cuda::GPU::next_d2d_stream = atomic<unsigned>(0) |
◆ next_task_stream
| atomic<unsigned> Realm::Cuda::GPU::next_task_stream = atomic<unsigned>(0) |
◆ peer_fbs
| std::set<Memory> Realm::Cuda::GPU::peer_fbs |
◆ peer_to_peer_streams
| std::vector<GPUStream *> Realm::Cuda::GPU::peer_to_peer_streams |
◆ pinned_sysmems
| std::set<Memory> Realm::Cuda::GPU::pinned_sysmems |
◆ proc
◆ task_streams
| std::vector<GPUStream *> Realm::Cuda::GPU::task_streams |
◆ transpose_kernels
◆ worker
| GPUWorker* Realm::Cuda::GPU::worker = nullptr |
The documentation for this class was generated from the following file: