Realm
A distributed, event-based tasking library
Loading...
Searching...
No Matches
Realm::Cuda::GPU Class Reference
module nullptr

#include <cuda_internal.h>

Collaboration diagram for Realm::Cuda::GPU:

Classes

struct  CudaIpcMapping
 
struct  GPUFuncInfo
 
struct  GPUReductionOpEntry
 

Public Member Functions

 GPU (CudaModule *_module, GPUInfo *_info, GPUWorker *worker, CUcontext _context)
 
 ~GPU (void)
 
void push_context (void)
 
void pop_context (void)
 
GPUAllocationadd_allocation (GPUAllocation &&alloc)
 
void create_processor (RuntimeImpl *runtime, size_t stack_size)
 
void create_fb_memory (RuntimeImpl *runtime, size_t size, size_t ib_size)
 
void create_dynamic_fb_memory (RuntimeImpl *runtime, size_t max_size)
 
void create_dma_channels (Realm::RuntimeImpl *r)
 
bool can_access_peer (const GPU *peer) const
 
GPUStreamfind_stream (CUstream stream) const
 
REALM_INTERNAL_API_EXTERNAL_LINKAGE GPUStreamget_null_task_stream (void) const
 
GPUStreamget_next_task_stream (bool create=false)
 
GPUStreamget_next_d2d_stream ()
 
void launch_batch_affine_fill_kernel (void *fill_info, size_t dim, size_t elemSize, size_t volume, GPUStream *stream)
 
void launch_batch_affine_kernel (void *copy_info, size_t dim, size_t elemSize, size_t volume, GPUStream *stream)
 
void launch_transpose_kernel (MemcpyTransposeInfo< size_t > &copy_info, size_t elemSize, GPUStream *stream)
 
void launch_indirect_copy_kernel (void *copy_info, size_t dim, size_t addr_size, size_t field_size, size_t volume, GPUStream *stream)
 
bool is_accessible_host_mem (const MemoryImpl *mem) const
 
bool is_accessible_gpu_mem (const MemoryImpl *mem) const
 
bool register_reduction (ReductionOpID redop_id, CUfunction apply_excl, CUfunction apply_nonexcl, CUfunction fold_excl, CUfunction fold_nonexcl)
 
const CudaIpcMappingfind_ipc_mapping (Memory mem) const
 

Public Attributes

ContextSynchronizer ctxsync
 
GPUInfoinfo = nullptr
 
GPUWorkerworker = nullptr
 
GPUProcessorproc = nullptr
 
std::map< CUdeviceptr, GPUAllocationallocations
 
GPUFBMemoryfbmem = nullptr
 
GPUDynamicFBMemoryfb_dmem = nullptr
 
GPUFBIBMemoryfb_ibmem = nullptr
 
CUcontext context = nullptr
 
CUmodule device_module = nullptr
 
GPUFuncInfo indirect_copy_kernels [REALM_MAX_DIM][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES]
 
GPUFuncInfo batch_affine_kernels [REALM_MAX_DIM][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES]
 
GPUFuncInfo batch_fill_affine_kernels [REALM_MAX_DIM][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES]
 
GPUFuncInfo fill_affine_large_kernels [REALM_MAX_DIM][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES]
 
GPUFuncInfo transpose_kernels [CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES]
 
CUdeviceptr fbmem_base = 0
 
CUdeviceptr fb_ibmem_base = 0
 
std::set< Memorypinned_sysmems
 
std::set< Memorymanaged_mems
 
std::set< Memorypeer_fbs
 
GPUStreamhost_to_device_stream = nullptr
 
GPUStreamdevice_to_host_stream = nullptr
 
GPUStreamdevice_to_device_stream = nullptr
 
std::vector< GPUStream * > device_to_device_streams
 
std::vector< GPUStream * > peer_to_peer_streams
 
std::vector< GPUStream * > task_streams
 
atomic< unsigned > next_task_stream = atomic<unsigned>(0)
 
atomic< unsigned > next_d2d_stream = atomic<unsigned>(0)
 
size_t cupti_activity_refcount = 0
 
GPUEventPool event_pool
 
int least_stream_priority
 
int greatest_stream_priority
 
std::vector< CudaIpcMappingcudaipc_mappings
 
std::map< NodeID, GPUStream * > cudaipc_streams
 
Mutex alloc_mutex
 
std::unordered_map< ReductionOpID, GPUReductionOpEntrygpu_reduction_table
 

Static Public Attributes

static const size_t CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES = 5
 

Protected Member Functions

CUmodule load_cuda_module (const void *data)
 

Constructor & Destructor Documentation

◆ GPU()

Realm::Cuda::GPU::GPU ( CudaModule _module,
GPUInfo _info,
GPUWorker worker,
CUcontext  _context 
)

◆ ~GPU()

Realm::Cuda::GPU::~GPU ( void  )

Member Function Documentation

◆ add_allocation()

GPUAllocation & Realm::Cuda::GPU::add_allocation ( GPUAllocation &&  alloc)

◆ can_access_peer()

bool Realm::Cuda::GPU::can_access_peer ( const GPU peer) const

◆ create_dma_channels()

void Realm::Cuda::GPU::create_dma_channels ( Realm::RuntimeImpl r)

◆ create_dynamic_fb_memory()

void Realm::Cuda::GPU::create_dynamic_fb_memory ( RuntimeImpl runtime,
size_t  max_size 
)

◆ create_fb_memory()

void Realm::Cuda::GPU::create_fb_memory ( RuntimeImpl runtime,
size_t  size,
size_t  ib_size 
)

◆ create_processor()

void Realm::Cuda::GPU::create_processor ( RuntimeImpl runtime,
size_t  stack_size 
)

◆ find_ipc_mapping()

const CudaIpcMapping * Realm::Cuda::GPU::find_ipc_mapping ( Memory  mem) const

◆ find_stream()

GPUStream * Realm::Cuda::GPU::find_stream ( CUstream  stream) const

◆ get_next_d2d_stream()

GPUStream * Realm::Cuda::GPU::get_next_d2d_stream ( )

◆ get_next_task_stream()

GPUStream * Realm::Cuda::GPU::get_next_task_stream ( bool  create = false)

◆ get_null_task_stream()

REALM_INTERNAL_API_EXTERNAL_LINKAGE GPUStream * Realm::Cuda::GPU::get_null_task_stream ( void  ) const

◆ is_accessible_gpu_mem()

bool Realm::Cuda::GPU::is_accessible_gpu_mem ( const MemoryImpl mem) const

◆ is_accessible_host_mem()

bool Realm::Cuda::GPU::is_accessible_host_mem ( const MemoryImpl mem) const

◆ launch_batch_affine_fill_kernel()

void Realm::Cuda::GPU::launch_batch_affine_fill_kernel ( void *  fill_info,
size_t  dim,
size_t  elemSize,
size_t  volume,
GPUStream stream 
)

◆ launch_batch_affine_kernel()

void Realm::Cuda::GPU::launch_batch_affine_kernel ( void *  copy_info,
size_t  dim,
size_t  elemSize,
size_t  volume,
GPUStream stream 
)

◆ launch_indirect_copy_kernel()

void Realm::Cuda::GPU::launch_indirect_copy_kernel ( void *  copy_info,
size_t  dim,
size_t  addr_size,
size_t  field_size,
size_t  volume,
GPUStream stream 
)

◆ launch_transpose_kernel()

void Realm::Cuda::GPU::launch_transpose_kernel ( MemcpyTransposeInfo< size_t > &  copy_info,
size_t  elemSize,
GPUStream stream 
)

◆ load_cuda_module()

CUmodule Realm::Cuda::GPU::load_cuda_module ( const void *  data)
protected

◆ pop_context()

void Realm::Cuda::GPU::pop_context ( void  )

◆ push_context()

void Realm::Cuda::GPU::push_context ( void  )

◆ register_reduction()

bool Realm::Cuda::GPU::register_reduction ( ReductionOpID  redop_id,
CUfunction  apply_excl,
CUfunction  apply_nonexcl,
CUfunction  fold_excl,
CUfunction  fold_nonexcl 
)

Member Data Documentation

◆ alloc_mutex

Mutex Realm::Cuda::GPU::alloc_mutex

◆ allocations

std::map<CUdeviceptr, GPUAllocation> Realm::Cuda::GPU::allocations

◆ batch_affine_kernels

GPUFuncInfo Realm::Cuda::GPU::batch_affine_kernels[REALM_MAX_DIM][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES]

◆ batch_fill_affine_kernels

GPUFuncInfo Realm::Cuda::GPU::batch_fill_affine_kernels[REALM_MAX_DIM][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES]

◆ context

CUcontext Realm::Cuda::GPU::context = nullptr

◆ ctxsync

ContextSynchronizer Realm::Cuda::GPU::ctxsync

◆ CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES

const size_t Realm::Cuda::GPU::CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES = 5
static

◆ cudaipc_mappings

std::vector<CudaIpcMapping> Realm::Cuda::GPU::cudaipc_mappings

◆ cudaipc_streams

std::map<NodeID, GPUStream *> Realm::Cuda::GPU::cudaipc_streams

◆ cupti_activity_refcount

size_t Realm::Cuda::GPU::cupti_activity_refcount = 0

◆ device_module

CUmodule Realm::Cuda::GPU::device_module = nullptr

◆ device_to_device_stream

GPUStream* Realm::Cuda::GPU::device_to_device_stream = nullptr

◆ device_to_device_streams

std::vector<GPUStream *> Realm::Cuda::GPU::device_to_device_streams

◆ device_to_host_stream

GPUStream* Realm::Cuda::GPU::device_to_host_stream = nullptr

◆ event_pool

GPUEventPool Realm::Cuda::GPU::event_pool

◆ fb_dmem

GPUDynamicFBMemory* Realm::Cuda::GPU::fb_dmem = nullptr

◆ fb_ibmem

GPUFBIBMemory* Realm::Cuda::GPU::fb_ibmem = nullptr

◆ fb_ibmem_base

CUdeviceptr Realm::Cuda::GPU::fb_ibmem_base = 0

◆ fbmem

GPUFBMemory* Realm::Cuda::GPU::fbmem = nullptr

◆ fbmem_base

CUdeviceptr Realm::Cuda::GPU::fbmem_base = 0

◆ fill_affine_large_kernels

GPUFuncInfo Realm::Cuda::GPU::fill_affine_large_kernels[REALM_MAX_DIM][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES]

◆ gpu_reduction_table

std::unordered_map<ReductionOpID, GPUReductionOpEntry> Realm::Cuda::GPU::gpu_reduction_table

◆ greatest_stream_priority

int Realm::Cuda::GPU::greatest_stream_priority

◆ host_to_device_stream

GPUStream* Realm::Cuda::GPU::host_to_device_stream = nullptr

◆ indirect_copy_kernels

◆ info

GPUInfo* Realm::Cuda::GPU::info = nullptr

◆ least_stream_priority

int Realm::Cuda::GPU::least_stream_priority

◆ managed_mems

std::set<Memory> Realm::Cuda::GPU::managed_mems

◆ next_d2d_stream

atomic<unsigned> Realm::Cuda::GPU::next_d2d_stream = atomic<unsigned>(0)

◆ next_task_stream

atomic<unsigned> Realm::Cuda::GPU::next_task_stream = atomic<unsigned>(0)

◆ peer_fbs

std::set<Memory> Realm::Cuda::GPU::peer_fbs

◆ peer_to_peer_streams

std::vector<GPUStream *> Realm::Cuda::GPU::peer_to_peer_streams

◆ pinned_sysmems

std::set<Memory> Realm::Cuda::GPU::pinned_sysmems

◆ proc

GPUProcessor* Realm::Cuda::GPU::proc = nullptr

◆ task_streams

std::vector<GPUStream *> Realm::Cuda::GPU::task_streams

◆ transpose_kernels

GPUFuncInfo Realm::Cuda::GPU::transpose_kernels[CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES]

◆ worker

GPUWorker* Realm::Cuda::GPU::worker = nullptr

The documentation for this class was generated from the following file: