#include <cuda_internal.h>

Collaboration diagram for Realm::Cuda::GPU:

Classes
struct	CudaIpcMapping

struct	GPUFuncInfo

struct	GPUReductionOpEntry

Public Member Functions
	GPU (CudaModule _module, GPUInfo _info, GPUWorker *worker, CUcontext _context)

	~GPU (void)

void	push_context (void)

void	pop_context (void)

GPUAllocation &	add_allocation (GPUAllocation &&alloc)

void	create_processor (RuntimeImpl *runtime, size_t stack_size)

void	create_fb_memory (RuntimeImpl *runtime, size_t size, size_t ib_size)

void	create_dynamic_fb_memory (RuntimeImpl *runtime, size_t max_size)

void	create_dma_channels (Realm::RuntimeImpl *r)

bool	can_access_peer (const GPU *peer) const

GPUStream *	find_stream (CUstream stream) const

REALM_INTERNAL_API_EXTERNAL_LINKAGE GPUStream *	get_null_task_stream (void) const

GPUStream *	get_next_task_stream (bool create=false)

GPUStream *	get_next_d2d_stream ()

void	launch_batch_affine_fill_kernel (void fill_info, size_t dim, size_t elemSize, size_t volume, GPUStream stream)

void	launch_batch_affine_kernel (void copy_info, size_t dim, size_t elemSize, size_t volume, bool multified_optimized, GPUStream stream)

void	launch_transpose_kernel (MemcpyTransposeInfo< size_t > &copy_info, size_t elemSize, GPUStream *stream)

void	launch_indirect_copy_kernel (void copy_info, size_t dim, size_t addr_size, size_t field_size, size_t volume, GPUStream stream)

bool	is_accessible_host_mem (const MemoryImpl *mem) const

bool	is_accessible_gpu_mem (const MemoryImpl *mem) const

bool	register_reduction (ReductionOpID redop_id, CUfunction apply_excl, CUfunction apply_nonexcl, CUfunction fold_excl, CUfunction fold_nonexcl, CUfunction apply_excl_advanced, CUfunction apply_nonexcl_advanced, CUfunction fold_excl_advanced, CUfunction fold_nonexcl_advanced, CUfunction apply_excl_transpose, CUfunction apply_nonexcl_transpose, CUfunction fold_excl_transpose, CUfunction fold_nonexcl_transpose)

const CudaIpcMapping *	find_ipc_mapping (Memory mem) const

Public Attributes
ContextSynchronizer	ctxsync

GPUInfo *	info = nullptr

GPUWorker *	worker = nullptr

GPUProcessor *	proc = nullptr

std::map< CUdeviceptr, GPUAllocation >	allocations

GPUFBMemory *	fbmem = nullptr

GPUDynamicFBMemory *	fb_dmem = nullptr

GPUFBIBMemory *	fb_ibmem = nullptr

CUcontext	context = nullptr

CUmodule	device_module = nullptr

GPUFuncInfo	indirect_copy_kernels [REALM_MAX_DIM][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES]

GPUFuncInfo	batch_affine_kernels [REALM_MAX_DIM][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES]

GPUFuncInfo	multi_batch_affine_kernels [REALM_MAX_DIM][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES]

GPUFuncInfo	batch_fill_affine_kernels [REALM_MAX_DIM][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES]

GPUFuncInfo	fill_affine_large_kernels [REALM_MAX_DIM][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES]

GPUFuncInfo	transpose_kernels [CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES]

CUdeviceptr	fbmem_base = 0

CUdeviceptr	fb_ibmem_base = 0

std::set< Memory >	pinned_sysmems

std::set< Memory >	managed_mems

std::set< Memory >	peer_fbs

GPUStream *	host_to_device_stream = nullptr

GPUStream *	device_to_host_stream = nullptr

GPUStream *	device_to_device_stream = nullptr

std::vector< GPUStream * >	device_to_device_streams

std::vector< GPUStream * >	peer_to_peer_streams

std::vector< GPUStream * >	task_streams

atomic< unsigned >	next_task_stream = atomic<unsigned>(0)

atomic< unsigned >	next_d2d_stream = atomic<unsigned>(0)

size_t	cupti_activity_refcount = 0

GPUEventPool	event_pool

int	least_stream_priority

int	greatest_stream_priority

std::vector< CudaIpcMapping >	cudaipc_mappings

std::map< NodeID, GPUStream * >	cudaipc_streams

Mutex	alloc_mutex

std::unordered_map< ReductionOpID, GPUReductionOpEntry >	gpu_reduction_table

Static Public Attributes
static const size_t	CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES = 5

Protected Member Functions
CUmodule	load_cuda_module (const void *data)

Constructor & Destructor Documentation

◆ GPU()

Realm::Cuda::GPU::GPU	(	CudaModule *	_module,
		GPUInfo *	_info,
		GPUWorker *	worker,
		CUcontext	_context
	)

◆ ~GPU()

Realm::Cuda::GPU::~GPU ( void )

Member Function Documentation

◆ add_allocation()

GPUAllocation & Realm::Cuda::GPU::add_allocation ( GPUAllocation && alloc )

◆ can_access_peer()

bool Realm::Cuda::GPU::can_access_peer ( const GPU * peer ) const

◆ create_dma_channels()

void Realm::Cuda::GPU::create_dma_channels ( Realm::RuntimeImpl * r )

◆ create_dynamic_fb_memory()

void Realm::Cuda::GPU::create_dynamic_fb_memory	(	RuntimeImpl *	runtime,
		size_t	max_size
	)

◆ create_fb_memory()

void Realm::Cuda::GPU::create_fb_memory	(	RuntimeImpl *	runtime,
		size_t	size,
		size_t	ib_size
	)

◆ create_processor()

void Realm::Cuda::GPU::create_processor	(	RuntimeImpl *	runtime,
		size_t	stack_size
	)

◆ find_ipc_mapping()

const CudaIpcMapping * Realm::Cuda::GPU::find_ipc_mapping ( Memory mem ) const

◆ find_stream()

GPUStream * Realm::Cuda::GPU::find_stream ( CUstream stream ) const

◆ get_next_d2d_stream()

GPUStream * Realm::Cuda::GPU::get_next_d2d_stream ( )

◆ get_next_task_stream()

GPUStream * Realm::Cuda::GPU::get_next_task_stream ( bool create = false )

◆ get_null_task_stream()

REALM_INTERNAL_API_EXTERNAL_LINKAGE GPUStream * Realm::Cuda::GPU::get_null_task_stream ( void ) const

◆ is_accessible_gpu_mem()

bool Realm::Cuda::GPU::is_accessible_gpu_mem ( const MemoryImpl * mem ) const

◆ is_accessible_host_mem()

bool Realm::Cuda::GPU::is_accessible_host_mem ( const MemoryImpl * mem ) const

◆ launch_batch_affine_fill_kernel()

void Realm::Cuda::GPU::launch_batch_affine_fill_kernel	(	void *	fill_info,
		size_t	dim,
		size_t	elemSize,
		size_t	volume,
		GPUStream *	stream
	)

◆ launch_batch_affine_kernel()

void Realm::Cuda::GPU::launch_batch_affine_kernel	(	void *	copy_info,
		size_t	dim,
		size_t	elemSize,
		size_t	volume,
		bool	multified_optimized,
		GPUStream *	stream
	)

◆ launch_indirect_copy_kernel()

void Realm::Cuda::GPU::launch_indirect_copy_kernel	(	void *	copy_info,
		size_t	dim,
		size_t	addr_size,
		size_t	field_size,
		size_t	volume,
		GPUStream *	stream
	)

◆ launch_transpose_kernel()

void Realm::Cuda::GPU::launch_transpose_kernel	(	MemcpyTransposeInfo< size_t > &	copy_info,
		size_t	elemSize,
		GPUStream *	stream
	)

◆ load_cuda_module()

CUmodule Realm::Cuda::GPU::load_cuda_module ( const void * data )

protected

◆ pop_context()

void Realm::Cuda::GPU::pop_context ( void )

◆ push_context()

void Realm::Cuda::GPU::push_context ( void )

◆ register_reduction()

bool Realm::Cuda::GPU::register_reduction	(	ReductionOpID	redop_id,
		CUfunction	apply_excl,
		CUfunction	apply_nonexcl,
		CUfunction	fold_excl,
		CUfunction	fold_nonexcl,
		CUfunction	apply_excl_advanced,
		CUfunction	apply_nonexcl_advanced,
		CUfunction	fold_excl_advanced,
		CUfunction	fold_nonexcl_advanced,
		CUfunction	apply_excl_transpose,
		CUfunction	apply_nonexcl_transpose,
		CUfunction	fold_excl_transpose,
		CUfunction	fold_nonexcl_transpose
	)

Member Data Documentation

◆ alloc_mutex

Mutex Realm::Cuda::GPU::alloc_mutex

◆ allocations

std::map<CUdeviceptr, GPUAllocation> Realm::Cuda::GPU::allocations

◆ batch_affine_kernels

GPUFuncInfo Realm::Cuda::GPU::batch_affine_kernels[REALM_MAX_DIM][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES]

◆ batch_fill_affine_kernels

GPUFuncInfo Realm::Cuda::GPU::batch_fill_affine_kernels[REALM_MAX_DIM][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES]

◆ context

CUcontext Realm::Cuda::GPU::context = nullptr

◆ ctxsync

ContextSynchronizer Realm::Cuda::GPU::ctxsync

◆ CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES

const size_t Realm::Cuda::GPU::CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES = 5

static

◆ cudaipc_mappings

std::vector<CudaIpcMapping> Realm::Cuda::GPU::cudaipc_mappings

◆ cudaipc_streams

std::map<NodeID, GPUStream *> Realm::Cuda::GPU::cudaipc_streams

◆ cupti_activity_refcount

size_t Realm::Cuda::GPU::cupti_activity_refcount = 0

◆ device_module

CUmodule Realm::Cuda::GPU::device_module = nullptr

◆ device_to_device_stream

GPUStream* Realm::Cuda::GPU::device_to_device_stream = nullptr

◆ device_to_device_streams

std::vector<GPUStream *> Realm::Cuda::GPU::device_to_device_streams

◆ device_to_host_stream

GPUStream* Realm::Cuda::GPU::device_to_host_stream = nullptr

◆ event_pool

GPUEventPool Realm::Cuda::GPU::event_pool

◆ fb_dmem

GPUDynamicFBMemory* Realm::Cuda::GPU::fb_dmem = nullptr

◆ fb_ibmem

GPUFBIBMemory* Realm::Cuda::GPU::fb_ibmem = nullptr

◆ fb_ibmem_base

CUdeviceptr Realm::Cuda::GPU::fb_ibmem_base = 0

◆ fbmem

GPUFBMemory* Realm::Cuda::GPU::fbmem = nullptr

◆ fbmem_base

CUdeviceptr Realm::Cuda::GPU::fbmem_base = 0

◆ fill_affine_large_kernels

GPUFuncInfo Realm::Cuda::GPU::fill_affine_large_kernels[REALM_MAX_DIM][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES]

◆ gpu_reduction_table

std::unordered_map<ReductionOpID, GPUReductionOpEntry> Realm::Cuda::GPU::gpu_reduction_table

◆ greatest_stream_priority

int Realm::Cuda::GPU::greatest_stream_priority

◆ host_to_device_stream

GPUStream* Realm::Cuda::GPU::host_to_device_stream = nullptr

◆ indirect_copy_kernels

GPUFuncInfo Realm::Cuda::GPU::indirect_copy_kernels[REALM_MAX_DIM][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES]

◆ info

GPUInfo* Realm::Cuda::GPU::info = nullptr

◆ least_stream_priority

int Realm::Cuda::GPU::least_stream_priority

◆ managed_mems

std::set<Memory> Realm::Cuda::GPU::managed_mems

◆ multi_batch_affine_kernels

GPUFuncInfo Realm::Cuda::GPU::multi_batch_affine_kernels[REALM_MAX_DIM][CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES]

◆ next_d2d_stream

atomic<unsigned> Realm::Cuda::GPU::next_d2d_stream = atomic<unsigned>(0)

◆ next_task_stream

atomic<unsigned> Realm::Cuda::GPU::next_task_stream = atomic<unsigned>(0)

◆ peer_fbs

std::set<Memory> Realm::Cuda::GPU::peer_fbs

◆ peer_to_peer_streams

std::vector<GPUStream *> Realm::Cuda::GPU::peer_to_peer_streams

◆ pinned_sysmems

std::set<Memory> Realm::Cuda::GPU::pinned_sysmems

◆ proc

GPUProcessor* Realm::Cuda::GPU::proc = nullptr

◆ task_streams

std::vector<GPUStream *> Realm::Cuda::GPU::task_streams

◆ transpose_kernels

GPUFuncInfo Realm::Cuda::GPU::transpose_kernels[CUDA_MEMCPY_KERNEL_MAX2_LOG2_BYTES]

◆ worker

GPUWorker* Realm::Cuda::GPU::worker = nullptr

The documentation for this class was generated from the following file:

/home/runner/work/realm/realm/realm-src/src/realm/cuda/cuda_internal.h

Classes

Public Member Functions

Public Attributes

Static Public Attributes

Protected Member Functions