Realm
A distributed, event-based tasking library
Loading...
Searching...
No Matches
cuda_internal.h File Reference
module nullptr
#include "realm/cuda/cuda_module.h"
#include <memory>
#include <unordered_map>
#include <cuda.h>
#include <nvml.h>
#include <cupti.h>
#include <vector_types.h>
#include "realm/operation.h"
#include "realm/threads.h"
#include "realm/circ_queue.h"
#include "realm/indexspace.h"
#include "realm/proc_impl.h"
#include "realm/mem_impl.h"
#include "realm/bgwork.h"
#include "realm/transfer/channel.h"
#include "realm/transfer/ib_memory.h"
#include "realm/cuda/cuda_memcpy.h"
Include dependency graph for cuda_internal.h:

Go to the source code of this file.

Classes

struct  Realm::Cuda::GPUInfo
 
class  Realm::Cuda::GPUContextManager
 
class  Realm::Cuda::GPUCompletionNotification
 
class  Realm::Cuda::GPUWorkFence
 
class  Realm::Cuda::GPUWorkStart
 
class  Realm::Cuda::GPUStream
 
struct  Realm::Cuda::GPUStream::PendingEvent
 
class  Realm::Cuda::GPUWorker
 
class  Realm::Cuda::GPUEventPool
 
class  Realm::Cuda::ContextSynchronizer
 
class  Realm::Cuda::GPU
 
struct  Realm::Cuda::GPU::GPUFuncInfo
 
struct  Realm::Cuda::GPU::CudaIpcMapping
 
struct  Realm::Cuda::GPU::GPUReductionOpEntry
 
class  Realm::Cuda::AutoGPUContext
 
class  Realm::Cuda::GPUProcessor
 
struct  Realm::Cuda::GPUProcessor::GPUTaskTableEntry
 
class  Realm::Cuda::CudaDeviceMemoryInfo
 
class  Realm::Cuda::GPUFBMemory
 
class  Realm::Cuda::GPUDynamicFBMemory
 
class  Realm::Cuda::GPUZCMemory
 
class  Realm::Cuda::GPUFBIBMemory
 
class  Realm::Cuda::GPUCompletionEvent
 
class  Realm::Cuda::GPURequest
 
class  Realm::Cuda::GPUIndirectTransferCompletion
 
class  Realm::Cuda::GPUTransferCompletion
 
class  Realm::Cuda::MemSpecificCudaArray
 
class  Realm::Cuda::AddressInfoCudaArray
 
class  Realm::Cuda::GPUXferDes
 
class  Realm::Cuda::GPUIndirectXferDes
 
class  Realm::Cuda::GPUIndirectChannel
 
class  Realm::Cuda::GPUIndirectRemoteChannelInfo
 
class  Realm::Cuda::GPUIndirectRemoteChannel
 
class  Realm::Cuda::GPUChannel
 
class  Realm::Cuda::GPUfillXferDes
 
class  Realm::Cuda::GPUfillChannel
 
class  Realm::Cuda::GPUreduceXferDes
 
class  Realm::Cuda::GPUreduceChannel
 
class  Realm::Cuda::GPUreduceRemoteChannelInfo
 
class  Realm::Cuda::GPUreduceRemoteChannel
 
struct  Realm::Cuda::CudaIpcImportRequest
 
class  Realm::Cuda::GPUReplHeapListener
 
class  Realm::Cuda::GPUAllocation
 Class for managing the lifetime of a given gpu allocation. As instances of this class own an underlying resource they are not copyable and must be std::move'd (thus invalidating the original variable) or references made. More...
 

Namespaces

namespace  Realm
 
namespace  Realm::Cuda
 

Macros

#define CUDA_ENABLE_DEPRECATED   1
 
#define CHECK_CUDART(cmd)
 
#define REPORT_CU_ERROR(level, cmd, ret)
 
#define CHECK_CU(cmd)
 
#define REPORT_NVML_ERROR(level, cmd, ret)
 
#define CHECK_NVML(cmd)
 
#define IS_DEFAULT_STREAM(stream)
 
#define REPORT_CUPTI_ERROR(level, cmd, ret)
 
#define CHECK_CUPTI(cmd)
 
#define CU_GET_PROC_ADDRESS_DEFAULT   0
 
#define CUDA_DRIVER_HAS_FNPTR(name)   ((name##_fnptr) != nullptr)
 
#define CUDA_DRIVER_FNPTR(name)   (assert(name##_fnptr != nullptr), name##_fnptr)
 
#define CUDA_VERSION_MIN   11080
 
#define CUDA_VERSION_COMPAT   ((CUDA_VERSION / 1000) * 1000)
 
#define CUDA_DRIVER_APIS(__op__)
 
#define DECL_FNPTR_EXTERN(name, ver)   extern decltype(&name) name##_fnptr;
 
#define NVML_FNPTR(name)   (name##_fnptr)
 
#define NVML_11_APIS(__op__)
 
#define NVML_12_APIS(__op__)
 
#define NVML_APIS(__op__)
 
#define DECL_FNPTR_EXTERN(name)   extern decltype(&name) name##_fnptr;
 
#define CUPTI_APIS(__op__)
 
#define DECL_FNPTR_EXTERN(name)   extern decltype(&name) name##_fnptr;
 
#define CUPTI_HAS_FNPTR(name)   (name##_fnptr != nullptr)
 
#define CUPTI_FNPTR(name)   (assert(name##_fnptr != nullptr), name##_fnptr)
 

Typedefs

typedef enum Realm::Cuda::nvmlIntNvLinkDeviceType_enum Realm::Cuda::nvmlIntNvLinkDeviceType_t
 

Enumerations

enum  Realm::Cuda::GPUMemcpyKind { Realm::Cuda::GPU_MEMCPY_HOST_TO_DEVICE , Realm::Cuda::GPU_MEMCPY_DEVICE_TO_HOST , Realm::Cuda::GPU_MEMCPY_DEVICE_TO_DEVICE , Realm::Cuda::GPU_MEMCPY_PEER_TO_PEER }
 
enum  Realm::Cuda::nvmlIntNvLinkDeviceType_enum { Realm::Cuda::NVML_NVLINK_DEVICE_TYPE_GPU = 0x00 , Realm::Cuda::NVML_NVLINK_DEVICE_TYPE_IBMNPU = 0x01 , Realm::Cuda::NVML_NVLINK_DEVICE_TYPE_SWITCH = 0x02 , Realm::Cuda::NVML_NVLINK_DEVICE_TYPE_UNKNOWN = 0xFF }
 

Functions

CUresult Realm::Cuda::cuGetProcAddress (const char *, void **, int, int)
 
CUresult Realm::Cuda::cuCtxRecordEvent (CUcontext hctx, CUevent event)
 
 Realm::Cuda::CUDA_DRIVER_APIS (DECL_FNPTR_EXTERN)
 
nvmlReturn_t Realm::Cuda::nvmlDeviceGetNvLinkRemoteDeviceType (nvmlDevice_t device, unsigned int link, nvmlIntNvLinkDeviceType_t *pNvLinkDeviceType)
 

Variables

CudaModuleRealm::Cuda::cuda_module_singleton
 

Macro Definition Documentation

◆ CHECK_CU

#define CHECK_CU (   cmd)
Value:
do { \
CUresult ret = (cmd); \
if(ret != CUDA_SUCCESS) { \
REPORT_CU_ERROR(Logger::LEVEL_ERROR, #cmd, ret); \
abort(); \
} \
} while(0)

◆ CHECK_CUDART

#define CHECK_CUDART (   cmd)
Value:
do { \
int ret = (int)(cmd); \
if(ret != 0) { \
fprintf(stderr, "CUDART: %s = %d\n", #cmd, ret); \
assert(0); \
exit(1); \
} \
} while(0)

◆ CHECK_CUPTI

#define CHECK_CUPTI (   cmd)
Value:
do { \
CUptiResult ret = (cmd); \
if(ret != CUPTI_SUCCESS) { \
REPORT_CUPTI_ERROR(Logger::LEVEL_ERROR, #cmd, ret); \
abort(); \
} \
} while(0)

◆ CHECK_NVML

#define CHECK_NVML (   cmd)
Value:
do { \
nvmlReturn_t ret = (cmd); \
if(ret != NVML_SUCCESS) { \
REPORT_NVML_ERROR(Logger::LEVEL_ERROR, #cmd, ret); \
abort(); \
} \
} while(0)

◆ CU_GET_PROC_ADDRESS_DEFAULT

#define CU_GET_PROC_ADDRESS_DEFAULT   0

◆ CUDA_DRIVER_APIS

#define CUDA_DRIVER_APIS (   __op__)

◆ CUDA_DRIVER_FNPTR

#define CUDA_DRIVER_FNPTR (   name)    (assert(name##_fnptr != nullptr), name##_fnptr)

◆ CUDA_DRIVER_HAS_FNPTR

#define CUDA_DRIVER_HAS_FNPTR (   name)    ((name##_fnptr) != nullptr)

◆ CUDA_ENABLE_DEPRECATED

#define CUDA_ENABLE_DEPRECATED   1

◆ CUDA_VERSION_COMPAT

#define CUDA_VERSION_COMPAT   ((CUDA_VERSION / 1000) * 1000)

◆ CUDA_VERSION_MIN

#define CUDA_VERSION_MIN   11080

◆ CUPTI_APIS

#define CUPTI_APIS (   __op__)
Value:
__op__(cuptiActivityRegisterCallbacks); \
__op__(cuptiActivityEnable); \
__op__(cuptiActivityDisable); \
__op__(cuptiActivityEnableContext); \
__op__(cuptiActivityDisableContext); \
__op__(cuptiActivityFlushAll); \
__op__(cuptiActivityGetNextRecord); \
__op__(cuptiActivityRegisterTimestampCallback); \
__op__(cuptiActivityPushExternalCorrelationId); \
__op__(cuptiActivityPopExternalCorrelationId);

◆ CUPTI_FNPTR

#define CUPTI_FNPTR (   name)    (assert(name##_fnptr != nullptr), name##_fnptr)

◆ CUPTI_HAS_FNPTR

#define CUPTI_HAS_FNPTR (   name)    (name##_fnptr != nullptr)

◆ DECL_FNPTR_EXTERN [1/3]

#define DECL_FNPTR_EXTERN (   name)    extern decltype(&name) name##_fnptr;

◆ DECL_FNPTR_EXTERN [2/3]

#define DECL_FNPTR_EXTERN (   name)    extern decltype(&name) name##_fnptr;

◆ DECL_FNPTR_EXTERN [3/3]

#define DECL_FNPTR_EXTERN (   name,
  ver 
)    extern decltype(&name) name##_fnptr;

◆ IS_DEFAULT_STREAM

#define IS_DEFAULT_STREAM (   stream)
Value:
(((stream) == 0) || ((stream) == CU_STREAM_LEGACY) || \
((stream) == CU_STREAM_PER_THREAD))

◆ NVML_11_APIS

#define NVML_11_APIS (   __op__)

◆ NVML_12_APIS

#define NVML_12_APIS (   __op__)

◆ NVML_APIS

#define NVML_APIS (   __op__)
Value:
__op__(nvmlInit); \
__op__(nvmlDeviceGetHandleByUUID); \
__op__(nvmlDeviceGetMaxPcieLinkWidth); \
__op__(nvmlDeviceGetMaxPcieLinkGeneration); \
__op__(nvmlDeviceGetNvLinkState); \
__op__(nvmlDeviceGetNvLinkVersion); \
__op__(nvmlDeviceGetNvLinkRemotePciInfo); \
__op__(nvmlDeviceGetNvLinkRemoteDeviceType); \
__op__(nvmlDeviceGetDeviceHandleFromMigDeviceHandle); \
__op__(nvmlDeviceGetFieldValues); \
NVML_11_APIS(__op__); \
NVML_12_APIS(__op__);

◆ NVML_FNPTR

#define NVML_FNPTR (   name)    (name##_fnptr)

◆ REPORT_CU_ERROR

#define REPORT_CU_ERROR (   level,
  cmd,
  ret 
)
Value:
do { \
log_gpu.newmsg(level) << __FILE__ << '(' << __LINE__ << "):" << cmd << " = " << ret \
} while(0)

◆ REPORT_CUPTI_ERROR

#define REPORT_CUPTI_ERROR (   level,
  cmd,
  ret 
)
Value:
do { \
log_gpu.newmsg(level) << __FILE__ << '(' << __LINE__ << "):" << cmd << " = " << ret; \
} while(0)

◆ REPORT_NVML_ERROR

#define REPORT_NVML_ERROR (   level,
  cmd,
  ret 
)
Value:
do { \
log_gpu.newmsg(level) << __FILE__ << '(' << __LINE__ << "):" << cmd << " = " << ret; \
} while(0)