#include "realm/cuda/cuda_module.h"
#include <memory>
#include <unordered_map>
#include <cuda.h>
#include <nvml.h>
#include <cupti.h>
#include <vector_types.h>
#include "realm/operation.h"
#include "realm/threads.h"
#include "realm/circ_queue.h"
#include "realm/indexspace.h"
#include "realm/proc_impl.h"
#include "realm/mem_impl.h"
#include "realm/bgwork.h"
#include "realm/transfer/channel.h"
#include "realm/transfer/ib_memory.h"
#include "realm/cuda/cuda_memcpy.h"

Include dependency graph for cuda_internal.h:

Classes
struct	Realm::Cuda::GPUInfo

class	Realm::Cuda::GPUContextManager

class	Realm::Cuda::GPUCompletionNotification

class	Realm::Cuda::GPUWorkFence

class	Realm::Cuda::GPUWorkStart

class	Realm::Cuda::GPUStream

struct	Realm::Cuda::GPUStream::PendingEvent

class	Realm::Cuda::GPUWorker

class	Realm::Cuda::GPUEventPool

class	Realm::Cuda::ContextSynchronizer

class	Realm::Cuda::GPU

struct	Realm::Cuda::GPU::GPUFuncInfo

struct	Realm::Cuda::GPU::CudaIpcMapping

struct	Realm::Cuda::GPU::GPUReductionOpEntry

class	Realm::Cuda::AutoGPUContext

class	Realm::Cuda::GPUProcessor

struct	Realm::Cuda::GPUProcessor::GPUTaskTableEntry

class	Realm::Cuda::CudaDeviceMemoryInfo

class	Realm::Cuda::GPUFBMemory

class	Realm::Cuda::GPUDynamicFBMemory

class	Realm::Cuda::GPUZCMemory

class	Realm::Cuda::GPUFBIBMemory

class	Realm::Cuda::GPUCompletionEvent

class	Realm::Cuda::GPURequest

class	Realm::Cuda::GPUIndirectTransferCompletion

class	Realm::Cuda::GPUTransferCompletion

class	Realm::Cuda::MemSpecificCudaArray

class	Realm::Cuda::AddressInfoCudaArray

class	Realm::Cuda::GPUXferDes

class	Realm::Cuda::GPUIndirectXferDes

class	Realm::Cuda::GPUIndirectChannel

class	Realm::Cuda::GPUIndirectRemoteChannelInfo

class	Realm::Cuda::GPUIndirectRemoteChannel

class	Realm::Cuda::GPUChannel

class	Realm::Cuda::GPURemoteChannelInfo

class	Realm::Cuda::GPURemoteChannel

class	Realm::Cuda::GPUfillXferDes

class	Realm::Cuda::GPUfillChannel

struct	Realm::Cuda::KernelVariantDesc

class	Realm::Cuda::GPUreduceXferDes

class	Realm::Cuda::GPUreduceChannel

class	Realm::Cuda::GPUreduceRemoteChannelInfo

class	Realm::Cuda::GPUreduceRemoteChannel

struct	Realm::Cuda::CudaIpcImportRequest

class	Realm::Cuda::GPUReplHeapListener

class	Realm::Cuda::GPUAllocation
	Class for managing the lifetime of a given gpu allocation. As instances of this class own an underlying resource they are not copyable and must be std::move'd (thus invalidating the original variable) or references made. More...

Namespaces
namespace	Realm

namespace	Realm::Cuda

Macros
#define	CUDA_ENABLE_DEPRECATED 1

#define	CHECK_CUDART(cmd)

#define	REPORT_CU_ERROR(level, cmd, ret)

#define	CHECK_CU(cmd)

#define	REPORT_NVML_ERROR(level, cmd, ret)

#define	CHECK_NVML(cmd)

#define	IS_DEFAULT_STREAM(stream)

#define	REPORT_CUPTI_ERROR(level, cmd, ret)

#define	CHECK_CUPTI(cmd)

#define	CU_GET_PROC_ADDRESS_DEFAULT 0

#define	CUDA_DRIVER_HAS_FNPTR(name) ((name##_fnptr) != nullptr)

#define	CUDA_DRIVER_FNPTR(name) (assert(name##_fnptr != nullptr), name##_fnptr)

#define	CUDA_VERSION_MIN 11080

#define	CUDA_VERSION_COMPAT ((CUDA_VERSION / 1000) * 1000)

#define	CUDA_DRIVER_APIS(__op__)

#define	DECL_FNPTR_EXTERN(name, ver) extern decltype(&name) name##_fnptr;

#define	NVML_FNPTR(name) (name##_fnptr)

#define	NVML_11_APIS(__op__)

#define	NVML_12_APIS(__op__)

#define	NVML_APIS(__op__)

#define	DECL_FNPTR_EXTERN(name) extern decltype(&name) name##_fnptr;

#define	CUPTI_APIS(__op__)

#define	DECL_FNPTR_EXTERN(name) extern decltype(&name) name##_fnptr;

#define	CUPTI_HAS_FNPTR(name) (name##_fnptr != nullptr)

#define	CUPTI_FNPTR(name) (assert(name##_fnptr != nullptr), name##_fnptr)

Typedefs
typedef enum Realm::Cuda::nvmlIntNvLinkDeviceType_enum	Realm::Cuda::nvmlIntNvLinkDeviceType_t

Enumerations
enum	Realm::Cuda::GPUMemcpyKind { Realm::Cuda::GPU_MEMCPY_HOST_TO_DEVICE , Realm::Cuda::GPU_MEMCPY_DEVICE_TO_HOST , Realm::Cuda::GPU_MEMCPY_DEVICE_TO_DEVICE , Realm::Cuda::GPU_MEMCPY_PEER_TO_PEER }

enum	Realm::Cuda::nvmlIntNvLinkDeviceType_enum { Realm::Cuda::NVML_NVLINK_DEVICE_TYPE_GPU = 0x00 , Realm::Cuda::NVML_NVLINK_DEVICE_TYPE_IBMNPU = 0x01 , Realm::Cuda::NVML_NVLINK_DEVICE_TYPE_SWITCH = 0x02 , Realm::Cuda::NVML_NVLINK_DEVICE_TYPE_UNKNOWN = 0xFF }

Functions
CUresult	Realm::Cuda::cuGetProcAddress (const char , void *, int, int)

CUresult	Realm::Cuda::cuCtxRecordEvent (CUcontext hctx, CUevent event)

	Realm::Cuda::CUDA_DRIVER_APIS (DECL_FNPTR_EXTERN)

nvmlReturn_t	Realm::Cuda::nvmlDeviceGetNvLinkRemoteDeviceType (nvmlDevice_t device, unsigned int link, nvmlIntNvLinkDeviceType_t *pNvLinkDeviceType)

Variables
CudaModule *	Realm::Cuda::cuda_module_singleton

Macro Definition Documentation

◆ CHECK_CU

#define CHECK_CU ( cmd )

Value:

  do {                                                                                   \
    CUresult ret = (cmd);                                                                \
    if(ret != CUDA_SUCCESS) {                                                            \
      REPORT_CU_ERROR(Logger::LEVEL_ERROR, #cmd, ret);                                   \
      abort();                                                                           \
    }                                                                                    \
  } while(0)

◆ CHECK_CUDART

#define CHECK_CUDART ( cmd )

Value:

  do {                                                                                   \
    int ret = (int)(cmd);                                                                \
    if(ret != 0) {                                                                       \
      fprintf(stderr, "CUDART: %s = %d\n", #cmd, ret);                                   \
      assert(0);                                                                         \
      exit(1);                                                                           \
    }                                                                                    \
  } while(0)

◆ CHECK_CUPTI

#define CHECK_CUPTI ( cmd )

Value:

  do {                                                                                   \
    CUptiResult ret = (cmd);                                                             \
    if(ret != CUPTI_SUCCESS) {                                                           \
      REPORT_CUPTI_ERROR(Logger::LEVEL_ERROR, #cmd, ret);                                \
      abort();                                                                           \
    }                                                                                    \
  } while(0)

◆ CHECK_NVML

#define CHECK_NVML ( cmd )

Value:

  do {                                                                                   \
    nvmlReturn_t ret = (cmd);                                                            \
    if(ret != NVML_SUCCESS) {                                                            \
      REPORT_NVML_ERROR(Logger::LEVEL_ERROR, #cmd, ret);                                 \
      abort();                                                                           \
    }                                                                                    \
  } while(0)

◆ CU_GET_PROC_ADDRESS_DEFAULT

#define CU_GET_PROC_ADDRESS_DEFAULT 0

◆ CUDA_DRIVER_APIS

#define CUDA_DRIVER_APIS ( __op__ )

◆ CUDA_DRIVER_FNPTR

#define CUDA_DRIVER_FNPTR ( name ) (assert(name##_fnptr != nullptr), name##_fnptr)

◆ CUDA_DRIVER_HAS_FNPTR

#define CUDA_DRIVER_HAS_FNPTR ( name ) ((name##_fnptr) != nullptr)

◆ CUDA_ENABLE_DEPRECATED

#define CUDA_ENABLE_DEPRECATED 1

◆ CUDA_VERSION_COMPAT

#define CUDA_VERSION_COMPAT ((CUDA_VERSION / 1000) * 1000)

◆ CUDA_VERSION_MIN

#define CUDA_VERSION_MIN 11080

◆ CUPTI_APIS

#define CUPTI_APIS ( __op__ )

Value:

  __op__(cuptiActivityRegisterCallbacks);                                                \
  __op__(cuptiActivityEnable);                                                           \
  __op__(cuptiActivityDisable);                                                          \
  __op__(cuptiActivityEnableContext);                                                    \
  __op__(cuptiActivityDisableContext);                                                   \
  __op__(cuptiActivityFlushAll);                                                         \
  __op__(cuptiActivityGetNextRecord);                                                    \
  __op__(cuptiActivityRegisterTimestampCallback);                                        \
  __op__(cuptiActivityPushExternalCorrelationId);                                        \
  __op__(cuptiActivityPopExternalCorrelationId);                                         \
  __op__(cuptiSubscribe);                                                                \
  __op__(cuptiEnableCallback);                                                           \
  __op__(cuptiUnsubscribe);

◆ CUPTI_FNPTR

#define CUPTI_FNPTR ( name ) (assert(name##_fnptr != nullptr), name##_fnptr)

◆ CUPTI_HAS_FNPTR

#define CUPTI_HAS_FNPTR ( name ) (name##_fnptr != nullptr)

◆ DECL_FNPTR_EXTERN [1/3]

#define DECL_FNPTR_EXTERN ( name ) extern decltype(&name) name##_fnptr;

◆ DECL_FNPTR_EXTERN [2/3]

#define DECL_FNPTR_EXTERN ( name ) extern decltype(&name) name##_fnptr;

◆ DECL_FNPTR_EXTERN [3/3]

#define DECL_FNPTR_EXTERN	(	name,
		ver
	)	extern decltype(&name) name##_fnptr;

◆ IS_DEFAULT_STREAM

#define IS_DEFAULT_STREAM ( stream )

Value:

(((stream) == 0) || ((stream) == CU_STREAM_LEGACY) || \

((stream) == CU_STREAM_PER_THREAD))

◆ NVML_11_APIS

#define NVML_11_APIS ( __op__ )

◆ NVML_12_APIS

#define NVML_12_APIS ( __op__ )

◆ NVML_APIS

#define NVML_APIS ( __op__ )

Value:

  __op__(nvmlInit);                                                                      \
  __op__(nvmlDeviceGetHandleByUUID);                                                     \
  __op__(nvmlDeviceGetMaxPcieLinkWidth);                                                 \
  __op__(nvmlDeviceGetMaxPcieLinkGeneration);                                            \
  __op__(nvmlDeviceGetNvLinkState);                                                      \
  __op__(nvmlDeviceGetNvLinkVersion);                                                    \
  __op__(nvmlDeviceGetNvLinkRemotePciInfo);                                              \
  __op__(nvmlDeviceGetNvLinkRemoteDeviceType);                                           \
  __op__(nvmlDeviceGetDeviceHandleFromMigDeviceHandle);                                  \
  __op__(nvmlDeviceGetFieldValues);                                                      \
  NVML_11_APIS(__op__);                                                                  \
  NVML_12_APIS(__op__);

◆ NVML_FNPTR

#define NVML_FNPTR ( name ) (name##_fnptr)

◆ REPORT_CU_ERROR

#define REPORT_CU_ERROR	(	level,
		cmd,
		ret
	)

Value:

  do {                                                                                   \
    log_gpu.newmsg(level) << __FILE__ << '(' << __LINE__ << "):" << cmd << " = " << ret  \
  } while(0)

◆ REPORT_CUPTI_ERROR

#define REPORT_CUPTI_ERROR	(	level,
		cmd,
		ret
	)

Value:

  do {                                                                                   \
    log_gpu.newmsg(level) << __FILE__ << '(' << __LINE__ << "):" << cmd << " = " << ret; \
  } while(0)

◆ REPORT_NVML_ERROR

#define REPORT_NVML_ERROR	(	level,
		cmd,
		ret
	)

Value:

  do {                                                                                   \
    log_gpu.newmsg(level) << __FILE__ << '(' << __LINE__ << "):" << cmd << " = " << ret; \
  } while(0)

Classes

Namespaces

Macros

Typedefs

Enumerations

Functions

Variables

Macro Definition Documentation

◆ CHECK_CU

◆ CHECK_CUDART

◆ CHECK_CUPTI

◆ CHECK_NVML

◆ CU_GET_PROC_ADDRESS_DEFAULT

◆ CUDA_DRIVER_APIS

◆ CUDA_DRIVER_FNPTR

◆ CUDA_DRIVER_HAS_FNPTR

◆ CUDA_ENABLE_DEPRECATED

◆ CUDA_VERSION_COMPAT

◆ CUDA_VERSION_MIN

◆ CUPTI_APIS

◆ CUPTI_FNPTR

◆ CUPTI_HAS_FNPTR

◆ DECL_FNPTR_EXTERN [1/3]

◆ DECL_FNPTR_EXTERN [2/3]

◆ DECL_FNPTR_EXTERN [3/3]

◆ IS_DEFAULT_STREAM

◆ NVML_11_APIS

◆ NVML_12_APIS

◆ NVML_APIS

◆ NVML_FNPTR

◆ REPORT_CU_ERROR

◆ REPORT_CUPTI_ERROR

◆ REPORT_NVML_ERROR