18#ifndef REALM_HIP_INTERNAL_H
19#define REALM_HIP_INTERNAL_H
23#include <hip/hip_runtime.h>
35#define CHECK_CUDART(cmd) \
37 hipError_t ret = (cmd); \
38 if(ret != hipSuccess) { \
39 fprintf(stderr, "HIP: %s = %d (%s)\n", #cmd, ret, hipGetErrorString(ret)); \
45#define REPORT_HIP_ERROR(cmd, ret) \
47 const char *name, *str; \
48 name = hipGetErrorName(ret); \
49 str = hipGetErrorString(ret); \
50 fprintf(stderr, "HIP: %s = %d (%s): %s\n", cmd, ret, name, str); \
54#define CHECK_HIP(cmd) \
56 hipError_t ret = (cmd); \
57 if(ret != hipSuccess) \
58 REPORT_HIP_ERROR(#cmd, ret); \
66#ifdef REALM_USE_HIP_HIJACK
67 :
public hipDeviceProp_t
74#ifndef REALM_USE_HIP_HIJACK
96 class GPUDynamicFBMemory;
139 virtual void print(std::ostream &os)
const;
159 virtual void print(std::ostream &os)
const;
330#ifdef REALM_USE_HIP_HIJACK
331 void register_fat_binary(
const FatBin *data);
335 hipFunction_t lookup_function(
const void *func);
336 char *lookup_variable(
const void *var);
408#ifdef REALM_USE_HIP_HIJACK
409 std::map<const FatBin *, hipModule_t> device_modules;
410 std::map<const void *, hipFunction_t> device_functions;
411 std::map<const void *, char *> device_variables;
446#ifdef REALM_USE_HIP_HIJACK
448 void push_call_configuration(dim3 grid_dim, dim3 block_dim,
size_t shared_size,
450 void pop_call_configuration(dim3 *grid_dim, dim3 *block_dim,
size_t *shared_size,
458#ifdef REALM_USE_HIP_HIJACK
459 void event_create(hipEvent_t *event,
int flags);
460 void event_destroy(hipEvent_t event);
461 void event_record(hipEvent_t event, hipStream_t stream);
462 void event_synchronize(hipEvent_t event);
463 void event_elapsed_time(
float *ms, hipEvent_t start, hipEvent_t end);
465 void configure_call(dim3 grid_dim, dim3 block_dim,
size_t shared_memory,
467 void setup_argument(
const void *arg,
size_t size,
size_t offset);
468 void launch(
const void *func);
469 void launch_kernel(
const void *func, dim3 grid_dim, dim3 block_dim,
void **args,
470 size_t shared_memory, hipStream_t stream);
473 void gpu_memcpy(
void *dst,
const void *
src,
size_t size, hipMemcpyKind kind);
476#ifdef REALM_USE_HIP_HIJACK
477 void gpu_memcpy_to_symbol(
const void *dst,
const void *
src,
size_t size,
478 size_t offset, hipMemcpyKind kind);
479 void gpu_memcpy_to_symbol_async(
const void *dst,
const void *
src,
size_t size,
480 size_t offset, hipMemcpyKind kind,
482 void gpu_memcpy_from_symbol(
void *dst,
const void *
src,
size_t size,
size_t offset,
484 void gpu_memcpy_from_symbol_async(
void *dst,
const void *
src,
size_t size,
485 size_t offset, hipMemcpyKind kind,
504 CallConfig(dim3 _grid, dim3 _block,
size_t _shared, hipStream_t _stream);
555 size_t &inst_offset);
580 bool need_alloc_result,
596 size_t &inst_offset);
615 void *_cpu_base,
size_t _size,
MemoryKind _kind,
628 size_t &inst_offset);
675 size_t _read_size,
int _write_port_idx,
size_t _write_offset,
693 XferDesID _guid,
const std::vector<XferDesPortInfo> &inputs_info,
694 const std::vector<XferDesPortInfo> &outputs_info,
int _priority);
701 std::vector<GPU *> src_gpus, dst_gpus;
702 std::vector<bool> dst_is_ipc;
716 const std::vector<XferDesPortInfo> &inputs_info,
717 const std::vector<XferDesPortInfo> &outputs_info,
719 const void *fill_data,
size_t fill_size,
734 XferDesID _guid,
const std::vector<XferDesPortInfo> &inputs_info,
735 const std::vector<XferDesPortInfo> &outputs_info,
int _priority,
736 const void *_fill_data,
size_t _fill_size,
size_t _fill_total);
755 const std::vector<XferDesPortInfo> &inputs_info,
756 const std::vector<XferDesPortInfo> &outputs_info,
758 const void *fill_data,
size_t fill_size,
774 XferDesID _guid,
const std::vector<XferDesPortInfo> &inputs_info,
775 const std::vector<XferDesPortInfo> &outputs_info,
int _priority,
785#if defined(REALM_USE_HIP_HIJACK)
806 const std::vector<XferDesPortInfo> &inputs_info,
807 const std::vector<XferDesPortInfo> &outputs_info,
809 const void *fill_data,
size_t fill_size,
823 const std::vector<Channel::SupportedPath> &_paths);
827 template <
typename S>
830 template <
typename S>
853 const void *data,
size_t datalen);
860 const void *data,
size_t datalen);
866 const void *data,
size_t datalen);
Definition bytearray.h:30
Definition bytearray.h:53
Definition circ_queue.h:35
Definition codedesc.h:249
Definition instance.h:405
Definition hip_internal.h:416
AutoGPUContext(GPU *_gpu)
GPU * gpu
Definition hip_internal.h:423
AutoGPUContext(GPU &_gpu)
Definition hip_internal.h:289
Mutex::CondVar condvar
Definition hip_internal.h:307
bool shutdown_flag
Definition hip_internal.h:308
int max_threads
Definition hip_internal.h:305
GPU * gpu
Definition hip_internal.h:302
int total_threads
Definition hip_internal.h:310
ContextSynchronizer(GPU *_gpu, int _device_id, CoreReservationSet &crs, int _max_threads)
GPUWorkFence::FenceList fences
Definition hip_internal.h:309
int device_id
Definition hip_internal.h:304
CoreReservation * core_rsrv
Definition hip_internal.h:312
std::vector< Thread * > worker_threads
Definition hip_internal.h:311
int sleeping_threads
Definition hip_internal.h:310
int syncing_threads
Definition hip_internal.h:310
void add_fence(GPUWorkFence *fence)
Mutex mutex
Definition hip_internal.h:306
Definition hip_internal.h:705
long submit(Request **requests, long nr)
GPUChannel(GPU *_src_gpu, XferDesKind _kind, BackgroundWorkManager *bgwork)
static const bool is_ordered
Definition hip_internal.h:712
virtual XferDes * create_xfer_des(uintptr_t dma_op, NodeID launch_node, XferDesID guid, const std::vector< XferDesPortInfo > &inputs_info, const std::vector< XferDesPortInfo > &outputs_info, int priority, XferDesRedopInfo redop_info, const void *fill_data, size_t fill_size, size_t fill_total)
Definition hip_internal.h:656
void request_completed(void)
GPURequest * req
Definition hip_internal.h:660
Definition hip_internal.h:106
virtual ~GPUCompletionNotification(void)
Definition hip_internal.h:108
virtual void request_completed(void)=0
Definition hip_internal.h:570
GPU * gpu
Definition hip_internal.h:606
virtual void unregister_external_resource(RegionInstanceImpl *inst)
std::map< RegionInstance, std::pair< void *, size_t > > alloc_bases
Definition hip_internal.h:609
size_t cur_size
Definition hip_internal.h:608
Mutex mutex
Definition hip_internal.h:607
virtual AllocationResult allocate_storage_immediate(RegionInstanceImpl *inst, bool need_alloc_result, bool poisoned, TimeLimit work_until)
virtual void get_bytes(off_t offset, void *dst, size_t size)
virtual ~GPUDynamicFBMemory(void)
virtual ExternalInstanceResource * generate_resource_info(RegionInstanceImpl *inst, const IndexSpaceGeneric *subspace, span< const FieldID > fields, bool read_only)
virtual void put_bytes(off_t offset, const void *src, size_t size)
virtual void * get_direct_ptr(off_t offset, size_t size)
virtual bool attempt_register_external_resource(RegionInstanceImpl *inst, size_t &inst_offset)
GPUDynamicFBMemory(RuntimeImpl *_runtime_impl, Memory _me, GPU *_gpu, size_t _max_size)
virtual void release_storage_immediate(RegionInstanceImpl *inst, bool poisoned, TimeLimit work_until)
Definition hip_internal.h:266
int external_count
Definition hip_internal.h:281
void init_pool(int init_size=0)
int batch_size
Definition hip_internal.h:281
int current_size
Definition hip_internal.h:281
GPUEventPool(int _batch_size=256)
int total_size
Definition hip_internal.h:281
hipEvent_t get_event(bool external=false)
Mutex mutex
Definition hip_internal.h:280
std::vector< hipEvent_t > available_events
Definition hip_internal.h:282
void return_event(hipEvent_t e, bool external=false)
Definition hip_internal.h:643
char * base
Definition hip_internal.h:650
GPU * gpu
Definition hip_internal.h:649
GPUFBIBMemory(RuntimeImpl *_runtime_impl, Memory _me, GPU *_gpu, char *_base, size_t _size)
NetworkSegment local_segment
Definition hip_internal.h:651
Definition hip_internal.h:539
virtual void unregister_external_resource(RegionInstanceImpl *inst)
virtual void put_bytes(off_t offset, const void *src, size_t size)
virtual ExternalInstanceResource * generate_resource_info(RegionInstanceImpl *inst, const IndexSpaceGeneric *subspace, span< const FieldID > fields, bool read_only)
virtual void get_bytes(off_t offset, void *dst, size_t size)
NetworkSegment local_segment
Definition hip_internal.h:567
virtual bool attempt_register_external_resource(RegionInstanceImpl *inst, size_t &inst_offset)
virtual void * get_direct_ptr(off_t offset, size_t size)
char * base
Definition hip_internal.h:566
virtual ~GPUFBMemory(void)
GPU * gpu
Definition hip_internal.h:565
GPUFBMemory(RuntimeImpl *_runtime_impl, Memory _me, GPU *_gpu, char *_base, size_t _size)
Definition hip_internal.h:113
GPUPreemptionWaiter(GPU *gpu)
virtual ~GPUPreemptionWaiter(void)
Definition hip_internal.h:116
virtual void request_completed(void)
Definition hip_internal.h:427
ContextSynchronizer ctxsync
Definition hip_internal.h:510
void gpu_memcpy_async(void *dst, const void *src, size_t size, hipMemcpyKind kind, hipStream_t stream)
void gpu_memset(void *dst, int value, size_t count)
std::vector< CallConfig > launch_configs
Definition hip_internal.h:506
virtual void shutdown(void)
void gpu_memset_async(void *dst, int value, size_t count, hipStream_t stream)
virtual bool register_task(Processor::TaskFuncID func_id, CodeDescriptor &codedesc, const ByteArrayRef &user_data)
GPUProcessor(RuntimeImpl *runtime_impl, GPU *_gpu, Processor _me, Realm::CoreReservationSet &crs, size_t _stack_size)
void gpu_memcpy(void *dst, const void *src, size_t size, hipMemcpyKind kind)
std::vector< CallConfig > call_configs
Definition hip_internal.h:508
static GPUProcessor * get_current_gpu_proc(void)
std::vector< char > kernel_args
Definition hip_internal.h:507
virtual ~GPUProcessor(void)
void stream_synchronize(hipStream_t stream)
virtual void execute_task(Processor::TaskFuncID func_id, const ByteArrayRef &task_args)
bool block_on_synchronize
Definition hip_internal.h:509
void stream_wait_on_event(hipStream_t stream, hipEvent_t event)
GPU * gpu
Definition hip_internal.h:493
void device_synchronize(void)
std::map< Processor::TaskFuncID, GPUTaskTableEntry > gpu_task_table
Definition hip_internal.h:523
Realm::CoreReservation * core_rsrv
Definition hip_internal.h:513
Definition hip_internal.h:869
virtual void chunk_destroyed(void *base, size_t bytes)
virtual void chunk_created(void *base, size_t bytes)
GPUReplHeapListener(HipModule *_module)
Definition hip_internal.h:663
void * dst_base
Definition hip_internal.h:666
const void * src_base
Definition hip_internal.h:665
GPUCompletionEvent event
Definition hip_internal.h:669
GPU * dst_gpu
Definition hip_internal.h:668
Definition hip_internal.h:171
void add_fence(GPUWorkFence *fence)
void add_start_event(GPUWorkStart *start)
GPUWorker * worker
Definition hip_internal.h:205
std::deque< PendingEvent > pending_events
Definition hip_internal.h:220
void add_event(hipEvent_t event, GPUWorkFence *fence, GPUCompletionNotification *notification=NULL, GPUWorkStart *start=NULL)
bool has_work(void) const
GPU * gpu
Definition hip_internal.h:204
void add_notification(GPUCompletionNotification *notification)
bool reap_events(TimeLimit work_until)
hipStream_t stream
Definition hip_internal.h:207
Mutex mutex
Definition hip_internal.h:209
void wait_on_streams(const std::set< GPUStream * > &other_streams)
GPUStream(GPU *_gpu, GPUWorker *_worker, int rel_priority=0)
REALM_INTERNAL_API_EXTERNAL_LINKAGE hipStream_t get_stream(void) const
bool ok_to_submit_copy(size_t bytes, XferDes *xd)
GPU * get_gpu(void) const
Definition hip_internal.h:672
int write_port_idx
Definition hip_internal.h:684
size_t read_offset
Definition hip_internal.h:683
size_t write_offset
Definition hip_internal.h:685
XferDes * xd
Definition hip_internal.h:681
virtual void request_completed(void)
int read_port_idx
Definition hip_internal.h:682
size_t write_size
Definition hip_internal.h:685
size_t read_size
Definition hip_internal.h:683
GPUTransferCompletion(XferDes *_xd, int _read_port_idx, size_t _read_offset, size_t _read_size, int _write_port_idx, size_t _write_offset, size_t _write_size)
Definition hip_internal.h:129
virtual void request_cancellation(void)
virtual void print(std::ostream &os) const
static void cuda_callback(hipStream_t stream, hipError_t res, void *data)
IntrusiveListLink< GPUWorkFence > fence_list_link
Definition hip_internal.h:141
GPUWorkFence(Realm::Operation *op)
virtual void mark_finished(bool successful)
IntrusiveList< GPUWorkFence, REALM_PMTA_USE(GPUWorkFence, fence_list_link), DummyLock > FenceList
Definition hip_internal.h:145
void enqueue_on_stream(GPUStream *stream)
REALM_PMTA_DEFN(GPUWorkFence, IntrusiveListLink< GPUWorkFence >, fence_list_link)
Definition hip_internal.h:151
void mark_gpu_work_start()
virtual void request_cancellation(void)
Definition hip_internal.h:155
void enqueue_on_stream(GPUStream *stream)
static void cuda_start_callback(hipStream_t stream, hipError_t res, void *data)
GPUWorkStart(Realm::Operation *op)
virtual void print(std::ostream &os) const
Definition hip_internal.h:227
Realm::CoreReservation * core_rsrv
Definition hip_internal.h:258
CircularQueue< GPUStream *, 16 > ActiveStreamQueue
Definition hip_internal.h:254
atomic< bool > worker_shutdown_requested
Definition hip_internal.h:261
bool thread_sleeping
Definition hip_internal.h:260
void start_background_thread(Realm::CoreReservationSet &crs, size_t stack_size)
ActiveStreamQueue active_streams
Definition hip_internal.h:255
bool do_work(TimeLimit work_until)
Mutex::CondVar condvar
Definition hip_internal.h:252
Mutex lock
Definition hip_internal.h:251
void shutdown_background_thread(void)
Realm::Thread * worker_thread
Definition hip_internal.h:259
void add_stream(GPUStream *s)
bool process_streams(bool sleep_on_empty)
Definition hip_internal.h:690
GPUXferDes(uintptr_t _dma_op, Channel *_channel, NodeID _launch_node, XferDesID _guid, const std::vector< XferDesPortInfo > &inputs_info, const std::vector< XferDesPortInfo > &outputs_info, int _priority)
bool progress_xd(GPUChannel *channel, TimeLimit work_until)
long get_requests(Request **requests, long nr)
Definition hip_internal.h:612
virtual bool attempt_register_external_resource(RegionInstanceImpl *inst, size_t &inst_offset)
char * cpu_base
Definition hip_internal.h:639
virtual void put_bytes(off_t offset, const void *src, size_t size)
char * gpu_base
Definition hip_internal.h:638
GPUZCMemory(RuntimeImpl *_runtime_impl, Memory _me, char *_gpu_base, void *_cpu_base, size_t _size, MemoryKind _kind, Memory::Kind _lowlevel_kind)
NetworkSegment local_segment
Definition hip_internal.h:640
virtual ExternalInstanceResource * generate_resource_info(RegionInstanceImpl *inst, const IndexSpaceGeneric *subspace, span< const FieldID > fields, bool read_only)
virtual void * get_direct_ptr(off_t offset, size_t size)
virtual void unregister_external_resource(RegionInstanceImpl *inst)
virtual ~GPUZCMemory(void)
virtual void get_bytes(off_t offset, void *dst, size_t size)
Definition hip_internal.h:322
int device_id
Definition hip_internal.h:366
GPUStream * find_stream(hipStream_t stream) const
std::vector< HipIpcMapping > hipipc_mappings
Definition hip_internal.h:403
GPUDynamicFBMemory * fb_dmem
Definition hip_internal.h:362
GPUProcessor * proc
Definition hip_internal.h:360
char * fb_ibmem_base
Definition hip_internal.h:370
std::set< Memory > pinned_sysmems
Definition hip_internal.h:373
hipModule_t load_hip_module(const void *data)
const HipIpcMapping * find_ipc_mapping(Memory mem) const
atomic< unsigned > next_d2d_stream
Definition hip_internal.h:389
GPUInfo * info
Definition hip_internal.h:358
GPUStream * device_to_device_stream
Definition hip_internal.h:384
int greatest_stream_priority
Definition hip_internal.h:395
atomic< unsigned > next_task_stream
Definition hip_internal.h:388
void create_dma_channels(Realm::RuntimeImpl *r)
char * fbmem_base
Definition hip_internal.h:368
std::vector< GPUStream * > peer_to_peer_streams
Definition hip_internal.h:386
std::set< Memory > peer_fbs
Definition hip_internal.h:379
int least_stream_priority
Definition hip_internal.h:395
void create_fb_memory(RuntimeImpl *runtime, size_t size, size_t ib_size)
GPUWorker * worker
Definition hip_internal.h:359
REALM_INTERNAL_API_EXTERNAL_LINKAGE GPUStream * get_null_task_stream(void) const
std::vector< GPUStream * > device_to_device_streams
Definition hip_internal.h:385
GPUFBMemory * fbmem
Definition hip_internal.h:361
bool can_access_peer(GPU *peer)
std::vector< GPUStream * > task_streams
Definition hip_internal.h:387
GPUEventPool event_pool
Definition hip_internal.h:391
GPU(HipModule *_module, GPUInfo *_info, GPUWorker *worker, int _device_id)
GPUStream * device_to_host_stream
Definition hip_internal.h:383
GPUStream * host_to_device_stream
Definition hip_internal.h:382
void create_dynamic_fb_memory(RuntimeImpl *runtime, size_t max_size)
GPUStream * get_next_d2d_stream()
void create_processor(RuntimeImpl *runtime, size_t stack_size)
std::map< NodeID, GPUStream * > hipipc_streams
Definition hip_internal.h:404
GPUFBIBMemory * fb_ibmem
Definition hip_internal.h:363
GPUStream * get_next_task_stream(bool create=false)
std::set< Memory > managed_mems
Definition hip_internal.h:376
Definition hip_internal.h:746
GPU * gpu
Definition hip_internal.h:766
virtual XferDes * create_xfer_des(uintptr_t dma_op, NodeID launch_node, XferDesID guid, const std::vector< XferDesPortInfo > &inputs_info, const std::vector< XferDesPortInfo > &outputs_info, int priority, XferDesRedopInfo redop_info, const void *fill_data, size_t fill_size, size_t fill_total)
long submit(Request **requests, long nr)
GPUfillChannel(GPU *_gpu, BackgroundWorkManager *bgwork)
static const bool is_ordered
Definition hip_internal.h:751
Definition hip_internal.h:731
GPUfillXferDes(uintptr_t _dma_op, Channel *_channel, NodeID _launch_node, XferDesID _guid, const std::vector< XferDesPortInfo > &inputs_info, const std::vector< XferDesPortInfo > &outputs_info, int _priority, const void *_fill_data, size_t _fill_size, size_t _fill_total)
bool progress_xd(GPUfillChannel *channel, TimeLimit work_until)
size_t reduced_fill_size
Definition hip_internal.h:743
long get_requests(Request **requests, long nr)
Definition hip_internal.h:793
long submit(Request **requests, long nr)
virtual XferDes * create_xfer_des(uintptr_t dma_op, NodeID launch_node, XferDesID guid, const std::vector< XferDesPortInfo > &inputs_info, const std::vector< XferDesPortInfo > &outputs_info, int priority, XferDesRedopInfo redop_info, const void *fill_data, size_t fill_size, size_t fill_total)
GPUreduceChannel(GPU *_gpu, BackgroundWorkManager *bgwork)
static const bool is_ordered
Definition hip_internal.h:798
virtual RemoteChannelInfo * construct_remote_info() const
GPU * gpu
Definition hip_internal.h:817
virtual bool supports_redop(ReductionOpID redop_id) const
Definition hip_internal.h:820
static Serialization::PolymorphicSerdezSubclass< RemoteChannelInfo, GPUreduceRemoteChannelInfo > serdez_subclass
Definition hip_internal.h:836
static RemoteChannelInfo * deserialize_new(S &deserializer)
virtual RemoteChannel * create_remote_channel()
GPUreduceRemoteChannelInfo(NodeID _owner, XferDesKind _kind, uintptr_t _remote_ptr, const std::vector< Channel::SupportedPath > &_paths)
bool serialize(S &serializer) const
Definition hip_internal.h:839
Definition hip_internal.h:771
bool progress_xd(GPUreduceChannel *channel, TimeLimit work_until)
const void * kernel_host_proxy
Definition hip_internal.h:788
long get_requests(Request **requests, long nr)
GPUreduceXferDes(uintptr_t _dma_op, Channel *_channel, NodeID _launch_node, XferDesID _guid, const std::vector< XferDesPortInfo > &inputs_info, const std::vector< XferDesPortInfo > &outputs_info, int _priority, XferDesRedopInfo _redop_info)
XferDesRedopInfo redop_info
Definition hip_internal.h:783
GPUStream * stream
Definition hip_internal.h:790
const ReductionOpUntyped * redop
Definition hip_internal.h:784
Definition hip_internal.h:531
HipDeviceMemoryInfo(int _device_id)
GPU * gpu
Definition hip_internal.h:536
int device_id
Definition hip_internal.h:535
Definition hip_module.h:142
Definition ib_memory.h:30
Definition indexspace.h:1115
Definition mem_impl.h:344
Definition proc_impl.h:141
MemoryKind
Definition mem_impl.h:53
size_t size
Definition mem_impl.h:195
AllocationResult
Definition mem_impl.h:89
Kind
Definition memory.h:59
Definition operation.h:75
Operation * op
Definition operation.h:87
Definition operation.h:32
Definition processor.h:37
::realm_task_func_id_t TaskFuncID
Definition processor.h:58
Definition inst_impl.h:54
Definition repl_heap.h:50
Definition runtime_impl.h:264
Definition serialize.h:363
Definition channel.h:1014
Channel * channel
Definition channel.h:343
#define REALM_INTERNAL_API_EXTERNAL_LINKAGE
Definition compiler_support.h:218
#define REALM_PMTA_USE(structtype, name)
Definition lists.h:42
GPUMemcpyKind
Definition hip_internal.h:84
@ GPU_MEMCPY_HOST_TO_DEVICE
Definition hip_internal.h:85
@ GPU_MEMCPY_PEER_TO_PEER
Definition hip_internal.h:88
@ GPU_MEMCPY_DEVICE_TO_HOST
Definition hip_internal.h:86
@ GPU_MEMCPY_DEVICE_TO_DEVICE
Definition hip_internal.h:87
HipModule * hip_module_singleton
Definition activemsg.h:38
int NodeID
Definition nodeset.h:40
XferDesKind
Definition channel.h:85
unsigned long long XferDesID
Definition channel.h:57
::realm_reduction_op_id_t ReductionOpID
Definition event.h:38
Definition hip_hijack.h:39
Definition hip_internal.h:69
size_t totalGlobalMem
Definition hip_internal.h:78
int major
Definition hip_internal.h:77
int index
Definition hip_internal.h:70
std::set< hipDevice_t > peers
Definition hip_internal.h:80
static const size_t MAX_NAME_LEN
Definition hip_internal.h:73
char name[MAX_NAME_LEN]
Definition hip_internal.h:75
int minor
Definition hip_internal.h:77
hipDevice_t device
Definition hip_internal.h:71
Definition hip_internal.h:502
CallConfig(dim3 _grid, dim3 _block, size_t _shared, hipStream_t _stream)
hipStream_t stream
Definition hip_internal.h:503
Definition hip_internal.h:515
Hip::StreamAwareTaskFuncPtr stream_aware_fnptr
Definition hip_internal.h:517
ByteArray user_data
Definition hip_internal.h:518
Processor::TaskFuncPtr fnptr
Definition hip_internal.h:516
Definition hip_internal.h:496
size_t shared
Definition hip_internal.h:499
dim3 grid
Definition hip_internal.h:497
dim3 block
Definition hip_internal.h:498
LaunchConfig(dim3 _grid, dim3 _block, size_t _shared)
Definition hip_internal.h:211
GPUCompletionNotification * notification
Definition hip_internal.h:215
GPUWorkFence * fence
Definition hip_internal.h:213
GPUWorkStart * start
Definition hip_internal.h:214
hipEvent_t event
Definition hip_internal.h:212
Definition hip_internal.h:397
NodeID owner
Definition hip_internal.h:398
uintptr_t local_base
Definition hip_internal.h:400
Memory mem
Definition hip_internal.h:399
uintptr_t address_offset
Definition hip_internal.h:401
Definition hip_internal.h:863
static void handle_message(NodeID sender, const HipIpcRelease &args, const void *data, size_t datalen)
Definition hip_internal.h:847
static void handle_message(NodeID sender, const HipIpcRequest &args, const void *data, size_t datalen)
Definition hip_internal.h:856
unsigned count
Definition hip_internal.h:857
static void handle_message(NodeID sender, const HipIpcResponse &args, const void *data, size_t datalen)
Definition hip_hijack.h:46
Definition hip_hijack.h:55
NodeID src
Definition ucp_internal.h:1