Realm
A distributed, event-based tasking library
Loading...
Searching...
No Matches
Realm::Cuda::GPUreduceXferDes Class Reference
module nullptr

#include <cuda_internal.h>

Inheritance diagram for Realm::Cuda::GPUreduceXferDes:
Collaboration diagram for Realm::Cuda::GPUreduceXferDes:

Public Member Functions

 GPUreduceXferDes (uintptr_t _dma_op, Channel *_channel, NodeID _launch_node, XferDesID _guid, const std::vector< XferDesPortInfo > &inputs_info, const std::vector< XferDesPortInfo > &outputs_info, int _priority, XferDesRedopInfo _redop_info)
 
long get_requests (Request **requests, long nr)
 
bool progress_xd (GPUreduceChannel *channel, TimeLimit work_until)
 
bool fast_reduction_kernel_mode (GPUreduceChannel *channel, const size_t max_bytes, XferPort *in_port, XferPort *out_port, const size_t in_span_start, const size_t out_span_start)
 
void setup_redop_kernel (GPUreduceChannel *channel, void *params, const size_t in_span_start, const size_t out_span_start, const size_t in_elem_size, const size_t out_elem_size, const size_t elems, const bool has_transpose)
 
void record_redop_advanced_kernel (GPU *gpu)
 
KernelVariantDesc describe_kernel_variant (GPU *cpu, bool is_advanced)
 
bool resolve_kernel_slot (GPU *gpu, void *host_proxy, CUfunction &kernel_out, CUfunction GPU::GPUReductionOpEntry::*cache_field)
 
- Public Member Functions inherited from Realm::XferDes
 REALM_ALIGNED_TYPE_CONST (AlignedStorage, UnalignedStorage, 16)
 
 REALM_PMTA_DEFN (XferDes, IntrusivePriorityListLink< XferDes >, xd_link)
 
 REALM_PMTA_DEFN (XferDes, int, priority)
 
 XferDes (uintptr_t _dma_op, Channel *_channel, NodeID _launch_node, XferDesID _guid, const std::vector< XferDesPortInfo > &inputs_info, const std::vector< XferDesPortInfo > &outputs_info, int _priority, const void *_fill_data, size_t fill_size)
 
void add_reference (void)
 
void remove_reference (void)
 
void add_update_pre_bytes_total_received (void)
 
virtual Event request_metadata ()
 
virtual void notify_request_read_done (Request *req)
 
virtual void notify_request_write_done (Request *req)
 
virtual void flush ()
 
long default_get_requests (Request **requests, long nr, unsigned flags=0)
 
void default_notify_request_read_done (Request *req)
 
void default_notify_request_write_done (Request *req)
 
virtual void update_bytes_read (int port_idx, size_t offset, size_t size)
 
virtual void update_bytes_write (int port_idx, size_t offset, size_t size)
 
void update_pre_bytes_write (int port_idx, size_t offset, size_t size)
 
void update_pre_bytes_total (int port_idx, size_t pre_bytes_total)
 
void update_next_bytes_read (int port_idx, size_t offset, size_t size)
 
void begin_completion ()
 
void mark_completed ()
 
unsigned current_progress (void)
 
bool check_for_progress (unsigned last_counter)
 
void update_progress (void)
 
virtual bool request_available ()
 
virtual Requestdequeue_request ()
 
virtual void enqueue_request (Request *req)
 
size_t update_control_info (ReadSequenceCache *rseqcache)
 
size_t get_addresses (size_t min_xfer_size, ReadSequenceCache *rseqcache)
 
size_t get_addresses (size_t min_xfer_size, ReadSequenceCache *rseqcache, const InstanceLayoutPieceBase *&in_nonaffine, const InstanceLayoutPieceBase *&out_nonaffine)
 
bool record_address_consumption (size_t total_read_bytes, size_t total_write_bytes)
 
void replicate_fill_data (size_t new_size)
 

Protected Attributes

XferDesRedopInfo redop_info
 
const ReductionOpUntypedredop
 
CUfunction kernel
 
CUfunction kernel_advanced
 
CUfunction kernel_transpose
 
const void * kernel_host_proxy
 
const void * kernel_host_proxy_advanced
 
const void * kernel_host_proxy_transpose
 
GPUStreamstream
 
std::vector< GPU * > src_gpus
 
std::vector< bool > src_is_ipc
 
- Protected Attributes inherited from Realm::XferDes
Mutex available_req_mutex
 
std::queue< Request * > available_reqs
 

Additional Inherited Members

- Public Types inherited from Realm::XferDes
enum  { XFERDES_NO_GUID = 0 }
 
typedef IntrusivePriorityList< XferDes, int, REALM_PMTA_USE(XferDes, xd_link), REALM_PMTA_USE(XferDes, priority), DummyLockXferDesList
 
typedef SequenceCache<&XferDes::update_bytes_readReadSequenceCache
 
typedef SequenceCache<&XferDes::update_bytes_writeWriteSequenceCache
 
- Public Attributes inherited from Realm::XferDes
uintptr_t dma_op
 
XferDesQueuexferDes_queue
 
NodeID launch_node
 
atomic< bool > iteration_completed
 
atomic< int64_t > bytes_write_pending
 
atomic< bool > transfer_completed
 
uint64_t current_in_port_mask
 
uint64_t current_out_port_mask
 
uint64_t current_in_port_remain
 
uint64_t current_out_port_remain
 
std::vector< XferPortinput_ports
 
std::vector< XferPortoutput_ports
 
ControlPortState input_control
 
ControlPortState output_control
 
uint64_t max_req_size
 
int priority
 
XferDesID guid
 
XferDesKind kind
 
Channelchannel
 
void * fill_data
 
size_t fill_size
 
size_t orig_fill_size
 
AlignedStorage inline_fill_storage
 
Mutex xd_lock
 
Mutex update_read_lock
 
Mutex update_write_lock
 
atomic< unsigned > progress_counter
 
atomic< unsigned > reference_count
 
unsigned nb_update_pre_bytes_total_calls_expected
 
atomic< unsigned > nb_update_pre_bytes_total_calls_received
 
IntrusivePriorityListLink< XferDesxd_link
 
DeferredXDEnqueue deferred_enqueue
 
- Static Public Attributes inherited from Realm::XferDes
static const size_t ALIGNED_FILL_STORAGE_SIZE = 32
 
- Protected Member Functions inherited from Realm::XferDes
virtual ~XferDes ()
 

Constructor & Destructor Documentation

◆ GPUreduceXferDes()

Realm::Cuda::GPUreduceXferDes::GPUreduceXferDes ( uintptr_t  _dma_op,
Channel _channel,
NodeID  _launch_node,
XferDesID  _guid,
const std::vector< XferDesPortInfo > &  inputs_info,
const std::vector< XferDesPortInfo > &  outputs_info,
int  _priority,
XferDesRedopInfo  _redop_info 
)

Member Function Documentation

◆ describe_kernel_variant()

KernelVariantDesc Realm::Cuda::GPUreduceXferDes::describe_kernel_variant ( GPU cpu,
bool  is_advanced 
)

◆ fast_reduction_kernel_mode()

bool Realm::Cuda::GPUreduceXferDes::fast_reduction_kernel_mode ( GPUreduceChannel channel,
const size_t  max_bytes,
XferPort in_port,
XferPort out_port,
const size_t  in_span_start,
const size_t  out_span_start 
)

◆ get_requests()

long Realm::Cuda::GPUreduceXferDes::get_requests ( Request **  requests,
long  nr 
)
virtual

Implements Realm::XferDes.

◆ progress_xd()

bool Realm::Cuda::GPUreduceXferDes::progress_xd ( GPUreduceChannel channel,
TimeLimit  work_until 
)

◆ record_redop_advanced_kernel()

void Realm::Cuda::GPUreduceXferDes::record_redop_advanced_kernel ( GPU gpu)

◆ resolve_kernel_slot()

bool Realm::Cuda::GPUreduceXferDes::resolve_kernel_slot ( GPU gpu,
void *  host_proxy,
CUfunction &  kernel_out,
CUfunction GPU::GPUReductionOpEntry::*  cache_field 
)

◆ setup_redop_kernel()

void Realm::Cuda::GPUreduceXferDes::setup_redop_kernel ( GPUreduceChannel channel,
void *  params,
const size_t  in_span_start,
const size_t  out_span_start,
const size_t  in_elem_size,
const size_t  out_elem_size,
const size_t  elems,
const bool  has_transpose 
)

Member Data Documentation

◆ kernel

CUfunction Realm::Cuda::GPUreduceXferDes::kernel
protected

◆ kernel_advanced

CUfunction Realm::Cuda::GPUreduceXferDes::kernel_advanced
protected

◆ kernel_host_proxy

const void* Realm::Cuda::GPUreduceXferDes::kernel_host_proxy
protected

◆ kernel_host_proxy_advanced

const void* Realm::Cuda::GPUreduceXferDes::kernel_host_proxy_advanced
protected

◆ kernel_host_proxy_transpose

const void* Realm::Cuda::GPUreduceXferDes::kernel_host_proxy_transpose
protected

◆ kernel_transpose

CUfunction Realm::Cuda::GPUreduceXferDes::kernel_transpose
protected

◆ redop

const ReductionOpUntyped* Realm::Cuda::GPUreduceXferDes::redop
protected

◆ redop_info

XferDesRedopInfo Realm::Cuda::GPUreduceXferDes::redop_info
protected

◆ src_gpus

std::vector<GPU *> Realm::Cuda::GPUreduceXferDes::src_gpus
protected

◆ src_is_ipc

std::vector<bool> Realm::Cuda::GPUreduceXferDes::src_is_ipc
protected

◆ stream

GPUStream* Realm::Cuda::GPUreduceXferDes::stream
protected

The documentation for this class was generated from the following file: