Realm
A distributed, event-based tasking library
Loading...
Searching...
No Matches
cuda_access.h
Go to the documentation of this file.
1/*
2 * Copyright 2025 Stanford University, NVIDIA Corporation
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18// CUDA-specific instance layouts and accessors
19
20#ifndef REALM_CUDA_ACCESS_H
21#define REALM_CUDA_ACCESS_H
22
23#include "realm/inst_layout.h"
24
25// CUDA driver/runtime opaque structs for arrays (convertable with explicit cast)
26struct CUarray_st;
27struct cudaArray;
28
29namespace Realm {
30
31 namespace PieceLayoutTypes {
32 static const LayoutType CudaArrayLayoutType = 3;
33 };
34
35 // CUDA arrays are fundamentally limited to 3D
37 int offset[3];
38 };
39
40 template <int N, typename T>
42 public CudaArrayPieceInfo {
43 public:
45
46 template <typename S>
48
49 virtual InstanceLayoutPiece<N, T> *clone(void) const;
50
51 virtual size_t calculate_offset(const Point<N, T> &p) const;
52
53 virtual void relocate(size_t base_offset);
54
55 virtual void print(std::ostream &os) const;
56
57 virtual size_t lookup_inst_size() const;
59 unsigned next_delta) const;
60
64
65 template <typename S>
66 bool serialize(S &serializer) const;
67 };
68
69 namespace PieceLookup {
70
71 namespace Opcodes {
72 static const Opcode OP_CUDA_ARRAY_PIECE = 4; // this is an CudaArrayPiece<N,T>
73 }
74
75 static const unsigned ALLOW_CUDA_ARRAY_PIECE = 1U << Opcodes::OP_CUDA_ARRAY_PIECE;
76
77 template <int N, typename T>
78 struct CudaArrayPiece : public Instruction {
79 // data is: { delta[23:0], opcode[7:0] }
80 // top 24 bits of data is jump delta
81 CudaArrayPiece(unsigned next_delta);
82
84 unsigned delta() const;
85
86 CUarray_st *array;
87 int offset[3];
88
90 const Instruction *next() const;
91 };
92
93 }; // namespace PieceLookup
94
96 public:
97 ExternalCudaMemoryResource(int _cuda_device_id, uintptr_t _base,
98 size_t _size_in_bytes, bool _read_only);
99 ExternalCudaMemoryResource(int _cuda_device_id, void *_base, size_t _size_in_bytes);
100 ExternalCudaMemoryResource(int _cuda_device_id, const void *_base,
101 size_t _size_in_bytes);
102
103 virtual bool satisfies(const InstanceLayoutGeneric &layout) const;
104
105 // returns the suggested memory in which this resource should be created
107
108 virtual ExternalInstanceResource *clone(void) const;
109
110 template <typename S>
111 bool serialize(S &serializer) const;
112
113 template <typename S>
115
116 protected:
118
122
123 virtual void print(std::ostream &os) const;
124
125 public:
127 uintptr_t base;
130 };
131
133 public:
134 ExternalCudaArrayResource(int _cuda_device_id, CUarray_st *_array);
135 ExternalCudaArrayResource(int _cuda_device_id, cudaArray *_array);
136
137 virtual bool satisfies(const InstanceLayoutGeneric &layout) const;
138
139 // returns the suggested memory in which this resource should be created
141
142 virtual ExternalInstanceResource *clone(void) const;
143
144 template <typename S>
145 bool serialize(S &serializer) const;
146
147 template <typename S>
149
150 protected:
152
156
157 virtual void print(std::ostream &os) const;
158
159 public:
161 CUarray_st *array;
162 };
163
165 public:
166 ExternalCudaPinnedHostResource(uintptr_t _base, size_t _size_in_bytes,
167 bool _read_only);
168 ExternalCudaPinnedHostResource(void *_base, size_t _size_in_bytes);
169 ExternalCudaPinnedHostResource(const void *_base, size_t _size_in_bytes);
170
171 // returns the suggested memory in which this resource should be created
173
174 virtual ExternalInstanceResource *clone(void) const;
175
176 template <typename S>
177 bool serialize(S &serializer) const;
178
179 template <typename S>
181
182 protected:
184
188
189 virtual void print(std::ostream &os) const;
190 };
191
192}; // namespace Realm
193
194#include "realm/cuda/cuda_access.inl"
195
196#endif // ifndef REALM_CUDA_ACCESS_H
Definition cuda_access.h:42
virtual PieceLookup::Instruction * create_lookup_inst(void *ptr, unsigned next_delta) const
virtual size_t calculate_offset(const Point< N, T > &p) const
static Serialization::PolymorphicSerdezSubclass< InstanceLayoutPiece< N, T >, CudaArrayLayoutPiece< N, T > > serdez_subclass
Definition cuda_access.h:63
bool serialize(S &serializer) const
virtual void relocate(size_t base_offset)
virtual size_t lookup_inst_size() const
static InstanceLayoutPiece< N, T > * deserialize_new(S &deserializer)
virtual void print(std::ostream &os) const
virtual InstanceLayoutPiece< N, T > * clone(void) const
Definition cuda_access.h:132
bool serialize(S &serializer) const
virtual ExternalInstanceResource * clone(void) const
ExternalCudaArrayResource(int _cuda_device_id, cudaArray *_array)
ExternalCudaArrayResource(int _cuda_device_id, CUarray_st *_array)
virtual bool satisfies(const InstanceLayoutGeneric &layout) const
static Serialization::PolymorphicSerdezSubclass< ExternalInstanceResource, ExternalCudaArrayResource > serdez_subclass
Definition cuda_access.h:155
CUarray_st * array
Definition cuda_access.h:161
int cuda_device_id
Definition cuda_access.h:160
virtual void print(std::ostream &os) const
static ExternalInstanceResource * deserialize_new(S &deserializer)
Definition cuda_access.h:95
uintptr_t base
Definition cuda_access.h:127
static ExternalInstanceResource * deserialize_new(S &deserializer)
size_t size_in_bytes
Definition cuda_access.h:128
bool read_only
Definition cuda_access.h:129
int cuda_device_id
Definition cuda_access.h:126
bool serialize(S &serializer) const
ExternalCudaMemoryResource(int _cuda_device_id, void *_base, size_t _size_in_bytes)
virtual void print(std::ostream &os) const
ExternalCudaMemoryResource(int _cuda_device_id, const void *_base, size_t _size_in_bytes)
virtual ExternalInstanceResource * clone(void) const
virtual bool satisfies(const InstanceLayoutGeneric &layout) const
static Serialization::PolymorphicSerdezSubclass< ExternalInstanceResource, ExternalCudaMemoryResource > serdez_subclass
Definition cuda_access.h:121
ExternalCudaMemoryResource(int _cuda_device_id, uintptr_t _base, size_t _size_in_bytes, bool _read_only)
Definition cuda_access.h:164
virtual ExternalInstanceResource * clone(void) const
ExternalCudaPinnedHostResource(const void *_base, size_t _size_in_bytes)
ExternalCudaPinnedHostResource(uintptr_t _base, size_t _size_in_bytes, bool _read_only)
virtual void print(std::ostream &os) const
static Serialization::PolymorphicSerdezSubclass< ExternalInstanceResource, ExternalCudaPinnedHostResource > serdez_subclass
Definition cuda_access.h:187
ExternalCudaPinnedHostResource(void *_base, size_t _size_in_bytes)
bool serialize(S &serializer) const
static ExternalInstanceResource * deserialize_new(S &deserializer)
Definition instance.h:405
Definition instance.h:443
Definition inst_layout.h:164
Definition inst_layout.h:289
Definition memory.h:33
#define REALM_CUDA_HD
Definition compiler_support.h:95
#define REALM_PUBLIC_API
Definition compiler_support.h:217
unsigned char LayoutType
Definition inst_layout.h:256
unsigned char Opcode
Definition inst_layout.h:121
Definition activemsg.h:38
Definition cuda_access.h:36
int offset[3]
Definition cuda_access.h:37
Definition cuda_access.h:78
REALM_CUDA_HD unsigned delta() const
REALM_CUDA_HD const Instruction * next() const
CUarray_st * array
Definition cuda_access.h:86
CudaArrayPiece(unsigned next_delta)
int offset[3]
Definition cuda_access.h:87
Definition inst_layout.h:131
Definition point.h:55