Realm
A distributed, event-based tasking library
Loading...
Searching...
No Matches
cuda_memcpy.h
Go to the documentation of this file.
1/*
2 * Copyright 2025 Stanford University, NVIDIA Corporation
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef CUDA_MEMCPY_H
19#define CUDA_MEMCPY_H
20
21#include <cstddef>
22#include <cstdint>
23#include <vector>
24#include "realm/point.h"
25
26#define CUDA_MAX_FIELD_BYTES 64
27#define CUDA_MAX_BLOCKS_PER_GRID 2048
28
29namespace Realm {
30 namespace Cuda {
31
32 template <size_t N, typename Offset_t = size_t>
33 struct alignas(8) AffineSubRect {
34 using FieldID = int;
35 // Extent of the ND array
36 Offset_t strides[N - 1];
37 // Address of the ND array
38 uintptr_t addr;
39
41 size_t num_fields;
42 Offset_t field_stride;
43 };
44
45 template <size_t N, typename Offset_t = size_t>
46 struct alignas(AffineSubRect<N, Offset_t>) AffineCopyPair {
49 // Extent of the ND sub-rect
50 Offset_t extents[N];
51 // Product of the extents for fast lookup, which is the same across
52 // the pair
53 Offset_t volume;
54 };
55
56 template <size_t N, typename Offset_t = size_t>
57 struct alignas(8) AffineFillRect {
58 Offset_t strides[N - 1];
59 // Product of the extents for fast lookup, which is the same across
60 // the pair
61 Offset_t volume;
62 // Extent of the ND sub-rect
63 Offset_t extents[N];
64 uintptr_t addr;
65 };
66
67 static const size_t MAX_CUDA_PARAM_CONSTBANK_SIZE = 4 * 1024;
68
69 template <size_t N, typename Offset_t = size_t,
70 size_t MAX_RECTS = (MAX_CUDA_PARAM_CONSTBANK_SIZE - 20) /
73 enum
74 {
75 MAX_NUM_RECTS = MAX_RECTS,
76 DIM = N
77 };
78 alignas(16) unsigned char fill_value[16];
79 unsigned short num_rects;
81 };
82
83 // Fills the ND rectangle starting at addr with the contents of the first element
84 // who's size is given by fill_elem_size. (This assumes the fill data is already
85 // copied to the first element)
86 template <size_t N, typename Offset_t = size_t>
88 enum
89 {
90 DIM = N
91 };
92 Offset_t extents[DIM];
93 Offset_t strides[DIM - 1];
94 Offset_t volume;
96 uintptr_t addr;
97 };
98
99 template <size_t N, typename Offset_t = size_t,
100 size_t MAX_RECTS = (MAX_CUDA_PARAM_CONSTBANK_SIZE - 2) /
102 struct alignas(AffineCopyPair<N, Offset_t>) AffineCopyInfo {
103 enum
104 {
105 MAX_NUM_RECTS = MAX_RECTS,
106 DIM = N
107 };
108
110 unsigned short num_rects;
111 };
112
113 template <typename Offset_t>
115 Offset_t extents[3];
116 Offset_t src_strides[2];
117 Offset_t dst_strides[2];
118 Offset_t tile_size;
119 uintptr_t dst;
120 uintptr_t src;
121 };
122
123 template <size_t N, typename Offset_t = size_t>
125 Offset_t volume;
126 Offset_t field_size;
127 Offset_t src_strides[N];
128 Offset_t dst_strides[N];
129 uintptr_t src_ind_addr;
130 uintptr_t dst_ind_addr;
131 uintptr_t src_addr;
132 uintptr_t dst_addr;
133 };
134
135 static const size_t CUDA_MAX_DIM = REALM_MAX_DIM < 3 ? REALM_MAX_DIM : 3;
136 } // namespace Cuda
137} // namespace Realm
138
139#endif // CUDA_MEMCPY_H
Definition activemsg.h:38
#define REALM_MAX_DIM
Definition realm_config.h:34
Definition cuda_memcpy.h:102
unsigned short num_rects
Definition cuda_memcpy.h:110
@ MAX_NUM_RECTS
Definition cuda_memcpy.h:105
@ DIM
Definition cuda_memcpy.h:106
AffineCopyPair< N, Offset_t > subrects[MAX_RECTS]
Definition cuda_memcpy.h:109
Definition cuda_memcpy.h:46
Offset_t volume
Definition cuda_memcpy.h:53
Offset_t extents[N]
Definition cuda_memcpy.h:50
AffineSubRect< N, Offset_t > dst
Definition cuda_memcpy.h:48
AffineSubRect< N, Offset_t > src
Definition cuda_memcpy.h:47
Definition cuda_memcpy.h:72
unsigned char fill_value[16]
Definition cuda_memcpy.h:78
unsigned short num_rects
Definition cuda_memcpy.h:79
AffineFillRect< N, Offset_t > subrects[MAX_RECTS]
Definition cuda_memcpy.h:80
@ DIM
Definition cuda_memcpy.h:76
@ MAX_NUM_RECTS
Definition cuda_memcpy.h:75
Definition cuda_memcpy.h:57
Offset_t strides[N - 1]
Definition cuda_memcpy.h:58
uintptr_t addr
Definition cuda_memcpy.h:64
Offset_t volume
Definition cuda_memcpy.h:61
Offset_t extents[N]
Definition cuda_memcpy.h:63
Definition cuda_memcpy.h:87
@ DIM
Definition cuda_memcpy.h:90
Offset_t strides[DIM - 1]
Definition cuda_memcpy.h:93
Offset_t volume
Definition cuda_memcpy.h:94
Offset_t fill_elem_size
Definition cuda_memcpy.h:95
Offset_t extents[DIM]
Definition cuda_memcpy.h:92
uintptr_t addr
Definition cuda_memcpy.h:96
Definition cuda_memcpy.h:33
Offset_t field_stride
Definition cuda_memcpy.h:42
Offset_t strides[N - 1]
Definition cuda_memcpy.h:36
const FieldID * fields
Definition cuda_memcpy.h:40
size_t num_fields
Definition cuda_memcpy.h:41
int FieldID
Definition cuda_memcpy.h:34
uintptr_t addr
Definition cuda_memcpy.h:38
Definition cuda_memcpy.h:124
Offset_t dst_strides[N]
Definition cuda_memcpy.h:128
Offset_t field_size
Definition cuda_memcpy.h:126
uintptr_t src_ind_addr
Definition cuda_memcpy.h:129
uintptr_t dst_ind_addr
Definition cuda_memcpy.h:130
Offset_t src_strides[N]
Definition cuda_memcpy.h:127
uintptr_t dst_addr
Definition cuda_memcpy.h:132
Offset_t volume
Definition cuda_memcpy.h:125
uintptr_t src_addr
Definition cuda_memcpy.h:131
Definition cuda_memcpy.h:114
Offset_t dst_strides[2]
Definition cuda_memcpy.h:117
Offset_t tile_size
Definition cuda_memcpy.h:118
uintptr_t dst
Definition cuda_memcpy.h:119
uintptr_t src
Definition cuda_memcpy.h:120
Offset_t src_strides[2]
Definition cuda_memcpy.h:116
Offset_t extents[3]
Definition cuda_memcpy.h:115