Realm
A distributed, event-based tasking library
Loading...
Searching...
No Matches
cuda_memcpy.h
Go to the documentation of this file.
1/*
2 * Copyright 2025 Stanford University, NVIDIA Corporation
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef CUDA_MEMCPY_H
19#define CUDA_MEMCPY_H
20
21#include <cstddef>
22#include <cstdint>
23#include <vector>
24#include "realm/point.h"
25
26#define CUDA_MAX_FIELD_BYTES 64
27#define CUDA_MAX_BLOCKS_PER_GRID 2048
28
29namespace Realm {
30 namespace Cuda {
31
32 template <size_t N, typename Offset_t = size_t>
33 struct alignas(8) AffineSubRect {
34 // Extent of the ND array
35 Offset_t strides[N - 1];
36 // Address of the ND array
37 uintptr_t addr;
38 };
39
40 template <size_t N, typename Offset_t = size_t>
41 struct alignas(AffineSubRect<N, Offset_t>) AffineCopyPair {
44 // Extent of the ND sub-rect
45 Offset_t extents[N];
46 // Product of the extents for fast lookup, which is the same across
47 // the pair
48 Offset_t volume;
49 };
50
51 template <size_t N, typename Offset_t = size_t>
52 struct alignas(8) AffineFillRect {
53 Offset_t strides[N - 1];
54 // Product of the extents for fast lookup, which is the same across
55 // the pair
56 Offset_t volume;
57 // Extent of the ND sub-rect
58 Offset_t extents[N];
59 uintptr_t addr;
60 };
61
62 static const size_t MAX_CUDA_PARAM_CONSTBANK_SIZE = 4 * 1024;
63
64 template <size_t N, typename Offset_t = size_t,
65 size_t MAX_RECTS = (MAX_CUDA_PARAM_CONSTBANK_SIZE - 20) /
68 enum
69 {
70 MAX_NUM_RECTS = MAX_RECTS,
71 DIM = N
72 };
73 alignas(16) unsigned char fill_value[16];
74 unsigned short num_rects;
76 };
77
78 // Fills the ND rectangle starting at addr with the contents of the first element
79 // who's size is given by fill_elem_size. (This assumes the fill data is already
80 // copied to the first element)
81 template <size_t N, typename Offset_t = size_t>
83 enum
84 {
85 DIM = N
86 };
87 Offset_t extents[DIM];
88 Offset_t strides[DIM - 1];
89 Offset_t volume;
91 uintptr_t addr;
92 };
93
94 template <size_t N, typename Offset_t = size_t,
95 size_t MAX_RECTS = (MAX_CUDA_PARAM_CONSTBANK_SIZE - 2) /
97 struct alignas(AffineCopyPair<N, Offset_t>) AffineCopyInfo {
98 enum
99 {
100 MAX_NUM_RECTS = MAX_RECTS,
101 DIM = N
102 };
103
105 unsigned short num_rects;
106 };
107
108 template <typename Offset_t>
110 Offset_t extents[3];
111 Offset_t src_strides[2];
112 Offset_t dst_strides[2];
113 Offset_t tile_size;
114 uintptr_t dst;
115 uintptr_t src;
116 };
117
118 template <size_t N, typename Offset_t = size_t>
120 Offset_t volume;
121 Offset_t field_size;
122 Offset_t src_strides[N];
123 Offset_t dst_strides[N];
124 uintptr_t src_ind_addr;
125 uintptr_t dst_ind_addr;
126 uintptr_t src_addr;
127 uintptr_t dst_addr;
128 };
129
130 static const size_t CUDA_MAX_DIM = REALM_MAX_DIM < 3 ? REALM_MAX_DIM : 3;
131 } // namespace Cuda
132} // namespace Realm
133
134#endif // CUDA_MEMCPY_H
Definition activemsg.h:38
#define REALM_MAX_DIM
Definition realm_config.h:34
Definition cuda_memcpy.h:97
unsigned short num_rects
Definition cuda_memcpy.h:105
@ MAX_NUM_RECTS
Definition cuda_memcpy.h:100
@ DIM
Definition cuda_memcpy.h:101
AffineCopyPair< N, Offset_t > subrects[MAX_RECTS]
Definition cuda_memcpy.h:104
Definition cuda_memcpy.h:41
Offset_t volume
Definition cuda_memcpy.h:48
Offset_t extents[N]
Definition cuda_memcpy.h:45
AffineSubRect< N, Offset_t > dst
Definition cuda_memcpy.h:43
AffineSubRect< N, Offset_t > src
Definition cuda_memcpy.h:42
Definition cuda_memcpy.h:67
unsigned char fill_value[16]
Definition cuda_memcpy.h:73
unsigned short num_rects
Definition cuda_memcpy.h:74
AffineFillRect< N, Offset_t > subrects[MAX_RECTS]
Definition cuda_memcpy.h:75
@ DIM
Definition cuda_memcpy.h:71
@ MAX_NUM_RECTS
Definition cuda_memcpy.h:70
Definition cuda_memcpy.h:52
Offset_t strides[N - 1]
Definition cuda_memcpy.h:53
uintptr_t addr
Definition cuda_memcpy.h:59
Offset_t volume
Definition cuda_memcpy.h:56
Offset_t extents[N]
Definition cuda_memcpy.h:58
Definition cuda_memcpy.h:82
@ DIM
Definition cuda_memcpy.h:85
Offset_t strides[DIM - 1]
Definition cuda_memcpy.h:88
Offset_t volume
Definition cuda_memcpy.h:89
Offset_t fill_elem_size
Definition cuda_memcpy.h:90
Offset_t extents[DIM]
Definition cuda_memcpy.h:87
uintptr_t addr
Definition cuda_memcpy.h:91
Definition cuda_memcpy.h:33
Offset_t strides[N - 1]
Definition cuda_memcpy.h:35
uintptr_t addr
Definition cuda_memcpy.h:37
Definition cuda_memcpy.h:119
Offset_t dst_strides[N]
Definition cuda_memcpy.h:123
Offset_t field_size
Definition cuda_memcpy.h:121
uintptr_t src_ind_addr
Definition cuda_memcpy.h:124
uintptr_t dst_ind_addr
Definition cuda_memcpy.h:125
Offset_t src_strides[N]
Definition cuda_memcpy.h:122
uintptr_t dst_addr
Definition cuda_memcpy.h:127
Offset_t volume
Definition cuda_memcpy.h:120
uintptr_t src_addr
Definition cuda_memcpy.h:126
Definition cuda_memcpy.h:109
Offset_t dst_strides[2]
Definition cuda_memcpy.h:112
Offset_t tile_size
Definition cuda_memcpy.h:113
uintptr_t dst
Definition cuda_memcpy.h:114
uintptr_t src
Definition cuda_memcpy.h:115
Offset_t src_strides[2]
Definition cuda_memcpy.h:111
Offset_t extents[3]
Definition cuda_memcpy.h:110