45 void apply(LHS &lhs, RHS rhs)
const;
48 static const RHS identity;
49 void fold(RHS &rhs1, RHS rhs2)
const;
53 template <
typename REDOP>
66 size_t rhs_stride,
size_t count,
const void *
userdata);
68 size_t rhs_stride,
size_t count,
const void *
userdata);
70 size_t rhs2_stride,
size_t count,
const void *
userdata);
72 size_t rhs2_stride,
size_t count,
const void *
userdata);
78 void *cuda_apply_excl_fn, *cuda_apply_nonexcl_fn;
79 void *cuda_fold_excl_fn, *cuda_fold_nonexcl_fn;
80 void *cuda_apply_excl_fn_advanced, *cuda_apply_nonexcl_fn_advanced;
81 void *cuda_fold_excl_fn_advanced, *cuda_fold_nonexcl_fn_advanced;
82 void *cuda_apply_excl_fn_transpose, *cuda_apply_nonexcl_fn_transpose;
83 void *cuda_fold_excl_fn_transpose, *cuda_fold_nonexcl_fn_transpose;
88 void *cudaLaunchKernel_fn;
91 void *cudaGetFuncBySymbol_fn;
97 void *hip_apply_excl_fn, *hip_apply_nonexcl_fn;
98 void *hip_fold_excl_fn, *hip_fold_nonexcl_fn;
113 , cuda_apply_excl_fn(0)
114 , cuda_apply_nonexcl_fn(0)
115 , cuda_fold_excl_fn(0)
116 , cuda_fold_nonexcl_fn(0)
117 , cuda_apply_excl_fn_advanced(0)
118 , cuda_apply_nonexcl_fn_advanced(0)
119 , cuda_fold_excl_fn_advanced(0)
120 , cuda_fold_nonexcl_fn_advanced(0)
121 , cuda_apply_excl_fn_transpose(0)
122 , cuda_apply_nonexcl_fn_transpose(0)
123 , cuda_fold_excl_fn_transpose(0)
124 , cuda_fold_nonexcl_fn_transpose(0)
125 , cudaLaunchKernel_fn(0)
126 , cudaGetFuncBySymbol_fn(0)
129 , hip_apply_excl_fn(0)
130 , hip_apply_nonexcl_fn(0)
131 , hip_fold_excl_fn(0)
132 , hip_fold_nonexcl_fn(0)
136 template <
class REDOP>
147 static_assert(std::is_trivially_copyable<ReductionOp<REDOP>>::value &&
148 std::is_trivially_destructible<ReductionOp<REDOP>>::value,
149 "ReductionOp<REDOP> must be trivially copyable/destructible");
162 namespace ReductionKernels {
163 template <
typename REDOP,
bool EXCL>
165 size_t rhs_stride,
size_t count,
const void *userdata)
167 const REDOP *redop =
static_cast<const REDOP *
>(userdata);
168 for(
size_t i = 0; i < count; i++) {
169 redop->template apply<EXCL>(*
static_cast<typename REDOP::LHS *
>(lhs_ptr),
170 *
static_cast<const typename REDOP::RHS *
>(rhs_ptr));
171 lhs_ptr =
static_cast<char *
>(lhs_ptr) + lhs_stride;
172 rhs_ptr =
static_cast<const char *
>(rhs_ptr) + rhs_stride;
176 template <
typename REDOP,
bool EXCL>
178 size_t rhs2_stride,
size_t count,
const void *userdata)
180 const REDOP *redop =
static_cast<const REDOP *
>(userdata);
181 for(
size_t i = 0; i < count; i++) {
182 redop->template fold<EXCL>(*
static_cast<typename REDOP::RHS *
>(rhs1_ptr),
183 *
static_cast<const typename REDOP::RHS *
>(rhs2_ptr));
184 rhs1_ptr =
static_cast<char *
>(rhs1_ptr) + rhs1_stride;
185 rhs2_ptr =
static_cast<const char *
>(rhs2_ptr) + rhs2_stride;
190#if defined(REALM_USE_CUDA) && defined(__CUDACC__)
194 template <
typename T>
195 struct HasHasCudaReductions {
202 struct AltnerativeDefinition {
203 static const bool has_cuda_reductions =
false;
205 template <
typename T2>
206 struct Combined :
public T2,
public AltnerativeDefinition {};
207 template <
typename T2, T2>
208 struct CheckAmbiguous {};
209 template <
typename T2>
211 has_member(CheckAmbiguous<
const bool *, &Combined<T2>::has_cuda_reductions> *);
212 template <
typename T2>
213 static YES has_member(...);
214 const static bool value =
sizeof(has_member<T>(0)) ==
sizeof(YES);
217 template <
typename T,
bool OK>
218 struct MaybeAddCudaReductions;
219 template <
typename T>
220 struct MaybeAddCudaReductions<T, false> {
221 static void if_member_exists(ReductionOpUntyped *redop){};
222 static void if_member_is_true(ReductionOpUntyped *redop){};
224 template <
typename T>
225 struct MaybeAddCudaReductions<T, true> {
226 static void if_member_exists(ReductionOpUntyped *redop)
228 MaybeAddCudaReductions<T, T::has_cuda_reductions>::if_member_is_true(redop);
230 static void if_member_is_true(ReductionOpUntyped *redop)
232 Cuda::add_cuda_redop_kernels<T>(redop);
237#if defined(REALM_USE_HIP) && (defined(__CUDACC__) || defined(__HIPCC__))
241 template <
typename T>
242 struct HasHasHipReductions {
249 struct AltnerativeDefinition {
250 static const bool has_hip_reductions =
false;
252 template <
typename T2>
253 struct Combined :
public T2,
public AltnerativeDefinition {};
254 template <
typename T2, T2>
255 struct CheckAmbiguous {};
256 template <
typename T2>
258 has_member(CheckAmbiguous<
const bool *, &Combined<T2>::has_hip_reductions> *);
259 template <
typename T2>
260 static YES has_member(...);
261 const static bool value =
sizeof(has_member<T>(0)) ==
sizeof(YES);
264 template <
typename T,
bool OK>
265 struct MaybeAddHipReductions;
266 template <
typename T>
267 struct MaybeAddHipReductions<T, false> {
268 static void if_member_exists(ReductionOpUntyped *redop){};
269 static void if_member_is_true(ReductionOpUntyped *redop){};
271 template <
typename T>
272 struct MaybeAddHipReductions<T, true> {
273 static void if_member_exists(ReductionOpUntyped *redop)
275 MaybeAddHipReductions<T, T::has_hip_reductions>::if_member_is_true(redop);
277 static void if_member_is_true(ReductionOpUntyped *redop)
279 Hip::add_hip_redop_kernels<T>(redop);
284 template <
typename REDOP>
304#if defined(REALM_USE_CUDA) && defined(__CUDACC__)
307 MaybeAddCudaReductions<REDOP, HasHasCudaReductions<REDOP>::value>::if_member_exists(
310#if defined(REALM_USE_HIP) && (defined(__CUDACC__) || defined(__HIPCC__))
313 MaybeAddHipReductions<REDOP, HasHasHipReductions<REDOP>::value>::if_member_exists(
Realm::ReductionOp< REDOP > ReductionOp
Definition prealm.h:88
void cpu_apply_wrapper(void *lhs_ptr, size_t lhs_stride, const void *rhs_ptr, size_t rhs_stride, size_t count, const void *userdata)
Definition redop.h:164
void cpu_fold_wrapper(void *rhs1_ptr, size_t rhs1_stride, const void *rhs2_ptr, size_t rhs2_stride, size_t count, const void *userdata)
Definition redop.h:177
Definition activemsg.h:38
size_t sizeof_lhs
Definition redop.h:58
static ReductionOpUntyped * create_reduction_op(void)
Definition redop.h:137
void(* cpu_apply_nonexcl_fn)(void *lhs_ptr, size_t lhs_stride, const void *rhs_ptr, size_t rhs_stride, size_t count, const void *userdata)
Definition redop.h:67
size_t sizeof_this
Definition redop.h:57
void(* cpu_fold_nonexcl_fn)(void *rhs1_ptr, size_t rhs1_stride, const void *rhs2_ptr, size_t rhs2_stride, size_t count, const void *userdata)
Definition redop.h:71
ReductionOpUntyped()
Definition redop.h:101
static ReductionOpUntyped * clone_reduction_op(const ReductionOpUntyped *redop)
void * identity
Definition redop.h:61
void * userdata
Definition redop.h:62
size_t sizeof_rhs
Definition redop.h:59
size_t sizeof_userdata
Definition redop.h:60
void(* cpu_fold_excl_fn)(void *rhs1_ptr, size_t rhs1_stride, const void *rhs2_ptr, size_t rhs2_stride, size_t count, const void *userdata)
Definition redop.h:69
void(* cpu_apply_excl_fn)(void *lhs_ptr, size_t lhs_stride, const void *rhs_ptr, size_t rhs_stride, size_t count, const void *userdata)
Definition redop.h:65
ReductionOp()
Definition redop.h:290
REDOP userdata_val
Definition redop.h:288
REDOP::RHS identity_val
Definition redop.h:287