18#ifndef REALM_HIP_REDOP_H
19#define REALM_HIP_REDOP_H
23#if defined(__CUDACC__) || defined(__HIPCC__)
25#include <hip/hip_runtime.h>
33#if defined(__CUDACC__) || defined(__HIPCC__)
36 namespace ReductionKernels {
37 template <
typename REDOP,
bool EXCL>
38 __global__
void apply_hip_kernel(uintptr_t lhs_base, uintptr_t lhs_stride,
39 uintptr_t rhs_base, uintptr_t rhs_stride,
40 size_t count, REDOP redop)
42 size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
43 for(
size_t idx = tid; tid < count; tid += blockDim.x * gridDim.x)
44 redop.template apply_hip<EXCL>(
45 *
reinterpret_cast<typename REDOP::LHS *
>(lhs_base + idx * lhs_stride),
46 *
reinterpret_cast<const typename REDOP::RHS *
>(rhs_base +
50 template <
typename REDOP,
bool EXCL>
51 __global__
void fold_hip_kernel(uintptr_t rhs1_base, uintptr_t rhs1_stride,
52 uintptr_t rhs2_base, uintptr_t rhs2_stride,
53 size_t count, REDOP redop)
55 size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
56 for(
size_t idx = tid; tid < count; tid += blockDim.x * gridDim.x)
57 redop.template fold_hip<EXCL>(
58 *
reinterpret_cast<typename REDOP::RHS *
>(rhs1_base + idx * rhs1_stride),
59 *
reinterpret_cast<const typename REDOP::RHS *
>(rhs2_base +
66 template <
typename REDOP,
typename T >
67 void add_hip_redop_kernels(T *redop)
71 redop->hip_apply_excl_fn =
72 reinterpret_cast<void *
>(&ReductionKernels::apply_hip_kernel<REDOP, true>);
73 redop->hip_apply_nonexcl_fn =
74 reinterpret_cast<void *
>(&ReductionKernels::apply_hip_kernel<REDOP, false>);
75 redop->hip_fold_excl_fn =
76 reinterpret_cast<void *
>(&ReductionKernels::fold_hip_kernel<REDOP, true>);
77 redop->hip_fold_nonexcl_fn =
78 reinterpret_cast<void *
>(&ReductionKernels::fold_hip_kernel<REDOP, false>);
Definition activemsg.h:38