1 #ifndef __PROTO_REDUCTION_H__ 2 #define __PROTO_REDUCTION_H__ 20 #ifdef PROTO_ACCEL // namespace collision in host builds 23 static constexpr T max(T a, T b) {
return (a > b ? a : b); }
26 static constexpr T min(T a, T b) {
return (a < b ? a : b); }
29 template<
typename T, Operation OP, MemType MEM=MEMTYPE_DEFAULT>
68 static void update(T& a_v1,
const T a_v2);
94 void reduce(
const T *a_data,
const size_t a_size);
107 T *m_deviTemp, *m_deviTotal;
108 int m_numThreads, m_numBlocks, m_warpSize;
112 template<
typename T, Operation OP>
124 template<
typename T, Operation OP>
126 void warpOp(T& val,
size_t idx,
size_t size) {
127 unsigned mask = 0xffffffff;
128 for (
unsigned int delta = warpSize/2; delta > 0; delta /= 2)
130 #if defined PROTO_HIP 133 #elif defined PROTO_CUDA 139 template<
typename T, Operation OP>
141 void blockOp(T& val,
int idx,
int size) {
142 PR_assert(blockDim.x <= warpSize*warpSize);
143 extern __shared__ __align__(
sizeof(T))
unsigned int shdata[];
144 T *shmem =
reinterpret_cast<T*
>(shdata);
145 int lane = threadIdx.x % warpSize;
146 int wid = threadIdx.x / warpSize;
147 int warps = (blockDim.x+warpSize-1)/warpSize;
149 warpOp<T,OP>(val, idx, size);
150 if (warps == 1)
return;
152 if (!lane) shmem[wid] = val;
157 warpOp<T,OP>(val, threadIdx.x, warps);
161 template<
typename T, Operation OP>
163 void kernel(
size_t size,
const T* in, T* out, T* val)
166 int idx = blockIdx.x*blockDim.x + threadIdx.x;
168 for (
size_t i = idx; i < size; i += blockDim.x*gridDim.x)
172 blockOp<T,OP>(ret, idx, size);
177 out[blockIdx.x] = ret;
189 #endif // __PROTO_REDUCTION_H__
static ACCEL_DECORATION T init()
Initialize Value.
static ACCEL_DECORATION void update(T &a_v1, const T a_v2)
Update Value.
Operation
Definition: Proto_Reduction.H:15
Definition: Proto_Reduction.H:15
Definition: Proto_Reduction.H:16
void reset()
Reset Reduction.
Definition: Proto_Reduction.H:16
Definition: Proto_Reduction.H:15
#define ACCEL_KERNEL
Definition: Proto_Accel.H:13
constexpr int line
Definition: Proto_Reduction.H:18
T * m_hostTotal
Definition: Proto_Reduction.H:103
Definition: Proto_Reduction.H:15
Definition: Proto_Reduction.H:30
#define PR_assert(stmt)
Definition: Proto_PAssert.H:68
#define ACCEL_DECORATION
Definition: Proto_Accel.H:12
T fetchLocal()
Get Reduction.
T * m_hostTemp
Definition: Proto_Reduction.H:104
Definition: Proto_Reduction.H:16
Definition: Proto_Array.H:17
ACCEL_KERNEL void initKernel(T *ptr)
Definition: Proto_Reduction.H:114
Atomic
Definition: Proto_Reduction.H:16
void reduce(const T *a_data, const size_t a_size)
Compute Reduction.
Definition: Proto_Reduction.H:15
Definition: Proto_Reduction.H:15
Definition: Proto_Reduction.H:15