1 #ifndef __PROTO_MACROS_H_ 2 #define __PROTO_MACROS_H_ 4 #include "Proto_MemType.H" 18 dim3(
size_t a_x,
size_t a_y,
size_t a_z) : x(a_x),y(a_y),z(a_z) {};
19 dim3():x(0),y(0),z(0) {};
21 #define protoStream_t int 22 #define protoGetCurrentStream 0 24 #define protoMalloc(memtype,PTR,NBYTES) protoMallocCPU(PTR,NBYTES) 25 #define protoFree(memtype,PTR) protoFreeCPU(PTR) 27 #define protoMemcpy(memtype,to,from,size,copyType) protoMemcpyCPU((char*)to,(char*)from,size,copyType) 28 #define protoMemcpyAsync(memtype,to,from,size,copyType,stream) protoMemcpyAsyncCPU(to,from,size,copyType,stream) 29 #define protoMemcpyFromSymbol(memtype,a,b,c,d,e) protoMemcpyFromSymbolCPU(a,b,c,d,e) 30 #define protoMemcpyToSymbol(memtype,a,b,c,d,e) protoMemcpyToSymbolCPU(a,b,c,d,e) 32 #define protoDeviceSynchronize(memtype) protoDeviceSynchronizeCPU() 34 template<Proto::MemType Type,
typename Ker,
typename... Args>
35 inline void protoLaunchKernelT(
int nBlocks,
int nbThreads,
const Args&... args)
41 template<Proto::MemType Type,
typename Ker,
typename... Args>
42 inline void protoLaunchKernelT(
dim3 nBlocks,
int nbThreads,
const Args&... args)
52 template<Proto::MemType Type,
typename Ker,
typename... Args>
53 inline void protoLaunchKernelMemAsyncT(
int nBlocks,
int nbThreads,
int smem, protoStream_t stream,
const Args&... args)
59 template<Proto::MemType Type,
typename Ker,
typename... Args>
60 inline void protoLaunchKernelMemAsyncT(
dim3 nBlocks,
int nbThreads,
int smem, protoStream_t stream,
const Args&... args)
69 #define protoStream_t int 71 #else //If PROTO_CUDA is defined 73 #define protoMalloc(memtype,PTR,NBYTES) \ 74 if(memtype==Proto::MemType::DEVICE) { protoMallocGPU(PTR,NBYTES); }\ 75 else { protoMallocCPU(PTR,NBYTES);} 77 #define protoFree(memtype,PTR) \ 78 if(memtype==Proto::MemType::DEVICE) { protoFreeGPU(PTR); } \ 79 else { protoFreeCPU(PTR);} 81 #define protoMemcpy(memtype,to,from,size,copyType) \ 82 if(memtype==Proto::MemType::DEVICE) { protoMemcpyGPU(to,from,size,copyType); } \ 83 else { protoMemcpyCPU(to,from,size,copyType);} 85 #define protoMemcpyAsync(memtype,to,from,size,copyType,stream) \ 86 if(memtype==Proto::MemType::DEVICE) { protoMemcpyAsyncGPU(to,from,size,copyType,stream); } \ 87 else { protoMemcpyAsyncCPU(to,from,size,copyType,stream);} 89 #define protoMemcpyFromSymbol(memtype,a,b,c,d,e) \ 90 if(memtype==Proto::MemType::DEVICE) { protoMemcpyFromSymbolGPU(a,b,c,d,e); } \ 91 else { protoMemcpyFromSymbolCPU(a,b,c,d,e); } 93 #define protoMemcpyToSymbol(memtype,a,b,c,d,e) \ 94 if(memtype==Proto::MemType::DEVICE) { protoMemcpyToSymbolGPU(a,b,c,d,e);} \ 95 else { protoMemcpyToSymbolCPU(a,b,c,d,e);} 97 #define protoDeviceSynchronize(memtype) \ 98 if(memtype==Proto::MemType::DEVICE) { protoDeviceSynchronizeGPU();} \ 99 else { protoDeviceSynchronizeCPU();} 118 template<
typename Ker,
typename... Args>
119 __global__
void generalLaunch(
const Args... args)
124 #define MAXTHREADS 1024 126 template<Proto::MemType Type,
typename Ker,
typename... Args>
127 inline void protoLaunchKernelT(
int nBlocks,
int nbThreads,
const Args&... args)
129 PRINT_KERNEL_NAME_ARGS(Ker,nBlocks,nbThreads);
130 if(Type == Proto::MemType::DEVICE)
132 assert(nBlocks > 0 && nbThreads > 0);
133 assert(nbThreads<=MAXTHREADS);
134 protoLaunchKernelGPU((generalLaunch<Ker, Args...>), nBlocks, nbThreads, args...);
135 GPU_CHECK(protoGetLastError());
141 template<Proto::MemType Type,
typename Ker,
typename... Args>
142 inline void protoLaunchKernelT(
dim3 nBlocks,
int nbThreads,
const Args&... args)
144 PRINT_KERNEL_NAME_ARGS(Ker,nBlocks,nbThreads);
145 if(Type == Proto::MemType::DEVICE)
147 assert(nBlocks.x> 0 && nBlocks.y> 0 && nBlocks.z> 0 && nbThreads > 0);
148 assert(nbThreads<=MAXTHREADS);
149 protoLaunchKernelGPU((generalLaunch<Ker, Args...>), nBlocks, nbThreads,args...);
150 GPU_CHECK(protoGetLastError());
157 template<Proto::MemType Type,
typename Ker,
typename... Args>
158 inline void protoLaunchKernelMemAsyncT(
int nBlocks,
int nbThreads,
int smem, protoStream_t stream,
const Args&... args)
160 PRINT_KERNEL_NAME_ARGS(Ker,nBlocks,nbThreads);
161 if(Type == Proto::MemType::DEVICE)
163 assert(nBlocks > 0 && nbThreads > 0);
164 assert(nbThreads<=MAXTHREADS);
165 protoLaunchKernelMemAsyncGPU((generalLaunch<Ker, Args...>),nBlocks, nbThreads, smem, stream, args...);
166 GPU_CHECK(protoGetLastError());
173 template<Proto::MemType Type,
typename Ker,
typename... Args>
174 inline void protoLaunchKernelMemAsyncT(
dim3 nBlocks,
int nbThreads,
int smem, protoStream_t stream,
const Args&... args)
176 PRINT_KERNEL_NAME_ARGS(Ker,nBlocks,nbThreads);
177 if(Type == Proto::MemType::DEVICE)
179 assert(nBlocks.x> 0 && nBlocks.y> 0 && nBlocks.z> 0 && nbThreads > 0);
180 assert(nbThreads<=MAXTHREADS);
181 protoLaunchKernelMemAsyncGPU((generalLaunch<Ker, Args...>), nBlocks, nbThreads, smem, stream, args...);
182 GPU_CHECK(protoGetLastError());
Definition: Proto_macros.H:13