Proto
Proto_macros.H
1 #ifndef __PROTO_MACROS_H_
2 #define __PROTO_MACROS_H_
3 
4 #include "Proto_MemType.H"
5 
6 #include "Proto_gpu.H"
7 
8 #include "Proto_cpu.H"
9 #include<cassert>
10 
11 #ifndef PROTO_CUDA
12 //Some of the device-agnostic code needs definitions of dim3, protoStream_t, protoGetCurrentStream
13 class dim3 {
14 public:
15  size_t x;
16  size_t y;
17  size_t z;
18  dim3(size_t a_x,size_t a_y, size_t a_z) : x(a_x),y(a_y),z(a_z) {};
19  dim3():x(0),y(0),z(0) {};
20 };
21 #define protoStream_t int
22 #define protoGetCurrentStream 0
23 
24 #define protoMalloc(memtype,PTR,NBYTES) protoMallocCPU(PTR,NBYTES)
25 #define protoFree(memtype,PTR) protoFreeCPU(PTR)
26 
27 #define protoMemcpy(memtype,to,from,size,copyType) protoMemcpyCPU((char*)to,(char*)from,size,copyType)
28 #define protoMemcpyAsync(memtype,to,from,size,copyType,stream) protoMemcpyAsyncCPU(to,from,size,copyType,stream)
29 #define protoMemcpyFromSymbol(memtype,a,b,c,d,e) protoMemcpyFromSymbolCPU(a,b,c,d,e)
30 #define protoMemcpyToSymbol(memtype,a,b,c,d,e) protoMemcpyToSymbolCPU(a,b,c,d,e)
31 
32 #define protoDeviceSynchronize(memtype) protoDeviceSynchronizeCPU()
33 
34 template<Proto::MemType Type, typename Ker, typename... Args>
35 inline void protoLaunchKernelT(int nBlocks, int nbThreads, const Args&... args)
36 {
37  //protoLaunchKernelCPU(Ker::cpu, nBlocks, nbThreads, args...);
38  Ker::cpu(args...);
39 }
40 
41 template<Proto::MemType Type, typename Ker, typename... Args>
42 inline void protoLaunchKernelT(dim3 nBlocks, int nbThreads, const Args&... args)
43 {
44  //protoLaunchKernelCPU(Ker::cpu, nBlocks, nbThreads, args...);
45  Ker::cpu(args...);
46 }
47 
52 template<Proto::MemType Type, typename Ker, typename... Args>
53 inline void protoLaunchKernelMemAsyncT(int nBlocks, int nbThreads, int smem, protoStream_t stream, const Args&... args)
54 {
55  //protoLaunchKernelMemAsyncCPU(Ker::cpu, nBlocks, nbThreads, smem, stream, args...);
56  Ker::cpu(args...);
57 }
58 
59 template<Proto::MemType Type, typename Ker, typename... Args>
60 inline void protoLaunchKernelMemAsyncT(dim3 nBlocks, int nbThreads, int smem, protoStream_t stream, const Args&... args)
61 {
62  //protoLaunchKernelMemAsyncCPU(Ker::cpu, nBlocks, nbThreads, smem, stream, args...);
63  Ker::cpu(args...);
64 }
69 #define protoStream_t int
70 
71 #else //If PROTO_CUDA is defined
72 
73 #define protoMalloc(memtype,PTR,NBYTES) \
74  if(memtype==Proto::MemType::DEVICE) { protoMallocGPU(PTR,NBYTES); }\
75  else { protoMallocCPU(PTR,NBYTES);}
76 
77 #define protoFree(memtype,PTR) \
78  if(memtype==Proto::MemType::DEVICE) { protoFreeGPU(PTR); } \
79  else { protoFreeCPU(PTR);}
80 
81 #define protoMemcpy(memtype,to,from,size,copyType) \
82  if(memtype==Proto::MemType::DEVICE) { protoMemcpyGPU(to,from,size,copyType); } \
83  else { protoMemcpyCPU(to,from,size,copyType);}
84 
85 #define protoMemcpyAsync(memtype,to,from,size,copyType,stream) \
86  if(memtype==Proto::MemType::DEVICE) { protoMemcpyAsyncGPU(to,from,size,copyType,stream); } \
87  else { protoMemcpyAsyncCPU(to,from,size,copyType,stream);}
88 
89 #define protoMemcpyFromSymbol(memtype,a,b,c,d,e) \
90  if(memtype==Proto::MemType::DEVICE) { protoMemcpyFromSymbolGPU(a,b,c,d,e); } \
91  else { protoMemcpyFromSymbolCPU(a,b,c,d,e); }
92 
93 #define protoMemcpyToSymbol(memtype,a,b,c,d,e) \
94  if(memtype==Proto::MemType::DEVICE) { protoMemcpyToSymbolGPU(a,b,c,d,e);} \
95  else { protoMemcpyToSymbolCPU(a,b,c,d,e);}
96 
97 #define protoDeviceSynchronize(memtype) \
98  if(memtype==Proto::MemType::DEVICE) { protoDeviceSynchronizeGPU();} \
99  else { protoDeviceSynchronizeCPU();}
100 
118 template<typename Ker, typename... Args>
119 __global__ void generalLaunch(const Args... args)
120 {
121  Ker::gpu(args...);
122 }
123 
124 #define MAXTHREADS 1024
125 
126 template<Proto::MemType Type, typename Ker, typename... Args>
127 inline void protoLaunchKernelT(int nBlocks, int nbThreads, const Args&... args)
128 {
129  PRINT_KERNEL_NAME_ARGS(Ker,nBlocks,nbThreads);
130  if(Type == Proto::MemType::DEVICE)
131  {
132  assert(nBlocks > 0 && nbThreads > 0);
133  assert(nbThreads<=MAXTHREADS);
134  protoLaunchKernelGPU((generalLaunch<Ker, Args...>), nBlocks, nbThreads, args...);
135  GPU_CHECK(protoGetLastError());
136  }
137  else
138  Ker::cpu( args...);
139 }
140 
141 template<Proto::MemType Type, typename Ker, typename... Args>
142 inline void protoLaunchKernelT(dim3 nBlocks, int nbThreads, const Args&... args)
143 {
144  PRINT_KERNEL_NAME_ARGS(Ker,nBlocks,nbThreads);
145  if(Type == Proto::MemType::DEVICE)
146  {
147  assert(nBlocks.x> 0 && nBlocks.y> 0 && nBlocks.z> 0 && nbThreads > 0);
148  assert(nbThreads<=MAXTHREADS);
149  protoLaunchKernelGPU((generalLaunch<Ker, Args...>), nBlocks, nbThreads,args...);
150  GPU_CHECK(protoGetLastError());
151  }
152  else
153  Ker::cpu( args...);
154 }
155 
156 
157 template<Proto::MemType Type, typename Ker, typename... Args>
158 inline void protoLaunchKernelMemAsyncT(int nBlocks, int nbThreads, int smem, protoStream_t stream, const Args&... args)
159 {
160  PRINT_KERNEL_NAME_ARGS(Ker,nBlocks,nbThreads);
161  if(Type == Proto::MemType::DEVICE)
162  {
163  assert(nBlocks > 0 && nbThreads > 0);
164  assert(nbThreads<=MAXTHREADS);
165  protoLaunchKernelMemAsyncGPU((generalLaunch<Ker, Args...>),nBlocks, nbThreads, smem, stream, args...);
166  GPU_CHECK(protoGetLastError());
167  }
168  else
169  Ker::cpu( args...);
170 }
171 
172 
173 template<Proto::MemType Type, typename Ker, typename... Args>
174 inline void protoLaunchKernelMemAsyncT(dim3 nBlocks, int nbThreads, int smem, protoStream_t stream, const Args&... args)
175 {
176  PRINT_KERNEL_NAME_ARGS(Ker,nBlocks,nbThreads);
177  if(Type == Proto::MemType::DEVICE)
178  {
179  assert(nBlocks.x> 0 && nBlocks.y> 0 && nBlocks.z> 0 && nbThreads > 0);
180  assert(nbThreads<=MAXTHREADS);
181  protoLaunchKernelMemAsyncGPU((generalLaunch<Ker, Args...>), nBlocks, nbThreads, smem, stream, args...);
182  GPU_CHECK(protoGetLastError());
183  }
184  else
185  Ker::cpu( args...);
186 }
187 
188 #endif
189 
190 /*
191 #define GPU(name) __global__ \
192 void gpu_##name
193 
194 #define CPU(name) void cpu_##name
195 
196 #define FUNCTOR(name) template<> struct base##name{ \
197  };\
198 \
199  template<> struct base##name<true>\
200  {\
201  template<typename... T>\
202  void operator()(unsigned int nbBlocks, unsigned int nbThreads, T... args)\
203  {\
204  gpu_##name<<<nbBlocks,nbThreads>>>(args...);\
205  }\
206  };\
207  template<> struct base##name<false>\
208  {\
209  template<typename... T>\
210  void operator()(unsigned int nbBlocks, unsigned int nbThreads, T... args)\
211  {\
212  cpu_##name(args...);\
213  }\
214  };\
215  base##name<true> gpu##name;\
216  base##name<false> cpu##name;
217 */
218 
219 #endif
Definition: Proto_macros.H:13