Proto  3.2
Proto_Macros.H
Go to the documentation of this file.
1 #ifndef __PROTO_MACROS_H_
2 #define __PROTO_MACROS_H_
3 
4 #include "Proto_MemType.H"
5 #include "Proto_CPU.H"
6 
7 #ifndef PROTO_ACCEL
8 //Some of the device-agnostic code needs definitions of dim3, protoStream_t, protoGetCurrentStream
9 class dim3 {
10 public:
11  size_t x;
12  size_t y;
13  size_t z;
14  dim3(size_t a_x,size_t a_y, size_t a_z) : x(a_x),y(a_y),z(a_z) {};
15  dim3(size_t a_x) : dim3(a_x,0,0) {}
16  dim3():x(0),y(0),z(0) {};
17 };
18 #define protoStream_t int
19 #define protoGetCurrentStream 0
20 
21 #define protoMalloc(memtype,PTR,NBYTES) protoMallocCPU(PTR,NBYTES)
22 #define protoFree(memtype,PTR) protoFreeCPU(PTR)
23 
24 #define protoMemcpy(memtype,to,from,size,copyType) protoMemcpyCPU((char*)to,(char*)from,size,copyType)
25 #ifndef PROTO_DEBUG
26 #define protoMemcpyAsync(memtype,to,from,size,copyType,stream) protoMemcpyAsyncCPU(to,from,size,copyType,stream)
27 #else
28 #define protoMemcpyAsync(memtype,to,from,size,copyType,stream) protoMemcpCPU((char*)to,(char*)from,size,copyType)
29 #endif
30 #define protoMemcpyFromSymbol(memtype,a,b,c,d,e) protoMemcpyFromSymbolCPU(a,b,c,d,e)
31 #define protoMemcpyToSymbol(memtype,a,b,c,d,e) protoMemcpyToSymbolCPU(a,b,c,d,e)
32 
33 #define protoDeviceSynchronize(memtype) protoDeviceSynchronizeCPU()
34 
35 template<Proto::MemType Type, typename Ker, typename... Args>
36 inline void protoLaunchKernelT(int nBlocks, int nbThreads, const Args&... args)
37 {
38  //protoLaunchKernelCPU(Ker::cpu, nBlocks, nbThreads, args...);
39  Ker::cpu(args...);
40 }
41 
42 template<Proto::MemType Type, typename Ker, typename... Args>
43 inline void protoLaunchKernelT(dim3 nBlocks, int nbThreads, const Args&... args)
44 {
45  //protoLaunchKernelCPU(Ker::cpu, nBlocks, nbThreads, args...);
46  Ker::cpu(args...);
47 }
48 
49 /**
50 r/ #define protoLaunchKernelMem(memtype,Ker, nbBlocks, nbThreads, smem, args...) \
51 // protoLaunchKernelMemCPU(Ker, nbBlocks, nbThreads, smem, args)
52 */
53 
54 template<Proto::MemType Type, typename Ker, typename... Args>
55 inline void protoLaunchKernelMemAsyncT(dim3 nBlocks, int nbThreads, int smem, protoStream_t stream, const Args&... args)
56 {
57 #ifndef PROTO_DEBUG
58  //protoLaunchKernelMemAsyncCPU(Ker::cpu, nBlocks, nbThreads, smem, stream, args...);
59  Ker::cpu(args...);
60 #else
61  //protoLaunchKernelMemCPU(Ker::cpu, nBlocks, nbThreads, smem, args...);
62  Ker::cpu(args...);
63 #endif
64 }
65 /**
66 // #define protoLaunchKernelMemAsync(memtype,Ker, nbBlocks, nbThreads, smem, stream, args...) \
67 // protoLaunchKernelMemAsyncCPU(Ker, nbBlocks, nbThreads, smem, stream, args)
68 **/
69 
70 #else //If PROTO_ACCEL is defined
71 #include "Proto_GPU.H"
72 
73 #define protoMalloc(memtype,PTR,NBYTES) \
74  if(memtype==Proto::MemType::DEVICE) { protoMallocGPU(PTR,NBYTES); }\
75  else { protoMallocCPU(PTR,NBYTES);}
76 
77 #define protoFree(memtype,PTR) \
78  if(memtype==Proto::MemType::DEVICE) { protoFreeGPU(PTR); } \
79  else { protoFreeCPU(PTR);}
80 
81 #define protoMemcpy(memtype,to,from,size,copyType) \
82  if(memtype==Proto::MemType::DEVICE) { protoMemcpyGPU(to,from,size,copyType); } \
83  else { protoMemcpyCPU(to,from,size,copyType);}
84 
85 #ifndef PROTO_DEBUG
86 #define protoMemcpyAsync(memtype,to,from,size,copyType,stream) \
87  if(memtype==Proto::MemType::DEVICE) { protoMemcpyAsyncGPU(to,from,size,copyType,stream); } \
88  else { protoMemcpyAsyncCPU(to,from,size,copyType,stream);}
89 #else
90 #define protoMemcpyAsync(memtype,to,from,size,copyType,stream) \
91  if(memtype==Proto::MemType::DEVICE) { protoMemcpyGPU(to,from,size,copyType); } \
92  else { protoMemcpyCPU(to,from,size,copyType);}
93 #endif
94 
95 #define protoMemcpyFromSymbol(memtype,a,b,c,d,e) \
96  if(memtype==Proto::MemType::DEVICE) { protoMemcpyFromSymbolGPU(a,b,c,d,e); } \
97  else { protoMemcpyFromSymbolCPU(a,b,c,d,e); }
98 
99 #define protoMemcpyToSymbol(memtype,a,b,c,d,e) \
100  if(memtype==Proto::MemType::DEVICE) { protoMemcpyToSymbolGPU(a,b,c,d,e);} \
101  else { protoMemcpyToSymbolCPU(a,b,c,d,e);}
102 
103 #define protoDeviceSynchronize(memtype) \
104  if(memtype==Proto::MemType::DEVICE) { protoDeviceSynchronizeGPU();} \
105  else { protoDeviceSynchronizeCPU();}
106 /**
107 // #define protoLaunchKernel(memtype,Ker, nbBlocks, nbThreads, args...) \
108 // if(memtype==Proto::MemType::DEVICE) \
109 // protoLaunchKernelGPU(Ker##GPU, nbBlocks, nbThreads, args) \
110 // else \
111 // protoLaunchKernelCPU(Ker##CPU, nbBlocks, nbThreads, args)
112 
113 // #define protoLaunchKernelMem(memtype,Ker, nbBlocks, nbThreads, smem, args...) \
114 // if(memtype==Proto::MemType::DEVICE) \
115 // protoLaunchKernelMemGPU(Ker##GPU, nbBlocks, nbThreads, smem, args); \
116 // else \
117 // protoLaunchKernelMemCPU(Ker##CPU, nbBlocks, nbThreads, smem, args);
118 // #ifndef PROTO_DEBUG
119 // #define protoLaunchKernelMemAsync(memtype,Ker, nbBlocks, nbThreads, smem, stream, args...) \
120 // if(memtype==Proto::MemType::DEVICE) \
121 // protoLaunchKernelMemAsyncGPU(Ker##GPU, nbBlocks, nbThreads, smem, stream, args); \
122 // else \
123 // protoLaunchKernelMemAsyncCPU(Ker##CPU, nbBlocks, nbThreads, smem, stream, args);
124 // #else
125 // #define protoLaunchKernelMemAsync(memtype,Ker, nbBlocks, nbThreads, smem, stream, args...) \
126 // if(memtype==Proto::MemType::DEVICE) \
127 // protoLaunchKernelMemGPU(Ker##GPU, nbBlocks, nbThreads, smem, args); \
128 // else \
129 // protoLaunchKernelMemCPU(Ker##CPU, nbBlocks, nbThreads, smem, args);
130 // #endif
131 */
132 template<typename Ker, typename... Args>
133 __global__ void generalLaunch(const Args... args)
134 {
135  Ker::gpu(args...);
136 }
137 
138 #define MAXTHREADS 1024
139 
140 template<Proto::MemType Type, typename Ker, typename... Args>
141 inline void protoLaunchKernelT(dim3 nBlocks, int nbThreads, const Args&... args)
142 {
143  PRINT_KERNEL_NAME_ARGS(Ker,nBlocks,nbThreads);
144  if(Type == Proto::MemType::DEVICE)
145  {
146  assert(nBlocks.x> 0 && nBlocks.y> 0 && nBlocks.z> 0 && nbThreads > 0);
147  assert(nbThreads<=MAXTHREADS);
148  protoLaunchKernelGPU((generalLaunch<Ker, Args...>), nBlocks, nbThreads,args...);
149  GPU_CHECK(protoGetLastError());
150  }
151  else
152  Ker::cpu( args...);
153 }
154 
155 template<Proto::MemType Type, typename Ker, typename... Args>
156 inline void protoLaunchKernelMemAsyncT(dim3 nBlocks, int nbThreads, int smem, protoStream_t stream, const Args&... args)
157 {
158  PRINT_KERNEL_NAME_ARGS(Ker,nBlocks,nbThreads);
159  if(Type == Proto::MemType::DEVICE)
160  {
161  assert(nBlocks.x> 0 && nBlocks.y> 0 && nBlocks.z> 0 && nbThreads > 0);
162  assert(nbThreads<=MAXTHREADS);
163 #ifndef PROTO_DEBUG
164  protoLaunchKernelMemAsyncGPU((generalLaunch<Ker, Args...>), nBlocks, nbThreads, smem, stream, args...);
165 #else
166  protoLaunchKernelMemGPU((generalLaunch<Ker, Args...>), nBlocks, nbThreads, smem, args...);
167 #endif
168  GPU_CHECK(protoGetLastError());
169  }
170  else
171  Ker::cpu( args...);
172 }
173 
174 #endif
175 
176 /*
177 #define GPU(name) __global__ \
178 void gpu_##name
179 
180 #define CPU(name) void cpu_##name
181 
182 #define FUNCTOR(name) template<> struct base##name{ \
183  };\
184 \
185  template<> struct base##name<true>\
186  {\
187  template<typename... T>\
188  void operator()(unsigned int nbBlocks, unsigned int nbThreads, T... args)\
189  {\
190  gpu_##name<<<nbBlocks,nbThreads>>>(args...);\
191  }\
192  };\
193  template<> struct base##name<false>\
194  {\
195  template<typename... T>\
196  void operator()(unsigned int nbBlocks, unsigned int nbThreads, T... args)\
197  {\
198  cpu_##name(args...);\
199  }\
200  };\
201  base##name<true> gpu##name;\
202  base##name<false> cpu##name;
203 */
204 
205 #endif
size_t y
Definition: Proto_Macros.H:12
dim3(size_t a_x, size_t a_y, size_t a_z)
Definition: Proto_Macros.H:14
Definition: Proto_MemType.H:7
dim3(size_t a_x)
Definition: Proto_Macros.H:15
MemType
Definition: Proto_MemType.H:7
void protoLaunchKernelMemAsyncT(dim3 nBlocks, int nbThreads, int smem, protoStream_t stream, const Args &... args)
Definition: Proto_Macros.H:55
#define GPU_CHECK(in)
Data Types / classes.
Definition: Proto_GPU.H:81
dim3()
Definition: Proto_Macros.H:16
void protoLaunchKernelT(int nBlocks, int nbThreads, const Args &... args)
Definition: Proto_Macros.H:36
#define PRINT_KERNEL_NAME_ARGS(IN, BLOCKS, THREADS)
Functions.
Definition: Proto_GPU.H:220
size_t z
Definition: Proto_Macros.H:13
size_t x
Definition: Proto_Macros.H:11
Definition: Proto_Macros.H:9
#define protoStream_t
Definition: Proto_Macros.H:18