Chombo + EB  3.2
SPMDI.H
Go to the documentation of this file.
1 #ifdef CH_LANG_CC
2 /*
3  * _______ __
4  * / ___/ / ___ __ _ / / ___
5  * / /__/ _ \/ _ \/ V \/ _ \/ _ \
6  * \___/_//_/\___/_/_/_/_.__/\___/
7  * Please refer to Copyright.txt, in Chombo's root directory.
8  */
9 #endif
10 
11 #ifndef _SPMDI_H_
12 #define _SPMDI_H_
13 
14 #include "memtrack.H"
15 #include "parstream.H"
16 #include "BaseNamespaceHeader.H"
17 
18 // default implementations for linearization routines.
19 
20 template <class T>
21 int linearSize(const T& inputT)
22 {
23  return inputT.linearSize();
24 }
25 
26 template <class T>
27 void linearIn(T& a_outputT, const void* const inBuf)
28 {
29  a_outputT.linearIn(inBuf);
30 }
31 
32 template <class T>
33 void linearOut(void* const a_outBuf, const T& inputT)
34 {
35  inputT.linearOut(a_outBuf);
36 }
37 
38 #ifdef CH_MPI
39 
40 extern void AttachDebugger(int);
41 /*****************************/
42 //gather a_input into a a_outVec
43 /*****************************/
44 template <class T>
45 inline void
46 gather(Vector<T>& a_outVec, const T& a_input, int a_dest)
47 {
48  CH_assert (a_dest >= 0);
49  CH_assert(a_dest < numProc());
50  //now THIS size lives on THIS processor
51  int isize = linearSize(a_input);
52 
53  //make stuff for linearout
54  void* loclBuf = mallocMT(isize);
55  if (loclBuf == NULL)
56  MayDay::Error("out of memory in gather 1");
57 
58  //put linearized T into its proper buffer
59  linearOut(loclBuf, a_input);
60 
61  int nProcess = numProc();
62  int sendCount = 1;
63  int recdCount = 1;
64 
65  //need to gather isizes onto processor a_dest
66  int* vectSize = NULL;
67  int* vectDisp = NULL;
68  void* sendBuf = static_cast<void*>(&isize);
69  //allocate received buffer
70  if (procID() == a_dest)
71  {
72  vectSize = new int[nProcess];
73  vectDisp = new int[nProcess];
74  }
75 
76  int result1 = MPI_Gather(sendBuf, sendCount, MPI_INT,
77  vectSize,recdCount, MPI_INT,
78  a_dest, Chombo_MPI::comm);
79 
80  if (result1 != MPI_SUCCESS)
81  MayDay::Error("Gather<T> failed in MPI_Gather 1");
82 
83  //make memory for gather, linearin
84  void* recdBuf = NULL;
85  if (procID() == a_dest)
86  {
87  size_t itotsize=0;
88  for (int iproc = 0; iproc < nProcess; iproc++)
89  {
90  vectDisp[iproc] = itotsize;
91  itotsize += vectSize[iproc];
92  }
93  recdBuf = mallocMT(itotsize);
94  if (recdBuf == NULL)
95  {
96  MayDay::Error("out of memory in gather 2");
97  }
98  }
99 
100  //gather data
101  int result2 = MPI_Gatherv(loclBuf, isize, MPI_BYTE,
102  recdBuf, vectSize, vectDisp, MPI_BYTE,
103  a_dest, Chombo_MPI::comm);
104  if (result2 != MPI_SUCCESS)
105  MayDay::Error("Gather<T> failed in MPI_Gather 2");
106 
107  if (procID() == a_dest)
108  {
109  //calculate offset into array for current processor
110  int ioffset = 0;
111  a_outVec.resize(nProcess);
112  //need to cast to char* to do pointer arithmetic
113  char* arithPtr = (char*)recdBuf;
114  for (int iproc = 0; iproc < nProcess; iproc++)
115  {
116  ioffset = vectDisp[iproc];
117  char* thisProcBuf = arithPtr + ioffset;
118  linearIn(a_outVec[iproc], thisProcBuf);
119  }
120 
121  //delete memory for dest-specific arrays
122  delete[] vectSize;
123  delete[] vectDisp;
124  freeMT(recdBuf);
125  }
126 
127  //delete memory for local buffer
128  freeMT(loclBuf);
129 }
130 
131 /*****************************/
132 //broadcast T everywhere
133 /*****************************/
134 template <class T>
135 inline void
136 broadcast(T& a_inAndOut, int a_src)
137 {
138  CH_assert (a_src >= 0);
139  CH_assert(a_src < numProc());
140  int isize;
141  if (procID() == a_src)
142  {
143  isize = linearSize(a_inAndOut);
144  }
145 
146  MPI_Bcast(&isize, 1, MPI_INT, a_src, Chombo_MPI::comm);
147 
148  void* broadBuf = mallocMT(isize);
149 
150  if (broadBuf == NULL)
151  {
152  MayDay::Error("out of memory in broadcast");
153  }
154 
155  //take inAndOut from src and put it into broadBuf
156  if (procID() == a_src)
157  {
158  linearOut(broadBuf, a_inAndOut);
159  }
160 
161  //broadcast broadBuf to all procs
162  MPI_Bcast(broadBuf, isize, MPI_BYTE, a_src, Chombo_MPI::comm);
163 
164  if (procID()==a_src)
165  {
166  CH_MaxMPISendSize = Max<long long>(CH_MaxMPISendSize, isize);
167  }
168  else
169  {
170  CH_MaxMPIRecvSize = Max<long long>(CH_MaxMPIRecvSize, isize);
171  }
172  //take broadBuf and put back into inAndOut if not src
173  if (procID() != a_src)
174  {
175  linearIn(a_inAndOut, broadBuf);
176  }
177 
178  //delete memory for buffer
179  freeMT(broadBuf);
180 }
181 
182 /*****************************/
183 // simple Barrier
184 /*****************************/
185 inline void
186 barrier(void)
187 {
188  MPI_Barrier(Chombo_MPI::comm);
189 }
190 
191 #else
192 /*****************************/
193 //non-mpi version
194 /*****************************/
195 template <class T>
196 inline void
197 gather(Vector<T>& a_outVec, const T& a_input, int a_dest)
198 {
199  a_outVec.resize(1);
200  a_outVec[0] = a_input;
201 }
202 /*****************************/
203 //non-mpi version
204 /*****************************/
205 template <class T>
206 inline void
207 broadcast(T& a_inAndOut, int a_src)
208 {
209  //nothing to do. in and out are the same with one proc
210 }
211 /*****************************/
212 //non-mpi version
213 /*****************************/
214 inline void
215 barrier(void)
216 {
217  // do nothing in serial
218 }
219 
220 #endif //the mpi thing
221 
222 //*************************************
223 //These should work independent of MPI
224 //*************************************
225 
226 //Vector<T> specialization of linearIn
227 template <class T>
228 void
229 linearListIn(Vector<T>& a_outputT, const void* const a_inBuf)
230 {
231  //first entry is the size of the vector
232  const int* const intBuf = (int*)a_inBuf;
233  int vecsize = intBuf[0];
234  Vector<int> vecOffset(vecsize);
235  //next vecsize entries are offsets of data into buffer
236  for (int ivec = 0; ivec < vecsize; ivec++)
237  {
238  vecOffset[ivec] = intBuf[ivec+1];
239  }
240  //next vecsize entries are the actual data
241  //yes I could do this in one loop but that would
242  // either
243  // a) make it less symmetric with linearOut
244  // and/or
245  // b) make both of them far less readable
246  a_outputT.resize(vecsize);
247  const char* const charbuf = (char*)a_inBuf;
248  for (int ivec = 0; ivec < vecsize; ivec++)
249  {
250  const char* const dataLoc = charbuf + vecOffset[ivec];
251  linearIn(a_outputT[ivec], dataLoc);
252  }
253 }
254 
255 //Vector<T> specialization of linearOut
256 template <class T>
257 void
258 linearListOut(void* const a_outBuf, const Vector<T>& a_input)
259 {
260  //first entry is the size of the vector
261  int* const intBuf = (int*)a_outBuf;
262  intBuf[0] = a_input.size();
263  int vecsize = intBuf[0];
264  Vector<int> vecOffset(vecsize);
265  //next vecsize entries are offsets of data into buffer
266  //next vecsize entries are the actual data
267  int ioffset = (vecsize+1)*sizeof(int);
268  for (int ivec = 0; ivec < vecsize; ivec++)
269  {
270  intBuf[ivec+1] = ioffset;
271  vecOffset[ivec] = ioffset;
272  ioffset += linearSize(a_input[ivec]);
273  }
274  //yes I could do this in one loop but that would
275  // either
276  // a) make it less symmetric with linearIn
277  // and/or
278  // b) make both of them far less readable
279  char* const charBuf = (char*)a_outBuf;
280  for (int ivec = 0; ivec < vecsize; ivec++)
281  {
282  char* const dataLoc = charBuf + vecOffset[ivec];
283  linearOut(dataLoc, a_input[ivec]);
284  }
285 }
286 
287 //Vector<T> specialization of linearSize
288 template <class T>
289 int
290 linearListSize(const Vector<T>& a_input)
291 {
292  //first entry is the size of the vector (int)
293  //next vecsize entries are offsets of data into buffer (int)
294  //next vecsize entries are the actual data
295  int itotsize = (a_input.size() + 1)*sizeof(int);
296  for (unsigned int ivec = 0; ivec < a_input.size(); ivec++)
297  {
298  itotsize += linearSize(a_input[ivec]);
299  }
300  return itotsize;
301 }
302 
303 #include "BaseNamespaceFooter.H"
304 
305 #endif
#define freeMT(a_a)
Definition: memtrack.H:160
#define CH_assert(cond)
Definition: CHArray.H:37
void linearOut(void *const a_outBuf, const T &inputT)
Definition: SPMDI.H:33
void barrier(void)
all ranks wait here to sync-up
Definition: SPMDI.H:215
one dimensional dynamic array
Definition: Vector.H:53
#define mallocMT(a_a)
Definition: memtrack.H:159
void AttachDebugger(int a_sig=4)
void linearIn(T &a_outputT, const void *const inBuf)
Definition: SPMDI.H:27
unsigned long long CH_MaxMPIRecvSize
unsigned long long CH_MaxMPISendSize
unsigned int numProc()
number of parallel processes
int linearSize(const T &inputT)
Definition: SPMDI.H:21
void resize(unsigned int isize)
Definition: Vector.H:346
void gather(Vector< T > &a_outVec, const T &a_input, int a_dest)
Definition: SPMDI.H:197
void linearListIn(Vector< T > &a_outputT, const void *const a_inBuf)
Definition: SPMDI.H:229
size_t size() const
Definition: Vector.H:192
static void Error(const char *const a_msg=m_nullString, int m_exitCode=CH_DEFAULT_ERROR_CODE)
Print out message to cerr and exit with the specified exit code.
void linearListOut(void *const a_outBuf, const Vector< T > &a_input)
Definition: SPMDI.H:258
int linearListSize(const Vector< T > &a_input)
Definition: SPMDI.H:290
int procID()
local process ID
void broadcast(T &a_inAndOut, int a_src)
broadcast to every process
Definition: SPMDI.H:207