Chombo + EB + MF  3.2
BoxLayoutDataI.H
Go to the documentation of this file.
1 #ifdef CH_LANG_CC
2 /*
3  * _______ __
4  * / ___/ / ___ __ _ / / ___
5  * / /__/ _ \/ _ \/ V \/ _ \/ _ \
6  * \___/_//_/\___/_/_/_/_.__/\___/
7  * Please refer to Copyright.txt, in Chombo's root directory.
8  */
9 #endif
10 
11 #ifndef _BOXLAYOUTDATAI_H_
12 #define _BOXLAYOUTDATAI_H_
13 
14 #include <cstdlib>
15 #include <algorithm>
16 #include <limits.h>
17 #include <list>
18 #include "CH_OpenMP.H"
19 #include "parstream.H"
20 #include "memtrack.H"
21 #include "Misc.H"
22 #include "CH_Timer.H"
23 #include "BaseFabMacros.H"
24 #include "NamespaceHeader.H"
25 
26 
27 using std::sort;
28 
29 
30 template <class T>
32  int ncomps,
33  const DataIndex& a_datInd) const
34 {
35  return new T(box, ncomps);
36 }
37 
38 template<class T>
39 inline bool BoxLayoutData<T>::isDefined() const
40 {
41  return m_isdefined;
42 }
43 
44 template <class T>
46  const Interval& srcComps,
47  const Interval& destComps)
48 {
49  if(&da != this)
50  {
51  DataIterator it=this->dataIterator();
52  int nbox=it.size();
53 #pragma omp parallel for if(this->m_threadSafe)
54  for(int box=0; box<nbox; box++)
55  {
56  this->m_vector[it[box].datInd()]->copy( this->box(it[box]), destComps,
57  this->box(it[box]), da[it[box]], srcComps);
58  }
59  }
60 }
61 
62 template<class T>
63 inline void BoxLayoutData<T>::define(const BoxLayoutData<T>& da, const Interval& comps,
64  const DataFactory<T>& factory)
65 {
66  if (this == &da)
67  {
68  MayDay::Error("BoxLayoutData<T>::define(const LayoutData<T>& da,.....) called with 'this'");
69  }
70  CH_assert(comps.size()>0);
71  CH_assert(comps.end()<=m_comps);
72  //AD: why are the two different
73  // CH_assert(comps.end()<=da.m_comps);
74  CH_assert(comps.begin()>=0);
75  this->m_boxLayout = da.boxLayout();
76 
77  this->m_comps = comps.size();
78  this->m_threadSafe = factory.threadSafe();
79  //this->m_threadSafe = false;
80 
81  Interval dest(0, m_comps-1);
82  allocateGhostVector(factory);
83  setVector(da, comps, dest);
84 }
85 
86 template<class T>
87 inline void BoxLayoutData<T>::define(const BoxLayout& boxes, int comps,
88  const DataFactory<T>& factory)
89 {
90  CH_assert(boxes.isClosed());
91  this->m_boxLayout = boxes;
92  m_comps = comps;
93  this->m_threadSafe = factory.threadSafe();
94  // this->m_threadSafe = false;
95  m_isdefined = true;
96  allocateGhostVector(factory);
97 
98 }
99 
100 template<class T>
101 inline void BoxLayoutData<T>::define(const BoxLayout& boxes)
102 {
103  MayDay::Error("BoxLayoutData<T>::define(const BoxLayout& boxes)...needs comps");
104 }
105 
106 template <class T>
108 {
109  m_isdefined = false;
110 #ifdef CH_MPI
111 
112 #endif
113 }
114 template<class T>
115 inline BoxLayoutData<T>::BoxLayoutData(const BoxLayout& boxes, int comps,
116  const DataFactory<T>& factory)
117  :m_comps(comps),m_buff(NULL)
118 {
119  CH_assert(boxes.isClosed());
120  this->m_boxLayout = boxes;
121  m_isdefined = true;
122  allocateGhostVector(factory);
123 #ifdef CH_MPI
124 
125 #endif
126 }
127 
128 template<class T>
130 {
131  CH_TIME("~BoxLayoutData");
132  //completePendingSends();
133 }
134 
135 template<class T>
137  const DataFactory<T>& factory)
138 {
139  if (this != &da)
140  {
142  this->m_boxLayout = da.boxLayout();
143  m_comps = da.nComp();
144  this->m_threadSafe = factory.threadSafe();
145  //this->m_threadSafe = false;
146  Interval srcAnddest(0, m_comps-1);
147  allocateGhostVector(factory);
148  setVector(da, srcAnddest, srcAnddest);
149  }
150 
151 }
152 template<class T>
154 {
155  if (this->m_callDelete == true)
156  {
157  for (unsigned int i=0; i<this->m_vector.size(); ++i)
158  {
159  delete this->m_vector[i];
160  this->m_vector[i] = NULL;
161  }
162  }
163  m_isdefined = false;
164 }
165 
166 template<class T>
167 inline void BoxLayoutData<T>::allocateGhostVector(const DataFactory<T>& factory, const IntVect& ghost)
168 {
169  if (this->m_callDelete == true)
170  {
171  for (unsigned int i=0; i<this->m_vector.size(); ++i)
172  {
173  delete this->m_vector[i];
174  this->m_vector[i] = NULL;
175  }
176  }
177 
178  this->m_callDelete = factory.callDelete();
179 
180  DataIterator it(this->dataIterator()); int nbox=it.size();
181  this->m_vector.resize(it.size(), NULL);
182 #pragma omp parallel for if(this->m_threadSafe)
183  for(int i=0; i<nbox; i++)
184  {
185  unsigned int index = it[i].datInd();
186  Box abox = this->box(it[i]);
187  abox.grow(ghost);
188  this->m_vector[index] = factory.create(abox, m_comps, it[i]);
189  if (this->m_vector[index] == NULL)
190  {
191  MayDay::Error("OutOfMemory in BoxLayoutData::allocateGhostVector");
192  }
193  }
194 }
195 
196 template<class T>
197 inline void BoxLayoutData<T>::apply(void (*a_func)(const Box& box, int comps, T& t))
198 {
199  DataIterator it(this->dataIterator()); int nbox=it.size();
200 #pragma omp parallel for
201  for(int i=0; i<nbox; i++)
202 
203  {
204  a_func(this->box(it[i]), m_comps, *(this->m_vector[ it[i].datInd() ]));
205  }
206 }
207 
208 //======================================================================
209 template <class T>
211 {
212  define(a_original, interval);
213 }
214 
215 template <class T>
217 {
218  m_origPointer = a_original;
219  m_interval = interval;
220 }
221 
222 template <class T>
223 T* AliasDataFactory<T>::create(const Box& a_box, int ncomps, const DataIndex& a_dataInd) const
224 {
225  //CH_assert(this->box(a_dataInd) == a_box);
226  CH_assert(ncomps = m_interval.size());
227  T* rtn = new T(m_interval, m_origPointer->operator[](a_dataInd));
228  return rtn;
229 }
230 
231 template<class T>
232 void BoxLayoutData<T>::makeItSo(const Interval& a_srcComps,
233  const BoxLayoutData<T>& a_src,
234  BoxLayoutData<T>& a_dest,
235  const Interval& a_destComps,
236  const Copier& a_copier,
237  const LDOperator<T>& a_op) const
238 {
239  if(s_verbosity > 0)
240  {
241  pout() << "makeit so copier = " << endl;
242  a_copier.print();
243  }
244  makeItSoBegin(a_srcComps, a_src, a_dest, a_destComps, a_copier, a_op);
245  makeItSoLocalCopy(a_srcComps, a_src, a_dest, a_destComps, a_copier, a_op);
246  a_dest.makeItSoEnd(a_destComps, a_op);
247 }
248 
249 template<class T>
251  const BoxLayoutData<T>& a_src,
252  BoxLayoutData<T>& a_dest,
253  const Interval& a_destComps,
254  const Copier& a_copier,
255  const LDOperator<T>& a_op) const
256 {
257  // The following five functions are nullOps in uniprocessor mode
258 
259 #ifdef CH_MPI
260 
261  allocateBuffers(a_src, a_srcComps,
262  a_dest, a_destComps,
263  a_copier,
264  a_op); //monkey with buffers, set up 'fromMe' and 'toMe' queues
265 
266  writeSendDataFromMeIntoBuffers(a_src, a_srcComps, a_op);
267 
268  // If there is nothing to recv/send, don't go into these functions
269  // and allocate memory that will not be freed later. (ndk)
270  // The #ifdef CH_MPI is for the m_buff->m_toMe and m_buff->m_fromMe
271  {
272  CH_TIME("post_messages");
273  this->m_buff->numReceives = m_buff->m_toMe.size();
274 
275  if (this->m_buff->numReceives > 0)
276  {
277  postReceivesToMe(); // all non-blocking
278  }
279 
280 
281  this->m_buff->numSends = m_buff->m_fromMe.size();
282  if (this->m_buff->numSends > 0)
283  {
284  postSendsFromMe(); // all non-blocking
285  }
286  }
287 #endif
288 }
289 
290 template<class T>
292  const BoxLayoutData<T>& a_src,
293  BoxLayoutData<T>& a_dest,
294  const Interval& a_destComps,
295  const Copier& a_copier,
296  const LDOperator<T>& a_op) const
297 {
298 
299  CH_TIME("local copying");
300  CopyIterator it(a_copier, CopyIterator::LOCAL);
301  int items=it.size();
302 #ifdef _OPENMP
303  bool threadSafe = m_threadSafe && (a_op.threadSafe());
304 #endif
305 #pragma omp parallel for if(threadSafe)
306  for (int n=0; n<items; n++)
307  {
308  const MotionItem& item = it[n];
309 //debugging bit to force serial code to run parallel bits
310 #if 0
311  const T & srcFAB = a_src[item.fromIndex];
312  T & dstFAB = a_dest[item.toIndex];
313  size_t bufsize_src = a_op.size(srcFAB, item.fromRegion, a_srcComps);
314  size_t bufsize_dst = a_op.size(srcFAB, item.fromRegion, a_srcComps);
315  if(bufsize_src != bufsize_dst)
316  {
317  MayDay::Error("buffer size mismatch");
318  }
319  char* charbuffer = new char[bufsize_src];
320  a_op.linearOut(srcFAB, charbuffer, item.fromRegion, a_srcComps);
321  a_op.linearIn (dstFAB, charbuffer, item.toRegion, a_destComps);
322 
323  delete[] charbuffer;
324 #else
325  a_op.op(a_dest[item.toIndex], item.fromRegion,
326  a_destComps,
327  item.toRegion,
328  a_src[item.fromIndex],
329  a_srcComps);
330 #endif
331 
332  }
333 }
334 template<class T>
336  const Interval& a_destComps,
337  const LDOperator<T>& a_op)
338 {
339  // Uncomment and Move this out of unpackReceivesToMe() (ndk)
340  completePendingSends(); // wait for sends from possible previous operation
341 
342  unpackReceivesToMe(a_destComps, a_op); // nullOp in uniprocessor mode
343 
344 }
345 
346 #ifndef CH_MPI
347 // uniprocessor version of all these nullop functions.
348 template<class T>
350 {
351 }
352 
353 template<class T>
355  const Interval& a_srcComps,
356  const BoxLayoutData<T>& a_dest,
357  const Interval& a_destComps,
358  const Copier& a_copier,
359  const LDOperator<T>& a_op
360  ) const
361 {
362 }
363 
364 template<class T>
366  const Interval& a_srcComps,
367  const LDOperator<T>& a_op) const
368 {
369 }
370 
371 template<class T>
373 {
374 }
375 
376 template<class T>
378 {
379 }
380 
381 template<class T>
383  const Interval& a_destComps,
384  const LDOperator<T>& a_op)
385 {
386 }
387 
388 template<class T>
390  const Interval& a_destComps,
391  int ncomp,
392  const DataFactory<T>& factory,
393  const LDOperator<T>& a_op) const
394 {
395 }
396 
397 #else
398 
399 // MPI versions of the above codes.
400 
401 template<class T>
403 {
404  CH_TIME("completePendingSends");
405  if (this->m_buff->numSends > 0)
406  {
407  CH_TIME("MPI_Waitall");
408  m_buff->m_sendStatus.resize(this->m_buff->numSends);
409  int result = MPI_Waitall(this->m_buff->numSends, &(m_buff->m_sendRequests[0]), &(m_buff->m_sendStatus[0]));
410  if (result != MPI_SUCCESS)
411  {
412  //hell if I know what to do about failed messaging here
413  }
414  }
415  this->m_buff->numSends = 0;
416 }
417 
418 template<class T>
420  const Interval& a_srcComps,
421  const BoxLayoutData<T>& a_dest,
422  const Interval& a_destComps,
423  const Copier& a_copier,
424  const LDOperator<T>& a_op) const
425 {
426  CH_TIME("MPI_allocateBuffers");
427  m_buff = &(((Copier&)a_copier).m_buffers);
428  a_dest.m_buff = m_buff;
429 
430  CH_assert(a_srcComps.size() == a_destComps.size());
431  if (m_buff->isDefined(a_srcComps.size()) && T::preAllocatable()<2) return;
432 
433  if(s_verbosity > 0)
434  {
435  pout() << " allocate buffers srcComps = " << a_srcComps << ", dest comps = " << a_destComps << endl;
436  }
437  m_buff->m_ncomps = a_srcComps.size();
438 
439  m_buff->m_fromMe.resize(0);
440  m_buff->m_toMe.resize(0);
441  size_t sendBufferSize = 0;
442  size_t recBufferSize = 0;
443  // two versions of code here. one for preAllocatable T, one not.
444 
445  T dummy;
446  for (CopyIterator it(a_copier, CopyIterator::FROM); it.ok(); ++it)
447  {
448  const MotionItem& item = it();
449  CopierBuffer::bufEntry b;
450  b.item = &item;
451  b.size = a_op.size(a_src[item.fromIndex], item.fromRegion, a_srcComps);
452  sendBufferSize+=b.size;
453  b.procID = item.procID;
454  m_buff->m_fromMe.push_back(b);
455  }
456  sort(m_buff->m_fromMe.begin(), m_buff->m_fromMe.end());
457  for (CopyIterator it(a_copier, CopyIterator::TO); it.ok(); ++it)
458  {
459  const MotionItem& item = it();
460  CopierBuffer::bufEntry b;
461  b.item = &item;
462  if (T::preAllocatable() == 0)
463  {
464  b.size = a_op.size(dummy, item.fromRegion, a_destComps);
465  recBufferSize+=b.size;
466  }
467  else if (T::preAllocatable() == 1)
468  {
469  b.size = a_op.size(a_dest[item.toIndex], item.fromRegion, a_destComps);
470  recBufferSize+=b.size;
471  }
472  b.procID = item.procID;
473  m_buff->m_toMe.push_back(b);
474  }
475  sort(m_buff->m_toMe.begin(), m_buff->m_toMe.end());
476 
477  if (T::preAllocatable() == 2) // dynamic allocatable, need two pass
478  {
479  CH_TIME("MPI_ Phase 1 of 2 Phase: preAllocatable==2");
480  if (s_verbosity > 0) pout()<<"preAllocatable==2\n";
481 
482  // in the non-preallocatable case, I need to message the
483  // values for the m_buff->m_toMe[*].size
484  Vector<unsigned long> fdata;
485  Vector<unsigned long> tdata;
486  int count = 1;
487  int scount = 1;
488  if (m_buff->m_toMe.size() > 0)
489  {
490  tdata.resize(m_buff->m_toMe.size(), ULONG_MAX);
491  m_buff->m_receiveRequests.resize(numProc()-1);
492  m_buff->m_receiveStatus.resize(numProc()-1);
493  MPI_Request* Rptr = &(m_buff->m_receiveRequests[0]);
494 
495  unsigned int lastProc = m_buff->m_toMe[0].procID;
496  int messageSize = 1;
497  unsigned long * dataPtr = &(tdata[0]);
498  unsigned int i = 1;
499 
500  for (;i<m_buff->m_toMe.size(); ++i)
501  {
502  CopierBuffer::bufEntry& b = m_buff->m_toMe[i];
503  if (b.procID == lastProc)
504  messageSize++;
505  else
506  {
507 
508  MPI_Irecv(dataPtr, messageSize, MPI_UNSIGNED_LONG, lastProc,
509  1, Chombo_MPI::comm, Rptr);
510  Rptr++;
511 
512  lastProc = b.procID;
513  messageSize = 1;
514  dataPtr = &(tdata[i]);
515  count++;
516  }
517  }
518 
519  MPI_Irecv(dataPtr, messageSize, MPI_UNSIGNED_LONG, lastProc,
520  1, Chombo_MPI::comm, Rptr );
521  }
522 
523  if (m_buff->m_fromMe.size() > 0)
524  {
525  fdata.resize(m_buff->m_fromMe.size());
526  fdata[0]=m_buff->m_fromMe[0].size;
527  m_buff->m_sendRequests.resize(numProc()-1);
528  m_buff->m_sendStatus.resize(numProc()-1);
529  MPI_Request* Rptr = &(m_buff->m_sendRequests[0]);
530 
531  unsigned int lastProc = m_buff->m_fromMe[0].procID;
532  int messageSize = 1;
533  unsigned long * dataPtr = &(fdata[0]);
534  unsigned int i = 1;
535  for (;i<m_buff->m_fromMe.size(); ++i)
536  {
537  fdata[i] = m_buff->m_fromMe[i].size;
538  CopierBuffer::bufEntry& b = m_buff->m_fromMe[i];
539  if (b.procID == lastProc)
540  messageSize++;
541  else
542  {
543  MPI_Isend(dataPtr, messageSize, MPI_UNSIGNED_LONG, lastProc,
544  1, Chombo_MPI::comm, Rptr);
545 
546  Rptr++;
547  lastProc = b.procID;
548  messageSize = 1;
549  dataPtr = &(fdata[i]);
550  scount++;
551  }
552  }
553 
554  MPI_Isend(dataPtr, messageSize, MPI_UNSIGNED_LONG, lastProc,
555  1, Chombo_MPI::comm, Rptr);
556  }
557 
558  if (m_buff->m_toMe.size() > 0)
559  {
560 
561  int result = MPI_Waitall(count, &(m_buff->m_receiveRequests[0]), &(m_buff->m_receiveStatus[0]));
562  if (result != MPI_SUCCESS)
563  {
564  MayDay::Error("First pass of two-phase communication failed");
565  }
566 
567  for (unsigned int i=0; i<m_buff->m_toMe.size(); ++i)
568  {
569  CH_assert(tdata[i] != ULONG_MAX);
570  m_buff->m_toMe[i].size = tdata[i];
571  recBufferSize+= tdata[i];
572  }
573  }
574 
575  if (m_buff->m_fromMe.size() > 0)
576  {
577 
578  int result = MPI_Waitall(scount, &(m_buff->m_sendRequests[0]), &(m_buff->m_sendStatus[0]));
579  if (result != MPI_SUCCESS)
580  {
581  MayDay::Error("First pass of two-phase communication failed");
582  }
583 
584  }
585  }
586 
587  // allocate send and receveive buffer space.
588 
589  if (sendBufferSize > m_buff->m_sendcapacity)
590  {
592  if (s_verbosity > 0) pout()<<"malloc send buffer "<<sendBufferSize<<std::endl;
593  (m_buff->m_sendbuffer) = mallocMT(sendBufferSize);
594  if ((m_buff->m_sendbuffer) == NULL)
595  {
596  MayDay::Error("Out of memory in BoxLayoutData::allocatebuffers");
597  }
598  m_buff->m_sendcapacity = sendBufferSize;
599  }
600 
601  if (recBufferSize > m_buff->m_reccapacity)
602  {
604  if (s_verbosity > 0) pout()<<"malloc receive buffer "<<recBufferSize<<std::endl;
605  m_buff->m_recbuffer = mallocMT(recBufferSize);
606  if (m_buff->m_recbuffer == NULL)
607  {
608  MayDay::Error("Out of memory in BoxLayoutData::allocatebuffers");
609  }
610  m_buff->m_reccapacity = recBufferSize;
611  }
612 
613  /*
614  pout()<<"\n";
615  for (int i=0; i<m_buff->m_fromMe.size(); i++)
616  pout()<<m_buff->m_fromMe[i].item->region<<"{"<<m_buff->m_fromMe[i].procID<<"}"<<" ";
617  pout() <<"::::";
618  for (int i=0; i<m_buff->m_toMe.size(); i++)
619  pout()<<m_buff->m_toMe[i].item->region<<"{"<<m_buff->m_toMe[i].procID<<"}"<<" ";
620  pout() << endl;
621  */
622 
623  char* nextFree = (char*)(m_buff->m_sendbuffer);
624  if (m_buff->m_fromMe.size() > 0)
625  {
626  for (unsigned int i=0; i<m_buff->m_fromMe.size(); ++i)
627  {
628  m_buff->m_fromMe[i].bufPtr = nextFree;
629  nextFree += m_buff->m_fromMe[i].size;
630  }
631  }
632 
633  nextFree = (char*)m_buff->m_recbuffer;
634  if (m_buff->m_toMe.size() > 0)
635  {
636  for (unsigned int i=0; i<m_buff->m_toMe.size(); ++i)
637  {
638  m_buff->m_toMe[i].bufPtr = nextFree;
639  nextFree += m_buff->m_toMe[i].size;
640  }
641  }
642 
643  // since fromMe and toMe are sorted based on procID, messages can now be grouped
644  // together on a per-processor basis.
645 
646 }
647 
648 template<class T>
650  const Interval& a_srcComps,
651  const LDOperator<T>& a_op) const
652 {
653  CH_TIME("write Data to buffers");
654  int isize = m_buff->m_fromMe.size();
655 #ifdef _OPENMP
656  bool threadSafe = m_threadSafe && (a_op.threadSafe());
657 #endif
658 #pragma omp parallel for if(threadSafe)
659  for (unsigned int i=0; i< isize; ++i)
660  {
661  const CopierBuffer::bufEntry& entry = m_buff->m_fromMe[i];
662  a_op.linearOut(a_src[entry.item->fromIndex], entry.bufPtr,
663  entry.item->fromRegion, a_srcComps);
664  }
665 }
666 
667 template<class T>
669 {
670  CH_TIME("post_Sends");
671  // now we get the magic of message coalescence
672  // fromMe has already been sorted in the allocateBuffers() step.
673 
674  this->m_buff->numSends = m_buff->m_fromMe.size();
675 
676  if (this->m_buff->numSends > 1)
677  {
678  for (unsigned int i=m_buff->m_fromMe.size()-1; i>0; --i)
679  {
680  if (m_buff->m_fromMe[i].procID == m_buff->m_fromMe[i-1].procID)
681  {
682  this->m_buff->numSends--;
683  m_buff->m_fromMe[i-1].size = m_buff->m_fromMe[i-1].size + m_buff->m_fromMe[i].size;
684  m_buff->m_fromMe[i].size = 0;
685  }
686  }
687  }
688  m_buff->m_sendRequests.resize(this->m_buff->numSends);
689  std::list<MPI_Request> extraRequests;
690 
691  unsigned int next=0;
692  long long maxSize = 0;
693  for (int i=0; i<this->m_buff->numSends; ++i)
694  {
695  const CopierBuffer::bufEntry& entry = m_buff->m_fromMe[next];
696  char* buffer = (char*)entry.bufPtr;
697  std::size_t bsize = entry.size;
698  int idtag=0;
699  while (bsize > CH_MAX_MPI_MESSAGE_SIZE)
700  {
701  extraRequests.push_back(MPI_Request());
702  {
703  //CH_TIME("MPI_Isend");
704  MPI_Isend(buffer, CH_MAX_MPI_MESSAGE_SIZE, MPI_BYTE, entry.procID,
705  idtag, Chombo_MPI::comm, &(extraRequests.back()));
706  }
707  maxSize = CH_MAX_MPI_MESSAGE_SIZE;
708  bsize -= CH_MAX_MPI_MESSAGE_SIZE;
709  buffer+=CH_MAX_MPI_MESSAGE_SIZE;
710  idtag++;
711  }
712  {
713  //CH_TIME("MPI_Isend");
714  MPI_Isend(buffer, bsize, MPI_BYTE, entry.procID,
715  idtag, Chombo_MPI::comm, &(m_buff->m_sendRequests[i]));
716  }
717  maxSize = Max<long long>(bsize, maxSize);
718  ++next;
719  while (next < m_buff->m_fromMe.size() && m_buff->m_fromMe[next].size == 0) ++next;
720  }
721  for (std::list<MPI_Request>::iterator it = extraRequests.begin(); it != extraRequests.end(); ++it)
722  {
723  m_buff->m_sendRequests.push_back(*it);
724  }
725  this->m_buff->numSends = m_buff->m_sendRequests.size();
726 
727  CH_MaxMPISendSize = Max<long long>(CH_MaxMPISendSize, maxSize);
728 
729 }
730 
731 template<class T>
733 {
734  CH_TIME("post_Receives");
735  this->m_buff->numReceives = m_buff->m_toMe.size();
736 
737  if (this->m_buff->numReceives > 1)
738  {
739  for (unsigned int i=m_buff->m_toMe.size()-1; i>0; --i)
740  {
741  if (m_buff->m_toMe[i].procID == m_buff->m_toMe[i-1].procID)
742  {
743  this->m_buff->numReceives--;
744  m_buff->m_toMe[i-1].size += m_buff->m_toMe[i].size;
745  m_buff->m_toMe[i].size = 0;
746  }
747 
748  }
749  }
750  m_buff->m_receiveRequests.resize(this->m_buff->numReceives);
751  std::list<MPI_Request> extraRequests;
752  unsigned int next=0;
753  long long maxSize = 0;
754  for (int i=0; i<this->m_buff->numReceives; ++i)
755  {
756  const CopierBuffer::bufEntry& entry = m_buff->m_toMe[next];
757  char* buffer = (char*)entry.bufPtr;
758  size_t bsize = entry.size;
759  int idtag=0;
760  while (bsize > CH_MAX_MPI_MESSAGE_SIZE)
761  {
762  extraRequests.push_back(MPI_Request());
763  {
764  //CH_TIME("MPI_Irecv");
765  MPI_Irecv(buffer, CH_MAX_MPI_MESSAGE_SIZE, MPI_BYTE, entry.procID,
766  idtag, Chombo_MPI::comm, &(extraRequests.back()));
767  }
768  maxSize = CH_MAX_MPI_MESSAGE_SIZE;
769  bsize -= CH_MAX_MPI_MESSAGE_SIZE;
770  buffer+=CH_MAX_MPI_MESSAGE_SIZE;
771  idtag++;
772  }
773  {
774  //CH_TIME("MPI_Irecv");
775  MPI_Irecv(buffer, bsize, MPI_BYTE, entry.procID,
776  idtag, Chombo_MPI::comm, &(m_buff->m_receiveRequests[i]));
777  }
778  ++next;
779  maxSize = Max<long long>(bsize, maxSize);
780  while (next < m_buff->m_toMe.size() && m_buff->m_toMe[next].size == 0) ++next;
781  }
782  for (std::list<MPI_Request>::iterator it = extraRequests.begin(); it != extraRequests.end(); ++it)
783  {
784  m_buff->m_receiveRequests.push_back(*it);
785  }
786  this->m_buff->numReceives = m_buff->m_receiveRequests.size();
787 
788  CH_MaxMPIRecvSize = Max<long long>(CH_MaxMPIRecvSize, maxSize);
789  //pout()<<"maxSize="<<maxSize<<" posted "<<this->m_buff->numReceives<<" receives\n";
790 
791 }
792 
793 template<class T>
795  const Interval& a_destComps,
796  const LDOperator<T>& a_op)
797 {
798 
799  CH_TIME("unpack_messages");
800 
801  if (this->m_buff->numReceives > 0)
802  {
803  m_buff->m_receiveStatus.resize(this->m_buff->numReceives);
804  int result;
805  {
806  CH_TIME("MPI_Waitall");
807  result = MPI_Waitall(this->m_buff->numReceives, &(m_buff->m_receiveRequests[0]),
808  &(m_buff->m_receiveStatus[0]));
809  }
810  if (result != MPI_SUCCESS)
811  {
812  //hell if I know what to do about failed messaging here
813  //maybe a mayday::warning?
814  }
815 
816  int isize = m_buff->m_toMe.size();
817 #ifdef _OPENMP
818  bool threadSafe = m_threadSafe && (a_op.threadSafe());
819 #endif
820 #pragma omp parallel for if(threadSafe)
821  for (unsigned int i=0; i< isize; ++i)
822  {
823  const CopierBuffer::bufEntry& entry = m_buff->m_toMe[i];
824  a_op.linearIn(this->operator[](entry.item->toIndex), entry.bufPtr,
825  entry.item->toRegion, a_destComps);
826  }
827  }
828  this->m_buff->numReceives = 0;
829 }
830 
831 template<class T>
833  const Interval& a_destComps,
834  int ncomp,
835  const DataFactory<T>& factory,
836 
837  const LDOperator<T>& a_op) const
838 {
839 
840  if (this->m_buff->numReceives > 0)
841  {
842  m_buff->m_receiveStatus.resize(this->m_buff->numReceives);
843  int result;
844  {
845  CH_TIME("MPI_Waitall");
846  result = MPI_Waitall(this->m_buff->numReceives, &(m_buff->m_receiveRequests[0]),
847  &(m_buff->m_receiveStatus[0]));
848  }
849  if (result != MPI_SUCCESS)
850  {
851  //hell if I know what to do about failed messaging here
852  }
853  int isize = m_buff->m_toMe.size();
854  // NOT thread-safe, because of a_dest[item.toIndex].push_back(newT);
855  // #pragma omp parallel for if(this->m_threadSafe)
856  for (int i=0; i< isize; ++i)
857  {
858  const CopierBuffer::bufEntry& entry = m_buff->m_toMe[i];
859  const MotionItem& item = *(entry.item);
860  RefCountedPtr<T> newT( factory.create(item.toRegion, ncomp, item.toIndex) );;
861 
862  a_op.linearIn(*newT, entry.bufPtr, item.toRegion, a_destComps);
863  a_dest[item.toIndex].push_back(newT);
864  }
865  }
866 
867  this->m_buff->numReceives = 0;
868 }
869 #endif
870 
871 template <class T>
873  LayoutData<Vector<RefCountedPtr<T> > >& a_dest,
874  const Interval& a_srcComps,
875  const ProblemDomain& a_domain,
876  const Copier& a_copier,
877  const DataFactory<T>& factory) const
878 {
879 
880  CH_assert(T::preAllocatable() == 0);
881  a_dest.define(a_destGrids);
882 
883  LDOperator<T> a_op;
884 
885  int ncomp = a_srcComps.size();
886  Interval destComps(0, ncomp-1);
887  allocateBuffers(*this, a_srcComps,
888  *this, destComps,
889  a_copier, a_op);
890 
891  writeSendDataFromMeIntoBuffers(*this, a_srcComps, a_op);
892 
893  // If there is nothing to recv/send, don't go into these functions
894  // and allocate memory that will not be freed later. (ndk)
895  // The #ifdef CH_MPI is for the m_buff->m_toMe and m_buff->m_fromMe
896 #ifdef CH_MPI
897  this->m_buff->numReceives = m_buff->m_toMe.size();
898  if (this->m_buff->numReceives > 0)
899  {
900  postReceivesToMe(); // all non-blocking
901  }
902 
903  this->m_buff->numSends = m_buff->m_fromMe.size();
904  if (this->m_buff->numSends > 0)
905  {
906  postSendsFromMe(); // all non-blocking
907  }
908 #endif
909 
910  // perform local copy
911  CopyIterator it(a_copier, CopyIterator::LOCAL);
912  int items=it.size();
913 
914 //brian says this does not need conditionals because everyone is getting different buffers
915  // NOT thread-safe, because of a_dest[item.toIndex].push_back(newT);
916  // #pragma omp parallel for
917  for(int i=0; i<items; ++i)
918  {
919  const MotionItem& item = it[i];
920  RefCountedPtr<T> newT( factory.create(item.toRegion, ncomp, item.toIndex) );
921 
922  a_op.op(*newT, item.fromRegion,
923  destComps,
924  item.toRegion,
925  this->operator[](item.fromIndex),
926  a_srcComps);
927  a_dest[item.toIndex].push_back(newT);
928  }
929  // }
930  // Uncomment and Move this out of unpackReceivesToMe() (ndk)
931  completePendingSends(); // wait for sends from possible previous operation
932 
933  unpackReceivesToMe_append(a_dest, destComps, ncomp, factory, a_op); // nullOp in uniprocessor mode
934 }
935 
936 template <class T>
938  LayoutData<Vector<RefCountedPtr<T> > >& a_dest,
939  const Interval& a_srcComps,
940  const ProblemDomain& a_domain,
941  const DataFactory<T>& factory) const
942 {
943  Copier copier;
944  copier.define(this->m_boxLayout, a_destGrids, a_domain, IntVect::Zero);
945 
946  generalCopyTo(a_destGrids, a_dest, a_srcComps, a_domain, copier, factory);
947 }
948 
949 template <class T>
950 void BoxLayoutData<T>::addTo(const Interval& a_srcComps,
951  BoxLayoutData<T>& a_dest,
952  const Interval& a_destComps,
953  const ProblemDomain& a_domain) const
954 {
955  Copier copier;
956  copier.define(this->m_boxLayout, a_dest.m_boxLayout, a_domain, IntVect::Zero);
957  addTo(a_srcComps, a_dest, a_destComps, a_domain, copier);
958 }
959 
960 template <class T>
961 class LDaddOp : public LDOperator<T>
962 {
963 public:
964  virtual void op(T& dest,
965  const Box& RegionFrom,
966  const Interval& Cdest,
967  const Box& RegionTo,
968  const T& src,
969  const Interval& Csrc) const
970  {
971  dest.plus(src, RegionFrom, RegionTo, Csrc.begin(), Cdest.begin(), Cdest.size());
972  }
973  virtual void linearIn(T& arg, void* buf, const Box& R,
974  const Interval& comps) const
975  {
976  Real* buffer = (Real*)buf;
977 
978  ForAllXBNNnoindx(Real, arg, R, comps.begin(), comps.size())
979  {
980  argR+=*buffer;
981  buffer++;
982  } EndFor
983 
984  }
985 
986  virtual bool threadSafe() const
987  {
988  return false;
989  }
990 };
991 
992 template <class T>
993 void BoxLayoutData<T>::addTo(const Interval& a_srcComps,
994  BoxLayoutData<T>& a_dest,
995  const Interval& a_destComps,
996  const ProblemDomain& a_domain,
997  const Copier& a_copier) const
998 {
999  CH_TIME("addTo");
1000  LDaddOp<T> addOp;
1001  addToBegin(a_srcComps, a_dest, a_destComps, a_copier);
1002  a_dest.addToEnd(a_destComps);
1003  //makeItSoBegin(a_srcComps, *this, a_dest, a_destComps, a_copier, addOp);
1004  //makeItSoLocalCopy(a_srcComps, *this, a_dest, a_destComps, a_copier, addOp);
1005  //a_dest.makeItSoEnd(a_destComps, addOp);
1006  //makeItSo(a_srcComps, *this, a_dest, a_destComps, a_copier, addOp);
1007 }
1008 
1009 
1010 template <class T>
1012  BoxLayoutData<T>& a_dest,
1013  const Interval& a_destComps,
1014  const Copier& a_copier) const
1015 {
1016  CH_TIME("addToBegin");
1017  LDaddOp<T> addOp;
1018  makeItSoBegin(a_srcComps, *this, a_dest, a_destComps, a_copier, addOp);
1019  makeItSoLocalCopy(a_srcComps, *this, a_dest, a_destComps, a_copier, addOp);
1020 
1021 }
1022 
1023 
1024 template <class T>
1025 void BoxLayoutData<T>::addToEnd(const Interval& a_destComps)
1026 {
1027  CH_TIME("addToEnd");
1028  LDaddOp<T> addOp;
1029 
1030  makeItSoEnd(a_destComps, addOp);
1031 
1032 }
1033 
1034 
1035 
1036 #include "NamespaceFooter.H"
1037 #endif
std::ostream & pout()
Use this in place of std::cout for program output.
virtual bool threadSafe() const
Definition: BoxLayoutData.H:301
int numReceives
Definition: Copier.H:124
void addToBegin(const Interval &a_srcComps, BoxLayoutData< T > &a_dest, const Interval &a_destComps, const Copier &a_copier) const
Definition: BoxLayoutDataI.H:1011
int m_comps
Definition: BoxLayoutData.H:403
CopierBuffer * m_buff
Definition: BoxLayoutData.H:481
virtual void define(const BoxLayout &boxes, int comps, const DataFactory< T > &factory=DefaultDataFactory< T >())
Definition: BoxLayoutDataI.H:87
bool m_callDelete
Definition: LayoutData.H:131
void define(BoxLayoutData< T > *a_original, const Interval &interval)
Definition: BoxLayoutDataI.H:216
A reference-counting handle class.
Definition: RefCountedPtr.H:173
#define freeMT(a_a)
Definition: memtrack.H:160
#define CH_assert(cond)
Definition: CHArray.H:37
A class to facilitate interaction with physical boundary conditions.
Definition: ProblemDomain.H:141
Vector< T * > m_vector
Definition: LayoutData.H:124
void unpackReceivesToMe_append(LayoutData< Vector< RefCountedPtr< T > > > &a_dest, const Interval &a_destComps, int ncomp, const DataFactory< T > &factory, const LDOperator< T > &a_op) const
Definition: BoxLayoutDataI.H:389
int m_ncomps
Definition: Copier.H:78
A not-necessarily-disjoint collective of boxes.
Definition: BoxLayout.H:145
one dimensional dynamic array
Definition: Vector.H:53
void writeSendDataFromMeIntoBuffers(const BoxLayoutData< T > &a_src, const Interval &a_srcComps, const LDOperator< T > &a_op) const
Definition: BoxLayoutDataI.H:365
virtual ~BoxLayoutData()
Definition: BoxLayoutDataI.H:129
int nComp() const
Definition: BoxLayoutData.H:306
Data that maintains a one-to-one mapping of T to the boxes in a BoxLayout.
Definition: BoxLayout.H:26
A strange but true thing to make copying from one boxlayoutdata to another fast.
Definition: Copier.H:152
#define mallocMT(a_a)
Definition: memtrack.H:159
int size() const
Definition: DataIterator.H:218
std::vector< bufEntry > m_fromMe
Definition: Copier.H:116
void unpackReceivesToMe(const Interval &a_destComps, const LDOperator< T > &a_op)
Definition: BoxLayoutDataI.H:382
virtual bool callDelete() const
Definition: BoxLayoutData.H:43
Definition: Copier.H:388
void setVector(const BoxLayoutData< T > &da, const Interval &srcComps, const Interval &destComps)
Definition: BoxLayoutDataI.H:45
size_t m_sendcapacity
Definition: Copier.H:82
Definition: DataIterator.H:190
bool isDefined(int ncomps) const
Definition: Copier.H:75
size_t size()
Definition: Copier.H:404
void completePendingSends() const
Definition: BoxLayoutDataI.H:349
unsigned long long CH_MaxMPIRecvSize
virtual void op(T &dest, const Box &RegionFrom, const Interval &Cdest, const Box &RegionTo, const T &src, const Interval &Csrc) const
Definition: BoxLayoutData.H:204
virtual void clear()
Definition: BoxLayoutDataI.H:153
unsigned long long CH_MaxMPISendSize
unsigned int numProc()
number of parallel processes
Definition: Copier.H:389
Definition: Copier.H:38
void postReceivesToMe() const
Definition: BoxLayoutDataI.H:377
int size() const
Definition: Interval.H:75
BoxLayout m_boxLayout
Definition: LayoutData.H:118
void generalCopyTo(const BoxLayout &a_destGrids, LayoutData< Vector< RefCountedPtr< T > > > &a_dest, const Interval &a_interval, const ProblemDomain &a_domain, const DataFactory< T > &factory=DefaultDataFactory< T >()) const
General data copying operation.
Definition: BoxLayoutDataI.H:937
int procID
Definition: Copier.H:44
virtual T * create(const Box &box, int ncomps, const DataIndex &a_datInd) const
factory function. creates a new 'T' object
Definition: BoxLayoutDataI.H:31
virtual void linearOut(const T &arg, void *buf, const Box &R, const Interval &comps) const
Definition: BoxLayoutData.H:184
Definition: EBInterface.H:45
void resize(unsigned int isize)
Definition: Vector.H:346
virtual void apply(void(*a_Function)(const Box &box, int comps, T &t))
Definition: BoxLayoutDataI.H:197
DataIndex toIndex
Definition: Copier.H:41
size_t m_reccapacity
Definition: Copier.H:85
void * m_recbuffer
Definition: Copier.H:83
virtual bool threadSafe() const
this boolean only has to do with whether the op(...) function is thread safe
Definition: BoxLayoutData.H:200
#define CH_TIME(name)
Definition: CH_Timer.H:82
Structure for passing component ranges in code.
Definition: Interval.H:23
int numSends
Definition: Copier.H:124
void allocateGhostVector(const DataFactory< T > &factory, const IntVect &ghost=IntVect::Zero)
Definition: BoxLayoutDataI.H:167
virtual bool isDefined() const
Definition: BoxLayoutDataI.H:39
void * m_sendbuffer
Definition: Copier.H:80
Interval interval() const
Definition: BoxLayoutData.H:312
virtual int size(const T &arg, const Box &b, const Interval &comps) const
Definition: BoxLayoutData.H:180
Data on a BoxLayout.
Definition: BoxLayoutData.H:97
int s_verbosity
Definition: BoxLayoutData.H:400
virtual void linearIn(T &arg, void *buf, const Box &R, const Interval &comps) const
Definition: BoxLayoutDataI.H:973
double Real
Definition: REAL.H:33
void addTo(const Interval &a_srcComps, BoxLayoutData< T > &a_dest, const Interval &a_destComps, const ProblemDomain &a_domain) const
Definition: BoxLayoutDataI.H:950
virtual void define(const DisjointBoxLayout &dp, int comps, const IntVect &ghost=IntVect::Zero, const DataFactory< T > &a_factory=DefaultDataFactory< T >())
Definition: LevelDataI.H:80
virtual void linearIn(T &arg, void *buf, const Box &R, const Interval &comps) const
Definition: BoxLayoutData.H:189
void makeItSoLocalCopy(const Interval &a_srcComps, const BoxLayoutData< T > &a_src, BoxLayoutData< T > &a_dest, const Interval &a_destComps, const Copier &a_copier, const LDOperator< T > &a_op=LDOperator< T >()) const
Definition: BoxLayoutDataI.H:291
Box toRegion
Definition: Copier.H:43
bool isClosed() const
Definition: BoxLayout.H:730
static void Error(const char *const a_msg=m_nullString, int m_exitCode=CH_DEFAULT_ERROR_CODE)
Print out message to cerr and exit with the specified exit code.
int begin() const
Definition: Interval.H:99
const BoxLayout & boxLayout() const
Definition: LayoutData.H:107
static const IntVect Zero
Definition: IntVect.H:658
void postSendsFromMe() const
Definition: BoxLayoutDataI.H:372
AliasDataFactory(BoxLayoutData< T > *a_original, const Interval &interval)
Definition: BoxLayoutDataI.H:210
A Rectangular Domain on an Integer Lattice.
Definition: Box.H:469
Definition: DataIndex.H:114
bool ok() const
Definition: Copier.H:449
bool m_isdefined
Definition: BoxLayoutData.H:405
unsigned long long CH_MAX_MPI_MESSAGE_SIZE
An integer Vector in SpaceDim-dimensional space.
Definition: CHArray.H:42
virtual bool threadSafe() const
this boolean only has to do with whether the op(...) function is thread safe
Definition: BoxLayoutDataI.H:986
DataIterator dataIterator() const
Definition: LayoutDataI.H:78
virtual T * create(const Box &box, int ncomps, const DataIndex &a_datInd) const =0
factory function. creates a new 'T' object
size_t size() const
Definition: Vector.H:192
Definition: Copier.H:387
Factory object to data members of a BoxLayoutData container.
Definition: BoxLayoutData.H:30
void makeItSoEnd(const Interval &a_destComps, const LDOperator< T > &a_op=LDOperator< T >())
Definition: BoxLayoutDataI.H:335
void makeItSoBegin(const Interval &a_srcComps, const BoxLayoutData< T > &a_src, BoxLayoutData< T > &a_dest, const Interval &a_destComps, const Copier &a_copier, const LDOperator< T > &a_op=LDOperator< T >()) const
Definition: BoxLayoutDataI.H:250
virtual T * create(const Box &box, int ncomps, const DataIndex &a_datInd) const
Definition: BoxLayoutDataI.H:223
Box & grow(int i)
grow functions
Definition: Box.H:2263
virtual bool threadSafe() const
Definition: BoxLayoutData.H:49
Definition: BoxLayoutData.H:173
int end() const
Definition: Interval.H:104
bool m_threadSafe
Definition: BoxLayoutData.H:404
virtual void define(const DisjointBoxLayout &a_level, const BoxLayout &a_dest, bool a_exchange=false, IntVect a_shift=IntVect::Zero)
Definition: BoxLayoutDataI.H:961
DataIndex fromIndex
Definition: Copier.H:41
int print() const
Box box(const DataIndex &a_index) const
Definition: LayoutDataI.H:66
Box fromRegion
Definition: Copier.H:42
std::vector< bufEntry > m_toMe
Definition: Copier.H:117
Definition: Copier.H:382
void makeItSo(const Interval &a_srcComps, const BoxLayoutData< T > &a_src, BoxLayoutData< T > &a_dest, const Interval &a_destComps, const Copier &a_copier, const LDOperator< T > &a_op=LDOperator< T >()) const
Definition: BoxLayoutDataI.H:232
BoxLayoutData()
Definition: BoxLayoutDataI.H:107
void addToEnd(const Interval &a_destComps)
you call this on the destination BoxLayoutData, not the source.
Definition: BoxLayoutDataI.H:1025
virtual void op(T &dest, const Box &RegionFrom, const Interval &Cdest, const Box &RegionTo, const T &src, const Interval &Csrc) const
Definition: BoxLayoutDataI.H:964
void allocateBuffers(const BoxLayoutData< T > &a_src, const Interval &a_srcComps, const BoxLayoutData< T > &a_dest, const Interval &a_destComps, const Copier &a_copier, const LDOperator< T > &a_op) const
Definition: BoxLayoutDataI.H:354