Chombo + EB  3.2
BoxLayoutDataI.H
Go to the documentation of this file.
1 #ifdef CH_LANG_CC
2 /*
3  * _______ __
4  * / ___/ / ___ __ _ / / ___
5  * / /__/ _ \/ _ \/ V \/ _ \/ _ \
6  * \___/_//_/\___/_/_/_/_.__/\___/
7  * Please refer to Copyright.txt, in Chombo's root directory.
8  */
9 #endif
10 
11 #ifndef _BOXLAYOUTDATAI_H_
12 #define _BOXLAYOUTDATAI_H_
13 
14 #include <cstdlib>
15 #include <algorithm>
16 #include <limits.h>
17 #include <list>
18 #include "CH_OpenMP.H"
19 #include "parstream.H"
20 #include "memtrack.H"
21 #include "Misc.H"
22 #include "CH_Timer.H"
23 #include "NamespaceHeader.H"
24 #include "BaseFabMacros.H"
25 
26 using std::sort;
27 
28 template<class T>
30 
31 template <class T>
33  int ncomps,
34  const DataIndex& a_datInd) const
35 {
36  return new T(box, ncomps);
37 }
38 
39 template<class T>
40 inline bool BoxLayoutData<T>::isDefined() const
41 {
42  return m_isdefined;
43 }
44 
45 template <class T>
47  const Interval& srcComps,
48  const Interval& destComps)
49 {
50  if(&da != this)
51  {
52  DataIterator it=this->dataIterator();
53  int nbox=it.size();
54 #pragma omp parallel for if(this->m_threadSafe)
55  for(int box=0; box<nbox; box++)
56  {
57  this->m_vector[it[box].datInd()]->copy( this->box(it[box]), destComps,
58  this->box(it[box]), da[it[box]], srcComps);
59  }
60  }
61 }
62 
63 template<class T>
64 inline void BoxLayoutData<T>::define(const BoxLayoutData<T>& da, const Interval& comps,
65  const DataFactory<T>& factory)
66 {
67  if (this == &da)
68  {
69  MayDay::Error("BoxLayoutData<T>::define(const LayoutData<T>& da,.....) called with 'this'");
70  }
71  CH_assert(comps.size()>0);
72  CH_assert(comps.end()<=m_comps);
73  //AD: why are the two different
74  // CH_assert(comps.end()<=da.m_comps);
75  CH_assert(comps.begin()>=0);
76  this->m_boxLayout = da.boxLayout();
77 
78  this->m_comps = comps.size();
79  this->m_threadSafe = factory.threadSafe();
80  //this->m_threadSafe = false;
81 
82  Interval dest(0, m_comps-1);
83  allocateGhostVector(factory);
84  setVector(da, comps, dest);
85 }
86 
87 template<class T>
88 inline void BoxLayoutData<T>::define(const BoxLayout& boxes, int comps,
89  const DataFactory<T>& factory)
90 {
91  CH_assert(boxes.isClosed());
92  this->m_boxLayout = boxes;
93  m_comps = comps;
94  this->m_threadSafe = factory.threadSafe();
95  // this->m_threadSafe = false;
96  m_isdefined = true;
97  allocateGhostVector(factory);
98 
99 }
100 
101 template<class T>
102 inline void BoxLayoutData<T>::define(const BoxLayout& boxes)
103 {
104  MayDay::Error("BoxLayoutData<T>::define(const BoxLayout& boxes)...needs comps");
105 }
106 
107 template <class T>
109 {
110  m_isdefined = false;
111 #ifdef CH_MPI
112  this->numSends = 0;
113  this->numReceives = 0;
114 #endif
115 }
116 template<class T>
117 inline BoxLayoutData<T>::BoxLayoutData(const BoxLayout& boxes, int comps,
118  const DataFactory<T>& factory)
119  :m_comps(comps),m_buff(NULL)
120 {
121  CH_assert(boxes.isClosed());
122  this->m_boxLayout = boxes;
123  m_isdefined = true;
124  allocateGhostVector(factory);
125 #ifdef CH_MPI
126 
127  this->numSends = 0;
128  this->numReceives = 0;
129 #endif
130 }
131 
132 template<class T>
134 {
135  CH_TIME("~BoxLayoutData");
137 }
138 
139 template<class T>
141  const DataFactory<T>& factory)
142 {
143  if (this != &da)
144  {
146  this->m_boxLayout = da.boxLayout();
147  m_comps = da.nComp();
148  this->m_threadSafe = factory.threadSafe();
149  //this->m_threadSafe = false;
150  Interval srcAnddest(0, m_comps-1);
151  allocateGhostVector(factory);
152  setVector(da, srcAnddest, srcAnddest);
153  }
154 
155 }
156 template<class T>
158 {
159  if (this->m_callDelete == true)
160  {
161  for (unsigned int i=0; i<this->m_vector.size(); ++i)
162  {
163  delete this->m_vector[i];
164  this->m_vector[i] = NULL;
165  }
166  }
167  m_isdefined = false;
168 }
169 
170 template<class T>
171 inline void BoxLayoutData<T>::allocateGhostVector(const DataFactory<T>& factory, const IntVect& ghost)
172 {
173  if (this->m_callDelete == true)
174  {
175  for (unsigned int i=0; i<this->m_vector.size(); ++i)
176  {
177  delete this->m_vector[i];
178  this->m_vector[i] = NULL;
179  }
180  }
181 
182  this->m_callDelete = factory.callDelete();
183 
184  DataIterator it(this->dataIterator()); int nbox=it.size();
185  this->m_vector.resize(it.size(), NULL);
186 #pragma omp parallel for if(this->m_threadSafe)
187  for(int i=0; i<nbox; i++)
188  {
189  unsigned int index = it[i].datInd();
190  Box abox = this->box(it[i]);
191  abox.grow(ghost);
192  this->m_vector[index] = factory.create(abox, m_comps, it[i]);
193  if (this->m_vector[index] == NULL)
194  {
195  MayDay::Error("OutOfMemory in BoxLayoutData::allocateGhostVector");
196  }
197  }
198 }
199 
200 template<class T>
201 inline void BoxLayoutData<T>::apply(void (*a_func)(const Box& box, int comps, T& t))
202 {
203  DataIterator it(this->dataIterator()); int nbox=it.size();
204 #pragma omp parallel for
205  for(int i=0; i<nbox; i++)
206 
207  {
208  a_func(this->box(it[i]), m_comps, *(this->m_vector[ it[i].datInd() ]));
209  }
210 }
211 
212 //======================================================================
213 template <class T>
215 {
216  define(a_original, interval);
217 }
218 
219 template <class T>
221 {
222  m_origPointer = a_original;
223  m_interval = interval;
224 }
225 
226 template <class T>
227 T* AliasDataFactory<T>::create(const Box& a_box, int ncomps, const DataIndex& a_dataInd) const
228 {
229  //CH_assert(this->box(a_dataInd) == a_box);
230  CH_assert(ncomps = m_interval.size());
231  T* rtn = new T(m_interval, m_origPointer->operator[](a_dataInd));
232  return rtn;
233 }
234 
235 template<class T>
236 void BoxLayoutData<T>::makeItSo(const Interval& a_srcComps,
237  const BoxLayoutData<T>& a_src,
238  BoxLayoutData<T>& a_dest,
239  const Interval& a_destComps,
240  const Copier& a_copier,
241  const LDOperator<T>& a_op) const
242 {
243  makeItSoBegin(a_srcComps, a_src, a_dest, a_destComps, a_copier, a_op);
244  makeItSoLocalCopy(a_srcComps, a_src, a_dest, a_destComps, a_copier, a_op);
245  makeItSoEnd(a_dest, a_destComps, a_op);
246 }
247 
248 template<class T>
250  const BoxLayoutData<T>& a_src,
251  BoxLayoutData<T>& a_dest,
252  const Interval& a_destComps,
253  const Copier& a_copier,
254  const LDOperator<T>& a_op) const
255 {
256  // The following five functions are nullOps in uniprocessor mode
257 
258 #ifdef CH_MPI
259 
260  allocateBuffers(a_src, a_srcComps,
261  a_dest, a_destComps,
262  a_copier,
263  a_op); //monkey with buffers, set up 'fromMe' and 'toMe' queues
264 
265  writeSendDataFromMeIntoBuffers(a_src, a_srcComps, a_op);
266 
267  // If there is nothing to recv/send, don't go into these functions
268  // and allocate memory that will not be freed later. (ndk)
269  // The #ifdef CH_MPI is for the m_buff->m_toMe and m_buff->m_fromMe
270  {
271  CH_TIME("post_messages");
272  this->numReceives = m_buff->m_toMe.size();
273 
274  if (this->numReceives > 0)
275  {
276  postReceivesToMe(); // all non-blocking
277  }
278 
279 
280  this->numSends = m_buff->m_fromMe.size();
281  if (this->numSends > 0)
282  {
283  postSendsFromMe(); // all non-blocking
284  }
285  }
286 #endif
287 }
288 
289 template<class T>
291  const BoxLayoutData<T>& a_src,
292  BoxLayoutData<T>& a_dest,
293  const Interval& a_destComps,
294  const Copier& a_copier,
295  const LDOperator<T>& a_op) const
296 {
297 
298  CH_TIME("local copying");
299  CopyIterator it(a_copier, CopyIterator::LOCAL);
300  int items=it.size();
301 #ifdef _OPENMP
302  bool threadSafe = m_threadSafe && (a_op.threadSafe());
303 #endif
304 #pragma omp parallel for if(threadSafe)
305  for (int n=0; n<items; n++)
306  {
307  const MotionItem& item = it[n];
308  a_op.op(a_dest[item.toIndex], item.fromRegion,
309  a_destComps,
310  item.toRegion,
311  a_src[item.fromIndex],
312  a_srcComps);
313 
314  }
315 }
316 template<class T>
318  const Interval& a_destComps,
319  const LDOperator<T>& a_op) const
320 {
321  // Uncomment and Move this out of unpackReceivesToMe() (ndk)
322  completePendingSends(); // wait for sends from possible previous operation
323 
324  unpackReceivesToMe(a_dest, a_destComps, a_op); // nullOp in uniprocessor mode
325 
326 }
327 
328 #ifndef CH_MPI
329 // uniprocessor version of all these nullop functions.
330 template<class T>
332 {
333 }
334 
335 template<class T>
337  const Interval& a_srcComps,
338  const BoxLayoutData<T>& a_dest,
339  const Interval& a_destComps,
340  const Copier& a_copier,
341  const LDOperator<T>& a_op
342  ) const
343 {
344 }
345 
346 template<class T>
348  const Interval& a_srcComps,
349  const LDOperator<T>& a_op) const
350 {
351 }
352 
353 template<class T>
355 {
356 }
357 
358 template<class T>
360 {
361 }
362 
363 template<class T>
365  const Interval& a_destComps,
366  const LDOperator<T>& a_op) const
367 {
368 }
369 
370 template<class T>
372  const Interval& a_destComps,
373  int ncomp,
374  const DataFactory<T>& factory,
375  const LDOperator<T>& a_op) const
376 {
377 }
378 
379 #else
380 
381 // MPI versions of the above codes.
382 
383 template<class T>
385 {
386  CH_TIME("completePendingSends");
387  if (this->numSends > 0)
388  {
389  CH_TIME("MPI_Waitall");
390  m_sendStatus.resize(this->numSends);
391  int result = MPI_Waitall(this->numSends, &(m_sendRequests[0]), &(m_sendStatus[0]));
392  if (result != MPI_SUCCESS)
393  {
394  //hell if I know what to do about failed messaging here
395  }
396  }
397  this->numSends = 0;
398 }
399 
400 template<class T>
402  const Interval& a_srcComps,
403  const BoxLayoutData<T>& a_dest,
404  const Interval& a_destComps,
405  const Copier& a_copier,
406  const LDOperator<T>& a_op) const
407 {
408  CH_TIME("MPI_allocateBuffers");
409  m_buff = &(((Copier&)a_copier).m_buffers);
410  if (m_buff->isDefined(a_srcComps.size()) && T::preAllocatable()<2) return;
411 
412  m_buff->m_ncomps = a_srcComps.size();
413 
414  m_buff->m_fromMe.resize(0);
415  m_buff->m_toMe.resize(0);
416  size_t sendBufferSize = 0;
417  size_t recBufferSize = 0;
418  // two versions of code here. one for preAllocatable T, one not.
419 
420  T dummy;
421  for (CopyIterator it(a_copier, CopyIterator::FROM); it.ok(); ++it)
422  {
423  const MotionItem& item = it();
424  CopierBuffer::bufEntry b;
425  b.item = &item;
426  b.size = a_op.size(a_src[item.fromIndex], item.fromRegion, a_srcComps);
427  sendBufferSize+=b.size;
428  b.procID = item.procID;
429  m_buff->m_fromMe.push_back(b);
430  }
431  sort(m_buff->m_fromMe.begin(), m_buff->m_fromMe.end());
432  for (CopyIterator it(a_copier, CopyIterator::TO); it.ok(); ++it)
433  {
434  const MotionItem& item = it();
435  CopierBuffer::bufEntry b;
436  b.item = &item;
437  if (T::preAllocatable() == 0)
438  {
439  b.size = a_op.size(dummy, item.fromRegion, a_destComps);
440  recBufferSize+=b.size;
441  }
442  else if (T::preAllocatable() == 1)
443  {
444  b.size = a_op.size(a_dest[item.toIndex], item.fromRegion, a_destComps);
445  recBufferSize+=b.size;
446  }
447  b.procID = item.procID;
448  m_buff->m_toMe.push_back(b);
449  }
450  sort(m_buff->m_toMe.begin(), m_buff->m_toMe.end());
451 
452  if (T::preAllocatable() == 2) // dynamic allocatable, need two pass
453  {
454  CH_TIME("MPI_ Phase 1 of 2 Phase: preAllocatable==2");
455  if (s_verbosity > 0) pout()<<"preAllocatable==2\n";
456 
457  // in the non-preallocatable case, I need to message the
458  // values for the m_buff->m_toMe[*].size
459  Vector<unsigned long> fdata;
460  Vector<unsigned long> tdata;
461  int count = 1;
462  int scount = 1;
463  if (m_buff->m_toMe.size() > 0)
464  {
465  tdata.resize(m_buff->m_toMe.size(), ULONG_MAX);
466  m_receiveRequests.resize(numProc()-1);
467  m_receiveStatus.resize(numProc()-1);
468  MPI_Request* Rptr = &(m_receiveRequests[0]);
469 
470  unsigned int lastProc = m_buff->m_toMe[0].procID;
471  int messageSize = 1;
472  unsigned long * dataPtr = &(tdata[0]);
473  unsigned int i = 1;
474 
475  for (;i<m_buff->m_toMe.size(); ++i)
476  {
477  CopierBuffer::bufEntry& b = m_buff->m_toMe[i];
478  if (b.procID == lastProc)
479  messageSize++;
480  else
481  {
482 
483  MPI_Irecv(dataPtr, messageSize, MPI_UNSIGNED_LONG, lastProc,
484  1, Chombo_MPI::comm, Rptr);
485  Rptr++;
486 
487  lastProc = b.procID;
488  messageSize = 1;
489  dataPtr = &(tdata[i]);
490  count++;
491  }
492  }
493 
494  MPI_Irecv(dataPtr, messageSize, MPI_UNSIGNED_LONG, lastProc,
495  1, Chombo_MPI::comm, Rptr );
496  }
497 
498  if (m_buff->m_fromMe.size() > 0)
499  {
500  fdata.resize(m_buff->m_fromMe.size());
501  fdata[0]=m_buff->m_fromMe[0].size;
502  m_sendRequests.resize(numProc()-1);
503  m_sendStatus.resize(numProc()-1);
504  MPI_Request* Rptr = &(m_sendRequests[0]);
505 
506  unsigned int lastProc = m_buff->m_fromMe[0].procID;
507  int messageSize = 1;
508  unsigned long * dataPtr = &(fdata[0]);
509  unsigned int i = 1;
510  for (;i<m_buff->m_fromMe.size(); ++i)
511  {
512  fdata[i] = m_buff->m_fromMe[i].size;
513  CopierBuffer::bufEntry& b = m_buff->m_fromMe[i];
514  if (b.procID == lastProc)
515  messageSize++;
516  else
517  {
518  MPI_Isend(dataPtr, messageSize, MPI_UNSIGNED_LONG, lastProc,
519  1, Chombo_MPI::comm, Rptr);
520 
521  Rptr++;
522  lastProc = b.procID;
523  messageSize = 1;
524  dataPtr = &(fdata[i]);
525  scount++;
526  }
527  }
528 
529  MPI_Isend(dataPtr, messageSize, MPI_UNSIGNED_LONG, lastProc,
530  1, Chombo_MPI::comm, Rptr);
531  }
532 
533  if (m_buff->m_toMe.size() > 0)
534  {
535 
536  int result = MPI_Waitall(count, &(m_receiveRequests[0]), &(m_receiveStatus[0]));
537  if (result != MPI_SUCCESS)
538  {
539  MayDay::Error("First pass of two-phase communication failed");
540  }
541 
542  for (unsigned int i=0; i<m_buff->m_toMe.size(); ++i)
543  {
544  CH_assert(tdata[i] != ULONG_MAX);
545  m_buff->m_toMe[i].size = tdata[i];
546  recBufferSize+= tdata[i];
547  }
548  }
549 
550  if (m_buff->m_fromMe.size() > 0)
551  {
552 
553  int result = MPI_Waitall(scount, &(m_sendRequests[0]), &(m_sendStatus[0]));
554  if (result != MPI_SUCCESS)
555  {
556  MayDay::Error("First pass of two-phase communication failed");
557  }
558 
559  }
560  }
561 
562  // allocate send and receveive buffer space.
563 
564  if (sendBufferSize > m_buff->m_sendcapacity)
565  {
567  if (s_verbosity > 0) pout()<<"malloc send buffer "<<sendBufferSize<<std::endl;
568  (m_buff->m_sendbuffer) = mallocMT(sendBufferSize);
569  if ((m_buff->m_sendbuffer) == NULL)
570  {
571  MayDay::Error("Out of memory in BoxLayoutData::allocatebuffers");
572  }
573  m_buff->m_sendcapacity = sendBufferSize;
574  }
575 
576  if (recBufferSize > m_buff->m_reccapacity)
577  {
579  if (s_verbosity > 0) pout()<<"malloc receive buffer "<<recBufferSize<<std::endl;
580  m_buff->m_recbuffer = mallocMT(recBufferSize);
581  if (m_buff->m_recbuffer == NULL)
582  {
583  MayDay::Error("Out of memory in BoxLayoutData::allocatebuffers");
584  }
585  m_buff->m_reccapacity = recBufferSize;
586  }
587 
588  /*
589  pout()<<"\n";
590  for (int i=0; i<m_buff->m_fromMe.size(); i++)
591  pout()<<m_buff->m_fromMe[i].item->region<<"{"<<m_buff->m_fromMe[i].procID<<"}"<<" ";
592  pout() <<"::::";
593  for (int i=0; i<m_buff->m_toMe.size(); i++)
594  pout()<<m_buff->m_toMe[i].item->region<<"{"<<m_buff->m_toMe[i].procID<<"}"<<" ";
595  pout() << endl;
596  */
597 
598  char* nextFree = (char*)(m_buff->m_sendbuffer);
599  if (m_buff->m_fromMe.size() > 0)
600  {
601  for (unsigned int i=0; i<m_buff->m_fromMe.size(); ++i)
602  {
603  m_buff->m_fromMe[i].bufPtr = nextFree;
604  nextFree += m_buff->m_fromMe[i].size;
605  }
606  }
607 
608  nextFree = (char*)m_buff->m_recbuffer;
609  if (m_buff->m_toMe.size() > 0)
610  {
611  for (unsigned int i=0; i<m_buff->m_toMe.size(); ++i)
612  {
613  m_buff->m_toMe[i].bufPtr = nextFree;
614  nextFree += m_buff->m_toMe[i].size;
615  }
616  }
617 
618  // since fromMe and toMe are sorted based on procID, messages can now be grouped
619  // together on a per-processor basis.
620 
621 }
622 
623 template<class T>
625  const Interval& a_srcComps,
626  const LDOperator<T>& a_op) const
627 {
628  CH_TIME("write Data to buffers");
629  int isize = m_buff->m_fromMe.size();
630 #ifdef _OPENMP
631  bool threadSafe = m_threadSafe && (a_op.threadSafe());
632 #endif
633 #pragma omp parallel for if(threadSafe)
634  for (unsigned int i=0; i< isize; ++i)
635  {
636  const CopierBuffer::bufEntry& entry = m_buff->m_fromMe[i];
637  a_op.linearOut(a_src[entry.item->fromIndex], entry.bufPtr,
638  entry.item->fromRegion, a_srcComps);
639  }
640 }
641 
642 template<class T>
644 {
645  CH_TIME("post_Sends");
646  // now we get the magic of message coalescence
647  // fromMe has already been sorted in the allocateBuffers() step.
648 
649  this->numSends = m_buff->m_fromMe.size();
650 
651  if (this->numSends > 1)
652  {
653  for (unsigned int i=m_buff->m_fromMe.size()-1; i>0; --i)
654  {
655  if (m_buff->m_fromMe[i].procID == m_buff->m_fromMe[i-1].procID)
656  {
657  this->numSends--;
658  m_buff->m_fromMe[i-1].size = m_buff->m_fromMe[i-1].size + m_buff->m_fromMe[i].size;
659  m_buff->m_fromMe[i].size = 0;
660  }
661  }
662  }
663  m_sendRequests.resize(this->numSends);
664  std::list<MPI_Request> extraRequests;
665 
666  unsigned int next=0;
667  long long maxSize = 0;
668  for (int i=0; i<this->numSends; ++i)
669  {
670  const CopierBuffer::bufEntry& entry = m_buff->m_fromMe[next];
671  char* buffer = (char*)entry.bufPtr;
672  std::size_t bsize = entry.size;
673  int idtag=0;
674  while (bsize > CH_MAX_MPI_MESSAGE_SIZE)
675  {
676  extraRequests.push_back(MPI_Request());
677  {
678  //CH_TIME("MPI_Isend");
679  MPI_Isend(buffer, CH_MAX_MPI_MESSAGE_SIZE, MPI_BYTE, entry.procID,
680  idtag, Chombo_MPI::comm, &(extraRequests.back()));
681  }
682  maxSize = CH_MAX_MPI_MESSAGE_SIZE;
683  bsize -= CH_MAX_MPI_MESSAGE_SIZE;
684  buffer+=CH_MAX_MPI_MESSAGE_SIZE;
685  idtag++;
686  }
687  {
688  //CH_TIME("MPI_Isend");
689  MPI_Isend(buffer, bsize, MPI_BYTE, entry.procID,
690  idtag, Chombo_MPI::comm, &(m_sendRequests[i]));
691  }
692  maxSize = Max<long long>(bsize, maxSize);
693  ++next;
694  while (next < m_buff->m_fromMe.size() && m_buff->m_fromMe[next].size == 0) ++next;
695  }
696  for (std::list<MPI_Request>::iterator it = extraRequests.begin(); it != extraRequests.end(); ++it)
697  {
698  m_sendRequests.push_back(*it);
699  }
700  this->numSends = m_sendRequests.size();
701 
702  CH_MaxMPISendSize = Max<long long>(CH_MaxMPISendSize, maxSize);
703 
704 }
705 
706 template<class T>
708 {
709  CH_TIME("post_Receives");
710  this->numReceives = m_buff->m_toMe.size();
711 
712  if (this->numReceives > 1)
713  {
714  for (unsigned int i=m_buff->m_toMe.size()-1; i>0; --i)
715  {
716  if (m_buff->m_toMe[i].procID == m_buff->m_toMe[i-1].procID)
717  {
718  this->numReceives--;
719  m_buff->m_toMe[i-1].size += m_buff->m_toMe[i].size;
720  m_buff->m_toMe[i].size = 0;
721  }
722 
723  }
724  }
725  m_receiveRequests.resize(this->numReceives);
726  std::list<MPI_Request> extraRequests;
727  unsigned int next=0;
728  long long maxSize = 0;
729  for (int i=0; i<this->numReceives; ++i)
730  {
731  const CopierBuffer::bufEntry& entry = m_buff->m_toMe[next];
732  char* buffer = (char*)entry.bufPtr;
733  size_t bsize = entry.size;
734  int idtag=0;
735  while (bsize > CH_MAX_MPI_MESSAGE_SIZE)
736  {
737  extraRequests.push_back(MPI_Request());
738  {
739  //CH_TIME("MPI_Irecv");
740  MPI_Irecv(buffer, CH_MAX_MPI_MESSAGE_SIZE, MPI_BYTE, entry.procID,
741  idtag, Chombo_MPI::comm, &(extraRequests.back()));
742  }
743  maxSize = CH_MAX_MPI_MESSAGE_SIZE;
744  bsize -= CH_MAX_MPI_MESSAGE_SIZE;
745  buffer+=CH_MAX_MPI_MESSAGE_SIZE;
746  idtag++;
747  }
748  {
749  //CH_TIME("MPI_Irecv");
750  MPI_Irecv(buffer, bsize, MPI_BYTE, entry.procID,
751  idtag, Chombo_MPI::comm, &(m_receiveRequests[i]));
752  }
753  ++next;
754  maxSize = Max<long long>(bsize, maxSize);
755  while (next < m_buff->m_toMe.size() && m_buff->m_toMe[next].size == 0) ++next;
756  }
757  for (std::list<MPI_Request>::iterator it = extraRequests.begin(); it != extraRequests.end(); ++it)
758  {
759  m_receiveRequests.push_back(*it);
760  }
761  this->numReceives = m_receiveRequests.size();
762 
763  CH_MaxMPIRecvSize = Max<long long>(CH_MaxMPIRecvSize, maxSize);
764  //pout()<<"maxSize="<<maxSize<<" posted "<<this->numReceives<<" receives\n";
765 
766 }
767 
768 template<class T>
770  const Interval& a_destComps,
771  const LDOperator<T>& a_op) const
772 {
773 
774  CH_TIME("unpack_messages");
775 
776  if (this->numReceives > 0)
777  {
778  m_receiveStatus.resize(this->numReceives);
779  int result;
780  {
781  CH_TIME("MPI_Waitall");
782  result = MPI_Waitall(this->numReceives, &(m_receiveRequests[0]),
783  &(m_receiveStatus[0]));
784  }
785  if (result != MPI_SUCCESS)
786  {
787  //hell if I know what to do about failed messaging here
788  //maybe a mayday::warning?
789  }
790 
791  int isize = m_buff->m_toMe.size();
792 #ifdef _OPENMP
793  bool threadSafe = m_threadSafe && (a_op.threadSafe());
794 #endif
795 #pragma omp parallel for if(threadSafe)
796  for (unsigned int i=0; i< isize; ++i)
797  {
798  const CopierBuffer::bufEntry& entry = m_buff->m_toMe[i];
799  a_op.linearIn(a_dest[entry.item->toIndex], entry.bufPtr, entry.item->toRegion, a_destComps);
800  }
801  }
802  this->numReceives = 0;
803 }
804 
805 template<class T>
807  const Interval& a_destComps,
808  int ncomp,
809  const DataFactory<T>& factory,
810 
811  const LDOperator<T>& a_op) const
812 {
813 
814  if (this->numReceives > 0)
815  {
816  m_receiveStatus.resize(this->numReceives);
817  int result;
818  {
819  CH_TIME("MPI_Waitall");
820  result = MPI_Waitall(this->numReceives, &(m_receiveRequests[0]),
821  &(m_receiveStatus[0]));
822  }
823  if (result != MPI_SUCCESS)
824  {
825  //hell if I know what to do about failed messaging here
826  }
827  int isize = m_buff->m_toMe.size();
828  // NOT thread-safe, because of a_dest[item.toIndex].push_back(newT);
829  // #pragma omp parallel for if(this->m_threadSafe)
830  for (int i=0; i< isize; ++i)
831  {
832  const CopierBuffer::bufEntry& entry = m_buff->m_toMe[i];
833  const MotionItem& item = *(entry.item);
834  RefCountedPtr<T> newT( factory.create(item.toRegion, ncomp, item.toIndex) );;
835 
836  a_op.linearIn(*newT, entry.bufPtr, item.toRegion, a_destComps);
837  a_dest[item.toIndex].push_back(newT);
838  }
839  }
840 
841  this->numReceives = 0;
842 }
843 #endif
844 
845 template <class T>
847  LayoutData<Vector<RefCountedPtr<T> > >& a_dest,
848  const Interval& a_srcComps,
849  const ProblemDomain& a_domain,
850  const Copier& a_copier,
851  const DataFactory<T>& factory) const
852 {
853 
854  CH_assert(T::preAllocatable() == 0);
855  a_dest.define(a_destGrids);
856 
857  LDOperator<T> a_op;
858 
859  int ncomp = a_srcComps.size();
860  Interval destComps(0, ncomp-1);
861  allocateBuffers(*this, a_srcComps,
862  *this, destComps,
863  a_copier, a_op);
864 
865  writeSendDataFromMeIntoBuffers(*this, a_srcComps, a_op);
866 
867  // If there is nothing to recv/send, don't go into these functions
868  // and allocate memory that will not be freed later. (ndk)
869  // The #ifdef CH_MPI is for the m_buff->m_toMe and m_buff->m_fromMe
870 #ifdef CH_MPI
871  this->numReceives = m_buff->m_toMe.size();
872  if (this->numReceives > 0)
873  {
874  postReceivesToMe(); // all non-blocking
875  }
876 
877  this->numSends = m_buff->m_fromMe.size();
878  if (this->numSends > 0)
879  {
880  postSendsFromMe(); // all non-blocking
881  }
882 #endif
883 
884  // perform local copy
885  CopyIterator it(a_copier, CopyIterator::LOCAL);
886  int items=it.size();
887 
888 //brian says this does not need conditionals because everyone is getting different buffers
889  // NOT thread-safe, because of a_dest[item.toIndex].push_back(newT);
890  // #pragma omp parallel for
891  for(int i=0; i<items; ++i)
892  {
893  const MotionItem& item = it[i];
894  RefCountedPtr<T> newT( factory.create(item.toRegion, ncomp, item.toIndex) );
895 
896  a_op.op(*newT, item.fromRegion,
897  destComps,
898  item.toRegion,
899  this->operator[](item.fromIndex),
900  a_srcComps);
901  a_dest[item.toIndex].push_back(newT);
902  }
903  // }
904  // Uncomment and Move this out of unpackReceivesToMe() (ndk)
905  completePendingSends(); // wait for sends from possible previous operation
906 
907  unpackReceivesToMe_append(a_dest, destComps, ncomp, factory, a_op); // nullOp in uniprocessor mode
908 }
909 
910 template <class T>
912  LayoutData<Vector<RefCountedPtr<T> > >& a_dest,
913  const Interval& a_srcComps,
914  const ProblemDomain& a_domain,
915  const DataFactory<T>& factory) const
916 {
917  Copier copier;
918  copier.define(this->m_boxLayout, a_destGrids, a_domain, IntVect::Zero);
919 
920  generalCopyTo(a_destGrids, a_dest, a_srcComps, a_domain, copier, factory);
921 }
922 
923 template <class T>
924 void BoxLayoutData<T>::addTo(const Interval& a_srcComps,
925  BoxLayoutData<T>& a_dest,
926  const Interval& a_destComps,
927  const ProblemDomain& a_domain) const
928 {
929  Copier copier;
930  copier.define(this->m_boxLayout, a_dest.m_boxLayout, a_domain, IntVect::Zero);
931  addTo(a_srcComps, a_dest, a_destComps, a_domain, copier);
932 }
933 
934 template <class T>
935 class LDaddOp : public LDOperator<T>
936 {
937 public:
938  virtual void op(T& dest,
939  const Box& RegionFrom,
940  const Interval& Cdest,
941  const Box& RegionTo,
942  const T& src,
943  const Interval& Csrc) const
944  {
945  dest.plus(src, RegionFrom, RegionTo, Csrc.begin(), Cdest.begin(), Cdest.size());
946  }
947  virtual void linearIn(T& arg, void* buf, const Box& R,
948  const Interval& comps) const
949  {
950  Real* buffer = (Real*)buf;
951 
952  ForAllXBNNnoindx(Real, arg, R, comps.begin(), comps.size())
953  {
954  argR+=*buffer;
955  buffer++;
956  } EndFor
957 
958  }
959 
960  virtual bool threadSafe() const
961  {
962  return false;
963  }
964 };
965 
966 template <class T>
967 void BoxLayoutData<T>::addTo(const Interval& a_srcComps,
968  BoxLayoutData<T>& a_dest,
969  const Interval& a_destComps,
970  const ProblemDomain& a_domain,
971  const Copier& a_copier) const
972 {
973  CH_TIME("addTo");
974  LDaddOp<T> addOp;
975  makeItSo(a_srcComps, *this, a_dest, a_destComps, a_copier, addOp);
976 }
977 
978 #include "NamespaceFooter.H"
979 #endif
std::ostream & pout()
Use this in place of std::cout for program output.
virtual bool threadSafe() const
Definition: BoxLayoutData.H:301
int m_comps
Definition: BoxLayoutData.H:387
CopierBuffer * m_buff
Definition: BoxLayoutData.H:466
virtual void define(const BoxLayout &boxes, int comps, const DataFactory< T > &factory=DefaultDataFactory< T >())
Definition: BoxLayoutDataI.H:88
bool m_callDelete
Definition: LayoutData.H:131
void define(BoxLayoutData< T > *a_original, const Interval &interval)
Definition: BoxLayoutDataI.H:220
A reference-counting handle class.
Definition: RefCountedPtr.H:173
#define freeMT(a_a)
Definition: memtrack.H:160
#define CH_assert(cond)
Definition: CHArray.H:37
void makeItSoEnd(BoxLayoutData< T > &a_dest, const Interval &a_destComps, const LDOperator< T > &a_op=LDOperator< T >()) const
Definition: BoxLayoutDataI.H:317
A class to facilitate interaction with physical boundary conditions.
Definition: ProblemDomain.H:141
Vector< T * > m_vector
Definition: LayoutData.H:124
void unpackReceivesToMe_append(LayoutData< Vector< RefCountedPtr< T > > > &a_dest, const Interval &a_destComps, int ncomp, const DataFactory< T > &factory, const LDOperator< T > &a_op) const
Definition: BoxLayoutDataI.H:371
int m_ncomps
Definition: Copier.H:76
A not-necessarily-disjoint collective of boxes.
Definition: BoxLayout.H:145
one dimensional dynamic array
Definition: Vector.H:53
void writeSendDataFromMeIntoBuffers(const BoxLayoutData< T > &a_src, const Interval &a_srcComps, const LDOperator< T > &a_op) const
Definition: BoxLayoutDataI.H:347
virtual ~BoxLayoutData()
Definition: BoxLayoutDataI.H:133
int nComp() const
Definition: BoxLayoutData.H:306
Data that maintains a one-to-one mapping of T to the boxes in a BoxLayout.
Definition: BoxLayout.H:26
A strange but true thing to make copying from one boxlayoutdata to another fast.
Definition: Copier.H:145
#define mallocMT(a_a)
Definition: memtrack.H:159
int size() const
Definition: DataIterator.H:218
std::vector< bufEntry > m_fromMe
Definition: Copier.H:114
virtual bool callDelete() const
Definition: BoxLayoutData.H:43
Definition: Copier.H:381
void setVector(const BoxLayoutData< T > &da, const Interval &srcComps, const Interval &destComps)
Definition: BoxLayoutDataI.H:46
size_t m_sendcapacity
Definition: Copier.H:80
Definition: DataIterator.H:190
bool isDefined(int ncomps) const
Definition: Copier.H:73
size_t size()
Definition: Copier.H:397
void completePendingSends() const
Definition: BoxLayoutDataI.H:331
unsigned long long CH_MaxMPIRecvSize
virtual void op(T &dest, const Box &RegionFrom, const Interval &Cdest, const Box &RegionTo, const T &src, const Interval &Csrc) const
Definition: BoxLayoutData.H:204
virtual void clear()
Definition: BoxLayoutDataI.H:157
unsigned long long CH_MaxMPISendSize
unsigned int numProc()
number of parallel processes
Definition: Copier.H:382
Definition: Copier.H:36
void postReceivesToMe() const
Definition: BoxLayoutDataI.H:359
int size() const
Definition: Interval.H:75
BoxLayout m_boxLayout
Definition: LayoutData.H:118
void generalCopyTo(const BoxLayout &a_destGrids, LayoutData< Vector< RefCountedPtr< T > > > &a_dest, const Interval &a_interval, const ProblemDomain &a_domain, const DataFactory< T > &factory=DefaultDataFactory< T >()) const
General data copying operation.
Definition: BoxLayoutDataI.H:911
int procID
Definition: Copier.H:42
virtual T * create(const Box &box, int ncomps, const DataIndex &a_datInd) const
factory function. creates a new 'T' object
Definition: BoxLayoutDataI.H:32
virtual void linearOut(const T &arg, void *buf, const Box &R, const Interval &comps) const
Definition: BoxLayoutData.H:184
Definition: EBInterface.H:45
void resize(unsigned int isize)
Definition: Vector.H:346
virtual void apply(void(*a_Function)(const Box &box, int comps, T &t))
Definition: BoxLayoutDataI.H:201
DataIndex toIndex
Definition: Copier.H:39
size_t m_reccapacity
Definition: Copier.H:83
void * m_recbuffer
Definition: Copier.H:81
virtual bool threadSafe() const
this boolean only has to do with whether the op(...) function is thread safe
Definition: BoxLayoutData.H:200
#define CH_TIME(name)
Definition: CH_Timer.H:82
Structure for passing component ranges in code.
Definition: Interval.H:23
void allocateGhostVector(const DataFactory< T > &factory, const IntVect &ghost=IntVect::Zero)
Definition: BoxLayoutDataI.H:171
virtual bool isDefined() const
Definition: BoxLayoutDataI.H:40
void * m_sendbuffer
Definition: Copier.H:78
Interval interval() const
Definition: BoxLayoutData.H:312
virtual int size(const T &arg, const Box &b, const Interval &comps) const
Definition: BoxLayoutData.H:180
Data on a BoxLayout.
Definition: BoxLayoutData.H:97
static int s_verbosity
Definition: BoxLayoutData.H:384
virtual void linearIn(T &arg, void *buf, const Box &R, const Interval &comps) const
Definition: BoxLayoutDataI.H:947
double Real
Definition: REAL.H:33
void addTo(const Interval &a_srcComps, BoxLayoutData< T > &a_dest, const Interval &a_destComps, const ProblemDomain &a_domain) const
Definition: BoxLayoutDataI.H:924
virtual void define(const DisjointBoxLayout &dp, int comps, const IntVect &ghost=IntVect::Zero, const DataFactory< T > &a_factory=DefaultDataFactory< T >())
Definition: LevelDataI.H:83
void unpackReceivesToMe(BoxLayoutData< T > &a_dest, const Interval &a_destComps, const LDOperator< T > &a_op) const
Definition: BoxLayoutDataI.H:364
virtual void linearIn(T &arg, void *buf, const Box &R, const Interval &comps) const
Definition: BoxLayoutData.H:189
void makeItSoLocalCopy(const Interval &a_srcComps, const BoxLayoutData< T > &a_src, BoxLayoutData< T > &a_dest, const Interval &a_destComps, const Copier &a_copier, const LDOperator< T > &a_op=LDOperator< T >()) const
Definition: BoxLayoutDataI.H:290
Box toRegion
Definition: Copier.H:41
bool isClosed() const
Definition: BoxLayout.H:729
static void Error(const char *const a_msg=m_nullString, int m_exitCode=CH_DEFAULT_ERROR_CODE)
Print out message to cerr and exit with the specified exit code.
int begin() const
Definition: Interval.H:97
const BoxLayout & boxLayout() const
Definition: LayoutData.H:107
static const IntVect Zero
Definition: IntVect.H:654
void postSendsFromMe() const
Definition: BoxLayoutDataI.H:354
AliasDataFactory(BoxLayoutData< T > *a_original, const Interval &interval)
Definition: BoxLayoutDataI.H:214
A Rectangular Domain on an Integer Lattice.
Definition: Box.H:465
Definition: DataIndex.H:112
bool ok() const
Definition: Copier.H:442
bool m_isdefined
Definition: BoxLayoutData.H:389
unsigned long long CH_MAX_MPI_MESSAGE_SIZE
An integer Vector in SpaceDim-dimensional space.
Definition: CHArray.H:42
virtual bool threadSafe() const
this boolean only has to do with whether the op(...) function is thread safe
Definition: BoxLayoutDataI.H:960
DataIterator dataIterator() const
Definition: LayoutDataI.H:78
virtual T * create(const Box &box, int ncomps, const DataIndex &a_datInd) const =0
factory function. creates a new 'T' object
size_t size() const
Definition: Vector.H:192
Definition: Copier.H:380
Factory object to data members of a BoxLayoutData container.
Definition: BoxLayoutData.H:30
void makeItSoBegin(const Interval &a_srcComps, const BoxLayoutData< T > &a_src, BoxLayoutData< T > &a_dest, const Interval &a_destComps, const Copier &a_copier, const LDOperator< T > &a_op=LDOperator< T >()) const
Definition: BoxLayoutDataI.H:249
virtual T * create(const Box &box, int ncomps, const DataIndex &a_datInd) const
Definition: BoxLayoutDataI.H:227
Box & grow(int i)
grow functions
Definition: Box.H:2247
virtual bool threadSafe() const
Definition: BoxLayoutData.H:49
Definition: BoxLayoutData.H:173
int end() const
Definition: Interval.H:102
bool m_threadSafe
Definition: BoxLayoutData.H:388
virtual void define(const DisjointBoxLayout &a_level, const BoxLayout &a_dest, bool a_exchange=false, IntVect a_shift=IntVect::Zero)
Definition: BoxLayoutDataI.H:935
DataIndex fromIndex
Definition: Copier.H:39
Box box(const DataIndex &a_index) const
Definition: LayoutDataI.H:66
Box fromRegion
Definition: Copier.H:40
std::vector< bufEntry > m_toMe
Definition: Copier.H:115
Definition: Copier.H:375
void makeItSo(const Interval &a_srcComps, const BoxLayoutData< T > &a_src, BoxLayoutData< T > &a_dest, const Interval &a_destComps, const Copier &a_copier, const LDOperator< T > &a_op=LDOperator< T >()) const
Definition: BoxLayoutDataI.H:236
BoxLayoutData()
Definition: BoxLayoutDataI.H:108
virtual void op(T &dest, const Box &RegionFrom, const Interval &Cdest, const Box &RegionTo, const T &src, const Interval &Csrc) const
Definition: BoxLayoutDataI.H:938
void allocateBuffers(const BoxLayoutData< T > &a_src, const Interval &a_srcComps, const BoxLayoutData< T > &a_dest, const Interval &a_destComps, const Copier &a_copier, const LDOperator< T > &a_op) const
Definition: BoxLayoutDataI.H:336