Chombo + EB  3.2
BoxLayoutDataI.H
Go to the documentation of this file.
1 #ifdef CH_LANG_CC
2 /*
3  * _______ __
4  * / ___/ / ___ __ _ / / ___
5  * / /__/ _ \/ _ \/ V \/ _ \/ _ \
6  * \___/_//_/\___/_/_/_/_.__/\___/
7  * Please refer to Copyright.txt, in Chombo's root directory.
8  */
9 #endif
10 
11 #ifndef _BOXLAYOUTDATAI_H_
12 #define _BOXLAYOUTDATAI_H_
13 
14 #include <cstdlib>
15 #include <algorithm>
16 #include <limits.h>
17 #include <list>
18 #include "CH_OpenMP.H"
19 #include "parstream.H"
20 #include "memtrack.H"
21 #include "Misc.H"
22 #include "CH_Timer.H"
23 #include "BaseFabMacros.H"
24 #include "NamespaceHeader.H"
25 
26 
27 using std::sort;
28 
29 template<class T>
31 
32 template <class T>
34  int ncomps,
35  const DataIndex& a_datInd) const
36 {
37  return new T(box, ncomps);
38 }
39 
40 template<class T>
41 inline bool BoxLayoutData<T>::isDefined() const
42 {
43  return m_isdefined;
44 }
45 
46 template <class T>
48  const Interval& srcComps,
49  const Interval& destComps)
50 {
51  if(&da != this)
52  {
53  DataIterator it=this->dataIterator();
54  int nbox=it.size();
55 #pragma omp parallel for if(this->m_threadSafe)
56  for(int box=0; box<nbox; box++)
57  {
58  this->m_vector[it[box].datInd()]->copy( this->box(it[box]), destComps,
59  this->box(it[box]), da[it[box]], srcComps);
60  }
61  }
62 }
63 
64 template<class T>
65 inline void BoxLayoutData<T>::define(const BoxLayoutData<T>& da, const Interval& comps,
66  const DataFactory<T>& factory)
67 {
68  if (this == &da)
69  {
70  MayDay::Error("BoxLayoutData<T>::define(const LayoutData<T>& da,.....) called with 'this'");
71  }
72  CH_assert(comps.size()>0);
73  CH_assert(comps.end()<=m_comps);
74  //AD: why are the two different
75  // CH_assert(comps.end()<=da.m_comps);
76  CH_assert(comps.begin()>=0);
77  this->m_boxLayout = da.boxLayout();
78 
79  this->m_comps = comps.size();
80  this->m_threadSafe = factory.threadSafe();
81  //this->m_threadSafe = false;
82 
83  Interval dest(0, m_comps-1);
84  allocateGhostVector(factory);
85  setVector(da, comps, dest);
86 }
87 
88 template<class T>
89 inline void BoxLayoutData<T>::define(const BoxLayout& boxes, int comps,
90  const DataFactory<T>& factory)
91 {
92  CH_assert(boxes.isClosed());
93  this->m_boxLayout = boxes;
94  m_comps = comps;
95  this->m_threadSafe = factory.threadSafe();
96  // this->m_threadSafe = false;
97  m_isdefined = true;
98  allocateGhostVector(factory);
99 
100 }
101 
102 template<class T>
103 inline void BoxLayoutData<T>::define(const BoxLayout& boxes)
104 {
105  MayDay::Error("BoxLayoutData<T>::define(const BoxLayout& boxes)...needs comps");
106 }
107 
108 template <class T>
110 {
111  m_isdefined = false;
112 #ifdef CH_MPI
113  this->numSends = 0;
114  this->numReceives = 0;
115 #endif
116 }
117 template<class T>
118 inline BoxLayoutData<T>::BoxLayoutData(const BoxLayout& boxes, int comps,
119  const DataFactory<T>& factory)
120  :m_comps(comps),m_buff(NULL)
121 {
122  CH_assert(boxes.isClosed());
123  this->m_boxLayout = boxes;
124  m_isdefined = true;
125  allocateGhostVector(factory);
126 #ifdef CH_MPI
127 
128  this->numSends = 0;
129  this->numReceives = 0;
130 #endif
131 }
132 
133 template<class T>
135 {
136  CH_TIME("~BoxLayoutData");
138 }
139 
140 template<class T>
142  const DataFactory<T>& factory)
143 {
144  if (this != &da)
145  {
147  this->m_boxLayout = da.boxLayout();
148  m_comps = da.nComp();
149  this->m_threadSafe = factory.threadSafe();
150  //this->m_threadSafe = false;
151  Interval srcAnddest(0, m_comps-1);
152  allocateGhostVector(factory);
153  setVector(da, srcAnddest, srcAnddest);
154  }
155 
156 }
157 template<class T>
159 {
160  if (this->m_callDelete == true)
161  {
162  for (unsigned int i=0; i<this->m_vector.size(); ++i)
163  {
164  delete this->m_vector[i];
165  this->m_vector[i] = NULL;
166  }
167  }
168  m_isdefined = false;
169 }
170 
171 template<class T>
172 inline void BoxLayoutData<T>::allocateGhostVector(const DataFactory<T>& factory, const IntVect& ghost)
173 {
174  if (this->m_callDelete == true)
175  {
176  for (unsigned int i=0; i<this->m_vector.size(); ++i)
177  {
178  delete this->m_vector[i];
179  this->m_vector[i] = NULL;
180  }
181  }
182 
183  this->m_callDelete = factory.callDelete();
184 
185  DataIterator it(this->dataIterator()); int nbox=it.size();
186  this->m_vector.resize(it.size(), NULL);
187 #pragma omp parallel for if(this->m_threadSafe)
188  for(int i=0; i<nbox; i++)
189  {
190  unsigned int index = it[i].datInd();
191  Box abox = this->box(it[i]);
192  abox.grow(ghost);
193  this->m_vector[index] = factory.create(abox, m_comps, it[i]);
194  if (this->m_vector[index] == NULL)
195  {
196  MayDay::Error("OutOfMemory in BoxLayoutData::allocateGhostVector");
197  }
198  }
199 }
200 
201 template<class T>
202 inline void BoxLayoutData<T>::apply(void (*a_func)(const Box& box, int comps, T& t))
203 {
204  DataIterator it(this->dataIterator()); int nbox=it.size();
205 #pragma omp parallel for
206  for(int i=0; i<nbox; i++)
207 
208  {
209  a_func(this->box(it[i]), m_comps, *(this->m_vector[ it[i].datInd() ]));
210  }
211 }
212 
213 //======================================================================
214 template <class T>
216 {
217  define(a_original, interval);
218 }
219 
220 template <class T>
222 {
223  m_origPointer = a_original;
224  m_interval = interval;
225 }
226 
227 template <class T>
228 T* AliasDataFactory<T>::create(const Box& a_box, int ncomps, const DataIndex& a_dataInd) const
229 {
230  //CH_assert(this->box(a_dataInd) == a_box);
231  CH_assert(ncomps = m_interval.size());
232  T* rtn = new T(m_interval, m_origPointer->operator[](a_dataInd));
233  return rtn;
234 }
235 
236 template<class T>
237 void BoxLayoutData<T>::makeItSo(const Interval& a_srcComps,
238  const BoxLayoutData<T>& a_src,
239  BoxLayoutData<T>& a_dest,
240  const Interval& a_destComps,
241  const Copier& a_copier,
242  const LDOperator<T>& a_op) const
243 {
244  makeItSoBegin(a_srcComps, a_src, a_dest, a_destComps, a_copier, a_op);
245  makeItSoLocalCopy(a_srcComps, a_src, a_dest, a_destComps, a_copier, a_op);
246  makeItSoEnd(a_dest, a_destComps, a_op);
247 }
248 
249 template<class T>
251  const BoxLayoutData<T>& a_src,
252  BoxLayoutData<T>& a_dest,
253  const Interval& a_destComps,
254  const Copier& a_copier,
255  const LDOperator<T>& a_op) const
256 {
257  // The following five functions are nullOps in uniprocessor mode
258 
259 #ifdef CH_MPI
260 
261  allocateBuffers(a_src, a_srcComps,
262  a_dest, a_destComps,
263  a_copier,
264  a_op); //monkey with buffers, set up 'fromMe' and 'toMe' queues
265 
266  writeSendDataFromMeIntoBuffers(a_src, a_srcComps, a_op);
267 
268  // If there is nothing to recv/send, don't go into these functions
269  // and allocate memory that will not be freed later. (ndk)
270  // The #ifdef CH_MPI is for the m_buff->m_toMe and m_buff->m_fromMe
271  {
272  CH_TIME("post_messages");
273  this->numReceives = m_buff->m_toMe.size();
274 
275  if (this->numReceives > 0)
276  {
277  postReceivesToMe(); // all non-blocking
278  }
279 
280 
281  this->numSends = m_buff->m_fromMe.size();
282  if (this->numSends > 0)
283  {
284  postSendsFromMe(); // all non-blocking
285  }
286  }
287 #endif
288 }
289 
290 template<class T>
292  const BoxLayoutData<T>& a_src,
293  BoxLayoutData<T>& a_dest,
294  const Interval& a_destComps,
295  const Copier& a_copier,
296  const LDOperator<T>& a_op) const
297 {
298 
299  CH_TIME("local copying");
300  CopyIterator it(a_copier, CopyIterator::LOCAL);
301  int items=it.size();
302 #ifdef _OPENMP
303  bool threadSafe = m_threadSafe && (a_op.threadSafe());
304 #endif
305 #pragma omp parallel for if(threadSafe)
306  for (int n=0; n<items; n++)
307  {
308  const MotionItem& item = it[n];
309  a_op.op(a_dest[item.toIndex], item.fromRegion,
310  a_destComps,
311  item.toRegion,
312  a_src[item.fromIndex],
313  a_srcComps);
314 
315  }
316 }
317 template<class T>
319  const Interval& a_destComps,
320  const LDOperator<T>& a_op) const
321 {
322  // Uncomment and Move this out of unpackReceivesToMe() (ndk)
323  completePendingSends(); // wait for sends from possible previous operation
324 
325  unpackReceivesToMe(a_dest, a_destComps, a_op); // nullOp in uniprocessor mode
326 
327 }
328 
329 #ifndef CH_MPI
330 // uniprocessor version of all these nullop functions.
331 template<class T>
333 {
334 }
335 
336 template<class T>
338  const Interval& a_srcComps,
339  const BoxLayoutData<T>& a_dest,
340  const Interval& a_destComps,
341  const Copier& a_copier,
342  const LDOperator<T>& a_op
343  ) const
344 {
345 }
346 
347 template<class T>
349  const Interval& a_srcComps,
350  const LDOperator<T>& a_op) const
351 {
352 }
353 
354 template<class T>
356 {
357 }
358 
359 template<class T>
361 {
362 }
363 
364 template<class T>
366  const Interval& a_destComps,
367  const LDOperator<T>& a_op) const
368 {
369 }
370 
371 template<class T>
373  const Interval& a_destComps,
374  int ncomp,
375  const DataFactory<T>& factory,
376  const LDOperator<T>& a_op) const
377 {
378 }
379 
380 #else
381 
382 // MPI versions of the above codes.
383 
384 template<class T>
386 {
387  CH_TIME("completePendingSends");
388  if (this->numSends > 0)
389  {
390  CH_TIME("MPI_Waitall");
391  m_sendStatus.resize(this->numSends);
392  int result = MPI_Waitall(this->numSends, &(m_sendRequests[0]), &(m_sendStatus[0]));
393  if (result != MPI_SUCCESS)
394  {
395  //hell if I know what to do about failed messaging here
396  }
397  }
398  this->numSends = 0;
399 }
400 
401 template<class T>
403  const Interval& a_srcComps,
404  const BoxLayoutData<T>& a_dest,
405  const Interval& a_destComps,
406  const Copier& a_copier,
407  const LDOperator<T>& a_op) const
408 {
409  CH_TIME("MPI_allocateBuffers");
410  m_buff = &(((Copier&)a_copier).m_buffers);
411  if (m_buff->isDefined(a_srcComps.size()) && T::preAllocatable()<2) return;
412 
413  m_buff->m_ncomps = a_srcComps.size();
414 
415  m_buff->m_fromMe.resize(0);
416  m_buff->m_toMe.resize(0);
417  size_t sendBufferSize = 0;
418  size_t recBufferSize = 0;
419  // two versions of code here. one for preAllocatable T, one not.
420 
421  T dummy;
422  for (CopyIterator it(a_copier, CopyIterator::FROM); it.ok(); ++it)
423  {
424  const MotionItem& item = it();
425  CopierBuffer::bufEntry b;
426  b.item = &item;
427  b.size = a_op.size(a_src[item.fromIndex], item.fromRegion, a_srcComps);
428  sendBufferSize+=b.size;
429  b.procID = item.procID;
430  m_buff->m_fromMe.push_back(b);
431  }
432  sort(m_buff->m_fromMe.begin(), m_buff->m_fromMe.end());
433  for (CopyIterator it(a_copier, CopyIterator::TO); it.ok(); ++it)
434  {
435  const MotionItem& item = it();
436  CopierBuffer::bufEntry b;
437  b.item = &item;
438  if (T::preAllocatable() == 0)
439  {
440  b.size = a_op.size(dummy, item.fromRegion, a_destComps);
441  recBufferSize+=b.size;
442  }
443  else if (T::preAllocatable() == 1)
444  {
445  b.size = a_op.size(a_dest[item.toIndex], item.fromRegion, a_destComps);
446  recBufferSize+=b.size;
447  }
448  b.procID = item.procID;
449  m_buff->m_toMe.push_back(b);
450  }
451  sort(m_buff->m_toMe.begin(), m_buff->m_toMe.end());
452 
453  if (T::preAllocatable() == 2) // dynamic allocatable, need two pass
454  {
455  CH_TIME("MPI_ Phase 1 of 2 Phase: preAllocatable==2");
456  if (s_verbosity > 0) pout()<<"preAllocatable==2\n";
457 
458  // in the non-preallocatable case, I need to message the
459  // values for the m_buff->m_toMe[*].size
460  Vector<unsigned long> fdata;
461  Vector<unsigned long> tdata;
462  int count = 1;
463  int scount = 1;
464  if (m_buff->m_toMe.size() > 0)
465  {
466  tdata.resize(m_buff->m_toMe.size(), ULONG_MAX);
467  m_receiveRequests.resize(numProc()-1);
468  m_receiveStatus.resize(numProc()-1);
469  MPI_Request* Rptr = &(m_receiveRequests[0]);
470 
471  unsigned int lastProc = m_buff->m_toMe[0].procID;
472  int messageSize = 1;
473  unsigned long * dataPtr = &(tdata[0]);
474  unsigned int i = 1;
475 
476  for (;i<m_buff->m_toMe.size(); ++i)
477  {
478  CopierBuffer::bufEntry& b = m_buff->m_toMe[i];
479  if (b.procID == lastProc)
480  messageSize++;
481  else
482  {
483 
484  MPI_Irecv(dataPtr, messageSize, MPI_UNSIGNED_LONG, lastProc,
485  1, Chombo_MPI::comm, Rptr);
486  Rptr++;
487 
488  lastProc = b.procID;
489  messageSize = 1;
490  dataPtr = &(tdata[i]);
491  count++;
492  }
493  }
494 
495  MPI_Irecv(dataPtr, messageSize, MPI_UNSIGNED_LONG, lastProc,
496  1, Chombo_MPI::comm, Rptr );
497  }
498 
499  if (m_buff->m_fromMe.size() > 0)
500  {
501  fdata.resize(m_buff->m_fromMe.size());
502  fdata[0]=m_buff->m_fromMe[0].size;
503  m_sendRequests.resize(numProc()-1);
504  m_sendStatus.resize(numProc()-1);
505  MPI_Request* Rptr = &(m_sendRequests[0]);
506 
507  unsigned int lastProc = m_buff->m_fromMe[0].procID;
508  int messageSize = 1;
509  unsigned long * dataPtr = &(fdata[0]);
510  unsigned int i = 1;
511  for (;i<m_buff->m_fromMe.size(); ++i)
512  {
513  fdata[i] = m_buff->m_fromMe[i].size;
514  CopierBuffer::bufEntry& b = m_buff->m_fromMe[i];
515  if (b.procID == lastProc)
516  messageSize++;
517  else
518  {
519  MPI_Isend(dataPtr, messageSize, MPI_UNSIGNED_LONG, lastProc,
520  1, Chombo_MPI::comm, Rptr);
521 
522  Rptr++;
523  lastProc = b.procID;
524  messageSize = 1;
525  dataPtr = &(fdata[i]);
526  scount++;
527  }
528  }
529 
530  MPI_Isend(dataPtr, messageSize, MPI_UNSIGNED_LONG, lastProc,
531  1, Chombo_MPI::comm, Rptr);
532  }
533 
534  if (m_buff->m_toMe.size() > 0)
535  {
536 
537  int result = MPI_Waitall(count, &(m_receiveRequests[0]), &(m_receiveStatus[0]));
538  if (result != MPI_SUCCESS)
539  {
540  MayDay::Error("First pass of two-phase communication failed");
541  }
542 
543  for (unsigned int i=0; i<m_buff->m_toMe.size(); ++i)
544  {
545  CH_assert(tdata[i] != ULONG_MAX);
546  m_buff->m_toMe[i].size = tdata[i];
547  recBufferSize+= tdata[i];
548  }
549  }
550 
551  if (m_buff->m_fromMe.size() > 0)
552  {
553 
554  int result = MPI_Waitall(scount, &(m_sendRequests[0]), &(m_sendStatus[0]));
555  if (result != MPI_SUCCESS)
556  {
557  MayDay::Error("First pass of two-phase communication failed");
558  }
559 
560  }
561  }
562 
563  // allocate send and receveive buffer space.
564 
565  if (sendBufferSize > m_buff->m_sendcapacity)
566  {
568  if (s_verbosity > 0) pout()<<"malloc send buffer "<<sendBufferSize<<std::endl;
569  (m_buff->m_sendbuffer) = mallocMT(sendBufferSize);
570  if ((m_buff->m_sendbuffer) == NULL)
571  {
572  MayDay::Error("Out of memory in BoxLayoutData::allocatebuffers");
573  }
574  m_buff->m_sendcapacity = sendBufferSize;
575  }
576 
577  if (recBufferSize > m_buff->m_reccapacity)
578  {
580  if (s_verbosity > 0) pout()<<"malloc receive buffer "<<recBufferSize<<std::endl;
581  m_buff->m_recbuffer = mallocMT(recBufferSize);
582  if (m_buff->m_recbuffer == NULL)
583  {
584  MayDay::Error("Out of memory in BoxLayoutData::allocatebuffers");
585  }
586  m_buff->m_reccapacity = recBufferSize;
587  }
588 
589  /*
590  pout()<<"\n";
591  for (int i=0; i<m_buff->m_fromMe.size(); i++)
592  pout()<<m_buff->m_fromMe[i].item->region<<"{"<<m_buff->m_fromMe[i].procID<<"}"<<" ";
593  pout() <<"::::";
594  for (int i=0; i<m_buff->m_toMe.size(); i++)
595  pout()<<m_buff->m_toMe[i].item->region<<"{"<<m_buff->m_toMe[i].procID<<"}"<<" ";
596  pout() << endl;
597  */
598 
599  char* nextFree = (char*)(m_buff->m_sendbuffer);
600  if (m_buff->m_fromMe.size() > 0)
601  {
602  for (unsigned int i=0; i<m_buff->m_fromMe.size(); ++i)
603  {
604  m_buff->m_fromMe[i].bufPtr = nextFree;
605  nextFree += m_buff->m_fromMe[i].size;
606  }
607  }
608 
609  nextFree = (char*)m_buff->m_recbuffer;
610  if (m_buff->m_toMe.size() > 0)
611  {
612  for (unsigned int i=0; i<m_buff->m_toMe.size(); ++i)
613  {
614  m_buff->m_toMe[i].bufPtr = nextFree;
615  nextFree += m_buff->m_toMe[i].size;
616  }
617  }
618 
619  // since fromMe and toMe are sorted based on procID, messages can now be grouped
620  // together on a per-processor basis.
621 
622 }
623 
624 template<class T>
626  const Interval& a_srcComps,
627  const LDOperator<T>& a_op) const
628 {
629  CH_TIME("write Data to buffers");
630  int isize = m_buff->m_fromMe.size();
631 #ifdef _OPENMP
632  bool threadSafe = m_threadSafe && (a_op.threadSafe());
633 #endif
634 #pragma omp parallel for if(threadSafe)
635  for (unsigned int i=0; i< isize; ++i)
636  {
637  const CopierBuffer::bufEntry& entry = m_buff->m_fromMe[i];
638  a_op.linearOut(a_src[entry.item->fromIndex], entry.bufPtr,
639  entry.item->fromRegion, a_srcComps);
640  }
641 }
642 
643 template<class T>
645 {
646  CH_TIME("post_Sends");
647  // now we get the magic of message coalescence
648  // fromMe has already been sorted in the allocateBuffers() step.
649 
650  this->numSends = m_buff->m_fromMe.size();
651 
652  if (this->numSends > 1)
653  {
654  for (unsigned int i=m_buff->m_fromMe.size()-1; i>0; --i)
655  {
656  if (m_buff->m_fromMe[i].procID == m_buff->m_fromMe[i-1].procID)
657  {
658  this->numSends--;
659  m_buff->m_fromMe[i-1].size = m_buff->m_fromMe[i-1].size + m_buff->m_fromMe[i].size;
660  m_buff->m_fromMe[i].size = 0;
661  }
662  }
663  }
664  m_sendRequests.resize(this->numSends);
665  std::list<MPI_Request> extraRequests;
666 
667  unsigned int next=0;
668  long long maxSize = 0;
669  for (int i=0; i<this->numSends; ++i)
670  {
671  const CopierBuffer::bufEntry& entry = m_buff->m_fromMe[next];
672  char* buffer = (char*)entry.bufPtr;
673  std::size_t bsize = entry.size;
674  int idtag=0;
675  while (bsize > CH_MAX_MPI_MESSAGE_SIZE)
676  {
677  extraRequests.push_back(MPI_Request());
678  {
679  //CH_TIME("MPI_Isend");
680  MPI_Isend(buffer, CH_MAX_MPI_MESSAGE_SIZE, MPI_BYTE, entry.procID,
681  idtag, Chombo_MPI::comm, &(extraRequests.back()));
682  }
683  maxSize = CH_MAX_MPI_MESSAGE_SIZE;
684  bsize -= CH_MAX_MPI_MESSAGE_SIZE;
685  buffer+=CH_MAX_MPI_MESSAGE_SIZE;
686  idtag++;
687  }
688  {
689  //CH_TIME("MPI_Isend");
690  MPI_Isend(buffer, bsize, MPI_BYTE, entry.procID,
691  idtag, Chombo_MPI::comm, &(m_sendRequests[i]));
692  }
693  maxSize = Max<long long>(bsize, maxSize);
694  ++next;
695  while (next < m_buff->m_fromMe.size() && m_buff->m_fromMe[next].size == 0) ++next;
696  }
697  for (std::list<MPI_Request>::iterator it = extraRequests.begin(); it != extraRequests.end(); ++it)
698  {
699  m_sendRequests.push_back(*it);
700  }
701  this->numSends = m_sendRequests.size();
702 
703  CH_MaxMPISendSize = Max<long long>(CH_MaxMPISendSize, maxSize);
704 
705 }
706 
707 template<class T>
709 {
710  CH_TIME("post_Receives");
711  this->numReceives = m_buff->m_toMe.size();
712 
713  if (this->numReceives > 1)
714  {
715  for (unsigned int i=m_buff->m_toMe.size()-1; i>0; --i)
716  {
717  if (m_buff->m_toMe[i].procID == m_buff->m_toMe[i-1].procID)
718  {
719  this->numReceives--;
720  m_buff->m_toMe[i-1].size += m_buff->m_toMe[i].size;
721  m_buff->m_toMe[i].size = 0;
722  }
723 
724  }
725  }
726  m_receiveRequests.resize(this->numReceives);
727  std::list<MPI_Request> extraRequests;
728  unsigned int next=0;
729  long long maxSize = 0;
730  for (int i=0; i<this->numReceives; ++i)
731  {
732  const CopierBuffer::bufEntry& entry = m_buff->m_toMe[next];
733  char* buffer = (char*)entry.bufPtr;
734  size_t bsize = entry.size;
735  int idtag=0;
736  while (bsize > CH_MAX_MPI_MESSAGE_SIZE)
737  {
738  extraRequests.push_back(MPI_Request());
739  {
740  //CH_TIME("MPI_Irecv");
741  MPI_Irecv(buffer, CH_MAX_MPI_MESSAGE_SIZE, MPI_BYTE, entry.procID,
742  idtag, Chombo_MPI::comm, &(extraRequests.back()));
743  }
744  maxSize = CH_MAX_MPI_MESSAGE_SIZE;
745  bsize -= CH_MAX_MPI_MESSAGE_SIZE;
746  buffer+=CH_MAX_MPI_MESSAGE_SIZE;
747  idtag++;
748  }
749  {
750  //CH_TIME("MPI_Irecv");
751  MPI_Irecv(buffer, bsize, MPI_BYTE, entry.procID,
752  idtag, Chombo_MPI::comm, &(m_receiveRequests[i]));
753  }
754  ++next;
755  maxSize = Max<long long>(bsize, maxSize);
756  while (next < m_buff->m_toMe.size() && m_buff->m_toMe[next].size == 0) ++next;
757  }
758  for (std::list<MPI_Request>::iterator it = extraRequests.begin(); it != extraRequests.end(); ++it)
759  {
760  m_receiveRequests.push_back(*it);
761  }
762  this->numReceives = m_receiveRequests.size();
763 
764  CH_MaxMPIRecvSize = Max<long long>(CH_MaxMPIRecvSize, maxSize);
765  //pout()<<"maxSize="<<maxSize<<" posted "<<this->numReceives<<" receives\n";
766 
767 }
768 
769 template<class T>
771  const Interval& a_destComps,
772  const LDOperator<T>& a_op) const
773 {
774 
775  CH_TIME("unpack_messages");
776 
777  if (this->numReceives > 0)
778  {
779  m_receiveStatus.resize(this->numReceives);
780  int result;
781  {
782  CH_TIME("MPI_Waitall");
783  result = MPI_Waitall(this->numReceives, &(m_receiveRequests[0]),
784  &(m_receiveStatus[0]));
785  }
786  if (result != MPI_SUCCESS)
787  {
788  //hell if I know what to do about failed messaging here
789  //maybe a mayday::warning?
790  }
791 
792  int isize = m_buff->m_toMe.size();
793 #ifdef _OPENMP
794  bool threadSafe = m_threadSafe && (a_op.threadSafe());
795 #endif
796 #pragma omp parallel for if(threadSafe)
797  for (unsigned int i=0; i< isize; ++i)
798  {
799  const CopierBuffer::bufEntry& entry = m_buff->m_toMe[i];
800  a_op.linearIn(a_dest[entry.item->toIndex], entry.bufPtr, entry.item->toRegion, a_destComps);
801  }
802  }
803  this->numReceives = 0;
804 }
805 
806 template<class T>
808  const Interval& a_destComps,
809  int ncomp,
810  const DataFactory<T>& factory,
811 
812  const LDOperator<T>& a_op) const
813 {
814 
815  if (this->numReceives > 0)
816  {
817  m_receiveStatus.resize(this->numReceives);
818  int result;
819  {
820  CH_TIME("MPI_Waitall");
821  result = MPI_Waitall(this->numReceives, &(m_receiveRequests[0]),
822  &(m_receiveStatus[0]));
823  }
824  if (result != MPI_SUCCESS)
825  {
826  //hell if I know what to do about failed messaging here
827  }
828  int isize = m_buff->m_toMe.size();
829  // NOT thread-safe, because of a_dest[item.toIndex].push_back(newT);
830  // #pragma omp parallel for if(this->m_threadSafe)
831  for (int i=0; i< isize; ++i)
832  {
833  const CopierBuffer::bufEntry& entry = m_buff->m_toMe[i];
834  const MotionItem& item = *(entry.item);
835  RefCountedPtr<T> newT( factory.create(item.toRegion, ncomp, item.toIndex) );;
836 
837  a_op.linearIn(*newT, entry.bufPtr, item.toRegion, a_destComps);
838  a_dest[item.toIndex].push_back(newT);
839  }
840  }
841 
842  this->numReceives = 0;
843 }
844 #endif
845 
846 template <class T>
848  LayoutData<Vector<RefCountedPtr<T> > >& a_dest,
849  const Interval& a_srcComps,
850  const ProblemDomain& a_domain,
851  const Copier& a_copier,
852  const DataFactory<T>& factory) const
853 {
854 
855  CH_assert(T::preAllocatable() == 0);
856  a_dest.define(a_destGrids);
857 
858  LDOperator<T> a_op;
859 
860  int ncomp = a_srcComps.size();
861  Interval destComps(0, ncomp-1);
862  allocateBuffers(*this, a_srcComps,
863  *this, destComps,
864  a_copier, a_op);
865 
866  writeSendDataFromMeIntoBuffers(*this, a_srcComps, a_op);
867 
868  // If there is nothing to recv/send, don't go into these functions
869  // and allocate memory that will not be freed later. (ndk)
870  // The #ifdef CH_MPI is for the m_buff->m_toMe and m_buff->m_fromMe
871 #ifdef CH_MPI
872  this->numReceives = m_buff->m_toMe.size();
873  if (this->numReceives > 0)
874  {
875  postReceivesToMe(); // all non-blocking
876  }
877 
878  this->numSends = m_buff->m_fromMe.size();
879  if (this->numSends > 0)
880  {
881  postSendsFromMe(); // all non-blocking
882  }
883 #endif
884 
885  // perform local copy
886  CopyIterator it(a_copier, CopyIterator::LOCAL);
887  int items=it.size();
888 
889 //brian says this does not need conditionals because everyone is getting different buffers
890  // NOT thread-safe, because of a_dest[item.toIndex].push_back(newT);
891  // #pragma omp parallel for
892  for(int i=0; i<items; ++i)
893  {
894  const MotionItem& item = it[i];
895  RefCountedPtr<T> newT( factory.create(item.toRegion, ncomp, item.toIndex) );
896 
897  a_op.op(*newT, item.fromRegion,
898  destComps,
899  item.toRegion,
900  this->operator[](item.fromIndex),
901  a_srcComps);
902  a_dest[item.toIndex].push_back(newT);
903  }
904  // }
905  // Uncomment and Move this out of unpackReceivesToMe() (ndk)
906  completePendingSends(); // wait for sends from possible previous operation
907 
908  unpackReceivesToMe_append(a_dest, destComps, ncomp, factory, a_op); // nullOp in uniprocessor mode
909 }
910 
911 template <class T>
913  LayoutData<Vector<RefCountedPtr<T> > >& a_dest,
914  const Interval& a_srcComps,
915  const ProblemDomain& a_domain,
916  const DataFactory<T>& factory) const
917 {
918  Copier copier;
919  copier.define(this->m_boxLayout, a_destGrids, a_domain, IntVect::Zero);
920 
921  generalCopyTo(a_destGrids, a_dest, a_srcComps, a_domain, copier, factory);
922 }
923 
924 template <class T>
925 void BoxLayoutData<T>::addTo(const Interval& a_srcComps,
926  BoxLayoutData<T>& a_dest,
927  const Interval& a_destComps,
928  const ProblemDomain& a_domain) const
929 {
930  Copier copier;
931  copier.define(this->m_boxLayout, a_dest.m_boxLayout, a_domain, IntVect::Zero);
932  addTo(a_srcComps, a_dest, a_destComps, a_domain, copier);
933 }
934 
935 template <class T>
936 class LDaddOp : public LDOperator<T>
937 {
938 public:
939  virtual void op(T& dest,
940  const Box& RegionFrom,
941  const Interval& Cdest,
942  const Box& RegionTo,
943  const T& src,
944  const Interval& Csrc) const
945  {
946  dest.plus(src, RegionFrom, RegionTo, Csrc.begin(), Cdest.begin(), Cdest.size());
947  }
948  virtual void linearIn(T& arg, void* buf, const Box& R,
949  const Interval& comps) const
950  {
951  Real* buffer = (Real*)buf;
952 
953  ForAllXBNNnoindx(Real, arg, R, comps.begin(), comps.size())
954  {
955  argR+=*buffer;
956  buffer++;
957  } EndFor
958 
959  }
960 
961  virtual bool threadSafe() const
962  {
963  return false;
964  }
965 };
966 
967 /// LDaddOp for face-centered data
968 template <class T>
969 class LDaddFaceOp : public LDOperator<T>
970 {
971 public:
972  virtual void op(T& dest,
973  const Box& RegionFrom,
974  const Interval& Cdest,
975  const Box& RegionTo,
976  const T& src,
977  const Interval& Csrc) const
978  {
979  dest.plus(RegionFrom, Cdest,RegionTo, src, Csrc);
980  }
981 
982  virtual void linearIn(T& arg, void* buf, const Box& R,
983  const Interval& comps) const
984 
985  {
986  Real* buffer = (Real*) buf;
987  for (int dir=0; dir<SpaceDim; dir++)
988  {
989  //CH_assert(arg[dir] != NULL);
990  Box Rdir(surroundingNodes(R,dir));
991  FArrayBox& argdir = arg[dir];
992  ForAllXBNNnoindx(Real, argdir, Rdir, comps.begin(), comps.size())
993  {
994  argdirR+=*buffer;
995  buffer++;
996  } EndFor
997  }
998 
999  }
1000 
1001  virtual bool threadSafe() const
1002  {
1003  return false;
1004  }
1005 };
1006 
1007 
1008 /// LDaddOp for edge-centered data
1009 template <class T>
1010 class LDaddEdgeOp : public LDOperator<T>
1011 {
1012 public:
1013  virtual void op(T& dest,
1014  const Box& RegionFrom,
1015  const Interval& Cdest,
1016  const Box& RegionTo,
1017  const T& src,
1018  const Interval& Csrc) const
1019  {
1020  dest.plus(RegionFrom, Cdest,RegionTo, src, Csrc);
1021  }
1022 
1023  virtual void linearIn(T& arg, void* buf, const Box& R,
1024  const Interval& comps) const
1025 
1026  {
1027 
1028  Real* buffer = (Real*) buf;
1029  for (int dir=0; dir<SpaceDim; dir++)
1030  {
1031  //CH_assert(arg[dir] != NULL);
1032  Box Rdir(surroundingNodes(R));
1033  Rdir.enclosedCells(dir);
1034  FArrayBox& argdir = arg[dir];
1035  ForAllXBNNnoindx(Real, argdir, Rdir, comps.begin(), comps.size())
1036  {
1037  argdirR+=*buffer;
1038  buffer++;
1039  } EndFor
1040  }
1041 
1042  }
1043 
1044  virtual bool threadSafe() const
1045  {
1046  return false;
1047  }
1048 };
1049 
1050 // LDaddOp for node-centered data
1051 template <class T>
1052 class LDaddNodeOp : public LDOperator<T>
1053 {
1054 public:
1055  virtual void op(T& dest,
1056  const Box& RegionFrom,
1057  const Interval& Cdest,
1058  const Box& RegionTo,
1059  const T& src,
1060  const Interval& Csrc) const
1061  {
1062  //dest.plus(RegionFrom, Cdest,RegionTo, src, Csrc);
1063  CH_assert(Csrc.size() == Cdest.size());
1064  dest.plus(src, RegionFrom, RegionTo, Csrc.begin(), Cdest.begin(),
1065  Csrc.size());
1066  }
1067 
1068  virtual void linearIn(T& arg, void* buf, const Box& R,
1069  const Interval& comps) const
1070 
1071  {
1072 
1073  Real* buffer = (Real*) buf;
1074 
1075  Box Rnode(surroundingNodes(R));
1076  //Box thisBox = arg.box();
1077  //Rnode &= thisBox;
1078  FArrayBox& arg_fab = arg.getFab();
1079  ForAllXBNNnoindx(Real, arg_fab, Rnode, comps.begin(), comps.size())
1080  {
1081  arg_fabR+=*buffer;
1082  buffer++;
1083  } EndFor
1084 
1085  }
1086 
1087  virtual bool threadSafe() const
1088  {
1089  return false;
1090  }
1091 };
1092 
1093 template <class T>
1094 void BoxLayoutData<T>::addTo(const Interval& a_srcComps,
1095  BoxLayoutData<T>& a_dest,
1096  const Interval& a_destComps,
1097  const ProblemDomain& a_domain,
1098  const Copier& a_copier) const
1099 {
1100  CH_TIME("addTo");
1101  LDaddOp<T> addOp;
1102  makeItSo(a_srcComps, *this, a_dest, a_destComps, a_copier, addOp);
1103 }
1104 
1105 #include "NamespaceFooter.H"
1106 #endif
void unpackReceivesToMe(BoxLayoutData< T > &a_dest, const Interval &a_destComps, const LDOperator< T > &a_op) const
Definition: BoxLayoutDataI.H:365
std::ostream & pout()
Use this in place of std::cout for program output.
bool isDefined(int ncomps) const
Definition: Copier.H:73
void postSendsFromMe() const
Definition: BoxLayoutDataI.H:355
void postReceivesToMe() const
Definition: BoxLayoutDataI.H:360
void allocateBuffers(const BoxLayoutData< T > &a_src, const Interval &a_srcComps, const BoxLayoutData< T > &a_dest, const Interval &a_destComps, const Copier &a_copier, const LDOperator< T > &a_op) const
Definition: BoxLayoutDataI.H:337
virtual void op(T &dest, const Box &RegionFrom, const Interval &Cdest, const Box &RegionTo, const T &src, const Interval &Csrc) const
Definition: BoxLayoutDataI.H:939
Definition: BoxLayoutDataI.H:1052
int m_comps
Definition: BoxLayoutData.H:387
CopierBuffer * m_buff
Definition: BoxLayoutData.H:466
virtual void define(const BoxLayout &boxes, int comps, const DataFactory< T > &factory=DefaultDataFactory< T >())
Definition: BoxLayoutDataI.H:89
void addTo(const Interval &a_srcComps, BoxLayoutData< T > &a_dest, const Interval &a_destComps, const ProblemDomain &a_domain) const
Definition: BoxLayoutDataI.H:925
bool m_callDelete
Definition: LayoutData.H:131
virtual void linearIn(T &arg, void *buf, const Box &R, const Interval &comps) const
Definition: BoxLayoutDataI.H:982
void define(BoxLayoutData< T > *a_original, const Interval &interval)
Definition: BoxLayoutDataI.H:221
A reference-counting handle class.
Definition: RefCountedPtr.H:173
Interval interval() const
Definition: BoxLayoutData.H:312
#define freeMT(a_a)
Definition: memtrack.H:160
#define CH_assert(cond)
Definition: CHArray.H:37
A class to facilitate interaction with physical boundary conditions.
Definition: ProblemDomain.H:141
Vector< T * > m_vector
Definition: LayoutData.H:124
virtual bool threadSafe() const
this boolean only has to do with whether the op(...) function is thread safe
Definition: BoxLayoutDataI.H:1087
virtual void linearIn(T &arg, void *buf, const Box &R, const Interval &comps) const
Definition: BoxLayoutData.H:189
virtual bool threadSafe() const
this boolean only has to do with whether the op(...) function is thread safe
Definition: BoxLayoutData.H:200
int m_ncomps
Definition: Copier.H:76
void makeItSoLocalCopy(const Interval &a_srcComps, const BoxLayoutData< T > &a_src, BoxLayoutData< T > &a_dest, const Interval &a_destComps, const Copier &a_copier, const LDOperator< T > &a_op=LDOperator< T >()) const
Definition: BoxLayoutDataI.H:291
A not-necessarily-disjoint collective of boxes.
Definition: BoxLayout.H:145
one dimensional dynamic array
Definition: Vector.H:53
virtual ~BoxLayoutData()
Definition: BoxLayoutDataI.H:134
Data that maintains a one-to-one mapping of T to the boxes in a BoxLayout.
Definition: BoxLayout.H:26
int size() const
Definition: Interval.H:75
A strange but true thing to make copying from one boxlayoutdata to another fast.
Definition: Copier.H:145
int begin() const
Definition: Interval.H:97
#define mallocMT(a_a)
Definition: memtrack.H:159
LDaddOp for edge-centered data.
Definition: BoxLayoutDataI.H:1010
virtual void linearIn(T &arg, void *buf, const Box &R, const Interval &comps) const
Definition: BoxLayoutDataI.H:1068
std::vector< bufEntry > m_fromMe
Definition: Copier.H:114
const BoxLayout & boxLayout() const
Definition: LayoutData.H:107
DataIterator dataIterator() const
Definition: LayoutDataI.H:78
Definition: Copier.H:401
void makeItSoBegin(const Interval &a_srcComps, const BoxLayoutData< T > &a_src, BoxLayoutData< T > &a_dest, const Interval &a_destComps, const Copier &a_copier, const LDOperator< T > &a_op=LDOperator< T >()) const
Definition: BoxLayoutDataI.H:250
void completePendingSends() const
Definition: BoxLayoutDataI.H:332
void setVector(const BoxLayoutData< T > &da, const Interval &srcComps, const Interval &destComps)
Definition: BoxLayoutDataI.H:47
Box box(const DataIndex &a_index) const
Definition: LayoutDataI.H:66
size_t m_sendcapacity
Definition: Copier.H:80
Definition: DataIterator.H:190
size_t size()
Definition: Copier.H:417
unsigned long long CH_MaxMPIRecvSize
virtual bool callDelete() const
Definition: BoxLayoutData.H:43
virtual void clear()
Definition: BoxLayoutDataI.H:158
virtual bool threadSafe() const
this boolean only has to do with whether the op(...) function is thread safe
Definition: BoxLayoutDataI.H:961
unsigned long long CH_MaxMPISendSize
unsigned int numProc()
number of parallel processes
Definition: Copier.H:402
Definition: Copier.H:36
virtual void op(T &dest, const Box &RegionFrom, const Interval &Cdest, const Box &RegionTo, const T &src, const Interval &Csrc) const
Definition: BoxLayoutData.H:204
BoxLayout m_boxLayout
Definition: LayoutData.H:118
const int SpaceDim
Definition: SPACE.H:38
int procID
Definition: Copier.H:42
Definition: EBInterface.H:45
Box & enclosedCells()
void resize(unsigned int isize)
Definition: Vector.H:346
virtual void apply(void(*a_Function)(const Box &box, int comps, T &t))
Definition: BoxLayoutDataI.H:202
DataIndex toIndex
Definition: Copier.H:39
virtual bool threadSafe() const
this boolean only has to do with whether the op(...) function is thread safe
Definition: BoxLayoutDataI.H:1044
int size() const
Definition: DataIterator.H:218
size_t m_reccapacity
Definition: Copier.H:83
void * m_recbuffer
Definition: Copier.H:81
virtual int size(const T &arg, const Box &b, const Interval &comps) const
Definition: BoxLayoutData.H:180
#define CH_TIME(name)
Definition: CH_Timer.H:82
Structure for passing component ranges in code.
Definition: Interval.H:23
void unpackReceivesToMe_append(LayoutData< Vector< RefCountedPtr< T > > > &a_dest, const Interval &a_destComps, int ncomp, const DataFactory< T > &factory, const LDOperator< T > &a_op) const
Definition: BoxLayoutDataI.H:372
void allocateGhostVector(const DataFactory< T > &factory, const IntVect &ghost=IntVect::Zero)
Definition: BoxLayoutDataI.H:172
void * m_sendbuffer
Definition: Copier.H:78
Data on a BoxLayout.
Definition: BoxLayoutData.H:97
static int s_verbosity
Definition: BoxLayoutData.H:384
double Real
Definition: REAL.H:33
Box surroundingNodes(const Box &b, int dir)
Definition: Box.H:2145
void makeItSo(const Interval &a_srcComps, const BoxLayoutData< T > &a_src, BoxLayoutData< T > &a_dest, const Interval &a_destComps, const Copier &a_copier, const LDOperator< T > &a_op=LDOperator< T >()) const
Definition: BoxLayoutDataI.H:237
virtual bool isDefined() const
Definition: BoxLayoutDataI.H:41
size_t size() const
Definition: Vector.H:192
void generalCopyTo(const BoxLayout &a_destGrids, LayoutData< Vector< RefCountedPtr< T > > > &a_dest, const Interval &a_interval, const ProblemDomain &a_domain, const DataFactory< T > &factory=DefaultDataFactory< T >()) const
General data copying operation.
Definition: BoxLayoutDataI.H:912
Box toRegion
Definition: Copier.H:41
static void Error(const char *const a_msg=m_nullString, int m_exitCode=CH_DEFAULT_ERROR_CODE)
Print out message to cerr and exit with the specified exit code.
virtual bool threadSafe() const
Definition: BoxLayoutData.H:49
virtual void op(T &dest, const Box &RegionFrom, const Interval &Cdest, const Box &RegionTo, const T &src, const Interval &Csrc) const
Definition: BoxLayoutDataI.H:972
static const IntVect Zero
Definition: IntVect.H:654
AliasDataFactory(BoxLayoutData< T > *a_original, const Interval &interval)
Definition: BoxLayoutDataI.H:215
A Rectangular Domain on an Integer Lattice.
Definition: Box.H:465
void makeItSoEnd(BoxLayoutData< T > &a_dest, const Interval &a_destComps, const LDOperator< T > &a_op=LDOperator< T >()) const
Definition: BoxLayoutDataI.H:318
virtual void op(T &dest, const Box &RegionFrom, const Interval &Cdest, const Box &RegionTo, const T &src, const Interval &Csrc) const
Definition: BoxLayoutDataI.H:1055
int nComp() const
Definition: BoxLayoutData.H:306
Definition: DataIndex.H:112
virtual void op(T &dest, const Box &RegionFrom, const Interval &Cdest, const Box &RegionTo, const T &src, const Interval &Csrc) const
Definition: BoxLayoutDataI.H:1013
void writeSendDataFromMeIntoBuffers(const BoxLayoutData< T > &a_src, const Interval &a_srcComps, const LDOperator< T > &a_op) const
Definition: BoxLayoutDataI.H:348
bool m_isdefined
Definition: BoxLayoutData.H:389
unsigned long long CH_MAX_MPI_MESSAGE_SIZE
An integer Vector in SpaceDim-dimensional space.
Definition: CHArray.H:42
Definition: FArrayBox.H:45
virtual T * create(const Box &box, int ncomps, const DataIndex &a_datInd) const =0
factory function. creates a new &#39;T&#39; object
Definition: Copier.H:400
Factory object to data members of a BoxLayoutData container.
Definition: BoxLayoutData.H:30
Box & grow(int i)
grow functions
Definition: Box.H:2247
virtual bool threadSafe() const
Definition: BoxLayoutData.H:301
virtual void linearOut(const T &arg, void *buf, const Box &R, const Interval &comps) const
Definition: BoxLayoutData.H:184
virtual void linearIn(T &arg, void *buf, const Box &R, const Interval &comps) const
Definition: BoxLayoutDataI.H:948
int end() const
Definition: Interval.H:102
virtual void linearIn(T &arg, void *buf, const Box &R, const Interval &comps) const
Definition: BoxLayoutDataI.H:1023
bool isClosed() const
Definition: BoxLayout.H:729
Definition: BoxLayoutData.H:173
LDaddOp for face-centered data.
Definition: BoxLayoutDataI.H:969
bool m_threadSafe
Definition: BoxLayoutData.H:388
virtual void define(const DisjointBoxLayout &a_level, const BoxLayout &a_dest, bool a_exchange=false, IntVect a_shift=IntVect::Zero)
virtual bool threadSafe() const
this boolean only has to do with whether the op(...) function is thread safe
Definition: BoxLayoutDataI.H:1001
Definition: BoxLayoutDataI.H:936
bool ok() const
Definition: Copier.H:462
DataIndex fromIndex
Definition: Copier.H:39
virtual T * create(const Box &box, int ncomps, const DataIndex &a_datInd) const
factory function. creates a new &#39;T&#39; object
Definition: BoxLayoutDataI.H:33
virtual T * create(const Box &box, int ncomps, const DataIndex &a_datInd) const
Definition: BoxLayoutDataI.H:228
Box fromRegion
Definition: Copier.H:40
std::vector< bufEntry > m_toMe
Definition: Copier.H:115
Definition: Copier.H:395
BoxLayoutData()
Definition: BoxLayoutDataI.H:109