Chombo + EB + MF  3.2
BoxLayoutDataI.H
Go to the documentation of this file.
1 #ifdef CH_LANG_CC
2 /*
3  * _______ __
4  * / ___/ / ___ __ _ / / ___
5  * / /__/ _ \/ _ \/ V \/ _ \/ _ \
6  * \___/_//_/\___/_/_/_/_.__/\___/
7  * Please refer to Copyright.txt, in Chombo's root directory.
8  */
9 #endif
10 
11 #ifndef _BOXLAYOUTDATAI_H_
12 #define _BOXLAYOUTDATAI_H_
13 
14 #include <cstdlib>
15 #include <algorithm>
16 #include <limits.h>
17 #include <list>
18 #include "CH_OpenMP.H"
19 #include "parstream.H"
20 #include "memtrack.H"
21 #include "Misc.H"
22 #include "CH_Timer.H"
23 #include "NamespaceHeader.H"
24 #include "BaseFabMacros.H"
25 
26 using std::sort;
27 
28 
29 template <class T>
31  int ncomps,
32  const DataIndex& a_datInd) const
33 {
34  return new T(box, ncomps);
35 }
36 
37 template<class T>
38 inline bool BoxLayoutData<T>::isDefined() const
39 {
40  return m_isdefined;
41 }
42 
43 template <class T>
45  const Interval& srcComps,
46  const Interval& destComps)
47 {
48  if(&da != this)
49  {
50  DataIterator it=this->dataIterator();
51  int nbox=it.size();
52 #pragma omp parallel for if(this->m_threadSafe)
53  for(int box=0; box<nbox; box++)
54  {
55  this->m_vector[it[box].datInd()]->copy( this->box(it[box]), destComps,
56  this->box(it[box]), da[it[box]], srcComps);
57  }
58  }
59 }
60 
61 template<class T>
62 inline void BoxLayoutData<T>::define(const BoxLayoutData<T>& da, const Interval& comps,
63  const DataFactory<T>& factory)
64 {
65  if (this == &da)
66  {
67  MayDay::Error("BoxLayoutData<T>::define(const LayoutData<T>& da,.....) called with 'this'");
68  }
69  CH_assert(comps.size()>0);
70  CH_assert(comps.end()<=m_comps);
71  //AD: why are the two different
72  // CH_assert(comps.end()<=da.m_comps);
73  CH_assert(comps.begin()>=0);
74  this->m_boxLayout = da.boxLayout();
75 
76  this->m_comps = comps.size();
77  this->m_threadSafe = factory.threadSafe();
78  //this->m_threadSafe = false;
79 
80  Interval dest(0, m_comps-1);
81  allocateGhostVector(factory);
82  setVector(da, comps, dest);
83 }
84 
85 template<class T>
86 inline void BoxLayoutData<T>::define(const BoxLayout& boxes, int comps,
87  const DataFactory<T>& factory)
88 {
89  CH_assert(boxes.isClosed());
90  this->m_boxLayout = boxes;
91  m_comps = comps;
92  this->m_threadSafe = factory.threadSafe();
93  // this->m_threadSafe = false;
94  m_isdefined = true;
95  allocateGhostVector(factory);
96 
97 }
98 
99 template<class T>
100 inline void BoxLayoutData<T>::define(const BoxLayout& boxes)
101 {
102  MayDay::Error("BoxLayoutData<T>::define(const BoxLayout& boxes)...needs comps");
103 }
104 
105 template <class T>
107 {
108  m_isdefined = false;
109 #ifdef CH_MPI
110 
111 #endif
112 }
113 template<class T>
114 inline BoxLayoutData<T>::BoxLayoutData(const BoxLayout& boxes, int comps,
115  const DataFactory<T>& factory)
116  :m_comps(comps),m_buff(NULL)
117 {
118  CH_assert(boxes.isClosed());
119  this->m_boxLayout = boxes;
120  m_isdefined = true;
121  allocateGhostVector(factory);
122 #ifdef CH_MPI
123 
124 #endif
125 }
126 
127 template<class T>
129 {
130  CH_TIME("~BoxLayoutData");
131  //completePendingSends();
132 }
133 
134 template<class T>
136  const DataFactory<T>& factory)
137 {
138  if (this != &da)
139  {
141  this->m_boxLayout = da.boxLayout();
142  m_comps = da.nComp();
143  this->m_threadSafe = factory.threadSafe();
144  //this->m_threadSafe = false;
145  Interval srcAnddest(0, m_comps-1);
146  allocateGhostVector(factory);
147  setVector(da, srcAnddest, srcAnddest);
148  }
149 
150 }
151 template<class T>
153 {
154  if (this->m_callDelete == true)
155  {
156  for (unsigned int i=0; i<this->m_vector.size(); ++i)
157  {
158  delete this->m_vector[i];
159  this->m_vector[i] = NULL;
160  }
161  }
162  m_isdefined = false;
163 }
164 
165 template<class T>
166 inline void BoxLayoutData<T>::allocateGhostVector(const DataFactory<T>& factory, const IntVect& ghost)
167 {
168  if (this->m_callDelete == true)
169  {
170  for (unsigned int i=0; i<this->m_vector.size(); ++i)
171  {
172  delete this->m_vector[i];
173  this->m_vector[i] = NULL;
174  }
175  }
176 
177  this->m_callDelete = factory.callDelete();
178 
179  DataIterator it(this->dataIterator()); int nbox=it.size();
180  this->m_vector.resize(it.size(), NULL);
181 #pragma omp parallel for if(this->m_threadSafe)
182  for(int i=0; i<nbox; i++)
183  {
184  unsigned int index = it[i].datInd();
185  Box abox = this->box(it[i]);
186  abox.grow(ghost);
187  this->m_vector[index] = factory.create(abox, m_comps, it[i]);
188  if (this->m_vector[index] == NULL)
189  {
190  MayDay::Error("OutOfMemory in BoxLayoutData::allocateGhostVector");
191  }
192  }
193 }
194 
195 template<class T>
196 inline void BoxLayoutData<T>::apply(void (*a_func)(const Box& box, int comps, T& t))
197 {
198  DataIterator it(this->dataIterator()); int nbox=it.size();
199 #pragma omp parallel for
200  for(int i=0; i<nbox; i++)
201 
202  {
203  a_func(this->box(it[i]), m_comps, *(this->m_vector[ it[i].datInd() ]));
204  }
205 }
206 
207 //======================================================================
208 template <class T>
210 {
211  define(a_original, interval);
212 }
213 
214 template <class T>
216 {
217  m_origPointer = a_original;
218  m_interval = interval;
219 }
220 
221 template <class T>
222 T* AliasDataFactory<T>::create(const Box& a_box, int ncomps, const DataIndex& a_dataInd) const
223 {
224  //CH_assert(this->box(a_dataInd) == a_box);
225  CH_assert(ncomps = m_interval.size());
226  T* rtn = new T(m_interval, m_origPointer->operator[](a_dataInd));
227  return rtn;
228 }
229 
230 template<class T>
231 void BoxLayoutData<T>::makeItSo(const Interval& a_srcComps,
232  const BoxLayoutData<T>& a_src,
233  BoxLayoutData<T>& a_dest,
234  const Interval& a_destComps,
235  const Copier& a_copier,
236  const LDOperator<T>& a_op) const
237 {
238  if(s_verbosity > 0)
239  {
240  pout() << "makeit so copier = " << endl;
241  a_copier.print();
242  }
243  makeItSoBegin(a_srcComps, a_src, a_dest, a_destComps, a_copier, a_op);
244  makeItSoLocalCopy(a_srcComps, a_src, a_dest, a_destComps, a_copier, a_op);
245  a_dest.makeItSoEnd(a_destComps, a_op);
246 }
247 
248 template<class T>
250  const BoxLayoutData<T>& a_src,
251  BoxLayoutData<T>& a_dest,
252  const Interval& a_destComps,
253  const Copier& a_copier,
254  const LDOperator<T>& a_op) const
255 {
256  // The following five functions are nullOps in uniprocessor mode
257 
258 #ifdef CH_MPI
259 
260  allocateBuffers(a_src, a_srcComps,
261  a_dest, a_destComps,
262  a_copier,
263  a_op); //monkey with buffers, set up 'fromMe' and 'toMe' queues
264 
265  writeSendDataFromMeIntoBuffers(a_src, a_srcComps, a_op);
266 
267  // If there is nothing to recv/send, don't go into these functions
268  // and allocate memory that will not be freed later. (ndk)
269  // The #ifdef CH_MPI is for the m_buff->m_toMe and m_buff->m_fromMe
270  {
271  CH_TIME("post_messages");
272  this->m_buff->numReceives = m_buff->m_toMe.size();
273 
274  if (this->m_buff->numReceives > 0)
275  {
276  postReceivesToMe(); // all non-blocking
277  }
278 
279 
280  this->m_buff->numSends = m_buff->m_fromMe.size();
281  if (this->m_buff->numSends > 0)
282  {
283  postSendsFromMe(); // all non-blocking
284  }
285  }
286 #endif
287 }
288 
289 template<class T>
291  const BoxLayoutData<T>& a_src,
292  BoxLayoutData<T>& a_dest,
293  const Interval& a_destComps,
294  const Copier& a_copier,
295  const LDOperator<T>& a_op) const
296 {
297 
298  CH_TIME("local copying");
299  CopyIterator it(a_copier, CopyIterator::LOCAL);
300  int items=it.size();
301 #ifdef _OPENMP
302  bool threadSafe = m_threadSafe && (a_op.threadSafe());
303 #endif
304 #pragma omp parallel for if(threadSafe)
305  for (int n=0; n<items; n++)
306  {
307  const MotionItem& item = it[n];
308 //debugging bit to force serial code to run parallel bits
309 #if 0
310  const T & srcFAB = a_src[item.fromIndex];
311  T & dstFAB = a_dest[item.toIndex];
312  size_t bufsize_src = a_op.size(srcFAB, item.fromRegion, a_srcComps);
313  size_t bufsize_dst = a_op.size(srcFAB, item.fromRegion, a_srcComps);
314  if(bufsize_src != bufsize_dst)
315  {
316  MayDay::Error("buffer size mismatch");
317  }
318  char* charbuffer = new char[bufsize_src];
319  a_op.linearOut(srcFAB, charbuffer, item.fromRegion, a_srcComps);
320  a_op.linearIn (dstFAB, charbuffer, item.toRegion, a_destComps);
321 
322  delete[] charbuffer;
323 #else
324  a_op.op(a_dest[item.toIndex], item.fromRegion,
325  a_destComps,
326  item.toRegion,
327  a_src[item.fromIndex],
328  a_srcComps);
329 #endif
330 
331  }
332 }
333 template<class T>
335  const Interval& a_destComps,
336  const LDOperator<T>& a_op)
337 {
338  // Uncomment and Move this out of unpackReceivesToMe() (ndk)
339  completePendingSends(); // wait for sends from possible previous operation
340 
341  unpackReceivesToMe(a_destComps, a_op); // nullOp in uniprocessor mode
342 
343 }
344 
345 #ifndef CH_MPI
346 // uniprocessor version of all these nullop functions.
347 template<class T>
349 {
350 }
351 
352 template<class T>
354  const Interval& a_srcComps,
355  const BoxLayoutData<T>& a_dest,
356  const Interval& a_destComps,
357  const Copier& a_copier,
358  const LDOperator<T>& a_op
359  ) const
360 {
361 }
362 
363 template<class T>
365  const Interval& a_srcComps,
366  const LDOperator<T>& a_op) const
367 {
368 }
369 
370 template<class T>
372 {
373 }
374 
375 template<class T>
377 {
378 }
379 
380 template<class T>
382  const Interval& a_destComps,
383  const LDOperator<T>& a_op)
384 {
385 }
386 
387 template<class T>
389  const Interval& a_destComps,
390  int ncomp,
391  const DataFactory<T>& factory,
392  const LDOperator<T>& a_op) const
393 {
394 }
395 
396 #else
397 
398 // MPI versions of the above codes.
399 
400 template<class T>
402 {
403  CH_TIME("completePendingSends");
404  if (this->m_buff->numSends > 0)
405  {
406  CH_TIME("MPI_Waitall");
407  m_buff->m_sendStatus.resize(this->m_buff->numSends);
408  int result = MPI_Waitall(this->m_buff->numSends, &(m_buff->m_sendRequests[0]), &(m_buff->m_sendStatus[0]));
409  if (result != MPI_SUCCESS)
410  {
411  //hell if I know what to do about failed messaging here
412  }
413  }
414  this->m_buff->numSends = 0;
415 }
416 
417 template<class T>
419  const Interval& a_srcComps,
420  const BoxLayoutData<T>& a_dest,
421  const Interval& a_destComps,
422  const Copier& a_copier,
423  const LDOperator<T>& a_op) const
424 {
425  CH_TIME("MPI_allocateBuffers");
426  m_buff = &(((Copier&)a_copier).m_buffers);
427  a_dest.m_buff = m_buff;
428 
429  CH_assert(a_srcComps.size() == a_destComps.size());
430  if (m_buff->isDefined(a_srcComps.size()) && T::preAllocatable()<2) return;
431 
432  if(s_verbosity > 0)
433  {
434  pout() << " allocate buffers srcComps = " << a_srcComps << ", dest comps = " << a_destComps << endl;
435  }
436  m_buff->m_ncomps = a_srcComps.size();
437 
438  m_buff->m_fromMe.resize(0);
439  m_buff->m_toMe.resize(0);
440  size_t sendBufferSize = 0;
441  size_t recBufferSize = 0;
442  // two versions of code here. one for preAllocatable T, one not.
443 
444  T dummy;
445  for (CopyIterator it(a_copier, CopyIterator::FROM); it.ok(); ++it)
446  {
447  const MotionItem& item = it();
448  CopierBuffer::bufEntry b;
449  b.item = &item;
450  b.size = a_op.size(a_src[item.fromIndex], item.fromRegion, a_srcComps);
451  sendBufferSize+=b.size;
452  b.procID = item.procID;
453  m_buff->m_fromMe.push_back(b);
454  }
455  sort(m_buff->m_fromMe.begin(), m_buff->m_fromMe.end());
456  for (CopyIterator it(a_copier, CopyIterator::TO); it.ok(); ++it)
457  {
458  const MotionItem& item = it();
459  CopierBuffer::bufEntry b;
460  b.item = &item;
461  if (T::preAllocatable() == 0)
462  {
463  b.size = a_op.size(dummy, item.fromRegion, a_destComps);
464  recBufferSize+=b.size;
465  }
466  else if (T::preAllocatable() == 1)
467  {
468  b.size = a_op.size(a_dest[item.toIndex], item.fromRegion, a_destComps);
469  recBufferSize+=b.size;
470  }
471  b.procID = item.procID;
472  m_buff->m_toMe.push_back(b);
473  }
474  sort(m_buff->m_toMe.begin(), m_buff->m_toMe.end());
475 
476  if (T::preAllocatable() == 2) // dynamic allocatable, need two pass
477  {
478  CH_TIME("MPI_ Phase 1 of 2 Phase: preAllocatable==2");
479  if (s_verbosity > 0) pout()<<"preAllocatable==2\n";
480 
481  // in the non-preallocatable case, I need to message the
482  // values for the m_buff->m_toMe[*].size
483  Vector<unsigned long> fdata;
484  Vector<unsigned long> tdata;
485  int count = 1;
486  int scount = 1;
487  if (m_buff->m_toMe.size() > 0)
488  {
489  tdata.resize(m_buff->m_toMe.size(), ULONG_MAX);
490  m_buff->m_receiveRequests.resize(numProc()-1);
491  m_buff->m_receiveStatus.resize(numProc()-1);
492  MPI_Request* Rptr = &(m_buff->m_receiveRequests[0]);
493 
494  unsigned int lastProc = m_buff->m_toMe[0].procID;
495  int messageSize = 1;
496  unsigned long * dataPtr = &(tdata[0]);
497  unsigned int i = 1;
498 
499  for (;i<m_buff->m_toMe.size(); ++i)
500  {
501  CopierBuffer::bufEntry& b = m_buff->m_toMe[i];
502  if (b.procID == lastProc)
503  messageSize++;
504  else
505  {
506 
507  MPI_Irecv(dataPtr, messageSize, MPI_UNSIGNED_LONG, lastProc,
508  1, Chombo_MPI::comm, Rptr);
509  Rptr++;
510 
511  lastProc = b.procID;
512  messageSize = 1;
513  dataPtr = &(tdata[i]);
514  count++;
515  }
516  }
517 
518  MPI_Irecv(dataPtr, messageSize, MPI_UNSIGNED_LONG, lastProc,
519  1, Chombo_MPI::comm, Rptr );
520  }
521 
522  if (m_buff->m_fromMe.size() > 0)
523  {
524  fdata.resize(m_buff->m_fromMe.size());
525  fdata[0]=m_buff->m_fromMe[0].size;
526  m_buff->m_sendRequests.resize(numProc()-1);
527  m_buff->m_sendStatus.resize(numProc()-1);
528  MPI_Request* Rptr = &(m_buff->m_sendRequests[0]);
529 
530  unsigned int lastProc = m_buff->m_fromMe[0].procID;
531  int messageSize = 1;
532  unsigned long * dataPtr = &(fdata[0]);
533  unsigned int i = 1;
534  for (;i<m_buff->m_fromMe.size(); ++i)
535  {
536  fdata[i] = m_buff->m_fromMe[i].size;
537  CopierBuffer::bufEntry& b = m_buff->m_fromMe[i];
538  if (b.procID == lastProc)
539  messageSize++;
540  else
541  {
542  MPI_Isend(dataPtr, messageSize, MPI_UNSIGNED_LONG, lastProc,
543  1, Chombo_MPI::comm, Rptr);
544 
545  Rptr++;
546  lastProc = b.procID;
547  messageSize = 1;
548  dataPtr = &(fdata[i]);
549  scount++;
550  }
551  }
552 
553  MPI_Isend(dataPtr, messageSize, MPI_UNSIGNED_LONG, lastProc,
554  1, Chombo_MPI::comm, Rptr);
555  }
556 
557  if (m_buff->m_toMe.size() > 0)
558  {
559 
560  int result = MPI_Waitall(count, &(m_buff->m_receiveRequests[0]), &(m_buff->m_receiveStatus[0]));
561  if (result != MPI_SUCCESS)
562  {
563  MayDay::Error("First pass of two-phase communication failed");
564  }
565 
566  for (unsigned int i=0; i<m_buff->m_toMe.size(); ++i)
567  {
568  CH_assert(tdata[i] != ULONG_MAX);
569  m_buff->m_toMe[i].size = tdata[i];
570  recBufferSize+= tdata[i];
571  }
572  }
573 
574  if (m_buff->m_fromMe.size() > 0)
575  {
576 
577  int result = MPI_Waitall(scount, &(m_buff->m_sendRequests[0]), &(m_buff->m_sendStatus[0]));
578  if (result != MPI_SUCCESS)
579  {
580  MayDay::Error("First pass of two-phase communication failed");
581  }
582 
583  }
584  }
585 
586  // allocate send and receveive buffer space.
587 
588  if (sendBufferSize > m_buff->m_sendcapacity)
589  {
591  if (s_verbosity > 0) pout()<<"malloc send buffer "<<sendBufferSize<<std::endl;
592  (m_buff->m_sendbuffer) = mallocMT(sendBufferSize);
593  if ((m_buff->m_sendbuffer) == NULL)
594  {
595  MayDay::Error("Out of memory in BoxLayoutData::allocatebuffers");
596  }
597  m_buff->m_sendcapacity = sendBufferSize;
598  }
599 
600  if (recBufferSize > m_buff->m_reccapacity)
601  {
603  if (s_verbosity > 0) pout()<<"malloc receive buffer "<<recBufferSize<<std::endl;
604  m_buff->m_recbuffer = mallocMT(recBufferSize);
605  if (m_buff->m_recbuffer == NULL)
606  {
607  MayDay::Error("Out of memory in BoxLayoutData::allocatebuffers");
608  }
609  m_buff->m_reccapacity = recBufferSize;
610  }
611 
612  /*
613  pout()<<"\n";
614  for (int i=0; i<m_buff->m_fromMe.size(); i++)
615  pout()<<m_buff->m_fromMe[i].item->region<<"{"<<m_buff->m_fromMe[i].procID<<"}"<<" ";
616  pout() <<"::::";
617  for (int i=0; i<m_buff->m_toMe.size(); i++)
618  pout()<<m_buff->m_toMe[i].item->region<<"{"<<m_buff->m_toMe[i].procID<<"}"<<" ";
619  pout() << endl;
620  */
621 
622  char* nextFree = (char*)(m_buff->m_sendbuffer);
623  if (m_buff->m_fromMe.size() > 0)
624  {
625  for (unsigned int i=0; i<m_buff->m_fromMe.size(); ++i)
626  {
627  m_buff->m_fromMe[i].bufPtr = nextFree;
628  nextFree += m_buff->m_fromMe[i].size;
629  }
630  }
631 
632  nextFree = (char*)m_buff->m_recbuffer;
633  if (m_buff->m_toMe.size() > 0)
634  {
635  for (unsigned int i=0; i<m_buff->m_toMe.size(); ++i)
636  {
637  m_buff->m_toMe[i].bufPtr = nextFree;
638  nextFree += m_buff->m_toMe[i].size;
639  }
640  }
641 
642  // since fromMe and toMe are sorted based on procID, messages can now be grouped
643  // together on a per-processor basis.
644 
645 }
646 
647 template<class T>
649  const Interval& a_srcComps,
650  const LDOperator<T>& a_op) const
651 {
652  CH_TIME("write Data to buffers");
653  int isize = m_buff->m_fromMe.size();
654 #ifdef _OPENMP
655  bool threadSafe = m_threadSafe && (a_op.threadSafe());
656 #endif
657 #pragma omp parallel for if(threadSafe)
658  for (unsigned int i=0; i< isize; ++i)
659  {
660  const CopierBuffer::bufEntry& entry = m_buff->m_fromMe[i];
661  a_op.linearOut(a_src[entry.item->fromIndex], entry.bufPtr,
662  entry.item->fromRegion, a_srcComps);
663  }
664 }
665 
666 template<class T>
668 {
669  CH_TIME("post_Sends");
670  // now we get the magic of message coalescence
671  // fromMe has already been sorted in the allocateBuffers() step.
672 
673  this->m_buff->numSends = m_buff->m_fromMe.size();
674 
675  if (this->m_buff->numSends > 1)
676  {
677  for (unsigned int i=m_buff->m_fromMe.size()-1; i>0; --i)
678  {
679  if (m_buff->m_fromMe[i].procID == m_buff->m_fromMe[i-1].procID)
680  {
681  this->m_buff->numSends--;
682  m_buff->m_fromMe[i-1].size = m_buff->m_fromMe[i-1].size + m_buff->m_fromMe[i].size;
683  m_buff->m_fromMe[i].size = 0;
684  }
685  }
686  }
687  m_buff->m_sendRequests.resize(this->m_buff->numSends);
688  std::list<MPI_Request> extraRequests;
689 
690  unsigned int next=0;
691  long long maxSize = 0;
692  for (int i=0; i<this->m_buff->numSends; ++i)
693  {
694  const CopierBuffer::bufEntry& entry = m_buff->m_fromMe[next];
695  char* buffer = (char*)entry.bufPtr;
696  std::size_t bsize = entry.size;
697  int idtag=0;
698  while (bsize > CH_MAX_MPI_MESSAGE_SIZE)
699  {
700  extraRequests.push_back(MPI_Request());
701  {
702  //CH_TIME("MPI_Isend");
703  MPI_Isend(buffer, CH_MAX_MPI_MESSAGE_SIZE, MPI_BYTE, entry.procID,
704  idtag, Chombo_MPI::comm, &(extraRequests.back()));
705  }
706  maxSize = CH_MAX_MPI_MESSAGE_SIZE;
707  bsize -= CH_MAX_MPI_MESSAGE_SIZE;
708  buffer+=CH_MAX_MPI_MESSAGE_SIZE;
709  idtag++;
710  }
711  {
712  //CH_TIME("MPI_Isend");
713  MPI_Isend(buffer, bsize, MPI_BYTE, entry.procID,
714  idtag, Chombo_MPI::comm, &(m_buff->m_sendRequests[i]));
715  }
716  maxSize = Max<long long>(bsize, maxSize);
717  ++next;
718  while (next < m_buff->m_fromMe.size() && m_buff->m_fromMe[next].size == 0) ++next;
719  }
720  for (std::list<MPI_Request>::iterator it = extraRequests.begin(); it != extraRequests.end(); ++it)
721  {
722  m_buff->m_sendRequests.push_back(*it);
723  }
724  this->m_buff->numSends = m_buff->m_sendRequests.size();
725 
726  CH_MaxMPISendSize = Max<long long>(CH_MaxMPISendSize, maxSize);
727 
728 }
729 
730 template<class T>
732 {
733  CH_TIME("post_Receives");
734  this->m_buff->numReceives = m_buff->m_toMe.size();
735 
736  if (this->m_buff->numReceives > 1)
737  {
738  for (unsigned int i=m_buff->m_toMe.size()-1; i>0; --i)
739  {
740  if (m_buff->m_toMe[i].procID == m_buff->m_toMe[i-1].procID)
741  {
742  this->m_buff->numReceives--;
743  m_buff->m_toMe[i-1].size += m_buff->m_toMe[i].size;
744  m_buff->m_toMe[i].size = 0;
745  }
746 
747  }
748  }
749  m_buff->m_receiveRequests.resize(this->m_buff->numReceives);
750  std::list<MPI_Request> extraRequests;
751  unsigned int next=0;
752  long long maxSize = 0;
753  for (int i=0; i<this->m_buff->numReceives; ++i)
754  {
755  const CopierBuffer::bufEntry& entry = m_buff->m_toMe[next];
756  char* buffer = (char*)entry.bufPtr;
757  size_t bsize = entry.size;
758  int idtag=0;
759  while (bsize > CH_MAX_MPI_MESSAGE_SIZE)
760  {
761  extraRequests.push_back(MPI_Request());
762  {
763  //CH_TIME("MPI_Irecv");
764  MPI_Irecv(buffer, CH_MAX_MPI_MESSAGE_SIZE, MPI_BYTE, entry.procID,
765  idtag, Chombo_MPI::comm, &(extraRequests.back()));
766  }
767  maxSize = CH_MAX_MPI_MESSAGE_SIZE;
768  bsize -= CH_MAX_MPI_MESSAGE_SIZE;
769  buffer+=CH_MAX_MPI_MESSAGE_SIZE;
770  idtag++;
771  }
772  {
773  //CH_TIME("MPI_Irecv");
774  MPI_Irecv(buffer, bsize, MPI_BYTE, entry.procID,
775  idtag, Chombo_MPI::comm, &(m_buff->m_receiveRequests[i]));
776  }
777  ++next;
778  maxSize = Max<long long>(bsize, maxSize);
779  while (next < m_buff->m_toMe.size() && m_buff->m_toMe[next].size == 0) ++next;
780  }
781  for (std::list<MPI_Request>::iterator it = extraRequests.begin(); it != extraRequests.end(); ++it)
782  {
783  m_buff->m_receiveRequests.push_back(*it);
784  }
785  this->m_buff->numReceives = m_buff->m_receiveRequests.size();
786 
787  CH_MaxMPIRecvSize = Max<long long>(CH_MaxMPIRecvSize, maxSize);
788  //pout()<<"maxSize="<<maxSize<<" posted "<<this->m_buff->numReceives<<" receives\n";
789 
790 }
791 
792 template<class T>
794  const Interval& a_destComps,
795  const LDOperator<T>& a_op)
796 {
797 
798  CH_TIME("unpack_messages");
799 
800  if (this->m_buff->numReceives > 0)
801  {
802  m_buff->m_receiveStatus.resize(this->m_buff->numReceives);
803  int result;
804  {
805  CH_TIME("MPI_Waitall");
806  result = MPI_Waitall(this->m_buff->numReceives, &(m_buff->m_receiveRequests[0]),
807  &(m_buff->m_receiveStatus[0]));
808  }
809  if (result != MPI_SUCCESS)
810  {
811  //hell if I know what to do about failed messaging here
812  //maybe a mayday::warning?
813  }
814 
815  int isize = m_buff->m_toMe.size();
816 #ifdef _OPENMP
817  bool threadSafe = m_threadSafe && (a_op.threadSafe());
818 #endif
819 #pragma omp parallel for if(threadSafe)
820  for (unsigned int i=0; i< isize; ++i)
821  {
822  const CopierBuffer::bufEntry& entry = m_buff->m_toMe[i];
823  a_op.linearIn(this->operator[](entry.item->toIndex), entry.bufPtr,
824  entry.item->toRegion, a_destComps);
825  }
826  }
827  this->m_buff->numReceives = 0;
828 }
829 
830 template<class T>
832  const Interval& a_destComps,
833  int ncomp,
834  const DataFactory<T>& factory,
835 
836  const LDOperator<T>& a_op) const
837 {
838 
839  if (this->m_buff->numReceives > 0)
840  {
841  m_buff->m_receiveStatus.resize(this->m_buff->numReceives);
842  int result;
843  {
844  CH_TIME("MPI_Waitall");
845  result = MPI_Waitall(this->m_buff->numReceives, &(m_buff->m_receiveRequests[0]),
846  &(m_buff->m_receiveStatus[0]));
847  }
848  if (result != MPI_SUCCESS)
849  {
850  //hell if I know what to do about failed messaging here
851  }
852  int isize = m_buff->m_toMe.size();
853  // NOT thread-safe, because of a_dest[item.toIndex].push_back(newT);
854  // #pragma omp parallel for if(this->m_threadSafe)
855  for (int i=0; i< isize; ++i)
856  {
857  const CopierBuffer::bufEntry& entry = m_buff->m_toMe[i];
858  const MotionItem& item = *(entry.item);
859  RefCountedPtr<T> newT( factory.create(item.toRegion, ncomp, item.toIndex) );;
860 
861  a_op.linearIn(*newT, entry.bufPtr, item.toRegion, a_destComps);
862  a_dest[item.toIndex].push_back(newT);
863  }
864  }
865 
866  this->m_buff->numReceives = 0;
867 }
868 #endif
869 
870 template <class T>
872  LayoutData<Vector<RefCountedPtr<T> > >& a_dest,
873  const Interval& a_srcComps,
874  const ProblemDomain& a_domain,
875  const Copier& a_copier,
876  const DataFactory<T>& factory) const
877 {
878 
879  CH_assert(T::preAllocatable() == 0);
880  a_dest.define(a_destGrids);
881 
882  LDOperator<T> a_op;
883 
884  int ncomp = a_srcComps.size();
885  Interval destComps(0, ncomp-1);
886  allocateBuffers(*this, a_srcComps,
887  *this, destComps,
888  a_copier, a_op);
889 
890  writeSendDataFromMeIntoBuffers(*this, a_srcComps, a_op);
891 
892  // If there is nothing to recv/send, don't go into these functions
893  // and allocate memory that will not be freed later. (ndk)
894  // The #ifdef CH_MPI is for the m_buff->m_toMe and m_buff->m_fromMe
895 #ifdef CH_MPI
896  this->m_buff->numReceives = m_buff->m_toMe.size();
897  if (this->m_buff->numReceives > 0)
898  {
899  postReceivesToMe(); // all non-blocking
900  }
901 
902  this->m_buff->numSends = m_buff->m_fromMe.size();
903  if (this->m_buff->numSends > 0)
904  {
905  postSendsFromMe(); // all non-blocking
906  }
907 #endif
908 
909  // perform local copy
910  CopyIterator it(a_copier, CopyIterator::LOCAL);
911  int items=it.size();
912 
913 //brian says this does not need conditionals because everyone is getting different buffers
914  // NOT thread-safe, because of a_dest[item.toIndex].push_back(newT);
915  // #pragma omp parallel for
916  for(int i=0; i<items; ++i)
917  {
918  const MotionItem& item = it[i];
919  RefCountedPtr<T> newT( factory.create(item.toRegion, ncomp, item.toIndex) );
920 
921  a_op.op(*newT, item.fromRegion,
922  destComps,
923  item.toRegion,
924  this->operator[](item.fromIndex),
925  a_srcComps);
926  a_dest[item.toIndex].push_back(newT);
927  }
928  // }
929  // Uncomment and Move this out of unpackReceivesToMe() (ndk)
930  completePendingSends(); // wait for sends from possible previous operation
931 
932  unpackReceivesToMe_append(a_dest, destComps, ncomp, factory, a_op); // nullOp in uniprocessor mode
933 }
934 
935 template <class T>
937  LayoutData<Vector<RefCountedPtr<T> > >& a_dest,
938  const Interval& a_srcComps,
939  const ProblemDomain& a_domain,
940  const DataFactory<T>& factory) const
941 {
942  Copier copier;
943  copier.define(this->m_boxLayout, a_destGrids, a_domain, IntVect::Zero);
944 
945  generalCopyTo(a_destGrids, a_dest, a_srcComps, a_domain, copier, factory);
946 }
947 
948 template <class T>
949 void BoxLayoutData<T>::addTo(const Interval& a_srcComps,
950  BoxLayoutData<T>& a_dest,
951  const Interval& a_destComps,
952  const ProblemDomain& a_domain) const
953 {
954  Copier copier;
955  copier.define(this->m_boxLayout, a_dest.m_boxLayout, a_domain, IntVect::Zero);
956  addTo(a_srcComps, a_dest, a_destComps, a_domain, copier);
957 }
958 
959 template <class T>
960 class LDaddOp : public LDOperator<T>
961 {
962 public:
963  virtual void op(T& dest,
964  const Box& RegionFrom,
965  const Interval& Cdest,
966  const Box& RegionTo,
967  const T& src,
968  const Interval& Csrc) const
969  {
970  dest.plus(src, RegionFrom, RegionTo, Csrc.begin(), Cdest.begin(), Cdest.size());
971  }
972  virtual void linearIn(T& arg, void* buf, const Box& R,
973  const Interval& comps) const
974  {
975  Real* buffer = (Real*)buf;
976 
977  ForAllXBNNnoindx(Real, arg, R, comps.begin(), comps.size())
978  {
979  argR+=*buffer;
980  buffer++;
981  } EndFor
982 
983  }
984 
985  virtual bool threadSafe() const
986  {
987  return false;
988  }
989 };
990 
991 template <class T>
992 void BoxLayoutData<T>::addTo(const Interval& a_srcComps,
993  BoxLayoutData<T>& a_dest,
994  const Interval& a_destComps,
995  const ProblemDomain& a_domain,
996  const Copier& a_copier) const
997 {
998  CH_TIME("addTo");
999  LDaddOp<T> addOp;
1000  addToBegin(a_srcComps, a_dest, a_destComps, a_copier);
1001  a_dest.addToEnd(a_destComps);
1002  //makeItSoBegin(a_srcComps, *this, a_dest, a_destComps, a_copier, addOp);
1003  //makeItSoLocalCopy(a_srcComps, *this, a_dest, a_destComps, a_copier, addOp);
1004  //a_dest.makeItSoEnd(a_destComps, addOp);
1005  //makeItSo(a_srcComps, *this, a_dest, a_destComps, a_copier, addOp);
1006 }
1007 
1008 
1009 template <class T>
1011  BoxLayoutData<T>& a_dest,
1012  const Interval& a_destComps,
1013  const Copier& a_copier) const
1014 {
1015  CH_TIME("addToBegin");
1016  LDaddOp<T> addOp;
1017  makeItSoBegin(a_srcComps, *this, a_dest, a_destComps, a_copier, addOp);
1018  makeItSoLocalCopy(a_srcComps, *this, a_dest, a_destComps, a_copier, addOp);
1019 
1020 }
1021 
1022 
1023 template <class T>
1024 void BoxLayoutData<T>::addToEnd(const Interval& a_destComps)
1025 {
1026  CH_TIME("addToEnd");
1027  LDaddOp<T> addOp;
1028 
1029  makeItSoEnd(a_destComps, addOp);
1030 
1031 }
1032 
1033 
1034 
1035 #include "NamespaceFooter.H"
1036 #endif
std::ostream & pout()
Use this in place of std::cout for program output.
virtual bool threadSafe() const
Definition: BoxLayoutData.H:301
int numReceives
Definition: Copier.H:124
void addToBegin(const Interval &a_srcComps, BoxLayoutData< T > &a_dest, const Interval &a_destComps, const Copier &a_copier) const
Definition: BoxLayoutDataI.H:1010
int m_comps
Definition: BoxLayoutData.H:403
CopierBuffer * m_buff
Definition: BoxLayoutData.H:481
virtual void define(const BoxLayout &boxes, int comps, const DataFactory< T > &factory=DefaultDataFactory< T >())
Definition: BoxLayoutDataI.H:86
bool m_callDelete
Definition: LayoutData.H:131
void define(BoxLayoutData< T > *a_original, const Interval &interval)
Definition: BoxLayoutDataI.H:215
A reference-counting handle class.
Definition: RefCountedPtr.H:173
#define freeMT(a_a)
Definition: memtrack.H:160
#define CH_assert(cond)
Definition: CHArray.H:37
A class to facilitate interaction with physical boundary conditions.
Definition: ProblemDomain.H:141
Vector< T * > m_vector
Definition: LayoutData.H:124
void unpackReceivesToMe_append(LayoutData< Vector< RefCountedPtr< T > > > &a_dest, const Interval &a_destComps, int ncomp, const DataFactory< T > &factory, const LDOperator< T > &a_op) const
Definition: BoxLayoutDataI.H:388
int m_ncomps
Definition: Copier.H:78
A not-necessarily-disjoint collective of boxes.
Definition: BoxLayout.H:145
one dimensional dynamic array
Definition: Vector.H:53
void writeSendDataFromMeIntoBuffers(const BoxLayoutData< T > &a_src, const Interval &a_srcComps, const LDOperator< T > &a_op) const
Definition: BoxLayoutDataI.H:364
virtual ~BoxLayoutData()
Definition: BoxLayoutDataI.H:128
int nComp() const
Definition: BoxLayoutData.H:306
Data that maintains a one-to-one mapping of T to the boxes in a BoxLayout.
Definition: BoxLayout.H:26
A strange but true thing to make copying from one boxlayoutdata to another fast.
Definition: Copier.H:152
#define mallocMT(a_a)
Definition: memtrack.H:159
int size() const
Definition: DataIterator.H:218
std::vector< bufEntry > m_fromMe
Definition: Copier.H:116
void unpackReceivesToMe(const Interval &a_destComps, const LDOperator< T > &a_op)
Definition: BoxLayoutDataI.H:381
virtual bool callDelete() const
Definition: BoxLayoutData.H:43
Definition: Copier.H:388
void setVector(const BoxLayoutData< T > &da, const Interval &srcComps, const Interval &destComps)
Definition: BoxLayoutDataI.H:44
size_t m_sendcapacity
Definition: Copier.H:82
Definition: DataIterator.H:190
bool isDefined(int ncomps) const
Definition: Copier.H:75
size_t size()
Definition: Copier.H:404
void completePendingSends() const
Definition: BoxLayoutDataI.H:348
unsigned long long CH_MaxMPIRecvSize
virtual void op(T &dest, const Box &RegionFrom, const Interval &Cdest, const Box &RegionTo, const T &src, const Interval &Csrc) const
Definition: BoxLayoutData.H:204
virtual void clear()
Definition: BoxLayoutDataI.H:152
unsigned long long CH_MaxMPISendSize
unsigned int numProc()
number of parallel processes
Definition: Copier.H:389
Definition: Copier.H:38
void postReceivesToMe() const
Definition: BoxLayoutDataI.H:376
int size() const
Definition: Interval.H:75
BoxLayout m_boxLayout
Definition: LayoutData.H:118
void generalCopyTo(const BoxLayout &a_destGrids, LayoutData< Vector< RefCountedPtr< T > > > &a_dest, const Interval &a_interval, const ProblemDomain &a_domain, const DataFactory< T > &factory=DefaultDataFactory< T >()) const
General data copying operation.
Definition: BoxLayoutDataI.H:936
int procID
Definition: Copier.H:44
virtual T * create(const Box &box, int ncomps, const DataIndex &a_datInd) const
factory function. creates a new 'T' object
Definition: BoxLayoutDataI.H:30
virtual void linearOut(const T &arg, void *buf, const Box &R, const Interval &comps) const
Definition: BoxLayoutData.H:184
Definition: EBInterface.H:45
void resize(unsigned int isize)
Definition: Vector.H:346
virtual void apply(void(*a_Function)(const Box &box, int comps, T &t))
Definition: BoxLayoutDataI.H:196
DataIndex toIndex
Definition: Copier.H:41
size_t m_reccapacity
Definition: Copier.H:85
void * m_recbuffer
Definition: Copier.H:83
virtual bool threadSafe() const
this boolean only has to do with whether the op(...) function is thread safe
Definition: BoxLayoutData.H:200
#define CH_TIME(name)
Definition: CH_Timer.H:82
Structure for passing component ranges in code.
Definition: Interval.H:23
int numSends
Definition: Copier.H:124
void allocateGhostVector(const DataFactory< T > &factory, const IntVect &ghost=IntVect::Zero)
Definition: BoxLayoutDataI.H:166
virtual bool isDefined() const
Definition: BoxLayoutDataI.H:38
void * m_sendbuffer
Definition: Copier.H:80
Interval interval() const
Definition: BoxLayoutData.H:312
virtual int size(const T &arg, const Box &b, const Interval &comps) const
Definition: BoxLayoutData.H:180
Data on a BoxLayout.
Definition: BoxLayoutData.H:97
int s_verbosity
Definition: BoxLayoutData.H:400
virtual void linearIn(T &arg, void *buf, const Box &R, const Interval &comps) const
Definition: BoxLayoutDataI.H:972
double Real
Definition: REAL.H:33
void addTo(const Interval &a_srcComps, BoxLayoutData< T > &a_dest, const Interval &a_destComps, const ProblemDomain &a_domain) const
Definition: BoxLayoutDataI.H:949
virtual void define(const DisjointBoxLayout &dp, int comps, const IntVect &ghost=IntVect::Zero, const DataFactory< T > &a_factory=DefaultDataFactory< T >())
Definition: LevelDataI.H:80
virtual void linearIn(T &arg, void *buf, const Box &R, const Interval &comps) const
Definition: BoxLayoutData.H:189
void makeItSoLocalCopy(const Interval &a_srcComps, const BoxLayoutData< T > &a_src, BoxLayoutData< T > &a_dest, const Interval &a_destComps, const Copier &a_copier, const LDOperator< T > &a_op=LDOperator< T >()) const
Definition: BoxLayoutDataI.H:290
Box toRegion
Definition: Copier.H:43
bool isClosed() const
Definition: BoxLayout.H:730
static void Error(const char *const a_msg=m_nullString, int m_exitCode=CH_DEFAULT_ERROR_CODE)
Print out message to cerr and exit with the specified exit code.
int begin() const
Definition: Interval.H:99
const BoxLayout & boxLayout() const
Definition: LayoutData.H:107
static const IntVect Zero
Definition: IntVect.H:658
void postSendsFromMe() const
Definition: BoxLayoutDataI.H:371
AliasDataFactory(BoxLayoutData< T > *a_original, const Interval &interval)
Definition: BoxLayoutDataI.H:209
A Rectangular Domain on an Integer Lattice.
Definition: Box.H:469
Definition: DataIndex.H:114
bool ok() const
Definition: Copier.H:449
bool m_isdefined
Definition: BoxLayoutData.H:405
unsigned long long CH_MAX_MPI_MESSAGE_SIZE
An integer Vector in SpaceDim-dimensional space.
Definition: CHArray.H:42
virtual bool threadSafe() const
this boolean only has to do with whether the op(...) function is thread safe
Definition: BoxLayoutDataI.H:985
DataIterator dataIterator() const
Definition: LayoutDataI.H:78
virtual T * create(const Box &box, int ncomps, const DataIndex &a_datInd) const =0
factory function. creates a new 'T' object
size_t size() const
Definition: Vector.H:192
Definition: Copier.H:387
Factory object to data members of a BoxLayoutData container.
Definition: BoxLayoutData.H:30
void makeItSoEnd(const Interval &a_destComps, const LDOperator< T > &a_op=LDOperator< T >())
Definition: BoxLayoutDataI.H:334
void makeItSoBegin(const Interval &a_srcComps, const BoxLayoutData< T > &a_src, BoxLayoutData< T > &a_dest, const Interval &a_destComps, const Copier &a_copier, const LDOperator< T > &a_op=LDOperator< T >()) const
Definition: BoxLayoutDataI.H:249
virtual T * create(const Box &box, int ncomps, const DataIndex &a_datInd) const
Definition: BoxLayoutDataI.H:222
Box & grow(int i)
grow functions
Definition: Box.H:2263
virtual bool threadSafe() const
Definition: BoxLayoutData.H:49
Definition: BoxLayoutData.H:173
int end() const
Definition: Interval.H:104
bool m_threadSafe
Definition: BoxLayoutData.H:404
virtual void define(const DisjointBoxLayout &a_level, const BoxLayout &a_dest, bool a_exchange=false, IntVect a_shift=IntVect::Zero)
Definition: BoxLayoutDataI.H:960
DataIndex fromIndex
Definition: Copier.H:41
int print() const
Box box(const DataIndex &a_index) const
Definition: LayoutDataI.H:66
Box fromRegion
Definition: Copier.H:42
std::vector< bufEntry > m_toMe
Definition: Copier.H:117
Definition: Copier.H:382
void makeItSo(const Interval &a_srcComps, const BoxLayoutData< T > &a_src, BoxLayoutData< T > &a_dest, const Interval &a_destComps, const Copier &a_copier, const LDOperator< T > &a_op=LDOperator< T >()) const
Definition: BoxLayoutDataI.H:231
BoxLayoutData()
Definition: BoxLayoutDataI.H:106
void addToEnd(const Interval &a_destComps)
you call this on the destination BoxLayoutData, not the source.
Definition: BoxLayoutDataI.H:1024
virtual void op(T &dest, const Box &RegionFrom, const Interval &Cdest, const Box &RegionTo, const T &src, const Interval &Csrc) const
Definition: BoxLayoutDataI.H:963
void allocateBuffers(const BoxLayoutData< T > &a_src, const Interval &a_srcComps, const BoxLayoutData< T > &a_dest, const Interval &a_destComps, const Copier &a_copier, const LDOperator< T > &a_op) const
Definition: BoxLayoutDataI.H:353