00001 #ifdef CH_LANG_CC
00002
00003
00004
00005
00006
00007
00008
00009 #endif
00010
00011 #ifndef _BOXLAYOUTDATAI_H_
00012 #define _BOXLAYOUTDATAI_H_
00013
00014 #include <cstdlib>
00015 #include <algorithm>
00016 #include <limits.h>
00017 #include <list>
00018 #include "CH_OpenMP.H"
00019 #include "parstream.H"
00020 #include "memtrack.H"
00021 #include "Misc.H"
00022 #include "CH_Timer.H"
00023 #include "NamespaceHeader.H"
00024 #include "BaseFabMacros.H"
00025
00026 using std::sort;
00027
00028 template<class T>
00029 int BoxLayoutData<T>::s_verbosity = 0;
00030
00031 template <class T>
00032 T* DefaultDataFactory<T>::create(const Box& box,
00033 int ncomps,
00034 const DataIndex& a_datInd) const
00035 {
00036 return new T(box, ncomps);
00037 }
00038
00039 template<class T>
00040 inline bool BoxLayoutData<T>::isDefined() const
00041 {
00042 return m_isdefined;
00043 }
00044
00045 template <class T>
00046 inline void BoxLayoutData<T>::setVector(const BoxLayoutData<T>& da,
00047 const Interval& srcComps,
00048 const Interval& destComps)
00049 {
00050 if(&da != this)
00051 {
00052 DataIterator it=this->dataIterator();
00053 int nbox=it.size();
00054 #pragma omp parallel for if(this->m_threadSafe)
00055 for(int box=0; box<nbox; box++)
00056 {
00057 this->m_vector[it[box].datInd()]->copy( this->box(it[box]), destComps,
00058 this->box(it[box]), da[it[box]], srcComps);
00059 }
00060 }
00061 }
00062
00063 template<class T>
00064 inline void BoxLayoutData<T>::define(const BoxLayoutData<T>& da, const Interval& comps,
00065 const DataFactory<T>& factory)
00066 {
00067 if (this == &da)
00068 {
00069 MayDay::Error("BoxLayoutData<T>::define(const LayoutData<T>& da,.....) called with 'this'");
00070 }
00071 CH_assert(comps.size()>0);
00072 CH_assert(comps.end()<=m_comps);
00073
00074
00075 CH_assert(comps.begin()>=0);
00076 this->m_boxLayout = da.boxLayout();
00077
00078 this->m_comps = comps.size();
00079 this->m_threadSafe = factory.threadSafe();
00080
00081
00082 Interval dest(0, m_comps-1);
00083 allocateGhostVector(factory);
00084 setVector(da, comps, dest);
00085 }
00086
00087 template<class T>
00088 inline void BoxLayoutData<T>::define(const BoxLayout& boxes, int comps,
00089 const DataFactory<T>& factory)
00090 {
00091 CH_assert(boxes.isClosed());
00092 this->m_boxLayout = boxes;
00093 m_comps = comps;
00094 this->m_threadSafe = factory.threadSafe();
00095
00096 m_isdefined = true;
00097 allocateGhostVector(factory);
00098
00099 }
00100
00101 template<class T>
00102 inline void BoxLayoutData<T>::define(const BoxLayout& boxes)
00103 {
00104 MayDay::Error("BoxLayoutData<T>::define(const BoxLayout& boxes)...needs comps");
00105 }
00106
00107 template <class T>
00108 inline BoxLayoutData<T>::BoxLayoutData():m_comps(0) ,m_buff(0)
00109 {
00110 m_isdefined = false;
00111 #ifdef CH_MPI
00112 this->numSends = 0;
00113 this->numReceives = 0;
00114 #endif
00115 }
00116 template<class T>
00117 inline BoxLayoutData<T>::BoxLayoutData(const BoxLayout& boxes, int comps,
00118 const DataFactory<T>& factory)
00119 :m_comps(comps),m_buff(NULL)
00120 {
00121 CH_assert(boxes.isClosed());
00122 this->m_boxLayout = boxes;
00123 m_isdefined = true;
00124 allocateGhostVector(factory);
00125 #ifdef CH_MPI
00126
00127 this->numSends = 0;
00128 this->numReceives = 0;
00129 #endif
00130 }
00131
00132 template<class T>
00133 BoxLayoutData<T>::~BoxLayoutData()
00134 {
00135 CH_TIME("~BoxLayoutData");
00136 completePendingSends();
00137 }
00138
00139 template<class T>
00140 inline void BoxLayoutData<T>::define(const BoxLayoutData<T>& da,
00141 const DataFactory<T>& factory)
00142 {
00143 if (this != &da)
00144 {
00145 m_isdefined = da.m_isdefined;
00146 this->m_boxLayout = da.boxLayout();
00147 m_comps = da.nComp();
00148 this->m_threadSafe = factory.threadSafe();
00149
00150 Interval srcAnddest(0, m_comps-1);
00151 allocateGhostVector(factory);
00152 setVector(da, srcAnddest, srcAnddest);
00153 }
00154
00155 }
00156 template<class T>
00157 inline void BoxLayoutData<T>::clear()
00158 {
00159 if (this->m_callDelete == true)
00160 {
00161 for (unsigned int i=0; i<this->m_vector.size(); ++i)
00162 {
00163 delete this->m_vector[i];
00164 this->m_vector[i] = NULL;
00165 }
00166 }
00167 m_isdefined = false;
00168 }
00169
00170 template<class T>
00171 inline void BoxLayoutData<T>::allocateGhostVector(const DataFactory<T>& factory, const IntVect& ghost)
00172 {
00173 if (this->m_callDelete == true)
00174 {
00175 for (unsigned int i=0; i<this->m_vector.size(); ++i)
00176 {
00177 delete this->m_vector[i];
00178 this->m_vector[i] = NULL;
00179 }
00180 }
00181
00182 this->m_callDelete = factory.callDelete();
00183
00184 DataIterator it(this->dataIterator()); int nbox=it.size();
00185 this->m_vector.resize(it.size(), NULL);
00186 #pragma omp parallel for if(this->m_threadSafe)
00187 for(int i=0; i<nbox; i++)
00188 {
00189 unsigned int index = it[i].datInd();
00190 Box abox = this->box(it[i]);
00191 abox.grow(ghost);
00192 this->m_vector[index] = factory.create(abox, m_comps, it[i]);
00193 if (this->m_vector[index] == NULL)
00194 {
00195 MayDay::Error("OutOfMemory in BoxLayoutData::allocateGhostVector");
00196 }
00197 }
00198 }
00199
00200 template<class T>
00201 inline void BoxLayoutData<T>::apply(void (*a_func)(const Box& box, int comps, T& t))
00202 {
00203 DataIterator it(this->dataIterator()); int nbox=it.size();
00204 #pragma omp parallel for
00205 for(int i=0; i<nbox; i++)
00206
00207 {
00208 a_func(this->box(it[i]), m_comps, *(this->m_vector[ it[i].datInd() ]));
00209 }
00210 }
00211
00212
00213 template <class T>
00214 AliasDataFactory<T>::AliasDataFactory(BoxLayoutData<T>* a_original, const Interval& interval)
00215 {
00216 define(a_original, interval);
00217 }
00218
00219 template <class T>
00220 void AliasDataFactory<T>::define(BoxLayoutData<T>* a_original, const Interval& interval)
00221 {
00222 m_origPointer = a_original;
00223 m_interval = interval;
00224 }
00225
00226 template <class T>
00227 T* AliasDataFactory<T>::create(const Box& a_box, int ncomps, const DataIndex& a_dataInd) const
00228 {
00229
00230 CH_assert(ncomps = m_interval.size());
00231 T* rtn = new T(m_interval, m_origPointer->operator[](a_dataInd));
00232 return rtn;
00233 }
00234
00235 template<class T>
00236 void BoxLayoutData<T>::makeItSo(const Interval& a_srcComps,
00237 const BoxLayoutData<T>& a_src,
00238 BoxLayoutData<T>& a_dest,
00239 const Interval& a_destComps,
00240 const Copier& a_copier,
00241 const LDOperator<T>& a_op) const
00242 {
00243 makeItSoBegin(a_srcComps, a_src, a_dest, a_destComps, a_copier, a_op);
00244 makeItSoLocalCopy(a_srcComps, a_src, a_dest, a_destComps, a_copier, a_op);
00245 makeItSoEnd(a_dest, a_destComps, a_op);
00246 }
00247
00248 template<class T>
00249 void BoxLayoutData<T>::makeItSoBegin(const Interval& a_srcComps,
00250 const BoxLayoutData<T>& a_src,
00251 BoxLayoutData<T>& a_dest,
00252 const Interval& a_destComps,
00253 const Copier& a_copier,
00254 const LDOperator<T>& a_op) const
00255 {
00256
00257
00258 #ifdef CH_MPI
00259
00260 allocateBuffers(a_src, a_srcComps,
00261 a_dest, a_destComps,
00262 a_copier,
00263 a_op);
00264
00265 writeSendDataFromMeIntoBuffers(a_src, a_srcComps, a_op);
00266
00267
00268
00269
00270 {
00271 CH_TIME("post_messages");
00272 this->numReceives = m_buff->m_toMe.size();
00273
00274 if (this->numReceives > 0)
00275 {
00276 postReceivesToMe();
00277 }
00278
00279
00280 this->numSends = m_buff->m_fromMe.size();
00281 if (this->numSends > 0)
00282 {
00283 postSendsFromMe();
00284 }
00285 }
00286 #endif
00287 }
00288
00289 template<class T>
00290 void BoxLayoutData<T>::makeItSoLocalCopy(const Interval& a_srcComps,
00291 const BoxLayoutData<T>& a_src,
00292 BoxLayoutData<T>& a_dest,
00293 const Interval& a_destComps,
00294 const Copier& a_copier,
00295 const LDOperator<T>& a_op) const
00296 {
00297
00298 CH_TIME("local copying");
00299 CopyIterator it(a_copier, CopyIterator::LOCAL);
00300 int items=it.size();
00301 #ifdef _OPENMP
00302 bool threadSafe = m_threadSafe && (a_op.threadSafe());
00303 #endif
00304 #pragma omp parallel for if(threadSafe)
00305 for (int n=0; n<items; n++)
00306 {
00307 const MotionItem& item = it[n];
00308 a_op.op(a_dest[item.toIndex], item.fromRegion,
00309 a_destComps,
00310 item.toRegion,
00311 a_src[item.fromIndex],
00312 a_srcComps);
00313
00314 }
00315 }
00316 template<class T>
00317 void BoxLayoutData<T>::makeItSoEnd(BoxLayoutData<T>& a_dest,
00318 const Interval& a_destComps,
00319 const LDOperator<T>& a_op) const
00320 {
00321
00322 completePendingSends();
00323
00324 unpackReceivesToMe(a_dest, a_destComps, a_op);
00325
00326 }
00327
00328 #ifndef CH_MPI
00329
00330 template<class T>
00331 void BoxLayoutData<T>::completePendingSends() const
00332 {
00333 }
00334
00335 template<class T>
00336 void BoxLayoutData<T>::allocateBuffers(const BoxLayoutData<T>& a_src,
00337 const Interval& a_srcComps,
00338 const BoxLayoutData<T>& a_dest,
00339 const Interval& a_destComps,
00340 const Copier& a_copier,
00341 const LDOperator<T>& a_op
00342 ) const
00343 {
00344 }
00345
00346 template<class T>
00347 void BoxLayoutData<T>::writeSendDataFromMeIntoBuffers(const BoxLayoutData<T>& a_src,
00348 const Interval& a_srcComps,
00349 const LDOperator<T>& a_op) const
00350 {
00351 }
00352
00353 template<class T>
00354 void BoxLayoutData<T>::postSendsFromMe() const
00355 {
00356 }
00357
00358 template<class T>
00359 void BoxLayoutData<T>::postReceivesToMe() const
00360 {
00361 }
00362
00363 template<class T>
00364 void BoxLayoutData<T>::unpackReceivesToMe(BoxLayoutData<T>& a_dest,
00365 const Interval& a_destComps,
00366 const LDOperator<T>& a_op) const
00367 {
00368 }
00369
00370 template<class T>
00371 void BoxLayoutData<T>::unpackReceivesToMe_append(LayoutData<Vector<RefCountedPtr<T> > >& a_dest,
00372 const Interval& a_destComps,
00373 int ncomp,
00374 const DataFactory<T>& factory,
00375 const LDOperator<T>& a_op) const
00376 {
00377 }
00378
00379 #else
00380
00381
00382
00383 template<class T>
00384 void BoxLayoutData<T>::completePendingSends() const
00385 {
00386 CH_TIME("completePendingSends");
00387 if (this->numSends > 0)
00388 {
00389 CH_TIME("MPI_Waitall");
00390 m_sendStatus.resize(this->numSends);
00391 int result = MPI_Waitall(this->numSends, &(m_sendRequests[0]), &(m_sendStatus[0]));
00392 if (result != MPI_SUCCESS)
00393 {
00394
00395 }
00396 }
00397 this->numSends = 0;
00398 }
00399
00400 template<class T>
00401 void BoxLayoutData<T>::allocateBuffers(const BoxLayoutData<T>& a_src,
00402 const Interval& a_srcComps,
00403 const BoxLayoutData<T>& a_dest,
00404 const Interval& a_destComps,
00405 const Copier& a_copier,
00406 const LDOperator<T>& a_op) const
00407 {
00408 CH_TIME("MPI_allocateBuffers");
00409 m_buff = &(((Copier&)a_copier).m_buffers);
00410 if (m_buff->isDefined(a_srcComps.size()) && T::preAllocatable()<2) return;
00411
00412 m_buff->m_ncomps = a_srcComps.size();
00413
00414 m_buff->m_fromMe.resize(0);
00415 m_buff->m_toMe.resize(0);
00416 size_t sendBufferSize = 0;
00417 size_t recBufferSize = 0;
00418
00419
00420 T dummy;
00421 for (CopyIterator it(a_copier, CopyIterator::FROM); it.ok(); ++it)
00422 {
00423 const MotionItem& item = it();
00424 CopierBuffer::bufEntry b;
00425 b.item = &item;
00426 b.size = a_op.size(a_src[item.fromIndex], item.fromRegion, a_srcComps);
00427 sendBufferSize+=b.size;
00428 b.procID = item.procID;
00429 m_buff->m_fromMe.push_back(b);
00430 }
00431 sort(m_buff->m_fromMe.begin(), m_buff->m_fromMe.end());
00432 for (CopyIterator it(a_copier, CopyIterator::TO); it.ok(); ++it)
00433 {
00434 const MotionItem& item = it();
00435 CopierBuffer::bufEntry b;
00436 b.item = &item;
00437 if (T::preAllocatable() == 0)
00438 {
00439 b.size = a_op.size(dummy, item.fromRegion, a_destComps);
00440 recBufferSize+=b.size;
00441 }
00442 else if (T::preAllocatable() == 1)
00443 {
00444 b.size = a_op.size(a_dest[item.toIndex], item.fromRegion, a_destComps);
00445 recBufferSize+=b.size;
00446 }
00447 b.procID = item.procID;
00448 m_buff->m_toMe.push_back(b);
00449 }
00450 sort(m_buff->m_toMe.begin(), m_buff->m_toMe.end());
00451
00452 if (T::preAllocatable() == 2)
00453 {
00454 CH_TIME("MPI_ Phase 1 of 2 Phase: preAllocatable==2");
00455 if (s_verbosity > 0) pout()<<"preAllocatable==2\n";
00456
00457
00458
00459 Vector<unsigned long> fdata;
00460 Vector<unsigned long> tdata;
00461 int count = 1;
00462 int scount = 1;
00463 if (m_buff->m_toMe.size() > 0)
00464 {
00465 tdata.resize(m_buff->m_toMe.size(), ULONG_MAX);
00466 m_receiveRequests.resize(numProc()-1);
00467 m_receiveStatus.resize(numProc()-1);
00468 MPI_Request* Rptr = &(m_receiveRequests[0]);
00469
00470 unsigned int lastProc = m_buff->m_toMe[0].procID;
00471 int messageSize = 1;
00472 unsigned long * dataPtr = &(tdata[0]);
00473 unsigned int i = 1;
00474
00475 for (;i<m_buff->m_toMe.size(); ++i)
00476 {
00477 CopierBuffer::bufEntry& b = m_buff->m_toMe[i];
00478 if (b.procID == lastProc)
00479 messageSize++;
00480 else
00481 {
00482
00483 MPI_Irecv(dataPtr, messageSize, MPI_UNSIGNED_LONG, lastProc,
00484 1, Chombo_MPI::comm, Rptr);
00485 Rptr++;
00486
00487 lastProc = b.procID;
00488 messageSize = 1;
00489 dataPtr = &(tdata[i]);
00490 count++;
00491 }
00492 }
00493
00494 MPI_Irecv(dataPtr, messageSize, MPI_UNSIGNED_LONG, lastProc,
00495 1, Chombo_MPI::comm, Rptr );
00496 }
00497
00498 if (m_buff->m_fromMe.size() > 0)
00499 {
00500 fdata.resize(m_buff->m_fromMe.size());
00501 fdata[0]=m_buff->m_fromMe[0].size;
00502 m_sendRequests.resize(numProc()-1);
00503 m_sendStatus.resize(numProc()-1);
00504 MPI_Request* Rptr = &(m_sendRequests[0]);
00505
00506 unsigned int lastProc = m_buff->m_fromMe[0].procID;
00507 int messageSize = 1;
00508 unsigned long * dataPtr = &(fdata[0]);
00509 unsigned int i = 1;
00510 for (;i<m_buff->m_fromMe.size(); ++i)
00511 {
00512 fdata[i] = m_buff->m_fromMe[i].size;
00513 CopierBuffer::bufEntry& b = m_buff->m_fromMe[i];
00514 if (b.procID == lastProc)
00515 messageSize++;
00516 else
00517 {
00518 MPI_Isend(dataPtr, messageSize, MPI_UNSIGNED_LONG, lastProc,
00519 1, Chombo_MPI::comm, Rptr);
00520
00521 Rptr++;
00522 lastProc = b.procID;
00523 messageSize = 1;
00524 dataPtr = &(fdata[i]);
00525 scount++;
00526 }
00527 }
00528
00529 MPI_Isend(dataPtr, messageSize, MPI_UNSIGNED_LONG, lastProc,
00530 1, Chombo_MPI::comm, Rptr);
00531 }
00532
00533 if (m_buff->m_toMe.size() > 0)
00534 {
00535
00536 int result = MPI_Waitall(count, &(m_receiveRequests[0]), &(m_receiveStatus[0]));
00537 if (result != MPI_SUCCESS)
00538 {
00539 MayDay::Error("First pass of two-phase communication failed");
00540 }
00541
00542 for (unsigned int i=0; i<m_buff->m_toMe.size(); ++i)
00543 {
00544 CH_assert(tdata[i] != ULONG_MAX);
00545 m_buff->m_toMe[i].size = tdata[i];
00546 recBufferSize+= tdata[i];
00547 }
00548 }
00549
00550 if (m_buff->m_fromMe.size() > 0)
00551 {
00552
00553 int result = MPI_Waitall(scount, &(m_sendRequests[0]), &(m_sendStatus[0]));
00554 if (result != MPI_SUCCESS)
00555 {
00556 MayDay::Error("First pass of two-phase communication failed");
00557 }
00558
00559 }
00560 }
00561
00562
00563
00564 if (sendBufferSize > m_buff->m_sendcapacity)
00565 {
00566 freeMT((m_buff->m_sendbuffer));
00567 if (s_verbosity > 0) pout()<<"malloc send buffer "<<sendBufferSize<<std::endl;
00568 (m_buff->m_sendbuffer) = mallocMT(sendBufferSize);
00569 if ((m_buff->m_sendbuffer) == NULL)
00570 {
00571 MayDay::Error("Out of memory in BoxLayoutData::allocatebuffers");
00572 }
00573 m_buff->m_sendcapacity = sendBufferSize;
00574 }
00575
00576 if (recBufferSize > m_buff->m_reccapacity)
00577 {
00578 freeMT(m_buff->m_recbuffer);
00579 if (s_verbosity > 0) pout()<<"malloc receive buffer "<<recBufferSize<<std::endl;
00580 m_buff->m_recbuffer = mallocMT(recBufferSize);
00581 if (m_buff->m_recbuffer == NULL)
00582 {
00583 MayDay::Error("Out of memory in BoxLayoutData::allocatebuffers");
00584 }
00585 m_buff->m_reccapacity = recBufferSize;
00586 }
00587
00588
00589
00590
00591
00592
00593
00594
00595
00596
00597
00598 char* nextFree = (char*)(m_buff->m_sendbuffer);
00599 if (m_buff->m_fromMe.size() > 0)
00600 {
00601 for (unsigned int i=0; i<m_buff->m_fromMe.size(); ++i)
00602 {
00603 m_buff->m_fromMe[i].bufPtr = nextFree;
00604 nextFree += m_buff->m_fromMe[i].size;
00605 }
00606 }
00607
00608 nextFree = (char*)m_buff->m_recbuffer;
00609 if (m_buff->m_toMe.size() > 0)
00610 {
00611 for (unsigned int i=0; i<m_buff->m_toMe.size(); ++i)
00612 {
00613 m_buff->m_toMe[i].bufPtr = nextFree;
00614 nextFree += m_buff->m_toMe[i].size;
00615 }
00616 }
00617
00618
00619
00620
00621 }
00622
00623 template<class T>
00624 void BoxLayoutData<T>::writeSendDataFromMeIntoBuffers(const BoxLayoutData<T>& a_src,
00625 const Interval& a_srcComps,
00626 const LDOperator<T>& a_op) const
00627 {
00628 CH_TIME("write Data to buffers");
00629 int isize = m_buff->m_fromMe.size();
00630 #ifdef _OPENMP
00631 bool threadSafe = m_threadSafe && (a_op.threadSafe());
00632 #endif
00633 #pragma omp parallel for if(threadSafe)
00634 for (unsigned int i=0; i< isize; ++i)
00635 {
00636 const CopierBuffer::bufEntry& entry = m_buff->m_fromMe[i];
00637 a_op.linearOut(a_src[entry.item->fromIndex], entry.bufPtr,
00638 entry.item->fromRegion, a_srcComps);
00639 }
00640 }
00641
00642 template<class T>
00643 void BoxLayoutData<T>::postSendsFromMe() const
00644 {
00645 CH_TIME("post_Sends");
00646
00647
00648
00649 this->numSends = m_buff->m_fromMe.size();
00650
00651 if (this->numSends > 1)
00652 {
00653 for (unsigned int i=m_buff->m_fromMe.size()-1; i>0; --i)
00654 {
00655 if (m_buff->m_fromMe[i].procID == m_buff->m_fromMe[i-1].procID)
00656 {
00657 this->numSends--;
00658 m_buff->m_fromMe[i-1].size = m_buff->m_fromMe[i-1].size + m_buff->m_fromMe[i].size;
00659 m_buff->m_fromMe[i].size = 0;
00660 }
00661 }
00662 }
00663 m_sendRequests.resize(this->numSends);
00664 std::list<MPI_Request> extraRequests;
00665
00666 unsigned int next=0;
00667 long long maxSize = 0;
00668 for (int i=0; i<this->numSends; ++i)
00669 {
00670 const CopierBuffer::bufEntry& entry = m_buff->m_fromMe[next];
00671 char* buffer = (char*)entry.bufPtr;
00672 std::size_t bsize = entry.size;
00673 int idtag=0;
00674 while (bsize > CH_MAX_MPI_MESSAGE_SIZE)
00675 {
00676 extraRequests.push_back(MPI_Request());
00677 {
00678
00679 MPI_Isend(buffer, CH_MAX_MPI_MESSAGE_SIZE, MPI_BYTE, entry.procID,
00680 idtag, Chombo_MPI::comm, &(extraRequests.back()));
00681 }
00682 maxSize = CH_MAX_MPI_MESSAGE_SIZE;
00683 bsize -= CH_MAX_MPI_MESSAGE_SIZE;
00684 buffer+=CH_MAX_MPI_MESSAGE_SIZE;
00685 idtag++;
00686 }
00687 {
00688
00689 MPI_Isend(buffer, bsize, MPI_BYTE, entry.procID,
00690 idtag, Chombo_MPI::comm, &(m_sendRequests[i]));
00691 }
00692 maxSize = Max<long long>(bsize, maxSize);
00693 ++next;
00694 while (next < m_buff->m_fromMe.size() && m_buff->m_fromMe[next].size == 0) ++next;
00695 }
00696 for (std::list<MPI_Request>::iterator it = extraRequests.begin(); it != extraRequests.end(); ++it)
00697 {
00698 m_sendRequests.push_back(*it);
00699 }
00700 this->numSends = m_sendRequests.size();
00701
00702 CH_MaxMPISendSize = Max<long long>(CH_MaxMPISendSize, maxSize);
00703
00704 }
00705
00706 template<class T>
00707 void BoxLayoutData<T>::postReceivesToMe() const
00708 {
00709 CH_TIME("post_Receives");
00710 this->numReceives = m_buff->m_toMe.size();
00711
00712 if (this->numReceives > 1)
00713 {
00714 for (unsigned int i=m_buff->m_toMe.size()-1; i>0; --i)
00715 {
00716 if (m_buff->m_toMe[i].procID == m_buff->m_toMe[i-1].procID)
00717 {
00718 this->numReceives--;
00719 m_buff->m_toMe[i-1].size += m_buff->m_toMe[i].size;
00720 m_buff->m_toMe[i].size = 0;
00721 }
00722
00723 }
00724 }
00725 m_receiveRequests.resize(this->numReceives);
00726 std::list<MPI_Request> extraRequests;
00727 unsigned int next=0;
00728 long long maxSize = 0;
00729 for (int i=0; i<this->numReceives; ++i)
00730 {
00731 const CopierBuffer::bufEntry& entry = m_buff->m_toMe[next];
00732 char* buffer = (char*)entry.bufPtr;
00733 size_t bsize = entry.size;
00734 int idtag=0;
00735 while (bsize > CH_MAX_MPI_MESSAGE_SIZE)
00736 {
00737 extraRequests.push_back(MPI_Request());
00738 {
00739
00740 MPI_Irecv(buffer, CH_MAX_MPI_MESSAGE_SIZE, MPI_BYTE, entry.procID,
00741 idtag, Chombo_MPI::comm, &(extraRequests.back()));
00742 }
00743 maxSize = CH_MAX_MPI_MESSAGE_SIZE;
00744 bsize -= CH_MAX_MPI_MESSAGE_SIZE;
00745 buffer+=CH_MAX_MPI_MESSAGE_SIZE;
00746 idtag++;
00747 }
00748 {
00749
00750 MPI_Irecv(buffer, bsize, MPI_BYTE, entry.procID,
00751 idtag, Chombo_MPI::comm, &(m_receiveRequests[i]));
00752 }
00753 ++next;
00754 maxSize = Max<long long>(bsize, maxSize);
00755 while (next < m_buff->m_toMe.size() && m_buff->m_toMe[next].size == 0) ++next;
00756 }
00757 for (std::list<MPI_Request>::iterator it = extraRequests.begin(); it != extraRequests.end(); ++it)
00758 {
00759 m_receiveRequests.push_back(*it);
00760 }
00761 this->numReceives = m_receiveRequests.size();
00762
00763 CH_MaxMPIRecvSize = Max<long long>(CH_MaxMPIRecvSize, maxSize);
00764
00765
00766 }
00767
00768 template<class T>
00769 void BoxLayoutData<T>::unpackReceivesToMe(BoxLayoutData<T>& a_dest,
00770 const Interval& a_destComps,
00771 const LDOperator<T>& a_op) const
00772 {
00773
00774 CH_TIME("unpack_messages");
00775
00776 if (this->numReceives > 0)
00777 {
00778 m_receiveStatus.resize(this->numReceives);
00779 int result;
00780 {
00781 CH_TIME("MPI_Waitall");
00782 result = MPI_Waitall(this->numReceives, &(m_receiveRequests[0]),
00783 &(m_receiveStatus[0]));
00784 }
00785 if (result != MPI_SUCCESS)
00786 {
00787
00788
00789 }
00790
00791 int isize = m_buff->m_toMe.size();
00792 #ifdef _OPENMP
00793 bool threadSafe = m_threadSafe && (a_op.threadSafe());
00794 #endif
00795 #pragma omp parallel for if(threadSafe)
00796 for (unsigned int i=0; i< isize; ++i)
00797 {
00798 const CopierBuffer::bufEntry& entry = m_buff->m_toMe[i];
00799 a_op.linearIn(a_dest[entry.item->toIndex], entry.bufPtr, entry.item->toRegion, a_destComps);
00800 }
00801 }
00802 this->numReceives = 0;
00803 }
00804
00805 template<class T>
00806 void BoxLayoutData<T>::unpackReceivesToMe_append(LayoutData<Vector<RefCountedPtr<T> > >& a_dest,
00807 const Interval& a_destComps,
00808 int ncomp,
00809 const DataFactory<T>& factory,
00810
00811 const LDOperator<T>& a_op) const
00812 {
00813
00814 if (this->numReceives > 0)
00815 {
00816 m_receiveStatus.resize(this->numReceives);
00817 int result;
00818 {
00819 CH_TIME("MPI_Waitall");
00820 result = MPI_Waitall(this->numReceives, &(m_receiveRequests[0]),
00821 &(m_receiveStatus[0]));
00822 }
00823 if (result != MPI_SUCCESS)
00824 {
00825
00826 }
00827 int isize = m_buff->m_toMe.size();
00828
00829
00830 for (int i=0; i< isize; ++i)
00831 {
00832 const CopierBuffer::bufEntry& entry = m_buff->m_toMe[i];
00833 const MotionItem& item = *(entry.item);
00834 RefCountedPtr<T> newT( factory.create(item.toRegion, ncomp, item.toIndex) );;
00835
00836 a_op.linearIn(*newT, entry.bufPtr, item.toRegion, a_destComps);
00837 a_dest[item.toIndex].push_back(newT);
00838 }
00839 }
00840
00841 this->numReceives = 0;
00842 }
00843 #endif
00844
00845 template <class T>
00846 void BoxLayoutData<T>::generalCopyTo(const BoxLayout& a_destGrids,
00847 LayoutData<Vector<RefCountedPtr<T> > >& a_dest,
00848 const Interval& a_srcComps,
00849 const ProblemDomain& a_domain,
00850 const Copier& a_copier,
00851 const DataFactory<T>& factory) const
00852 {
00853
00854 CH_assert(T::preAllocatable() == 0);
00855 a_dest.define(a_destGrids);
00856
00857 LDOperator<T> a_op;
00858
00859 int ncomp = a_srcComps.size();
00860 Interval destComps(0, ncomp-1);
00861 allocateBuffers(*this, a_srcComps,
00862 *this, destComps,
00863 a_copier, a_op);
00864
00865 writeSendDataFromMeIntoBuffers(*this, a_srcComps, a_op);
00866
00867
00868
00869
00870 #ifdef CH_MPI
00871 this->numReceives = m_buff->m_toMe.size();
00872 if (this->numReceives > 0)
00873 {
00874 postReceivesToMe();
00875 }
00876
00877 this->numSends = m_buff->m_fromMe.size();
00878 if (this->numSends > 0)
00879 {
00880 postSendsFromMe();
00881 }
00882 #endif
00883
00884
00885 CopyIterator it(a_copier, CopyIterator::LOCAL);
00886 int items=it.size();
00887
00888
00889
00890
00891 for(int i=0; i<items; ++i)
00892 {
00893 const MotionItem& item = it[i];
00894 RefCountedPtr<T> newT( factory.create(item.toRegion, ncomp, item.toIndex) );
00895
00896 a_op.op(*newT, item.fromRegion,
00897 destComps,
00898 item.toRegion,
00899 this->operator[](item.fromIndex),
00900 a_srcComps);
00901 a_dest[item.toIndex].push_back(newT);
00902 }
00903
00904
00905 completePendingSends();
00906
00907 unpackReceivesToMe_append(a_dest, destComps, ncomp, factory, a_op);
00908 }
00909
00910 template <class T>
00911 void BoxLayoutData<T>::generalCopyTo(const BoxLayout& a_destGrids,
00912 LayoutData<Vector<RefCountedPtr<T> > >& a_dest,
00913 const Interval& a_srcComps,
00914 const ProblemDomain& a_domain,
00915 const DataFactory<T>& factory) const
00916 {
00917 Copier copier;
00918 copier.define(this->m_boxLayout, a_destGrids, a_domain, IntVect::Zero);
00919
00920 generalCopyTo(a_destGrids, a_dest, a_srcComps, a_domain, copier, factory);
00921 }
00922
00923 template <class T>
00924 void BoxLayoutData<T>::addTo(const Interval& a_srcComps,
00925 BoxLayoutData<T>& a_dest,
00926 const Interval& a_destComps,
00927 const ProblemDomain& a_domain) const
00928 {
00929 Copier copier;
00930 copier.define(this->m_boxLayout, a_dest.m_boxLayout, a_domain, IntVect::Zero);
00931 addTo(a_srcComps, a_dest, a_destComps, a_domain, copier);
00932 }
00933
00934 template <class T>
00935 class LDaddOp : public LDOperator<T>
00936 {
00937 public:
00938 virtual void op(T& dest,
00939 const Box& RegionFrom,
00940 const Interval& Cdest,
00941 const Box& RegionTo,
00942 const T& src,
00943 const Interval& Csrc) const
00944 {
00945 dest.plus(src, RegionFrom, RegionTo, Csrc.begin(), Cdest.begin(), Cdest.size());
00946 }
00947 virtual void linearIn(T& arg, void* buf, const Box& R,
00948 const Interval& comps) const
00949 {
00950 Real* buffer = (Real*)buf;
00951
00952 ForAllXBNNnoindx(Real, arg, R, comps.begin(), comps.size())
00953 {
00954 argR+=*buffer;
00955 buffer++;
00956 } EndFor
00957
00958 }
00959
00960 virtual bool threadSafe() const
00961 {
00962 return false;
00963 }
00964 };
00965
00966 template <class T>
00967 void BoxLayoutData<T>::addTo(const Interval& a_srcComps,
00968 BoxLayoutData<T>& a_dest,
00969 const Interval& a_destComps,
00970 const ProblemDomain& a_domain,
00971 const Copier& a_copier) const
00972 {
00973 CH_TIME("addTo");
00974 LDaddOp<T> addOp;
00975 makeItSo(a_srcComps, *this, a_dest, a_destComps, a_copier, addOp);
00976 }
00977
00978 #include "NamespaceFooter.H"
00979 #endif