00001
00002 #include <cstdlib>
00003 #include <algorithm>
00004 #include "parstream.H"
00005 #include "memtrack.H"
00006
00007 using std::sort;
00008
00009 template<class T>
00010 void BoxLayoutData<T>::makeItSo(const Interval& a_srcComps,
00011 const BoxLayoutData<T>& a_src,
00012 BoxLayoutData<T>& a_dest,
00013 const Interval& a_destComps,
00014 const Copier& a_copier,
00015 const LDOperator<T>& a_op) const
00016 {
00017
00018
00019
00020
00021
00022
00023
00024
00025 #ifdef MPI
00026 static Copier* lastCopier=NULL;
00027
00028
00029
00030 #ifndef NDEBUG
00031
00032 #endif
00033
00034 if(T::preAllocatable() == 2 || !a_copier.bufferAllocated() ||
00035 (m_fromMe.size() + m_toMe.size() == 0) ||lastCopier != &a_copier){
00036 allocateBuffers(a_src, a_srcComps,
00037 a_dest, a_destComps,
00038 a_copier,
00039 a_op);
00040 a_copier.setBufferAllocated(true);
00041 }
00042 lastCopier = (Copier*)(&a_copier);
00043
00044 #endif
00045
00046 writeSendDataFromMeIntoBuffers(a_src, a_srcComps, a_op);
00047
00048
00049
00050
00051
00052 #ifdef MPI
00053 if (m_toMe.size() > 0) {
00054 postReceivesToMe();
00055 }
00056
00057 if (m_fromMe.size() > 0) {
00058 postSendsFromMe();
00059 }
00060 #endif
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090 for(CopyIterator it(a_copier, CopyIterator::LOCAL); it.ok(); ++it)
00091 {
00092 const MotionItem& item = it();
00093 a_op.op(a_dest[item.toIndex], item.fromRegion,
00094 a_destComps,
00095 item.toRegion,
00096 a_src[item.fromIndex],
00097 a_srcComps);
00098 }
00099
00100
00101 completePendingSends();
00102
00103 unpackReceivesToMe(a_dest, a_destComps, a_op);
00104
00105 }
00106
00107 #ifndef MPI
00108
00109 template<class T>
00110 void BoxLayoutData<T>::completePendingSends() const
00111 {;}
00112
00113 template<class T>
00114 void BoxLayoutData<T>::allocateBuffers(const BoxLayoutData<T>& a_src,
00115 const Interval& a_srcComps,
00116 const BoxLayoutData<T>& a_dest,
00117 const Interval& a_destComps,
00118 const Copier& a_copier,
00119 const LDOperator<T>& a_op
00120 ) const
00121 {;}
00122
00123 template<class T>
00124 void BoxLayoutData<T>::writeSendDataFromMeIntoBuffers(const BoxLayoutData<T>& a_src,
00125 const Interval& a_srcComps,
00126 const LDOperator<T>& a_op) const
00127 {;}
00128
00129 template<class T>
00130 void BoxLayoutData<T>::postSendsFromMe() const
00131 {;}
00132
00133 template<class T>
00134 void BoxLayoutData<T>::postReceivesToMe() const
00135 {;}
00136
00137 template<class T>
00138 void BoxLayoutData<T>::unpackReceivesToMe(BoxLayoutData<T>& a_dest,
00139 const Interval& a_destComps,
00140 const LDOperator<T>& a_op) const
00141 {;}
00142
00143
00144 template<class T>
00145 void BoxLayoutData<T>::unpackReceivesToMe_append(LayoutData<Vector<RefCountedPtr<T> > >& a_dest,
00146 const Interval& a_destComps,
00147 int ncomp,
00148 const DataFactory<T>& factory,
00149 const LDOperator<T>& a_op) const
00150 {;}
00151
00152 #else
00153
00154
00155
00156 template<class T>
00157 void BoxLayoutData<T>::completePendingSends() const
00158 {
00159 if(numSends > 0){
00160 int result = MPI_Waitall(numSends, m_sendRequests, m_sendStatus);
00161 if(result != MPI_SUCCESS)
00162 {
00163
00164 }
00165
00166 delete[] m_sendRequests;
00167 delete[] m_sendStatus;
00168 }
00169 numSends = 0;
00170 }
00171
00172 template<class T>
00173 void BoxLayoutData<T>::allocateBuffers(const BoxLayoutData<T>& a_src,
00174 const Interval& a_srcComps,
00175 const BoxLayoutData<T>& a_dest,
00176 const Interval& a_destComps,
00177 const Copier& a_copier,
00178 const LDOperator<T>& a_op) const
00179 {
00180 m_fromMe.resize(0);
00181 m_toMe.resize(0);
00182 size_t sendBufferSize = 0;
00183 size_t recBufferSize = 0;
00184
00185
00186 T dummy;
00187 for(CopyIterator it(a_copier, CopyIterator::FROM); it.ok(); ++it)
00188 {
00189 const MotionItem& item = it();
00190 bufEntry b;
00191 b.item = &item;
00192 b.size = a_op.size(a_src[item.fromIndex], item.fromRegion, a_srcComps);
00193 sendBufferSize+=b.size;
00194 b.procID = item.procID;
00195 m_fromMe.push_back(b);
00196 }
00197 sort(m_fromMe.begin(), m_fromMe.end());
00198 for(CopyIterator it(a_copier, CopyIterator::TO); it.ok(); ++it)
00199 {
00200 const MotionItem& item = it();
00201 bufEntry b;
00202 b.item = &item;
00203 if(T::preAllocatable() == 0)
00204 {
00205 b.size = a_op.size(dummy, item.toRegion, a_destComps);
00206 recBufferSize+=b.size;
00207 }
00208 else if (T::preAllocatable() == 1)
00209 {
00210 b.size = a_op.size(a_dest[item.toIndex], item.toRegion, a_destComps);
00211 recBufferSize+=b.size;
00212 }
00213 b.procID = item.procID;
00214 m_toMe.push_back(b);
00215 }
00216 sort(m_toMe.begin(), m_toMe.end());
00217
00218 if(T::preAllocatable() == 2)
00219 {
00220
00221
00222 if(m_fromMe.size() > 0)
00223 {
00224 MPI_Request nullrequest;
00225
00226 int lastProc = -1;
00227 int messageIndex = 0;
00228 for(int i=0; i<m_fromMe.size(); ++i)
00229 {
00230 bufEntry& b = m_fromMe[i];
00231 if(b.procID == lastProc) messageIndex++;
00232 else messageIndex = 0;
00233 lastProc = b.procID;
00234 MPI_Isend(&(b.size), 1, MPI_UNSIGNED_LONG, b.procID,
00235 messageIndex, Chombo_MPI::comm, &(nullrequest));
00236 MPI_Request_free(&(nullrequest));
00237
00238
00239 }
00240 }
00241 if(m_toMe.size() > 0)
00242 {
00243 m_receiveRequests = new MPI_Request[m_toMe.size()];
00244 m_receiveStatus = new MPI_Status[m_toMe.size()];
00245 int lastProc = -1;
00246 int messageIndex = 0;
00247 for(int i=0; i<m_toMe.size(); ++i)
00248 {
00249 bufEntry& b = m_toMe[i];
00250 if(b.procID == lastProc) messageIndex++;
00251 else messageIndex = 0;
00252 lastProc = b.procID;
00253 MPI_Irecv(&(b.size), 1, MPI_UNSIGNED_LONG, b.procID,
00254 messageIndex, Chombo_MPI::comm, m_receiveRequests+i);
00255 }
00256
00257 int result = MPI_Waitall(m_toMe.size(), m_receiveRequests, m_receiveStatus);
00258 if(result != MPI_SUCCESS)
00259 {
00260 MayDay::Error("First pass of two-phase communication failed");
00261 }
00262 for(int i=0; i<m_toMe.size(); ++i) recBufferSize+= m_toMe[i].size;
00263 delete[] m_receiveRequests;
00264 delete[] m_receiveStatus;
00265 }
00266 }
00267
00268
00269
00270 if(sendBufferSize > m_sendcapacity)
00271 {
00272 free(m_sendbuffer);
00273 m_sendbuffer = malloc(sendBufferSize);
00274 if(m_sendbuffer == NULL)
00275 {
00276 MayDay::Error("Out of memory in BoxLayoutData::allocatebuffers");
00277 }
00278 m_sendcapacity = sendBufferSize;
00279 }
00280
00281 if(recBufferSize > m_reccapacity)
00282 {
00283 free(m_recbuffer);
00284 m_recbuffer = malloc(recBufferSize);
00285 if(m_recbuffer == NULL)
00286 {
00287 MayDay::Error("Out of memory in BoxLayoutData::allocatebuffers");
00288 }
00289 m_reccapacity = recBufferSize;
00290 }
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303 char* nextFree = (char*)m_sendbuffer;
00304 if(m_fromMe.size() > 0)
00305 {
00306 for(unsigned int i=0; i<m_fromMe.size(); ++i)
00307 {
00308 m_fromMe[i].bufPtr = nextFree;
00309 nextFree += m_fromMe[i].size;
00310 }
00311 }
00312
00313 nextFree = (char*)m_recbuffer;
00314 if(m_toMe.size() > 0)
00315 {
00316 for(unsigned int i=0; i<m_toMe.size(); ++i)
00317 {
00318 m_toMe[i].bufPtr = nextFree;
00319 nextFree += m_toMe[i].size;
00320 }
00321 }
00322
00323
00324
00325
00326 }
00327
00328
00329
00330 template<class T>
00331 void BoxLayoutData<T>::writeSendDataFromMeIntoBuffers(const BoxLayoutData<T>& a_src,
00332 const Interval& a_srcComps,
00333 const LDOperator<T>& a_op) const
00334 {
00335
00336
00337 for(unsigned int i=0; i<m_fromMe.size(); ++i)
00338 {
00339 const bufEntry& entry = m_fromMe[i];
00340 a_op.linearOut(a_src[entry.item->fromIndex], entry.bufPtr,
00341 entry.item->fromRegion, a_srcComps);
00342 }
00343
00344 }
00345
00346 template<class T>
00347 void BoxLayoutData<T>::postSendsFromMe() const
00348 {
00349
00350
00351
00352
00353 numSends = m_fromMe.size();
00354 if(numSends > 1){
00355 for(unsigned int i=m_fromMe.size()-1; i>0; --i)
00356 {
00357 if(m_fromMe[i].procID == m_fromMe[i-1].procID)
00358 {
00359 numSends--;
00360 m_fromMe[i-1].size+=m_fromMe[i].size;
00361 m_fromMe[i].size = 0;
00362 }
00363 }
00364 }
00365 m_sendRequests = new MPI_Request[numSends];
00366 m_sendStatus = new MPI_Status[numSends];
00367
00368
00369 unsigned int next=0;
00370 for(int i=0; i<numSends; ++i)
00371 {
00372 const bufEntry& entry = m_fromMe[next];
00373
00374
00375 MPI_Isend(entry.bufPtr, entry.size, MPI_BYTE, entry.procID,
00376 0, Chombo_MPI::comm, m_sendRequests+i);
00377 ++next;
00378 while(next < m_fromMe.size() && m_fromMe[next].size == 0) ++next;
00379 }
00380
00381
00382 }
00383
00384 template<class T>
00385 void BoxLayoutData<T>::postReceivesToMe() const
00386 {
00387 numReceives = m_toMe.size();
00388
00389 if(numReceives > 1){
00390 for(unsigned int i=m_toMe.size()-1; i>0; --i)
00391 {
00392 if(m_toMe[i].procID == m_toMe[i-1].procID)
00393 {
00394 numReceives--;
00395 m_toMe[i-1].size+=m_toMe[i].size;
00396 m_toMe[i].size = 0;
00397 }
00398 }
00399 }
00400 m_receiveRequests = new MPI_Request[numReceives];
00401 m_receiveStatus = new MPI_Status[numReceives];
00402
00403
00404 unsigned int next=0;
00405 for(int i=0; i<numReceives; ++i)
00406 {
00407 const bufEntry& entry = m_toMe[next];
00408
00409
00410 MPI_Irecv(entry.bufPtr, entry.size, MPI_BYTE, entry.procID,
00411 0, Chombo_MPI::comm, m_receiveRequests+i);
00412 ++next;
00413 while(next < m_toMe.size() && m_toMe[next].size == 0) ++next;
00414 }
00415
00416 }
00417
00418
00419 template<class T>
00420 void BoxLayoutData<T>::unpackReceivesToMe(BoxLayoutData<T>& a_dest,
00421 const Interval& a_destComps,
00422 const LDOperator<T>& a_op) const
00423 {
00424
00425 if(numReceives > 0){
00426 int result = MPI_Waitall(numReceives, m_receiveRequests, m_receiveStatus);
00427 if(result != MPI_SUCCESS)
00428 {
00429
00430 }
00431
00432 for(unsigned int i=0; i<m_toMe.size(); ++i)
00433 {
00434 const bufEntry& entry = m_toMe[i];
00435 a_op.linearIn(a_dest[entry.item->toIndex], entry.bufPtr, entry.item->toRegion, a_destComps);
00436 }
00437
00438 delete[] m_receiveRequests;
00439 delete[] m_receiveStatus;
00440 }
00441 numReceives = 0;
00442 }
00443
00444
00445 template<class T>
00446 void BoxLayoutData<T>::unpackReceivesToMe_append(LayoutData<Vector<RefCountedPtr<T> > >& a_dest,
00447 const Interval& a_destComps,
00448 int ncomp,
00449 const DataFactory<T>& factory,
00450
00451 const LDOperator<T>& a_op) const
00452 {
00453
00454 if(numReceives > 0){
00455 int result = MPI_Waitall(numReceives, m_receiveRequests, m_receiveStatus);
00456 if(result != MPI_SUCCESS)
00457 {
00458
00459 }
00460
00461 for(unsigned int i=0; i<m_toMe.size(); ++i)
00462 {
00463 const bufEntry& entry = m_toMe[i];
00464 const MotionItem& item = *(entry.item);
00465 RefCountedPtr<T> newT = factory.create(item.toRegion, ncomp, item.toIndex);
00466
00467 a_op.linearIn(*newT, entry.bufPtr, item.toRegion, a_destComps);
00468 a_dest[item.toIndex].push_back(newT);
00469 }
00470
00471 delete[] m_receiveRequests;
00472 delete[] m_receiveStatus;
00473 }
00474 numReceives = 0;
00475 }
00476
00477
00478 #endif
00479
00480
00481 template <class T>
00482 void BoxLayoutData<T>::generalCopyTo(const BoxLayout& a_destGrids,
00483 LayoutData<Vector<RefCountedPtr<T> > >& a_dest,
00484 const Interval& a_srcComps,
00485 const ProblemDomain& a_domain,
00486 const DataFactory<T>& factory) const
00487 {
00488
00489 assert(T::preAllocatable() == 0);
00490 LDOperator<T> a_op;
00491
00492 a_dest.define(a_destGrids);
00493 Copier copier;
00494 copier.define(this->m_boxLayout, a_destGrids, a_domain, IntVect::Zero);
00495
00496 int ncomp = a_srcComps.size();
00497 Interval destComps(0, ncomp-1);
00498 allocateBuffers(*this, a_srcComps,
00499 *this, destComps,
00500 copier, a_op);
00501
00502 writeSendDataFromMeIntoBuffers(*this, a_srcComps, a_op);
00503
00504
00505
00506
00507
00508 #ifdef MPI
00509 if (m_toMe.size() > 0) {
00510 postReceivesToMe();
00511 }
00512
00513 if (m_fromMe.size() > 0) {
00514 postSendsFromMe();
00515 }
00516 #endif
00517
00518
00519 for(CopyIterator it(copier, CopyIterator::LOCAL); it.ok(); ++it)
00520 {
00521 const MotionItem& item = it();
00522 RefCountedPtr<T> newT = factory.create(item.toRegion, ncomp, item.toIndex);
00523
00524 a_op.op(*newT, item.fromRegion,
00525 destComps,
00526 item.toRegion,
00527 this->operator[](item.fromIndex),
00528 a_srcComps);
00529 a_dest[item.toIndex].push_back(newT);
00530 }
00531
00532
00533 completePendingSends();
00534
00535 unpackReceivesToMe_append(a_dest, destComps, ncomp, factory, a_op);
00536
00537 }