Main Page | Modules | Namespace List | Class Hierarchy | Alphabetical List | Compound List | File List | Compound Members | File Members

BoxLayoutDataI.H

Go to the documentation of this file.
00001 
00002 #include <cstdlib>
00003 #include <algorithm>
00004 #include "parstream.H"
00005 #include "memtrack.H"
00006 
00007 using std::sort;
00008 
00009 template<class T> 
00010 void BoxLayoutData<T>::makeItSo(const Interval&   a_srcComps, 
00011                             const BoxLayoutData<T>& a_src,
00012                             BoxLayoutData<T>& a_dest,
00013                             const Interval&   a_destComps,
00014                             const Copier&     a_copier,
00015                             const LDOperator<T>& a_op) const
00016 {
00017   // The following five functions are nullOps in uniprocessor mode
00018 
00019   // Instead of doing this here, do it an end of makeItSo (ndk)
00020   //completePendingSends(); // wait for sends from possible previous operation
00021 
00022   // new evil logic to determine under what conditions we can just
00023   // re-use our messaging pattern and buffers from the last call to
00024   // makeItSo.  pretty elaborate, I know.  bvs
00025 #ifdef MPI
00026   static Copier* lastCopier=NULL;
00027   
00028 
00029   
00030 #ifndef NDEBUG
00031   //verifyCommunications();
00032 #endif
00033 
00034   if(T::preAllocatable() == 2 || !a_copier.bufferAllocated() || 
00035      (m_fromMe.size() + m_toMe.size() == 0) ||lastCopier != &a_copier){
00036     allocateBuffers(a_src,  a_srcComps,
00037                     a_dest, a_destComps,
00038                     a_copier,
00039                     a_op);  //monkey with buffers, set up 'fromMe' and 'toMe' queues
00040     a_copier.setBufferAllocated(true);
00041   }
00042   lastCopier = (Copier*)(&a_copier);
00043 
00044 #endif
00045 
00046   writeSendDataFromMeIntoBuffers(a_src, a_srcComps, a_op);
00047 
00048 
00049   // If there is nothing to recv/send, don't go into these functions 
00050   // and allocate memory that will not be freed later.  (ndk)
00051   // The #ifdef MPI is for the m_toMe and m_fromMe
00052 #ifdef MPI
00053   if (m_toMe.size() > 0) {
00054     postReceivesToMe(); // all non-blocking
00055   }
00056 
00057   if (m_fromMe.size() > 0) {
00058     postSendsFromMe();  // all non-blocking
00059   }
00060 #endif
00061 
00062   //  computation that could occur during communication should really
00063   //  go here somehow.  while all the non-blocking sending and receiving is
00064   //  going on.  
00065   //
00066   //  my thought is to return from this function at this point an object
00067   //  that encapsulates the argument list above.  
00068   //  a "ChomboMessaging" object.
00069   //  The user can keep a reference
00070   //  to this object and do computations.  When they reach the limit of what
00071   //  they can compute without this communication completing, they call the
00072   //  "finalize()" function of their ChomboMessaging object and the rest of this
00073   //  code below gets executed.
00074   //  a real question though is: is there really enough computation to do while
00075   //  messaging is going on to justify the effort, and what machines really have
00076   //  good asynchronous messaging to make the work worthwhile.
00077   //  
00078   //  the other approach is to more finely decompose the overlapping of
00079   //  messaging and computation by using the ChomboMessaging object in the
00080   //  DataIterator construction.  The DataIterator returns T objects as they
00081   //  are completed from messaging.  This preserves almost all of the Chombo
00082   //  code as is but would be mucho tricky to actually implement and might only
00083   //  gain little.  This would not be a thing to try unitl Chombo is
00084   //  heavily instrumented for performance measuring.  in this design, unpackRecievesToMe()
00085   //  would become a complicated process interwoven with a DataIterator.
00086 
00087   //  postReceivesToMe();
00088 
00089   // perform local copy
00090   for(CopyIterator it(a_copier, CopyIterator::LOCAL); it.ok(); ++it)
00091     {
00092       const MotionItem& item = it();
00093       a_op.op(a_dest[item.toIndex], item.fromRegion, 
00094               a_destComps,
00095               item.toRegion,
00096               a_src[item.fromIndex],
00097               a_srcComps);
00098     }
00099 
00100   // Uncomment and Move this out of unpackReceivesToMe()  (ndk)
00101   completePendingSends(); // wait for sends from possible previous operation
00102 
00103   unpackReceivesToMe(a_dest, a_destComps, a_op); // nullOp in uniprocessor mode
00104 
00105 }
00106 
00107 #ifndef MPI
00108 // uniprocessor version of all these nullop functions.
00109 template<class T> 
00110 void BoxLayoutData<T>::completePendingSends() const
00111 {;}
00112 
00113 template<class T> 
00114 void BoxLayoutData<T>::allocateBuffers(const BoxLayoutData<T>& a_src, 
00115                                    const Interval& a_srcComps,
00116                                    const BoxLayoutData<T>& a_dest,
00117                                    const Interval& a_destComps,
00118                                    const Copier&   a_copier,
00119                                    const LDOperator<T>& a_op
00120                                    ) const
00121 {;}  
00122 
00123 template<class T> 
00124 void BoxLayoutData<T>::writeSendDataFromMeIntoBuffers(const BoxLayoutData<T>& a_src, 
00125                                                   const Interval&     a_srcComps,
00126                                                   const LDOperator<T>& a_op) const
00127 {;}
00128 
00129 template<class T> 
00130 void BoxLayoutData<T>::postSendsFromMe() const
00131 {;}
00132 
00133 template<class T> 
00134 void BoxLayoutData<T>::postReceivesToMe() const
00135 {;}
00136 
00137 template<class T> 
00138 void BoxLayoutData<T>::unpackReceivesToMe(BoxLayoutData<T>& a_dest, 
00139                                       const Interval&   a_destComps,
00140                                       const LDOperator<T>& a_op) const
00141 {;}
00142 
00143 
00144 template<class T> 
00145 void BoxLayoutData<T>::unpackReceivesToMe_append(LayoutData<Vector<RefCountedPtr<T> > >& a_dest, 
00146                                                  const Interval&   a_destComps,
00147                                                  int ncomp,
00148                                                  const DataFactory<T>& factory,
00149                                                  const LDOperator<T>& a_op) const
00150 {;}
00151 
00152 #else
00153 
00154 // MPI versions of the above codes.
00155 
00156 template<class T> 
00157 void BoxLayoutData<T>::completePendingSends() const
00158 {
00159   if(numSends > 0){
00160     int result = MPI_Waitall(numSends, m_sendRequests, m_sendStatus);
00161     if(result != MPI_SUCCESS)
00162       {
00163         //hell if I know what to do about failed messaging here
00164       }
00165 
00166     delete[] m_sendRequests;
00167     delete[] m_sendStatus;
00168   }
00169   numSends = 0;
00170 }
00171 
00172 template<class T> 
00173 void BoxLayoutData<T>::allocateBuffers(const BoxLayoutData<T>& a_src, 
00174                                    const Interval& a_srcComps,
00175                                    const BoxLayoutData<T>& a_dest,
00176                                    const Interval& a_destComps,
00177                                    const Copier&   a_copier,
00178                                    const LDOperator<T>& a_op) const
00179 {
00180   m_fromMe.resize(0);
00181   m_toMe.resize(0);
00182   size_t sendBufferSize = 0;
00183   size_t recBufferSize  = 0;
00184   // two versions of code here.  one for preAllocatable T, one not.
00185 
00186   T dummy;
00187   for(CopyIterator it(a_copier, CopyIterator::FROM); it.ok(); ++it)
00188     {
00189       const MotionItem& item = it();
00190       bufEntry b;
00191       b.item = &item;
00192       b.size = a_op.size(a_src[item.fromIndex], item.fromRegion, a_srcComps);
00193       sendBufferSize+=b.size;
00194       b.procID = item.procID;
00195       m_fromMe.push_back(b);
00196     }
00197   sort(m_fromMe.begin(), m_fromMe.end());
00198   for(CopyIterator it(a_copier, CopyIterator::TO); it.ok(); ++it)
00199     {
00200       const MotionItem& item = it();
00201       bufEntry b;
00202       b.item = &item;
00203       if(T::preAllocatable() == 0)
00204         {
00205           b.size = a_op.size(dummy, item.toRegion, a_destComps);
00206           recBufferSize+=b.size;
00207         }
00208       else if (T::preAllocatable() == 1)
00209         {
00210           b.size = a_op.size(a_dest[item.toIndex], item.toRegion, a_destComps);
00211           recBufferSize+=b.size;
00212         }
00213       b.procID = item.procID;
00214       m_toMe.push_back(b);
00215     }
00216   sort(m_toMe.begin(), m_toMe.end());
00217 
00218   if(T::preAllocatable() == 2) // dynamic allocatable, need two pass
00219     {
00220       // in the non-preallocatable case, I need to message the
00221       // values for the m_toMe[*].size 
00222       if(m_fromMe.size() > 0)
00223         {
00224           MPI_Request nullrequest;
00225 
00226           int lastProc = -1;
00227           int messageIndex = 0;
00228           for(int i=0; i<m_fromMe.size(); ++i)
00229             {
00230               bufEntry& b = m_fromMe[i];
00231               if(b.procID == lastProc) messageIndex++;
00232               else                     messageIndex = 0;
00233               lastProc = b.procID;
00234               MPI_Isend(&(b.size), 1, MPI_UNSIGNED_LONG, b.procID, 
00235                         messageIndex, Chombo_MPI::comm, &(nullrequest));
00236               MPI_Request_free(&(nullrequest));  // we don't wait on these sends, assume
00237               //    this memory really doesn't go anywhere.  Most MPI systems will fast message
00238               //    a single integer message
00239             }
00240         }
00241       if(m_toMe.size() > 0)
00242         {
00243           m_receiveRequests = new MPI_Request[m_toMe.size()];
00244           m_receiveStatus   = new MPI_Status[m_toMe.size()];
00245           int lastProc = -1;
00246           int messageIndex = 0;
00247           for(int i=0; i<m_toMe.size(); ++i)
00248             {
00249               bufEntry& b = m_toMe[i];
00250               if(b.procID == lastProc) messageIndex++;
00251               else                     messageIndex = 0;
00252               lastProc = b.procID;
00253               MPI_Irecv(&(b.size), 1, MPI_UNSIGNED_LONG, b.procID, 
00254                         messageIndex, Chombo_MPI::comm, m_receiveRequests+i);
00255             }
00256 
00257           int result = MPI_Waitall(m_toMe.size(), m_receiveRequests, m_receiveStatus);
00258           if(result != MPI_SUCCESS)
00259             {
00260               MayDay::Error("First pass of two-phase communication failed");
00261             }
00262           for(int i=0; i<m_toMe.size(); ++i)  recBufferSize+= m_toMe[i].size;
00263           delete[] m_receiveRequests;
00264           delete[] m_receiveStatus;
00265         }
00266     }
00267 
00268   // allocate send and receveive buffer space.
00269 
00270   if(sendBufferSize > m_sendcapacity)
00271     {
00272       free(m_sendbuffer);
00273       m_sendbuffer = malloc(sendBufferSize);
00274       if(m_sendbuffer == NULL)
00275         {
00276           MayDay::Error("Out of memory in BoxLayoutData::allocatebuffers");
00277         }
00278       m_sendcapacity = sendBufferSize;
00279     }
00280 
00281   if(recBufferSize > m_reccapacity)
00282     {
00283       free(m_recbuffer);
00284       m_recbuffer = malloc(recBufferSize);
00285       if(m_recbuffer == NULL)
00286         {
00287           MayDay::Error("Out of memory in BoxLayoutData::allocatebuffers");
00288         }
00289       m_reccapacity = recBufferSize;
00290     }
00291 
00292 
00293   /*
00294     pout()<<"\n";
00295     for(int i=0; i<m_fromMe.size(); i++) 
00296     pout()<<m_fromMe[i].item->region<<"{"<<m_fromMe[i].procID<<"}"<<" ";
00297     pout() <<"::::";
00298     for(int i=0; i<m_toMe.size(); i++) 
00299     pout()<<m_toMe[i].item->region<<"{"<<m_toMe[i].procID<<"}"<<" ";
00300     pout() << endl;
00301   */
00302 
00303   char* nextFree = (char*)m_sendbuffer;
00304   if(m_fromMe.size() > 0)
00305     {
00306       for(unsigned int i=0; i<m_fromMe.size(); ++i)
00307         {
00308           m_fromMe[i].bufPtr = nextFree;
00309           nextFree += m_fromMe[i].size;
00310         }
00311     }
00312 
00313   nextFree = (char*)m_recbuffer;
00314   if(m_toMe.size() > 0)
00315     {
00316       for(unsigned int i=0; i<m_toMe.size(); ++i)
00317         {
00318           m_toMe[i].bufPtr = nextFree;
00319           nextFree += m_toMe[i].size;
00320         }
00321     }
00322 
00323   // since fromMe and toMe are sorted based on procID, messages can now be grouped
00324   // together on a per-processor basis.
00325 
00326 }
00327 
00328 
00329 
00330 template<class T> 
00331 void BoxLayoutData<T>::writeSendDataFromMeIntoBuffers(const BoxLayoutData<T>& a_src, 
00332                                                   const Interval&     a_srcComps,
00333                                                   const LDOperator<T>& a_op) const
00334 {
00335 
00336 
00337   for(unsigned int i=0; i<m_fromMe.size(); ++i)
00338     {
00339       const bufEntry& entry = m_fromMe[i];
00340       a_op.linearOut(a_src[entry.item->fromIndex], entry.bufPtr, 
00341                      entry.item->fromRegion, a_srcComps);
00342     }
00343 
00344 }
00345 
00346 template<class T> 
00347 void BoxLayoutData<T>::postSendsFromMe() const
00348 {
00349 
00350   // now we get the magic of message coalescence
00351   // fromMe has already been sorted in the allocateBuffers() step.
00352 
00353   numSends = m_fromMe.size();
00354   if(numSends > 1){
00355     for(unsigned int i=m_fromMe.size()-1; i>0; --i)
00356       {
00357         if(m_fromMe[i].procID == m_fromMe[i-1].procID) 
00358           {
00359             numSends--;
00360             m_fromMe[i-1].size+=m_fromMe[i].size;
00361             m_fromMe[i].size = 0;
00362           }
00363       }
00364   }
00365   m_sendRequests = new MPI_Request[numSends];
00366   m_sendStatus = new MPI_Status[numSends];
00367   
00368   
00369   unsigned int next=0;
00370   for(int i=0; i<numSends; ++i)
00371     {
00372       const bufEntry& entry = m_fromMe[next];
00373       // cout<<procID()<< ": sending message of "<<entry.size;
00374       // cout<<" to proc "<<  entry.procID<<endl;
00375       MPI_Isend(entry.bufPtr, entry.size, MPI_BYTE, entry.procID, 
00376                 0, Chombo_MPI::comm, m_sendRequests+i);
00377       ++next;
00378       while(next < m_fromMe.size() && m_fromMe[next].size == 0) ++next;
00379     }
00380 
00381 
00382 }
00383 
00384 template<class T> 
00385 void BoxLayoutData<T>::postReceivesToMe() const
00386 {
00387   numReceives = m_toMe.size();
00388 
00389   if(numReceives > 1){
00390     for(unsigned int i=m_toMe.size()-1; i>0; --i)
00391       {
00392         if(m_toMe[i].procID == m_toMe[i-1].procID) 
00393           {
00394             numReceives--;
00395             m_toMe[i-1].size+=m_toMe[i].size;
00396             m_toMe[i].size = 0;
00397           }
00398       }
00399   }
00400   m_receiveRequests = new MPI_Request[numReceives];
00401   m_receiveStatus = new MPI_Status[numReceives];
00402   
00403 
00404   unsigned int next=0;
00405   for(int i=0; i<numReceives; ++i)
00406     {
00407       const bufEntry& entry = m_toMe[next];
00408       //cout<<procID()<< ": receiving message of "<<entry.size;
00409       //cout<<" from proc "<<  entry.procID<<endl;
00410       MPI_Irecv(entry.bufPtr, entry.size, MPI_BYTE, entry.procID, 
00411                 0, Chombo_MPI::comm, m_receiveRequests+i);
00412       ++next;
00413       while(next < m_toMe.size() && m_toMe[next].size == 0) ++next;
00414     }
00415 
00416 }
00417 
00418 
00419 template<class T> 
00420 void BoxLayoutData<T>::unpackReceivesToMe(BoxLayoutData<T>& a_dest, 
00421                                       const Interval&   a_destComps,
00422                                       const LDOperator<T>& a_op) const
00423 {
00424 
00425   if(numReceives > 0){
00426     int result = MPI_Waitall(numReceives, m_receiveRequests, m_receiveStatus);
00427     if(result != MPI_SUCCESS)
00428       {
00429         //hell if I know what to do about failed messaging here
00430       }
00431 
00432     for(unsigned int i=0; i<m_toMe.size(); ++i)
00433       {
00434         const bufEntry& entry = m_toMe[i];
00435         a_op.linearIn(a_dest[entry.item->toIndex], entry.bufPtr, entry.item->toRegion, a_destComps);
00436       }
00437 
00438     delete[] m_receiveRequests;
00439     delete[] m_receiveStatus;
00440   }
00441   numReceives = 0;
00442 }
00443 
00444 
00445 template<class T> 
00446 void BoxLayoutData<T>::unpackReceivesToMe_append(LayoutData<Vector<RefCountedPtr<T> > >& a_dest, 
00447                                                  const Interval&   a_destComps,
00448                                                  int ncomp,
00449                                                  const DataFactory<T>& factory,
00450                                                
00451                                                  const LDOperator<T>& a_op) const
00452 {
00453 
00454   if(numReceives > 0){
00455     int result = MPI_Waitall(numReceives, m_receiveRequests, m_receiveStatus);
00456     if(result != MPI_SUCCESS)
00457       {
00458         //hell if I know what to do about failed messaging here
00459       }
00460 
00461     for(unsigned int i=0; i<m_toMe.size(); ++i)
00462       {
00463         const bufEntry& entry = m_toMe[i];
00464         const MotionItem& item = *(entry.item);
00465         RefCountedPtr<T> newT = factory.create(item.toRegion, ncomp, item.toIndex);
00466 
00467         a_op.linearIn(*newT, entry.bufPtr, item.toRegion, a_destComps);
00468         a_dest[item.toIndex].push_back(newT);
00469       }
00470 
00471     delete[] m_receiveRequests;
00472     delete[] m_receiveStatus;
00473   }
00474   numReceives = 0;
00475 }
00476 
00477 
00478 #endif
00479 
00480 
00481 template <class T>
00482 void BoxLayoutData<T>::generalCopyTo(const BoxLayout& a_destGrids, 
00483                                      LayoutData<Vector<RefCountedPtr<T> > >& a_dest,
00484                                      const Interval& a_srcComps,
00485                                      const ProblemDomain& a_domain,
00486                                      const DataFactory<T>& factory) const
00487 {
00488 
00489   assert(T::preAllocatable() == 0);
00490   LDOperator<T> a_op;
00491 
00492   a_dest.define(a_destGrids);
00493   Copier copier;
00494   copier.define(this->m_boxLayout, a_destGrids, a_domain, IntVect::Zero);
00495   
00496   int ncomp = a_srcComps.size();
00497   Interval destComps(0, ncomp-1);
00498   allocateBuffers(*this,  a_srcComps,
00499                   *this,  destComps,
00500                   copier, a_op);
00501   
00502   writeSendDataFromMeIntoBuffers(*this, a_srcComps, a_op);
00503 
00504 
00505   // If there is nothing to recv/send, don't go into these functions 
00506   // and allocate memory that will not be freed later.  (ndk)
00507   // The #ifdef MPI is for the m_toMe and m_fromMe
00508 #ifdef MPI
00509   if (m_toMe.size() > 0) {
00510     postReceivesToMe(); // all non-blocking
00511   }
00512 
00513   if (m_fromMe.size() > 0) {
00514     postSendsFromMe();  // all non-blocking
00515   }
00516 #endif
00517 
00518     // perform local copy
00519   for(CopyIterator it(copier, CopyIterator::LOCAL); it.ok(); ++it)
00520     {
00521       const MotionItem& item = it();
00522       RefCountedPtr<T> newT = factory.create(item.toRegion, ncomp, item.toIndex);
00523 
00524       a_op.op(*newT, item.fromRegion, 
00525               destComps,
00526               item.toRegion,
00527               this->operator[](item.fromIndex),
00528               a_srcComps);
00529       a_dest[item.toIndex].push_back(newT);
00530     }
00531 
00532   // Uncomment and Move this out of unpackReceivesToMe()  (ndk)
00533   completePendingSends(); // wait for sends from possible previous operation
00534 
00535   unpackReceivesToMe_append(a_dest, destComps, ncomp, factory, a_op); // nullOp in uniprocessor mode
00536 
00537 }

Generated on Fri Jul 2 17:53:42 2004 for Chombo by doxygen 1.3.2