BoxLayoutDataI.H Source File

00001 /*   _______              __
00002     / ___/ /  ___  __ _  / /  ___
00003    / /__/ _ \/ _ \/  V \/ _ \/ _ \
00004    \___/_//_/\___/_/_/_/_.__/\___/
00005 */
00006 // CHOMBO Copyright (c) 2000-2004, The Regents of the University of
00007 // California, through Lawrence Berkeley National Laboratory (subject to
00008 // receipt of any required approvals from U.S. Dept. of Energy).  All
00009 // rights reserved.
00010 //
00011 // Redistribution and use in source and binary forms, with or without
00012 // modification, are permitted provided that the following conditions are met:
00013 //
00014 // (1) Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 // (2) Redistributions in binary form must reproduce the above copyright
00017 // notice, this list of conditions and the following disclaimer in the
00018 // documentation and/or other materials provided with the distribution.
00019 // (3) Neither the name of Lawrence Berkeley National Laboratory, U.S.
00020 // Dept. of Energy nor the names of its contributors may be used to endorse
00021 // or promote products derived from this software without specific prior
00022 // written permission.
00023 //
00024 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00025 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
00026 // TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
00027 // PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
00028 // OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00029 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00030 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00031 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00032 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00033 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00034 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00035 //
00036 // You are under no obligation whatsoever to provide any bug fixes,
00037 // patches, or upgrades to the features, functionality or performance of
00038 // the source code ("Enhancements") to anyone; however, if you choose to
00039 // make your Enhancements available either publicly, or directly to
00040 // Lawrence Berkeley National Laboratory, without imposing a separate
00041 // written license agreement for such Enhancements, then you hereby grant
00042 // the following license: a non-exclusive, royalty-free perpetual license
00043 // to install, use, modify, prepare derivative works, incorporate into
00044 // other computer software, distribute, and sublicense such Enhancements or
00045 // derivative works thereof, in binary and source code form.
00046 //
00047 // TRADEMARKS. Product and company names mentioned herein may be the
00048 // trademarks of their respective owners.  Any rights not expressly granted
00049 // herein are reserved.
00050 //
00051 
00052 #ifndef _BOXLAYOUTDATAI_H_
00053 #define _BOXLAYOUTDATAI_H_
00054 
00055 #include <cstdlib>
00056 #include <algorithm>
00057 
00058 #include "parstream.H"
00059 #include "memtrack.H"
00060 
00061 using std::sort;
00062 
00063 template<class T>
00064 void BoxLayoutData<T>::makeItSo(const Interval&   a_srcComps,
00065                             const BoxLayoutData<T>& a_src,
00066                             BoxLayoutData<T>& a_dest,
00067                             const Interval&   a_destComps,
00068                             const Copier&     a_copier,
00069                             const LDOperator<T>& a_op) const
00070 {
00071   // The following five functions are nullOps in uniprocessor mode
00072 
00073   // Instead of doing this here, do it an end of makeItSo (ndk)
00074   //completePendingSends(); // wait for sends from possible previous operation
00075 
00076   // new evil logic to determine under what conditions we can just
00077   // re-use our messaging pattern and buffers from the last call to
00078   // makeItSo.  pretty elaborate, I know.  bvs
00079 #ifdef MPI
00080   static Copier* lastCopier=NULL;
00081 
00082 #ifndef NDEBUG
00083   //verifyCommunications();
00084 #endif
00085 
00086   if(T::preAllocatable() == 2 || !a_copier.bufferAllocated() ||
00087      (m_fromMe.size() + m_toMe.size() == 0) ||lastCopier != &a_copier){
00088     allocateBuffers(a_src,  a_srcComps,
00089                     a_dest, a_destComps,
00090                     a_copier,
00091                     a_op);  //monkey with buffers, set up 'fromMe' and 'toMe' queues
00092     a_copier.setBufferAllocated(true);
00093   }
00094   lastCopier = (Copier*)(&a_copier);
00095 
00096 #endif
00097 
00098   writeSendDataFromMeIntoBuffers(a_src, a_srcComps, a_op);
00099 
00100   // If there is nothing to recv/send, don't go into these functions
00101   // and allocate memory that will not be freed later.  (ndk)
00102   // The #ifdef MPI is for the m_toMe and m_fromMe
00103 #ifdef MPI
00104   this->numReceives = m_toMe.size();
00105   if (this->numReceives > 0) {
00106     postReceivesToMe(); // all non-blocking
00107   }
00108 
00109   this->numSends = m_fromMe.size();
00110   if (this->numSends > 0) {
00111     postSendsFromMe();  // all non-blocking
00112   }
00113 #endif
00114 
00115   //  computation that could occur during communication should really
00116   //  go here somehow.  while all the non-blocking sending and receiving is
00117   //  going on.
00118   //
00119   //  my thought is to return from this function at this point an object
00120   //  that encapsulates the argument list above.
00121   //  a "ChomboMessaging" object.
00122   //  The user can keep a reference
00123   //  to this object and do computations.  When they reach the limit of what
00124   //  they can compute without this communication completing, they call the
00125   //  "finalize()" function of their ChomboMessaging object and the rest of this
00126   //  code below gets executed.
00127   //  a real question though is: is there really enough computation to do while
00128   //  messaging is going on to justify the effort, and what machines really have
00129   //  good asynchronous messaging to make the work worthwhile.
00130   //
00131   //  the other approach is to more finely decompose the overlapping of
00132   //  messaging and computation by using the ChomboMessaging object in the
00133   //  DataIterator construction.  The DataIterator returns T objects as they
00134   //  are completed from messaging.  This preserves almost all of the Chombo
00135   //  code as is but would be mucho tricky to actually implement and might only
00136   //  gain little.  This would not be a thing to try unitl Chombo is
00137   //  heavily instrumented for performance measuring.  in this design, unpackRecievesToMe()
00138   //  would become a complicated process interwoven with a DataIterator.
00139 
00140   //  postReceivesToMe();
00141 
00142   // perform local copy
00143   for(CopyIterator it(a_copier, CopyIterator::LOCAL); it.ok(); ++it)
00144     {
00145       const MotionItem& item = it();
00146       a_op.op(a_dest[item.toIndex], item.fromRegion,
00147               a_destComps,
00148               item.toRegion,
00149               a_src[item.fromIndex],
00150               a_srcComps);
00151     }
00152 
00153   // Uncomment and Move this out of unpackReceivesToMe()  (ndk)
00154   completePendingSends(); // wait for sends from possible previous operation
00155 
00156   unpackReceivesToMe(a_dest, a_destComps, a_op); // nullOp in uniprocessor mode
00157 
00158 }
00159 
00160 #ifndef MPI
00161 // uniprocessor version of all these nullop functions.
00162 template<class T>
00163 void BoxLayoutData<T>::completePendingSends() const
00164 {}
00165 
00166 template<class T>
00167 void BoxLayoutData<T>::allocateBuffers(const BoxLayoutData<T>& a_src,
00168                                    const Interval& a_srcComps,
00169                                    const BoxLayoutData<T>& a_dest,
00170                                    const Interval& a_destComps,
00171                                    const Copier&   a_copier,
00172                                    const LDOperator<T>& a_op
00173                                    ) const
00174 {}
00175 
00176 template<class T>
00177 void BoxLayoutData<T>::writeSendDataFromMeIntoBuffers(const BoxLayoutData<T>& a_src,
00178                                                   const Interval&     a_srcComps,
00179                                                   const LDOperator<T>& a_op) const
00180 {}
00181 
00182 template<class T>
00183 void BoxLayoutData<T>::postSendsFromMe() const
00184 {}
00185 
00186 template<class T>
00187 void BoxLayoutData<T>::postReceivesToMe() const
00188 {}
00189 
00190 template<class T>
00191 void BoxLayoutData<T>::unpackReceivesToMe(BoxLayoutData<T>& a_dest,
00192                                       const Interval&   a_destComps,
00193                                       const LDOperator<T>& a_op) const
00194 {}
00195 
00196 template<class T>
00197 void BoxLayoutData<T>::unpackReceivesToMe_append(LayoutData<Vector<RefCountedPtr<T> > >& a_dest,
00198                                                  const Interval&   a_destComps,
00199                                                  int ncomp,
00200                                                  const DataFactory<T>& factory,
00201                                                  const LDOperator<T>& a_op) const
00202 {}
00203 
00204 #else
00205 
00206 // MPI versions of the above codes.
00207 
00208 template<class T>
00209 void BoxLayoutData<T>::completePendingSends() const
00210 {
00211   if(this->numSends > 0){
00212     int result = MPI_Waitall(this->numSends, m_sendRequests, m_sendStatus);
00213     if(result != MPI_SUCCESS)
00214       {
00215         //hell if I know what to do about failed messaging here
00216       }
00217 
00218     delete[] m_sendRequests;
00219     delete[] m_sendStatus;
00220   }
00221   this->numSends = 0;
00222 }
00223 
00224 template<class T>
00225 void BoxLayoutData<T>::allocateBuffers(const BoxLayoutData<T>& a_src,
00226                                    const Interval& a_srcComps,
00227                                    const BoxLayoutData<T>& a_dest,
00228                                    const Interval& a_destComps,
00229                                    const Copier&   a_copier,
00230                                    const LDOperator<T>& a_op) const
00231 {
00232   m_fromMe.resize(0);
00233   m_toMe.resize(0);
00234   size_t sendBufferSize = 0;
00235   size_t recBufferSize  = 0;
00236   // two versions of code here.  one for preAllocatable T, one not.
00237 
00238   T dummy;
00239   for(CopyIterator it(a_copier, CopyIterator::FROM); it.ok(); ++it)
00240     {
00241       const MotionItem& item = it();
00242       bufEntry b;
00243       b.item = &item;
00244       b.size = a_op.size(a_src[item.fromIndex], item.fromRegion, a_srcComps);
00245       sendBufferSize+=b.size;
00246       b.procID = item.procID;
00247       m_fromMe.push_back(b);
00248     }
00249   sort(m_fromMe.begin(), m_fromMe.end());
00250   for(CopyIterator it(a_copier, CopyIterator::TO); it.ok(); ++it)
00251     {
00252       const MotionItem& item = it();
00253       bufEntry b;
00254       b.item = &item;
00255       if(T::preAllocatable() == 0)
00256         {
00257           b.size = a_op.size(dummy, item.toRegion, a_destComps);
00258           recBufferSize+=b.size;
00259         }
00260       else if (T::preAllocatable() == 1)
00261         {
00262           b.size = a_op.size(a_dest[item.toIndex], item.toRegion, a_destComps);
00263           recBufferSize+=b.size;
00264         }
00265       b.procID = item.procID;
00266       m_toMe.push_back(b);
00267     }
00268   sort(m_toMe.begin(), m_toMe.end());
00269 
00270   if(T::preAllocatable() == 2) // dynamic allocatable, need two pass
00271     {
00272       // in the non-preallocatable case, I need to message the
00273       // values for the m_toMe[*].size
00274       if(m_fromMe.size() > 0)
00275         {
00276           MPI_Request nullrequest;
00277 
00278           int lastProc = -1;
00279           int messageIndex = 0;
00280           for(int i=0; i<m_fromMe.size(); ++i)
00281             {
00282               bufEntry& b = m_fromMe[i];
00283               if(b.procID == lastProc) messageIndex++;
00284               else                     messageIndex = 0;
00285               lastProc = b.procID;
00286               MPI_Isend(&(b.size), 1, MPI_UNSIGNED_LONG, b.procID,
00287                         messageIndex, Chombo_MPI::comm, &(nullrequest));
00288               MPI_Request_free(&(nullrequest));  // we don't wait on these sends, assume
00289               //    this memory really doesn't go anywhere.  Most MPI systems will fast message
00290               //    a single integer message
00291             }
00292         }
00293       if(m_toMe.size() > 0)
00294         {
00295           m_receiveRequests = new MPI_Request[m_toMe.size()];
00296           m_receiveStatus   = new MPI_Status[m_toMe.size()];
00297           int lastProc = -1;
00298           int messageIndex = 0;
00299           for(int i=0; i<m_toMe.size(); ++i)
00300             {
00301               bufEntry& b = m_toMe[i];
00302               if(b.procID == lastProc) messageIndex++;
00303               else                     messageIndex = 0;
00304               lastProc = b.procID;
00305               MPI_Irecv(&(b.size), 1, MPI_UNSIGNED_LONG, b.procID,
00306                         messageIndex, Chombo_MPI::comm, m_receiveRequests+i);
00307             }
00308 
00309           int result = MPI_Waitall(m_toMe.size(), m_receiveRequests, m_receiveStatus);
00310           if(result != MPI_SUCCESS)
00311             {
00312               MayDay::Error("First pass of two-phase communication failed");
00313             }
00314           for(int i=0; i<m_toMe.size(); ++i)  recBufferSize+= m_toMe[i].size;
00315           delete[] m_receiveRequests;
00316           delete[] m_receiveStatus;
00317         }
00318     }
00319 
00320   // allocate send and receveive buffer space.
00321 
00322   if(sendBufferSize > m_sendcapacity)
00323     {
00324       free(m_sendbuffer);
00325       m_sendbuffer = malloc(sendBufferSize);
00326       if(m_sendbuffer == NULL)
00327         {
00328           MayDay::Error("Out of memory in BoxLayoutData::allocatebuffers");
00329         }
00330       m_sendcapacity = sendBufferSize;
00331     }
00332 
00333   if(recBufferSize > m_reccapacity)
00334     {
00335       free(m_recbuffer);
00336       m_recbuffer = malloc(recBufferSize);
00337       if(m_recbuffer == NULL)
00338         {
00339           MayDay::Error("Out of memory in BoxLayoutData::allocatebuffers");
00340         }
00341       m_reccapacity = recBufferSize;
00342     }
00343 
00344   /*
00345     pout()<<"\n";
00346     for(int i=0; i<m_fromMe.size(); i++)
00347     pout()<<m_fromMe[i].item->region<<"{"<<m_fromMe[i].procID<<"}"<<" ";
00348     pout() <<"::::";
00349     for(int i=0; i<m_toMe.size(); i++)
00350     pout()<<m_toMe[i].item->region<<"{"<<m_toMe[i].procID<<"}"<<" ";
00351     pout() << endl;
00352   */
00353 
00354   char* nextFree = (char*)m_sendbuffer;
00355   if(m_fromMe.size() > 0)
00356     {
00357       for(unsigned int i=0; i<m_fromMe.size(); ++i)
00358         {
00359           m_fromMe[i].bufPtr = nextFree;
00360           nextFree += m_fromMe[i].size;
00361         }
00362     }
00363 
00364   nextFree = (char*)m_recbuffer;
00365   if(m_toMe.size() > 0)
00366     {
00367       for(unsigned int i=0; i<m_toMe.size(); ++i)
00368         {
00369           m_toMe[i].bufPtr = nextFree;
00370           nextFree += m_toMe[i].size;
00371         }
00372     }
00373 
00374   // since fromMe and toMe are sorted based on procID, messages can now be grouped
00375   // together on a per-processor basis.
00376 
00377 }
00378 
00379 template<class T>
00380 void BoxLayoutData<T>::writeSendDataFromMeIntoBuffers(const BoxLayoutData<T>& a_src,
00381                                                   const Interval&     a_srcComps,
00382                                                   const LDOperator<T>& a_op) const
00383 {
00384   for(unsigned int i=0; i<m_fromMe.size(); ++i)
00385     {
00386       const bufEntry& entry = m_fromMe[i];
00387       a_op.linearOut(a_src[entry.item->fromIndex], entry.bufPtr,
00388                      entry.item->fromRegion, a_srcComps);
00389     }
00390 }
00391 
00392 template<class T>
00393 void BoxLayoutData<T>::postSendsFromMe() const
00394 {
00395 
00396   // now we get the magic of message coalescence
00397   // fromMe has already been sorted in the allocateBuffers() step.
00398 
00399   this->numSends = m_fromMe.size();
00400   if(this->numSends > 1){
00401     for(unsigned int i=m_fromMe.size()-1; i>0; --i)
00402       {
00403         if(m_fromMe[i].procID == m_fromMe[i-1].procID)
00404           {
00405             this->numSends--;
00406             m_fromMe[i-1].size+=m_fromMe[i].size;
00407             m_fromMe[i].size = 0;
00408           }
00409       }
00410   }
00411   m_sendRequests = new MPI_Request[this->numSends];
00412   m_sendStatus = new MPI_Status[this->numSends];
00413 
00414   unsigned int next=0;
00415   for(int i=0; i<this->numSends; ++i)
00416     {
00417       const bufEntry& entry = m_fromMe[next];
00418       // cout<<procID()<< ": sending message of "<<entry.size;
00419       // cout<<" to proc "<<  entry.procID<<endl;
00420       MPI_Isend(entry.bufPtr, entry.size, MPI_BYTE, entry.procID,
00421                 0, Chombo_MPI::comm, m_sendRequests+i);
00422       ++next;
00423       while(next < m_fromMe.size() && m_fromMe[next].size == 0) ++next;
00424     }
00425 }
00426 
00427 template<class T>
00428 void BoxLayoutData<T>::postReceivesToMe() const
00429 {
00430   this->numReceives = m_toMe.size();
00431 
00432   if(this->numReceives > 1){
00433     for(unsigned int i=m_toMe.size()-1; i>0; --i)
00434       {
00435         if(m_toMe[i].procID == m_toMe[i-1].procID)
00436           {
00437             this->numReceives--;
00438             m_toMe[i-1].size+=m_toMe[i].size;
00439             m_toMe[i].size = 0;
00440           }
00441       }
00442   }
00443   m_receiveRequests = new MPI_Request[this->numReceives];
00444   m_receiveStatus = new MPI_Status[this->numReceives];
00445 
00446   unsigned int next=0;
00447   for(int i=0; i<this->numReceives; ++i)
00448     {
00449       const bufEntry& entry = m_toMe[next];
00450       //cout<<procID()<< ": receiving message of "<<entry.size;
00451       //cout<<" from proc "<<  entry.procID<<endl;
00452       MPI_Irecv(entry.bufPtr, entry.size, MPI_BYTE, entry.procID,
00453                 0, Chombo_MPI::comm, m_receiveRequests+i);
00454       ++next;
00455       while(next < m_toMe.size() && m_toMe[next].size == 0) ++next;
00456     }
00457 
00458 }
00459 
00460 template<class T>
00461 void BoxLayoutData<T>::unpackReceivesToMe(BoxLayoutData<T>& a_dest,
00462                                       const Interval&   a_destComps,
00463                                       const LDOperator<T>& a_op) const
00464 {
00465 
00466   if(this->numReceives > 0){
00467     int result = MPI_Waitall(this->numReceives, m_receiveRequests, m_receiveStatus);
00468     if(result != MPI_SUCCESS)
00469       {
00470         //hell if I know what to do about failed messaging here
00471       }
00472 
00473     for(unsigned int i=0; i<m_toMe.size(); ++i)
00474       {
00475         const bufEntry& entry = m_toMe[i];
00476         a_op.linearIn(a_dest[entry.item->toIndex], entry.bufPtr, entry.item->toRegion, a_destComps);
00477       }
00478 
00479     delete[] m_receiveRequests;
00480     delete[] m_receiveStatus;
00481   }
00482   this->numReceives = 0;
00483 }
00484 
00485 template<class T>
00486 void BoxLayoutData<T>::unpackReceivesToMe_append(LayoutData<Vector<RefCountedPtr<T> > >& a_dest,
00487                                                  const Interval&   a_destComps,
00488                                                  int ncomp,
00489                                                  const DataFactory<T>& factory,
00490 
00491                                                  const LDOperator<T>& a_op) const
00492 {
00493 
00494   if(this->numReceives > 0){
00495     int result = MPI_Waitall(this->numReceives, m_receiveRequests, m_receiveStatus);
00496     if(result != MPI_SUCCESS)
00497       {
00498         //hell if I know what to do about failed messaging here
00499       }
00500 
00501     for(unsigned int i=0; i<m_toMe.size(); ++i)
00502       {
00503         const bufEntry& entry = m_toMe[i];
00504         const MotionItem& item = *(entry.item);
00505         RefCountedPtr<T> newT = factory.create(item.toRegion, ncomp, item.toIndex);
00506 
00507         a_op.linearIn(*newT, entry.bufPtr, item.toRegion, a_destComps);
00508         a_dest[item.toIndex].push_back(newT);
00509       }
00510 
00511     delete[] m_receiveRequests;
00512     delete[] m_receiveStatus;
00513   }
00514   this->numReceives = 0;
00515 }
00516 #endif
00517 
00518 template <class T>
00519 void BoxLayoutData<T>::generalCopyTo(const BoxLayout& a_destGrids,
00520                                      LayoutData<Vector<RefCountedPtr<T> > >& a_dest,
00521                                      const Interval& a_srcComps,
00522                                      const ProblemDomain& a_domain,
00523                                      const DataFactory<T>& factory) const
00524 {
00525 
00526   assert(T::preAllocatable() == 0);
00527   LDOperator<T> a_op;
00528 
00529   a_dest.define(a_destGrids);
00530   Copier copier;
00531   copier.define(this->m_boxLayout, a_destGrids, a_domain, IntVect::Zero);
00532 
00533   int ncomp = a_srcComps.size();
00534   Interval destComps(0, ncomp-1);
00535   allocateBuffers(*this,  a_srcComps,
00536                   *this,  destComps,
00537                   copier, a_op);
00538 
00539   writeSendDataFromMeIntoBuffers(*this, a_srcComps, a_op);
00540 
00541   // If there is nothing to recv/send, don't go into these functions
00542   // and allocate memory that will not be freed later.  (ndk)
00543   // The #ifdef MPI is for the m_toMe and m_fromMe
00544 #ifdef MPI
00545   this->numReceives = m_toMe.size();
00546   if (this->numReceives > 0) {
00547     postReceivesToMe(); // all non-blocking
00548   }
00549 
00550   this->numSends = m_fromMe.size();
00551   if (this->numSends > 0) {
00552     postSendsFromMe();  // all non-blocking
00553   }
00554 #endif
00555 
00556     // perform local copy
00557   for(CopyIterator it(copier, CopyIterator::LOCAL); it.ok(); ++it)
00558     {
00559       const MotionItem& item = it();
00560       RefCountedPtr<T> newT = factory.create(item.toRegion, ncomp, item.toIndex);
00561 
00562       a_op.op(*newT, item.fromRegion,
00563               destComps,
00564               item.toRegion,
00565               this->operator[](item.fromIndex),
00566               a_srcComps);
00567       a_dest[item.toIndex].push_back(newT);
00568     }
00569 
00570   // Uncomment and Move this out of unpackReceivesToMe()  (ndk)
00571   completePendingSends(); // wait for sends from possible previous operation
00572 
00573   unpackReceivesToMe_append(a_dest, destComps, ncomp, factory, a_op); // nullOp in uniprocessor mode
00574 }
00575 
00576 #endif