Chombo&INSwithParticles: SPMDI.H Source File

00001 /* _______              __
00002   / ___/ /  ___  __ _  / /  ___
00003  / /__/ _ \/ _ \/  ' \/ _ \/ _ \
00004  \___/_//_/\___/_/_/_/_.__/\___/ 
00005 */
00006 //
00007 // This software is copyright (C) by the Lawrence Berkeley
00008 // National Laboratory.  Permission is granted to reproduce
00009 // this software for non-commercial purposes provided that
00010 // this notice is left intact.
00011 // 
00012 // It is acknowledged that the U.S. Government has rights to
00013 // this software under Contract DE-AC03-765F00098 between
00014 // the U.S.  Department of Energy and the University of
00015 // California.
00016 //
00017 // This software is provided as a professional and academic
00018 // contribution for joint exchange. Thus it is experimental,
00019 // is provided ``as is'', with no warranties of any kind
00020 // whatsoever, no support, no promise of updates, or printed
00021 // documentation. By using this software, you acknowledge
00022 // that the Lawrence Berkeley National Laboratory and
00023 // Regents of the University of California shall have no
00024 // liability with respect to the infringement of other
00025 // copyrights by any part of this software.
00026 //
00027 
00028 #ifndef _SPMDI_H_
00029 #define _SPMDI_H_
00030 
00031 
00032 //default implementations for linearization routines.
00033 
00034 template <class T>
00035 int linearSize(const T& inputT)
00036 {
00037   return inputT.linearSize();
00038 }
00039 
00040 template <class T>
00041 void linearIn(T& a_outputT, const void* const inBuf)
00042 {
00043   a_outputT.linearIn(inBuf);
00044 }
00045 
00046 template <class T>
00047 void linearOut(void* const a_outBuf, const T& inputT)
00048 {
00049   inputT.linearOut(a_outBuf);
00050 }
00051 
00052 #ifdef MPI
00053 
00054 /*****************************/
00055 //gather a_input into a a_outVec
00056 /*****************************/
00057 template <class T>
00058 inline void 
00059 gather(Vector<T>& a_outVec, const T& a_input, int a_dest)
00060 {
00061   assert (a_dest >= 0);
00062   assert(a_dest <  numProc());
00063   //now THIS size lives on THIS processor
00064   int isize = linearSize(a_input);
00065     
00066   //make stuff for linearout
00067   void* loclBuf = malloc(isize);
00068   if(loclBuf == NULL)
00069     MayDay::Error("out of memory in gather 1");
00070     
00071   //put linearized T into its proper buffer
00072   linearOut(loclBuf, a_input);
00073 
00074 
00075   int nProcess = numProc(); 
00076   int sendCount = 1;
00077   int recdCount = 1;
00078     
00079   //need to gather isizes onto processor a_dest
00080   int* vectSize = NULL;
00081   int* vectDisp = NULL;
00082   void* sendBuf = static_cast<void*>(&isize);
00083   //allocate received buffer
00084   if(procID() == a_dest)
00085     {
00086       vectSize = new int[nProcess];
00087       vectDisp = new int[nProcess];
00088     }
00089 
00090   int result1 = MPI_Gather(sendBuf, sendCount, MPI_INT, 
00091                            vectSize,recdCount, MPI_INT,
00092                            a_dest,  Chombo_MPI::comm);
00093 
00094   if(result1 != MPI_SUCCESS)
00095     MayDay::Error("Gather<T> failed in MPI_Gather 1");
00096 
00097   //make memory for gather, linearin
00098   void* recdBuf = NULL;
00099   if(procID() == a_dest)
00100     {
00101       int itotsize=0;
00102       for(int iproc = 0; iproc < nProcess; iproc++)
00103         {
00104           vectDisp[iproc] = itotsize;
00105           itotsize += vectSize[iproc];
00106         }
00107       recdBuf = malloc(itotsize);
00108       if(recdBuf == NULL)
00109         MayDay::Error("out of memory in gather 2");
00110     }
00111 
00112   //gather data 
00113   int result2 = MPI_Gatherv(loclBuf, isize, MPI_BYTE, 
00114                             recdBuf, vectSize, vectDisp, MPI_BYTE,
00115                             a_dest, Chombo_MPI::comm);
00116   if(result2 != MPI_SUCCESS)
00117     MayDay::Error("Gather<T> failed in MPI_Gather 2");
00118 
00119 
00120   if(procID() == a_dest)
00121     {
00122       //calculate offset into array for current processor
00123       int ioffset = 0;
00124       a_outVec.resize(nProcess);
00125       //need to cast to char* to do pointer arithmetic
00126       char* arithPtr = (char*)recdBuf;
00127       for(int iproc = 0; iproc < nProcess; iproc++)
00128         {
00129           ioffset = vectDisp[iproc];
00130           char* thisProcBuf = arithPtr + ioffset;
00131           linearIn(a_outVec[iproc], thisProcBuf);
00132         }
00133 
00134       //delete memory for dest-specific arrays
00135       delete[] vectSize;
00136       delete[] vectDisp;
00137       free(recdBuf);
00138     }
00139 
00140   //delete memory for local buffer
00141   free(loclBuf);
00142 }
00143 
00144 
00145 /*****************************/
00146 //broadcast T everywhere
00147 /*****************************/
00148 template <class T>
00149 inline void 
00150 broadcast(T& a_inAndOut,  int a_src)
00151 {
00152   assert (a_src >= 0);
00153   assert(a_src <  numProc());
00154   int isize;  
00155   if(procID() == a_src)
00156     isize = linearSize(a_inAndOut);
00157 
00158   MPI_Bcast(&isize, 1, MPI_INT, a_src, Chombo_MPI::comm);
00159 
00160   void* broadBuf = malloc(isize);
00161 
00162   if(broadBuf == NULL)
00163     MayDay::Error("out of memory in broadcast");
00164 
00165   //take inAndOut from src and put it into broadBuf
00166   if(procID() == a_src)
00167       linearOut(broadBuf, a_inAndOut);
00168 
00169   //broadcast broadBuf to all procs
00170   MPI_Bcast(broadBuf, isize, MPI_BYTE, a_src, Chombo_MPI::comm);
00171     
00172   //take broadBuf and put back into inAndOut if not src
00173   if(procID() != a_src)
00174       linearIn(a_inAndOut, broadBuf);
00175 
00176   //delete memory for buffer
00177   free(broadBuf);
00178 }
00179 
00180 #else
00181 /*****************************/
00182 //non-mpi version
00183 /*****************************/
00184 template <class T>
00185 inline void 
00186 gather(Vector<T>& a_outVec, const T& a_input, int a_dest)
00187 {
00188   a_outVec.resize(1);
00189   a_outVec[0] = a_input;
00190 }
00191 /*****************************/
00192 //non-mpi version
00193 /*****************************/
00194 template <class T>
00195 inline void 
00196 broadcast(T& a_inAndOut,  int a_src)
00197 {
00198   //nothing to do.  in and out are the same with one proc
00199 }
00200 
00201 #endif //the mpi thing
00202 
00203 //*************************************
00204 //These should work independent of MPI
00205 //*************************************
00206 
00207 //Vector<T> specialization of linearIn
00208 template <class T>
00209 void 
00210 linearListIn(Vector<T>& a_outputT, const void* const a_inBuf)
00211 {
00212   //first entry is the size of the vector
00213   const int* const intBuf = (int*)a_inBuf;
00214   int vecsize = intBuf[0];
00215   Vector<int> vecOffset(vecsize);
00216   //next vecsize entries are offsets of data into buffer
00217   for(int ivec = 0; ivec < vecsize; ivec++)
00218     {
00219       vecOffset[ivec] = intBuf[ivec+1];
00220     }
00221   //next vecsize entries are the actual data
00222   //yes I could do this in one loop but that would 
00223   // either 
00224   // a) make it less symmetric with linearOut 
00225   // and/or
00226   // b) make both of them far less readable
00227   a_outputT.resize(vecsize);
00228   const char* const charbuf = (char*)a_inBuf;
00229   for(int ivec = 0; ivec < vecsize; ivec++)
00230     {
00231       const char* const dataLoc = charbuf + vecOffset[ivec];
00232       linearIn(a_outputT[ivec], dataLoc);
00233     }
00234 }
00235 
00236 //Vector<T> specialization of linearOut
00237 template <class T>
00238 void 
00239 linearListOut(void* const a_outBuf, const Vector<T>& a_input)
00240 {
00241   //first entry is the size of the vector
00242   int* const intBuf = (int*)a_outBuf;
00243   intBuf[0] = a_input.size();
00244   int vecsize = intBuf[0];
00245   Vector<int> vecOffset(vecsize);
00246   //next vecsize entries are offsets of data into buffer
00247   //next vecsize entries are the actual data
00248   int ioffset = (vecsize+1)*sizeof(int);
00249   for(int ivec = 0; ivec < vecsize; ivec++)
00250     {
00251       intBuf[ivec+1] = ioffset; 
00252       vecOffset[ivec] = ioffset; 
00253       ioffset += linearSize(a_input[ivec]);
00254     }
00255   //yes I could do this in one loop but that would 
00256   // either 
00257   // a) make it less symmetric with linearIn
00258   // and/or
00259   // b) make both of them far less readable
00260   char* const charBuf = (char*)a_outBuf;
00261   for(int ivec = 0; ivec < vecsize; ivec++)
00262     {
00263       char* const dataLoc = charBuf + vecOffset[ivec];
00264       linearOut(dataLoc, a_input[ivec]);
00265     }
00266 }
00267 
00268 //Vector<T> specialization of linearSize
00269 template <class T>
00270 int 
00271 linearListSize(const Vector<T>& a_input)
00272 {
00273   //first entry is the size of the vector (int)
00274   //next vecsize entries are offsets of data into buffer (int)
00275   //next vecsize entries are the actual data
00276   int itotsize = (a_input.size() + 1)*sizeof(int);
00277   for(int ivec = 0; ivec < a_input.size(); ivec++)
00278     {
00279       itotsize += linearSize(a_input[ivec]);
00280     }
00281   return itotsize;
00282 }
00283 
00284 #endif