00001 #ifdef CH_LANG_CC 00002 /* 00003 * _______ __ 00004 * / ___/ / ___ __ _ / / ___ 00005 * / /__/ _ \/ _ \/ V \/ _ \/ _ \ 00006 * \___/_//_/\___/_/_/_/_.__/\___/ 00007 * Please refer to Copyright.txt, in Chombo's root directory. 00008 */ 00009 #endif 00010 00011 #ifndef _CH_TIMER_H_ 00012 #define _CH_TIMER_H_ 00013 00014 #include <cstdlib> 00015 #include <cstdio> 00016 #include <vector> 00017 #include <iostream> 00018 #include "ClockTicks.H" 00019 #include <sys/time.h> 00020 #ifdef CH_MPI 00021 #include "mpi.h" 00022 #endif 00023 00024 00025 /** TraceTimer class is a self-tracing code instrumentation system 00026 00027 TraceTimer class is a self-tracing code instrumentation system 00028 for Chombo (or any other package really). The user interface is specified 00029 by a small set of macros. The usage model is that you just leave these 00030 timers in the code, for good. Initially, your application will have 'main' 00031 and a few hewavy functions instrumented, and the lower level Chombo library 00032 instrumentation. As your tool or application matures, it 00033 will garner a larger set of instrumentation giving clear views of your 00034 code performance. After a routine has been 00035 cleverly and lovingly optimized, you leave in the timers, to spot when 00036 some later bug fix or *improvement* undoes your previous labors. 00037 00038 \note 00039 You should never need to use or interact with the the classes TraceTimer or 00040 AutoStart. Use the macros. They call the right functions and classes for you. 00041 00042 The first macro is what people will use the most: 00043 \code 00044 CH_TIME("label"); 00045 \endcode 00046 00047 This is the simplest interface for timers. you place this macro call in a function 00048 you wish to be timed. It handles making the timer, calling 'start' when you 00049 enter the function, and calling 'stop' when you leave the function. A good 00050 idea is to use a 'label' specific enough to be unambiguous without being 00051 overwhelming. for instance: 00052 00053 \code 00054 void AMRLevelPolytropicGas::define(AMRLevel* a_coarserLevelPtr, 00055 const ProblemDomain& a_problemDomain, 00056 int a_level, 00057 int a_refRatio) 00058 { 00059 CH_TIME("AMRLevelPolytropicGas::define"); 00060 . 00061 . 00062 } 00063 \endcode 00064 00065 In this case, we have a class with many constructors and define functions that 00066 all funnel into a single general function. We can just call this 'define' and 00067 not worry about naming/instrumenting all the different overloaded instances. If 00068 you slip up and use the same label twice, that is not a real problem, the two 00069 locations will be timed and tracked properly (even if one is a sibling or parent 00070 of the other). The only place it will make things a little harder is in the output 00071 where you might have the same name show up and look confusing. 00072 <br><br> 00073 In serial, you will see a file called <em>time.table</em> (in parallel, you will get a 00074 <em>time.table.n</em> (where n is the rank number) files). If 00075 you want fewer files, you can do 00076 setenv CH_OUTPUT_INTERVAL nproc 00077 and it will only output every nproc processors time.table.n files 00078 (where n%nproc == 0). I won't go into this file 00079 format. It is kind of gprof-ish, with what I consider improvements. The real 00080 benefit here is profiling that understands our Chombo context, a smaller information 00081 set to observe, and the fact that, so far in my testing, the timers have negligible 00082 impact on the run time or memory use of the code. 00083 <br><br> 00084 By default, Chombo compiles in the instructions for the timers wherever the macros 00085 appear. If the compiler macro <b>CH_NTIMER</b> is defined, then all the CH_TIME* macros 00086 evaluate to empty expressions at compile time. 00087 00088 \par So, you put some CH_TIME calls in your code and ran it, and nothing happened: 00089 Chombo looks for the environment variable <b>CH_TIMER</b>. If it is set to anything (even 00090 if it is set to 'false' or 'no' or whatever) then the timers will be active and 00091 reporting will happen. If this environment variable is not set, then all the timers 00092 check a bool and return after doing nothing. 00093 \par 00094 One point of interest with using the environment variable: In parallel jobs using 00095 mpich, only processor 0 inherits the environment variables from the shell where 00096 you invoke 'mpirun', the rest read your .cshrc (.bashrc, etc.) file to get their 00097 environment. To time all your processes, you need to make sure the <b>CH_TIMER</b> 00098 environment variable gets to all your processes. 00099 00100 00101 00102 \par Auto hierarchy: 00103 The timers automatically figure out their parent/child relationships. They 00104 also can be placed in template code. This has some consequences. First, 00105 if you have a low level function instrumented that has no timers near it in 00106 the code call stack, you will see it show up as a child of a high level timer. 00107 the root timer "main" will catch all orphaned timers. So, even though you 00108 might make no call to, say, 'exchange' in your 'main' function, you might 00109 very well call a function, that calls a function, that calls 'exchange'. Since 00110 no code in between was instrumented, this exchange is accounted for at 'main'. 00111 This might look strange, but it should prove very powerful. An expensive orphan 00112 is exactly where you should consider some more timers, or reconsidering code 00113 design. 00114 00115 \par 00116 For performance reasons, child timers have only one parent. As a consequence 00117 each CH_TIME("label") label can show up at multiple places in your output. Each 00118 instance has it's own timer. So, each path through the call graph that arrives 00119 at a low-level function has a unique lineage, with it's own counter and time. 00120 Thus, I can instrument LevelData::copyTo once, but copyTo can appear in many 00121 places in the time.table file. 00122 00123 00124 The next level up in complexity is the set of *four* macros for when you want 00125 sub-function resolution in your timers. For instance, in a really huge function 00126 that you have not figured out how to re-factor, or built with lots of bad cut n paste 00127 code 're-use'. 00128 \code 00129 CH_TIMERS("parent"); 00130 CH_TIMER("child1", t1); 00131 CH_TIMER("child2", t2); 00132 CH_START(t1); 00133 CH_STOP(t1); 00134 CH_START(t2); 00135 CH_STOP(t2); 00136 CH_START(t1); 00137 CH_STOP(t1); 00138 \endcode 00139 00140 CH_TIMERS has the same semantic as CH_TIME, except that you can declare an 00141 arbitrary number of children after it in the same function scope. The 00142 children here do not autostart and autostop, you have to tell them where to 00143 start and stop timing. The children can themselves be parents for timers 00144 in called functions, of course. The children obey a set of mutual exclusions. The 00145 following generate run time errors: 00146 - double start called 00147 - double stop called 00148 - start called when another child is also started 00149 - you leave the function with a child not stopped 00150 00151 the following will generate compile time errors: 00152 - more than one CH_TIME macro in a function 00153 - invoking CH_TIMER("child", t) without having first invoked CH_TIMERS 00154 - re-using the timer handle ie. CH_TIMER("bobby", t1); CH_TIMER("sally", t1) 00155 - mixing CH_TIME macro with CH_TIMER 00156 - mixing CH_TIME macro with CH_TIMERS 00157 00158 You do not have to put any calls in your main routine to activate the clocks 00159 or generate a report at completion, this is handled with static iniitalization 00160 and an atexit function. 00161 <br><br> 00162 There is a larger argument of manual instrumentation being counter to good development. 00163 Profiling the code is supposed to tell you where to expend your optimization effort. 00164 Manual instrumentation opens the door to people wasting time *assuming* what parts of the 00165 code are going to take up lots of time and instrumenting them, before seeing any real 00166 performance data. Good judgement is needed. We have a body of knowledge about Chombo 00167 that will inform us about a good minimal first set of functions to instrument. 00168 */ 00169 class TraceTimer 00170 { 00171 public: 00172 virtual ~TraceTimer(); 00173 void start(char* mutex); 00174 unsigned long long int stop(char* mutex); 00175 static void report(bool a_closeAfter=false); 00176 static void reset(); 00177 00178 00179 void leafStart(); 00180 void leafStop(); 00181 00182 unsigned long long int time() const 00183 { 00184 return m_accumulated_WCtime; 00185 } 00186 00187 int rank() const 00188 { 00189 return m_rank; 00190 } 00191 long long int count() const 00192 { 00193 return m_count; 00194 } 00195 00196 void prune(); 00197 bool isPruned() const {return m_pruned;} 00198 00199 static int initializer(); // don't use 00200 static TraceTimer* getTimer(const char* name); // don't use 00201 const std::vector<TraceTimer*>& children() const ;//don't use. 00202 00203 static void PruneTimersParentChildPercent(double percent); 00204 static void sampleMemUsage() ; 00205 00206 static const char* currentTimer() 00207 { 00208 return s_currentTimer[0]->m_name; 00209 } 00210 00211 private: 00212 TraceTimer(const char* a_name, TraceTimer* parent, int thread_id); 00213 static std::vector<TraceTimer*> s_roots; 00214 static std::vector<TraceTimer*> s_currentTimer; 00215 static bool s_traceMemory; 00216 static long long int s_peak; 00217 static TraceTimer* s_peakTimer; 00218 00219 bool m_pruned; 00220 TraceTimer* m_parent; 00221 std::vector<TraceTimer*> m_children; 00222 const char* m_name; 00223 long long int m_count; 00224 unsigned long long int m_accumulated_WCtime; 00225 unsigned long long int m_last_WCtime_stamp; 00226 mutable int m_rank; 00227 int m_thread_id; 00228 long long int m_memory; 00229 long long int m_last_Memory_Stamp; 00230 long long int m_peak; 00231 00232 void reportTree(FILE* out, const TraceTimer& node, int depth); 00233 const TraceTimer* activeChild() const; 00234 00235 void macroTest(); 00236 void macroTest2(); 00237 00238 void currentize() const ; 00239 int computeRank() const; 00240 static void reportFullTree(FILE* out, const TraceTimer& timer, 00241 unsigned long long int totalTime, int depth); 00242 static void reportOneTree(FILE* out, const TraceTimer& timer); 00243 static void reportMemoryOneTree(FILE* out, const TraceTimer& timer); 00244 static void subReport(FILE* out, const char* header, unsigned long long int totalTime); 00245 static void reset(TraceTimer& timer); 00246 static void PruneTimersParentChildPercent(double threshold, TraceTimer* parent); 00247 00248 }; 00249 00250 00251 class AutoStartLeaf 00252 { 00253 public: 00254 AutoStartLeaf(TraceTimer* a_timer):m_timer(a_timer) {a_timer->leafStart();} 00255 ~AutoStartLeaf(){m_timer->leafStop();} 00256 bool active(); 00257 private: 00258 TraceTimer* m_timer; 00259 }; 00260 00261 class AutoStart 00262 { 00263 public: 00264 AutoStart(TraceTimer* a_timer, char* mutex, char* childMutex) 00265 :m_mutex(mutex), m_childMutex(childMutex),m_timer(a_timer) 00266 {a_timer->start(mutex);} 00267 AutoStart(TraceTimer* a_timer, char* mutex) 00268 :m_mutex(mutex), m_childMutex(&AutoStart::ok),m_timer(a_timer) 00269 {a_timer->start(mutex);} 00270 inline ~AutoStart(); 00271 bool active(); 00272 private: 00273 char* m_mutex; 00274 char* m_childMutex; 00275 TraceTimer* m_timer; 00276 static char ok; 00277 00278 }; 00279 00280 inline 00281 AutoStart::~AutoStart() 00282 { 00283 # ifndef NDEBUG 00284 if(*m_childMutex == 1) 00285 { 00286 std::cerr<<" stop called in timer unexpectedly"; 00287 abort(); 00288 } 00289 # endif 00290 m_timer->stop(m_mutex); 00291 } 00292 00293 inline double TimerGetTimeStampWC(){ 00294 00295 // #ifdef CH_MPI 00296 // return( MPI_Wtime() ); 00297 // #endif 00298 #ifndef CH_NTIMER 00299 struct timeval tv; // Values from call to gettimeofday 00300 struct timezone tz; 00301 gettimeofday(&tv, &tz); 00302 return((double)tv.tv_sec + 0.000001 * (double)tv.tv_usec); 00303 #else 00304 return 0; 00305 #endif 00306 } 00307 00308 inline void TraceTimer::leafStart() 00309 { 00310 if(m_pruned) return; 00311 ++m_count; 00312 m_last_WCtime_stamp = ch_ticks(); 00313 } 00314 00315 inline void TraceTimer::leafStop() 00316 { 00317 if(m_pruned) return; 00318 m_accumulated_WCtime += ch_ticks() - m_last_WCtime_stamp; 00319 m_last_WCtime_stamp=0; 00320 } 00321 00322 #ifdef CH_NTIMER 00323 00324 #define CH_TIMER(name, tpointer) (void)0 00325 #define CH_TIME(name) (void)0 00326 #define CH_TIMELEAF(name) (void)0 00327 #define CH_TIMERS(name) (void)0 00328 #define CH_START(tpointer) (void)0 00329 #define CH_STOP(tpointer) (void)0 00330 #define CH_STOPV(tpointer, val) (void)0 00331 #define CH_TIMER_REPORT() (void)0 00332 #define CH_TIMER_RESET() (void)0 00333 #define CH_TIMER_PRUNE(threshold) (void)0 00334 00335 #else // CH_NTIMER 00336 00337 00338 #define CH_TIMER(name, tpointer) \ 00339 const char* TimerTag_##tpointer = name ; \ 00340 TraceTimer* tpointer = TraceTimer::getTimer(TimerTag_##tpointer) 00341 00342 #define CH_TIME(name) \ 00343 const char* TimerTagA = name ; \ 00344 char CH_TimermutexA = 0; \ 00345 TraceTimer* ch_tpointer = TraceTimer::getTimer(TimerTagA); \ 00346 AutoStart autostart(ch_tpointer, &CH_TimermutexA) 00347 00348 #define CH_TIMELEAF(name) \ 00349 const char* TimerTagA = name ; \ 00350 TraceTimer* ch_tpointer = TraceTimer::getTimer(TimerTagA); \ 00351 AutoStartLeaf autostart(ch_tpointer) 00352 00353 #define CH_TIMERS(name) \ 00354 const char* TimerTagA = name ; \ 00355 char CH_TimermutexA = 0; \ 00356 char CH_Timermutex = 0; \ 00357 TraceTimer* ch_tpointer = TraceTimer::getTimer(TimerTagA); \ 00358 AutoStart autostart(ch_tpointer, &CH_TimermutexA, &CH_Timermutex) 00359 00360 00361 #define CH_START(tpointer) tpointer->start(&CH_Timermutex) 00362 00363 #define CH_STOP(tpointer) tpointer->stop(&CH_Timermutex) 00364 #define CH_STOPV(tpointer, val ) val = tpointer->stop(&CH_Timermutex) 00365 00366 #define CH_TIMER_REPORT() TraceTimer::report() 00367 00368 #define CH_TIMER_RESET() TraceTimer::reset() 00369 00370 #define CH_TIMER_PRUNE(threshold) TraceTimer::PruneTimersParentChildPercent(threshold) 00371 00372 #endif // CH_NTIMER 00373 00374 00375 00376 #endif // CH_TIMER_H