Chombo + EB  3.0
CH_Timer.H
Go to the documentation of this file.
1 #ifdef CH_LANG_CC
2 /*
3  * _______ __
4  * / ___/ / ___ __ _ / / ___
5  * / /__/ _ \/ _ \/ V \/ _ \/ _ \
6  * \___/_//_/\___/_/_/_/_.__/\___/
7  * Please refer to Copyright.txt, in Chombo's root directory.
8  */
9 #endif
10 
11 #ifndef _CH_TIMER_H_
12 #define _CH_TIMER_H_
13 
14 #include <cstdio>
15 #include "REAL.H"
16 #include "MayDay.H"
17 #include "Vector.H"
18 #include "ClockTicks.H"
19 
20 #ifdef CH_MPI
21 #include "mpi.h"
22 #endif
23 
24 
25 #include <list>
26 #include <string>
27 #include "List.H"
28 
29 #include <string>
30 #include <iostream>
31 #include <sys/time.h>
32 
33 #include "BaseNamespaceHeader.H"
34 
35 using namespace std;
36 
37 
38 
39 #ifdef CH_NTIMER
40 
41 #define CH_TIMER(name, tpointer) (void)0
42 #define CH_TIME(name) (void)0
43 #define CH_TIMELEAF(name) (void)0
44 #define CH_TIMERS(name) (void)0
45 #define CH_START(tpointer) (void)0
46 #define CH_STOP(tpointer) (void)0
47 #define CH_STOPV(tpointer, val) (void)0
48 #define CH_TIMER_REPORT() (void)0
49 #define CH_TIMER_RESET() (void)0
50 #define CH_TIMER_PRUNE(threshold) (void)0
51 
52 #else // CH_NTIMER
53 
54 
55 #define CH_TIMER(name, tpointer) \
56  const char* TimerTag_##tpointer = name ; \
57  CH_XD::TraceTimer* tpointer = CH_XD::TraceTimer::getTimer(TimerTag_##tpointer)
58 
59 #define CH_TIME(name) \
60  const char* TimerTagA = name ; \
61  char CH_TimermutexA = 0; \
62  CH_XD::TraceTimer* ch_tpointer = CH_XD::TraceTimer::getTimer(TimerTagA); \
63  CH_XD::AutoStart autostart(ch_tpointer, &CH_TimermutexA)
64 
65 #define CH_TIMELEAF(name) \
66  const char* TimerTagA = name ; \
67  CH_XD::TraceTimer* ch_tpointer = CH_XD::TraceTimer::getTimer(TimerTagA); \
68  CH_XD::AutoStartLeaf autostart(ch_tpointer)
69 
70 #define CH_TIMERS(name) \
71  const char* TimerTagA = name ; \
72  char CH_TimermutexA = 0; \
73  char CH_Timermutex = 0; \
74  CH_XD::TraceTimer* ch_tpointer = CH_XD::TraceTimer::getTimer(TimerTagA); \
75  CH_XD::AutoStart autostart(ch_tpointer, &CH_TimermutexA, &CH_Timermutex)
76 
77 
78 #define CH_START(tpointer) tpointer->start(&CH_Timermutex)
79 
80 #define CH_STOP(tpointer) tpointer->stop(&CH_Timermutex)
81 #define CH_STOPV(tpointer, val ) val = tpointer->stop(&CH_Timermutex)
82 
83 #define CH_TIMER_REPORT() CH_XD::TraceTimer::report()
84 
85 #define CH_TIMER_RESET() CH_XD::TraceTimer::reset()
86 
87 #define CH_TIMER_PRUNE(threshold) CH_XD::TraceTimer::PruneTimersParentChildPercent(threshold)
88 
89 
90 
91 
92  /** TraceTimer class is a self-tracing code instrumentation system
93 
94  TraceTimer class is a self-tracing code instrumentation system
95  for Chombo (or any other package really). The user interface is specified
96  by a small set of macros. The usage model is that you just leave these
97  timers in the code, for good. Initially, your application will have 'main'
98  and a few hewavy functions instrumented, and the lower level Chombo library
99  instrumentation. As your tool or application matures, it
100  will garner a larger set of instrumentation giving clear views of your
101  code performance. After a routine has been
102  cleverly and lovingly optimized, you leave in the timers, to spot when
103  some later bug fix or *improvement* undoes your previous labors.
104 
105  \note
106  You should never need to use or interact with the the classes TraceTimer or
107  AutoStart. Use the macros. They call the right functions and classes for you.
108 
109  The first macro is what people will use the most:
110 \code
111  CH_TIME("label");
112 \endcode
113 
114  This is the simplest interface for timers. you place this macro call in a function
115  you wish to be timed. It handles making the timer, calling 'start' when you
116  enter the function, and calling 'stop' when you leave the function. A good
117  idea is to use a 'label' specific enough to be unambiguous without being
118  overwhelming. for instance:
119 
120 \code
121  void AMRLevelPolytropicGas::define(AMRLevel* a_coarserLevelPtr,
122  const ProblemDomain& a_problemDomain,
123  int a_level,
124  int a_refRatio)
125  {
126  CH_TIME("AMRLevelPolytropicGas::define");
127  .
128  .
129  }
130 \endcode
131 
132  In this case, we have a class with many constructors and define functions that
133  all funnel into a single general function. We can just call this 'define' and
134  not worry about naming/instrumenting all the different overloaded instances. If
135  you slip up and use the same label twice, that is not a real problem, the two
136  locations will be timed and tracked properly (even if one is a sibling or parent
137  of the other). The only place it will make things a little harder is in the output
138  where you might have the same name show up and look confusing.
139  <br><br>
140  In serial, you will see a file called <em>time.table</em> (in parallel, you will get a
141  <em>time.table.n</em> (where n is the rank number) files). If
142  you want fewer files, you can do
143  setenv CH_OUTPUT_INTERVAL nproc
144  and it will only output every nproc processors time.table.n files
145  (where n%nproc == 0). I won't go into this file
146  format. It is kind of gprof-ish, with what I consider improvements. The real
147  benefit here is profiling that understands our Chombo context, a smaller information
148  set to observe, and the fact that, so far in my testing, the timers have negligible
149  impact on the run time or memory use of the code.
150  <br><br>
151  By default, Chombo compiles in the instructions for the timers wherever the macros
152  appear. If the compiler macro <b>CH_NTIMER</b> is defined, then all the CH_TIME* macros
153  evaluate to empty expressions at compile time.
154 
155  \par So, you put some CH_TIME calls in your code and ran it, and nothing happened:
156  Chombo looks for the environment variable <b>CH_TIMER</b>. If it is set to anything (even
157  if it is set to 'false' or 'no' or whatever) then the timers will be active and
158  reporting will happen. If this environment variable is not set, then all the timers
159  check a bool and return after doing nothing.
160  \par
161  One point of interest with using the environment variable: In parallel jobs using
162  mpich, only processor 0 inherits the environment variables from the shell where
163  you invoke 'mpirun', the rest read your .cshrc (.bashrc, etc.) file to get their
164  environment. To time all your processes, you need to make sure the <b>CH_TIMER</b>
165  environment variable gets to all your processes.
166 
167 
168 
169  \par Auto hierarchy:
170  The timers automatically figure out their parent/child relationships. They
171  also can be placed in template code. This has some consequences. First,
172  if you have a low level function instrumented that has no timers near it in
173  the code call stack, you will see it show up as a child of a high level timer.
174  the root timer "main" will catch all orphaned timers. So, even though you
175  might make no call to, say, 'exchange' in your 'main' function, you might
176  very well call a function, that calls a function, that calls 'exchange'. Since
177  no code in between was instrumented, this exchange is accounted for at 'main'.
178  This might look strange, but it should prove very powerful. An expensive orphan
179  is exactly where you should consider some more timers, or reconsidering code
180  design.
181 
182  \par
183  For performance reasons, child timers have only one parent. As a consequence
184  each CH_TIME("label") label can show up at multiple places in your output. Each
185  instance has it's own timer. So, each path through the call graph that arrives
186  at a low-level function has a unique lineage, with it's own counter and time.
187  Thus, I can instrument LevelData::copyTo once, but copyTo can appear in many
188  places in the time.table file.
189 
190 
191  The next level up in complexity is the set of *four* macros for when you want
192  sub-function resolution in your timers. For instance, in a really huge function
193  that you have not figured out how to re-factor, or built with lots of bad cut n paste
194  code 're-use'.
195 \code
196  CH_TIMERS("parent");
197  CH_TIMER("child1", t1);
198  CH_TIMER("child2", t2);
199  CH_START(t1);
200  CH_STOP(t1);
201  CH_START(t2);
202  CH_STOP(t2);
203  CH_START(t1);
204  CH_STOP(t1);
205 \endcode
206 
207  CH_TIMERS has the same semantic as CH_TIME, except that you can declare an
208  arbitrary number of children after it in the same function scope. The
209  children here do not autostart and autostop, you have to tell them where to
210  start and stop timing. The children can themselves be parents for timers
211  in called functions, of course. The children obey a set of mutual exclusions. The
212  following generate run time errors:
213  - double start called
214  - double stop called
215  - start called when another child is also started
216  - you leave the function with a child not stopped
217 
218  the following will generate compile time errors:
219  - more than one CH_TIME macro in a function
220  - invoking CH_TIMER("child", t) without having first invoked CH_TIMERS
221  - re-using the timer handle ie. CH_TIMER("bobby", t1); CH_TIMER("sally", t1)
222  - mixing CH_TIME macro with CH_TIMER
223  - mixing CH_TIME macro with CH_TIMERS
224 
225  You do not have to put any calls in your main routine to activate the clocks
226  or generate a report at completion, this is handled with static iniitalization
227  and an atexit function.
228  <br><br>
229  There is a larger argument of manual instrumentation being counter to good development.
230  Profiling the code is supposed to tell you where to expend your optimization effort.
231  Manual instrumentation opens the door to people wasting time *assuming* what parts of the
232  code are going to take up lots of time and instrumenting them, before seeing any real
233  performance data. Good judgement is needed. We have a body of knowledge about Chombo
234  that will inform us about a good minimal first set of functions to instrument.
235  */
237  {
238  public:
239  virtual ~TraceTimer();
240  void start(char* mutex);
241  unsigned long long int stop(char* mutex);
242  static void report(bool a_closeAfter=false);
243  static void reset();
244 
245 
246  void leafStart();
247  void leafStop();
248 
249  unsigned long long int time() const
250  {
251  return m_accumulated_WCtime;
252  }
253 
254  int rank() const
255  {
256  return m_rank;
257  }
258  long long int count() const
259  {
260  return m_count;
261  }
262 
263  void prune();
264  bool isPruned() const
265  {
266  return m_pruned;
267  }
268 
269  static int initializer(); // don't use
270  static TraceTimer* getTimer(const char* name); // don't use
271  const std::vector<TraceTimer*>& children() const ;//don't use.
272 
273  static void PruneTimersParentChildPercent(double percent);
274  static void sampleMemUsage() ;
275 
276  static const char* currentTimer()
277  {
278  return s_currentTimer[0]->m_name;
279  }
280 
281  private:
282  TraceTimer(const char* a_name, TraceTimer* parent, int thread_id);
283  static std::vector<TraceTimer*> s_roots;
284  static std::vector<TraceTimer*> s_currentTimer;
285  static bool s_traceMemory;
286  static long long int s_peak;
288 
289  bool m_pruned;
291  std::vector<TraceTimer*> m_children;
292  const char* m_name;
293  long long int m_count;
294  unsigned long long int m_accumulated_WCtime;
295  unsigned long long int m_last_WCtime_stamp;
296  mutable int m_rank;
298  long long int m_memory;
299  long long int m_last_Memory_Stamp;
300  long long int m_peak;
301 
302  void reportTree(FILE* out, const TraceTimer& node, int depth);
303  const TraceTimer* activeChild() const;
304 
305  void macroTest();
306  void macroTest2();
307 
308  void currentize() const ;
309  int computeRank() const;
310  static void reportFullTree(FILE* out, const TraceTimer& timer,
311  unsigned long long int totalTime, int depth);
312  static void reportOneTree(FILE* out, const TraceTimer& timer);
313  static void reportMemoryOneTree(FILE* out, const TraceTimer& timer);
314  static void subReport(FILE* out, const char* header, unsigned long long int totalTime);
315  static void reset(TraceTimer& timer);
316  static void PruneTimersParentChildPercent(double threshold, TraceTimer* parent);
317 
318  };
319 
320 
322  {
323  public:
324  AutoStartLeaf(TraceTimer* a_timer):m_timer(a_timer)
325  {
326  a_timer->leafStart();
327  }
328 
330  {
331  m_timer->leafStop();
332  }
333 
334  bool active();
335 
336  private:
338  };
339 
340  class AutoStart
341  {
342  public:
343  AutoStart(TraceTimer* a_timer, char* mutex, char* childMutex)
344  :m_mutex(mutex), m_childMutex(childMutex),m_timer(a_timer)
345  {a_timer->start(mutex);}
346  AutoStart(TraceTimer* a_timer, char* mutex)
347  :m_mutex(mutex), m_childMutex(&AutoStart::ok),m_timer(a_timer)
348  {a_timer->start(mutex);}
349  inline ~AutoStart();
350  bool active();
351  private:
352  char* m_mutex;
355  static char ok;
356 
357  };
358 
359  inline
361  {
362 # ifndef NDEBUG
363  if (*m_childMutex == 1)
364  MayDay::Error("Child timer still running on function exit");
365 # endif
366  m_timer->stop(m_mutex);
367  }
368 
369  inline double TimerGetTimeStampWC()
370  {
371  // #ifdef CH_MPI
372  // return( MPI_Wtime() );
373  // #endif
374 #ifndef CH_NTIMER
375  struct timeval tv; // Values from call to gettimeofday
376  struct timezone tz;
377  gettimeofday(&tv, &tz);
378  return((double)tv.tv_sec + 0.000001 * (double)tv.tv_usec);
379 #else
380  return 0;
381 #endif
382  }
383 
385 {
386  if (m_pruned) return;
387  ++m_count;
388  m_last_WCtime_stamp = ch_ticks();
389 }
390 
391 inline void TraceTimer::leafStop()
392 {
393  if (m_pruned) return;
394  m_accumulated_WCtime += ch_ticks() - m_last_WCtime_stamp;
395  m_last_WCtime_stamp=0;
396 }
397 // Pruning options
398 
399 
400 #endif // CH_NTIMER
401 #include "BaseNamespaceFooter.H"
402 
403 #endif // CH_TIMER_H
static char ok
Definition: CH_Timer.H:355
std::vector< TraceTimer * > m_children
Definition: CH_Timer.H:291
char * m_childMutex
Definition: CH_Timer.H:353
long long int m_peak
Definition: CH_Timer.H:300
unsigned long long int time() const
Definition: CH_Timer.H:249
bool m_pruned
Definition: CH_Timer.H:289
static bool s_traceMemory
Definition: CH_Timer.H:285
AutoStart(TraceTimer *a_timer, char *mutex, char *childMutex)
Definition: CH_Timer.H:343
double TimerGetTimeStampWC()
Definition: CH_Timer.H:369
char * m_mutex
Definition: CH_Timer.H:352
Definition: CH_Timer.H:340
~AutoStart()
Definition: CH_Timer.H:360
int m_thread_id
Definition: CH_Timer.H:297
void leafStart()
Definition: CH_Timer.H:384
int rank() const
Definition: CH_Timer.H:254
Definition: CH_Timer.H:321
TraceTimer * m_timer
Definition: CH_Timer.H:337
const char * name(const FArrayBox &a_dummySpecializationArg)
Definition: CH_HDF5.H:741
unsigned long long int m_accumulated_WCtime
Definition: CH_Timer.H:294
int m_rank
Definition: CH_Timer.H:296
static const char * currentTimer()
Definition: CH_Timer.H:276
static TraceTimer * s_peakTimer
Definition: CH_Timer.H:287
static std::vector< TraceTimer * > s_roots
Definition: CH_Timer.H:283
static void Error(const char *const a_msg=m_nullString, int m_exitCode=CH_DEFAULT_ERROR_CODE)
Print out message to cerr and exit with the specified exit code.
long long int m_memory
Definition: CH_Timer.H:298
long long int m_last_Memory_Stamp
Definition: CH_Timer.H:299
void start(char *mutex)
AutoStart(TraceTimer *a_timer, char *mutex)
Definition: CH_Timer.H:346
bool isPruned() const
Definition: CH_Timer.H:264
TraceTimer * m_timer
Definition: CH_Timer.H:354
void leafStop()
Definition: CH_Timer.H:391
TraceTimer * m_parent
Definition: CH_Timer.H:290
Definition: CH_Timer.H:236
~AutoStartLeaf()
Definition: CH_Timer.H:329
AutoStartLeaf(TraceTimer *a_timer)
Definition: CH_Timer.H:324
static std::vector< TraceTimer * > s_currentTimer
Definition: CH_Timer.H:284
static long long int s_peak
Definition: CH_Timer.H:286
const char * m_name
Definition: CH_Timer.H:292
unsigned long long int m_last_WCtime_stamp
Definition: CH_Timer.H:295
long long int m_count
Definition: CH_Timer.H:293
long long int count() const
Definition: CH_Timer.H:258