Chombo + EB + MF  3.2
CH_Timer.H
Go to the documentation of this file.
1 #ifdef CH_LANG_CC
2 /*
3  * _______ __
4  * / ___/ / ___ __ _ / / ___
5  * / /__/ _ \/ _ \/ V \/ _ \/ _ \
6  * \___/_//_/\___/_/_/_/_.__/\___/
7  * Please refer to Copyright.txt, in Chombo's root directory.
8  */
9 #endif
10 
11 #ifndef _CH_TIMER_H_
12 #define _CH_TIMER_H_
13 
14 #include <cstdio>
15 #include "REAL.H"
16 #include "MayDay.H"
17 #include "Vector.H"
18 #include "ClockTicks.H"
19 #include "CH_Counters.H"
20 
21 
22 #ifdef CH_MPI
23 #include "mpi.h"
24 #endif
25 
26 #ifdef _OPENMP
27 #include "CH_Thread.H"
28 #endif
29 
30 
31 #include <list>
32 #include <string>
33 #include "List.H"
34 
35 #include <string>
36 #include <iostream>
37 
38 #include "BaseNamespaceHeader.H"
39 
40 using namespace std;
41 
42 #ifdef CH_NTIMER
43 
44 #define CH_TIMERS_ON() false
45 #define CH_TIMER(name, tpointer) (void)0
46 #define CH_TIME(name) (void)0
47 #define CH_TIMELEAF(name) (void)0
48 #define CH_TIMERS(name) (void)0
49 #define CH_START(tpointer) (void)0
50 #define CH_STOP(tpointer) (void)0
51 //#define CH_STOPV(tpointer, val) (void)0
52 #define CH_TIMER_REPORT() (void)0
53 #define CH_TIMER_REPORTNAME(stream, name) (void)0
54 #define CH_TIMER_RESET() (void)0
55 #define CH_TIMER_PRUNE(threshold) (void)0
56 
57 #else // CH_NTIMER
58 
59 extern std::fstream LBFILE;
60 
61 #define CH_TIMERS_ON() TraceTimer::timersOn()
62 
63 #define CH_TIMER(name, tpointer) \
64  const char* TimerTag_##tpointer = name ; \
65  CH_XD::TraceTimer* tpointer = NULL ;\
66  if(TIMERS_tid==0) \
67  { \
68  tpointer = CH_XD::TraceTimer::getTimer(TimerTag_##tpointer) ; \
69  }
70 #ifdef _OPENMP
71 #define CH_TIME(name) \
72  const char* TimerTagA = name ; \
73  char CH_TimermutexA = 0; \
74  CH_XD::TraceTimer* ch_tpointer = NULL; \
75  int tid=omp_get_thread_num(); \
76  if(tid == 0) \
77  { \
78  ch_tpointer = CH_XD::TraceTimer::getTimer(TimerTagA); \
79  } \
80  CH_XD::AutoStart autostart(ch_tpointer, &CH_TimermutexA)
81 #else
82 #define CH_TIME(name) \
83  const char* TimerTagA = name ; \
84  char CH_TimermutexA = 0; \
85  CH_XD::TraceTimer* ch_tpointer = NULL; \
86  ch_tpointer = CH_XD::TraceTimer::getTimer(TimerTagA); \
87  CH_XD::AutoStart autostart(ch_tpointer, &CH_TimermutexA)
88 #endif
89 
90 #ifdef _OPENMP
91 #define CH_TIMELEAF(name) \
92  const char* TimerTagA = name ; \
93  CH_XD::TraceTimer* ch_tpointer = NULL; \
94  int tid = omp_get_thread_num(); if(tid==0) \
95  { \
96  ch_tpointer = CH_XD::TraceTimer::getTimer(TimerTagA); \
97  } \
98  CH_XD::AutoStartLeaf autostart(ch_tpointer)
99 #else
100 #define CH_TIMELEAF(name) \
101  const char* TimerTagA = name ; \
102  CH_XD::TraceTimer* ch_tpointer = NULL; \
103  ch_tpointer = CH_XD::TraceTimer::getTimer(TimerTagA); \
104  CH_XD::AutoStartLeaf autostart(ch_tpointer)
105 #endif
106 
107 #ifdef _OPENMP
108 #define CH_TIMERS(name) \
109  const char* TimerTagA = name ; \
110  char CH_TimermutexA = 0; \
111  char CH_Timermutex = 0; \
112  CH_XD::TraceTimer* ch_tpointer = NULL; \
113  int TIMERS_tid = omp_get_thread_num(); if(TIMERS_tid==0) \
114  { \
115  ch_tpointer = CH_XD::TraceTimer::getTimer(TimerTagA); \
116  } \
117  CH_XD::AutoStart autostart(ch_tpointer, &CH_TimermutexA, &CH_Timermutex)
118 
119 
120 #define CH_START(tpointer) \
121  if(TIMERS_tid==0) \
122  { \
123  tpointer->start(&CH_Timermutex); \
124  }
125 
126 #define CH_STOP(tpointer) \
127  if(TIMERS_tid==0) \
128  { \
129  tpointer->stop(&CH_Timermutex); \
130  }
131 
132 #else
133 #define CH_TIMERS(name) \
134  const char* TimerTagA = name ; \
135  char CH_TimermutexA = 0; \
136  char CH_Timermutex = 0; \
137  CH_XD::TraceTimer* ch_tpointer = NULL; \
138  int TIMERS_tid=0; \
139  { \
140  ch_tpointer = CH_XD::TraceTimer::getTimer(TimerTagA); \
141  } \
142  CH_XD::AutoStart autostart(ch_tpointer, &CH_TimermutexA, &CH_Timermutex)
143 
144 
145 #define CH_START(tpointer) \
146  { \
147  tpointer->start(&CH_Timermutex); \
148  }
149 
150 #define CH_STOP(tpointer) \
151  { \
152  tpointer->stop(&CH_Timermutex); \
153  }
154 
155 #endif
156 //#define CH_STOPV(tpointer, val )
157 
158 #define CH_TIMER_REPORT() CH_XD::TraceTimer::report()
159 
160 #define CH_TIMER_REPORTNAME(stream, name) CH_XD::TraceTimer::reportName(stream, name);
161 #define CH_TIMER_RESET() CH_XD::TraceTimer::reset()
162 
163 #define CH_TIMER_PRUNE(threshold) CH_XD::TraceTimer::PruneTimersParentChildPercent(threshold)
164 
165  /** TraceTimer class is a self-tracing code instrumentation system
166 
167  TraceTimer class is a self-tracing code instrumentation system
168  for Chombo (or any other package really). The user interface is specified
169  by a small set of macros. The usage model is that you just leave these
170  timers in the code, for good. Initially, your application will have 'main'
171  and a few hewavy functions instrumented, and the lower level Chombo library
172  instrumentation. As your tool or application matures, it
173  will garner a larger set of instrumentation giving clear views of your
174  code performance. After a routine has been
175  cleverly and lovingly optimized, you leave in the timers, to spot when
176  some later bug fix or *improvement* undoes your previous labors.
177 
178  \note
179  You should never need to use or interact with the the classes TraceTimer or
180  AutoStart. Use the macros. They call the right functions and classes for you.
181 
182  The first macro is what people will use the most:
183 \code
184  CH_TIME("label");
185 \endcode
186 
187  This is the simplest interface for timers. you place this macro call in a function
188  you wish to be timed. It handles making the timer, calling 'start' when you
189  enter the function, and calling 'stop' when you leave the function. A good
190  idea is to use a 'label' specific enough to be unambiguous without being
191  overwhelming. for instance:
192 
193 \code
194  void AMRLevelPolytropicGas::define(AMRLevel* a_coarserLevelPtr,
195  const ProblemDomain& a_problemDomain,
196  int a_level,
197  int a_refRatio)
198  {
199  CH_TIME("AMRLevelPolytropicGas::define");
200  .
201  .
202  }
203 \endcode
204 
205  In this case, we have a class with many constructors and define functions that
206  all funnel into a single general function. We can just call this 'define' and
207  not worry about naming/instrumenting all the different overloaded instances. If
208  you slip up and use the same label twice, that is not a real problem, the two
209  locations will be timed and tracked properly (even if one is a sibling or parent
210  of the other). The only place it will make things a little harder is in the output
211  where you might have the same name show up and look confusing.
212  <br><br>
213  In serial, you will see a file called <em>time.table</em> (in parallel, you will get a
214  <em>time.table.n</em> (where n is the rank number) files). If
215  you want fewer files, you can do
216  setenv CH_OUTPUT_INTERVAL nproc
217  and it will only output every nproc processors time.table.n files
218  (where n%nproc == 0). I won't go into this file
219  format. It is kind of gprof-ish, with what I consider improvements. The real
220  benefit here is profiling that understands our Chombo context, a smaller information
221  set to observe, and the fact that, so far in my testing, the timers have negligible
222  impact on the run time or memory use of the code.
223  <br><br>
224  By default, Chombo compiles in the instructions for the timers wherever the macros
225  appear. If the compiler macro <b>CH_NTIMER</b> is defined, then all the CH_TIME* macros
226  evaluate to empty expressions at compile time.
227 
228  \par So, you put some CH_TIME calls in your code and ran it, and nothing happened:
229  Chombo looks for the environment variable <b>CH_TIMER</b>. If it is set to anything (even
230  if it is set to 'false' or 'no' or whatever) then the timers will be active and
231  reporting will happen. If this environment variable is not set, then all the timers
232  check a bool and return after doing nothing.
233  \par
234  One point of interest with using the environment variable: In parallel jobs using
235  mpich, only processor 0 inherits the environment variables from the shell where
236  you invoke 'mpirun', the rest read your .cshrc (.bashrc, etc.) file to get their
237  environment. To time all your processes, you need to make sure the <b>CH_TIMER</b>
238  environment variable gets to all your processes.
239 
240  \par Auto hierarchy:
241  The timers automatically figure out their parent/child relationships. They
242  also can be placed in template code. This has some consequences. First,
243  if you have a low level function instrumented that has no timers near it in
244  the code call stack, you will see it show up as a child of a high level timer.
245  the root timer "main" will catch all orphaned timers. So, even though you
246  might make no call to, say, 'exchange' in your 'main' function, you might
247  very well call a function, that calls a function, that calls 'exchange'. Since
248  no code in between was instrumented, this exchange is accounted for at 'main'.
249  This might look strange, but it should prove very powerful. An expensive orphan
250  is exactly where you should consider some more timers, or reconsidering code
251  design.
252 
253  \par
254  For performance reasons, child timers have only one parent. As a consequence
255  each CH_TIME("label") label can show up at multiple places in your output. Each
256  instance has it's own timer. So, each path through the call graph that arrives
257  at a low-level function has a unique lineage, with it's own counter and time.
258  Thus, I can instrument LevelData::copyTo once, but copyTo can appear in many
259  places in the time.table file.
260 
261  The next level up in complexity is the set of *four* macros for when you want
262  sub-function resolution in your timers. For instance, in a really huge function
263  that you have not figured out how to re-factor, or built with lots of bad cut n paste
264  code 're-use'.
265 \code
266  CH_TIMERS("parent");
267  CH_TIMER("child1", t1);
268  CH_TIMER("child2", t2);
269  CH_START(t1);
270  CH_STOP(t1);
271  CH_START(t2);
272  CH_STOP(t2);
273  CH_START(t1);
274  CH_STOP(t1);
275 \endcode
276 
277  CH_TIMERS has the same semantic as CH_TIME, except that you can declare an
278  arbitrary number of children after it in the same function scope. The
279  children here do not autostart and autostop, you have to tell them where to
280  start and stop timing. The children can themselves be parents for timers
281  in called functions, of course. The children obey a set of mutual exclusions. The
282  following generate run time errors:
283  - double start called
284  - double stop called
285  - start called when another child is also started
286  - you leave the function with a child not stopped
287 
288  the following will generate compile time errors:
289  - more than one CH_TIME macro in a function
290  - invoking CH_TIMER("child", t) without having first invoked CH_TIMERS
291  - re-using the timer handle ie. CH_TIMER("bobby", t1); CH_TIMER("sally", t1)
292  - mixing CH_TIME macro with CH_TIMER
293  - mixing CH_TIME macro with CH_TIMERS
294 
295  You do not have to put any calls in your main routine to activate the clocks
296  or generate a report at completion, this is handled with static iniitalization
297  and an atexit function.
298  <br><br>
299  There is a larger argument of manual instrumentation being counter to good development.
300  Profiling the code is supposed to tell you where to expend your optimization effort.
301  Manual instrumentation opens the door to people wasting time *assuming* what parts of the
302  code are going to take up lots of time and instrumenting them, before seeing any real
303  performance data. Good judgement is needed. We have a body of knowledge about Chombo
304  that will inform us about a good minimal first set of functions to instrument.
305  */
307  {
308  public:
309  virtual ~TraceTimer();
310  void start(char* mutex);
311  unsigned long long int stop(char* mutex);
312  static void report(bool a_closeAfter=false);
313  static void reportName(std::ostream& out, const char* name);
314  static void reset();
315 
316  void leafStart();
317  void leafStop();
318 
319  unsigned long long int time() const
320  {
321  return m_accumulated_WCtime;
322  }
323 
324  int rank() const
325  {
326  return m_rank;
327  }
328  long long int count() const
329  {
330  return m_count;
331  }
332 
333  void prune();
334  bool isPruned() const
335  {
336  return m_pruned;
337  }
338 
339  static int initializer(); // don't use
340  static TraceTimer* getTimer(const char* name); // don't use
341  const std::vector<TraceTimer*>& children() const ;//don't use.
342 
343  static void PruneTimersParentChildPercent(double percent);
344  static void sampleMemUsage(const char* a_name) ;
345  static void sampleMemUsage2() ;
346  static void reportPeak(FILE* out);
347 
348  static const char* currentTimer()
349  {
350  return s_currentTimer[0]->m_name;
351  }
352 
353  static bool timersOn();
354 
355  private:
356  TraceTimer(const char* a_name, TraceTimer* parent, int thread_id);
357  static std::vector<TraceTimer*> s_roots;
358  static std::vector<TraceTimer*> s_currentTimer;
359 
360  static long long int s_peak;
362 
363  bool m_pruned;
365  std::vector<TraceTimer*> m_children;
366  const char* m_name;
367  long long int m_count;
368  unsigned long long int m_accumulated_WCtime;
369  unsigned long long int m_last_WCtime_stamp;
370  mutable int m_rank;
372  static bool s_memorySampling;
373  unsigned int m_memoryMin, m_memoryMax;
374  unsigned long long int m_flopEnter, m_flops;
375 
376  static bool s_tracing;
377 
378  static bool find(std::list<const TraceTimer*>& a_trace, const TraceTimer* a_target);
379  void reportTree(FILE* out, const TraceTimer& node, int depth);
380  const TraceTimer* activeChild() const;
381 
382  void macroTest();
383  void macroTest2();
384 
385  void currentize() const ;
386  int computeRank() const;
387  static void reportFullTree(FILE* out, const TraceTimer& timer,
388  unsigned long long int totalTime, int depth, TraceTimer** stack_bottom);
389  static void reportOneTree(FILE* out, const TraceTimer& timer);
390  static void updateMemory(TraceTimer& a_timer);
391 
392  //static void reportMemoryOneTree(FILE* out, const TraceTimer& timer);
393  static void subReport(FILE* out, const char* header, unsigned long long int totalTime);
394  static void reset(TraceTimer& timer);
395  static void PruneTimersParentChildPercent(double threshold, TraceTimer* parent);
396 
397  };
398 
400  {
401  public:
402  AutoStartLeaf(TraceTimer* a_timer):m_timer(a_timer)
403  {
404  if(a_timer)a_timer->leafStart();
405  }
406 
408  {
409  if(m_timer)m_timer->leafStop();
410  }
411 
412  bool active();
413 
414  private:
416  };
417 
418  class AutoStart
419  {
420  public:
421  AutoStart(TraceTimer* a_timer, char* mutex, char* childMutex)
422  :m_mutex(mutex), m_childMutex(childMutex),m_timer(a_timer)
423  {if(a_timer)a_timer->start(mutex);}
424  AutoStart(TraceTimer* a_timer, char* mutex)
425  :m_mutex(mutex), m_childMutex(&AutoStart::ok),m_timer(a_timer)
426  {if(a_timer)a_timer->start(mutex);}
428  { if(m_timer)m_timer->stop(m_mutex); }
429 
430  bool active();
431  private:
432  char* m_mutex;
435  static char ok;
436 
437  };
438 
439 
440 
441 
443 {
444  if (m_pruned) return;
445  m_flopEnter=ch_flops();
446  ++m_count;
447  m_last_WCtime_stamp = ch_ticks();
448 }
449 
450 inline void TraceTimer::leafStop()
451 {
452  if (m_pruned) return;
453  m_accumulated_WCtime += ch_ticks() - m_last_WCtime_stamp;
454  m_flops+=ch_flops()-m_flopEnter;
455  m_last_WCtime_stamp=0;
456 }
457 // Pruning options
458 //#endif
459 #endif // CH_NTIMER
460 #include "BaseNamespaceFooter.H"
461 
462 #endif // CH_TIMER_H
static char ok
Definition: CH_Timer.H:435
std::vector< TraceTimer * > m_children
Definition: CH_Timer.H:365
char * m_childMutex
Definition: CH_Timer.H:433
unsigned int m_memoryMin
Definition: CH_Timer.H:373
bool m_pruned
Definition: CH_Timer.H:363
Definition: IntVect.H:719
unsigned long long int ch_ticks()
Definition: ClockTicks.H:64
AutoStart(TraceTimer *a_timer, char *mutex, char *childMutex)
Definition: CH_Timer.H:421
char * m_mutex
Definition: CH_Timer.H:432
Definition: CH_Timer.H:418
~AutoStart()
Definition: CH_Timer.H:427
int m_thread_id
Definition: CH_Timer.H:371
void leafStart()
Definition: CH_Timer.H:442
Definition: CH_Timer.H:399
int rank() const
Definition: CH_Timer.H:324
unsigned long long int m_flops
Definition: CH_Timer.H:374
TraceTimer * m_timer
Definition: CH_Timer.H:415
static bool s_tracing
Definition: CH_Timer.H:376
const char * name(const FArrayBox &a_dummySpecializationArg)
Definition: CH_HDF5.H:907
unsigned long long int m_accumulated_WCtime
Definition: CH_Timer.H:368
int m_rank
Definition: CH_Timer.H:370
bool isPruned() const
Definition: CH_Timer.H:334
static const char * currentTimer()
Definition: CH_Timer.H:348
static TraceTimer * s_peakTimer
Definition: CH_Timer.H:361
long long int count() const
Definition: CH_Timer.H:328
std::fstream LBFILE
static std::vector< TraceTimer * > s_roots
Definition: CH_Timer.H:357
void start(char *mutex)
AutoStart(TraceTimer *a_timer, char *mutex)
Definition: CH_Timer.H:424
TraceTimer * m_timer
Definition: CH_Timer.H:434
void leafStop()
Definition: CH_Timer.H:450
TraceTimer * m_parent
Definition: CH_Timer.H:364
Definition: CH_Timer.H:306
unsigned long long int time() const
Definition: CH_Timer.H:319
~AutoStartLeaf()
Definition: CH_Timer.H:407
long long int ch_flops
Definition: CH_Counters.H:30
AutoStartLeaf(TraceTimer *a_timer)
Definition: CH_Timer.H:402
static std::vector< TraceTimer * > s_currentTimer
Definition: CH_Timer.H:358
static bool s_memorySampling
Definition: CH_Timer.H:372
static long long int s_peak
Definition: CH_Timer.H:360
const char * m_name
Definition: CH_Timer.H:366
unsigned long long int m_last_WCtime_stamp
Definition: CH_Timer.H:369
long long int m_count
Definition: CH_Timer.H:367