Data Structures
class	AutoStart

class	AutoStartLeaf

class	Box
	An interval in DIM dimensional space. More...

class	BoxData
	Multidimensional Rectangular Array. More...

struct	boxdataIndexer

class	BoxIterator
	Iterator for Boxes. More...

class	BufferEntry

class	CInterval
	Component-Space Interval. More...

class	Copier
	Abstract Generic Parallel Copier. More...

class	CopierIterator

class	DataIndex

class	DataIterator

class	DBLInternals

class	describeInfo

class	DisjointBoxLayout
	Disjoint Box Layout. More...

struct	emptyIndexer

struct	getMemType

struct	getMemType< BoxData< T, C, MEMTYPE, D, E > >

struct	getMemType< Var< T, C, MEMTYPE, D, E > >

class	HDF5Handler

struct	indexer

struct	indexer_i

struct	indexer_p

class	InterpStencil
	Interpolation Stencil. More...

class	LazyStencil
	An Unevaluated Stencil Operation. More...

class	LevelBoxData
	Level Box Data. More...

class	LevelCopier
	Level Copier. More...

class	LevelCopierOp
	Level Copier Operator. More...

class	LevelExchangeCopier
	Exchange Copier. More...

class	MayDay
	Error-reporting Functions. More...

class	memInfo

class	MotionItem

class	MotionItemSorter

class	NeighborIterator

struct	opKernel

class	Point
	Integer Valued Vector. More...

class	ProblemDomain
	Represents a rectangular domain over which a problem can be defined, including periodic images. More...

class	Reduction

class	RK4
	Generic Explicit RK4 Algorithm. More...

struct	scalarIndexer

class	Shift
	Stencil Shift. More...

class	Side
	Encapsulation of high or low side. More...

class	SideIterator
	Iterator for low and high side. More...

struct	sIndexer3

class	Stencil
	A Linear Stencil Operation. More...

struct	structIndexer

class	traceInfo

class	TraceTimer

class	Var
	Pointwise Variable. More...

Functions
std::ostream &	operator<< (std::ostream &a_os, const Box &a_box)
	OStream Operator.

std::ostream &	operator<< (std::ostream &a_os, const CInterval &a_int)
	CInterval IOStream Operator.

template<typename T , unsigned int C, MemType MEMTYPE, unsigned char D, unsigned char E>
BoxData< T, 1, MEMTYPE >	slice (const BoxData< T, C, MEMTYPE, D, E > &a_src, unsigned int a_c, unsigned int a_d, unsigned int a_e)
	Slice Arbitrary Component (Non-Const) More...

std::ostream &	operator<< (std::ostream &os, const DisjointBoxLayout &a_dbl)
	Stream output for DBL.

std::ostream &	pout ()
	Use this in place of std::cout for program output. More...

void	setPoutBaseName (const std::string &a_Name)
	Changes the base part of the filename for pout() files. More...

const std::string &	poutFileName ()
	Accesses the filename for the local pout() file. More...

template<unsigned int P>
int	ipow (int M)
	Template Based Integer Exponentiation.

std::ostream &	operator<< (std::ostream &os, const ProblemDomain &a_pd)
	Stream output for ProblemDomain.

int	procID ()
	local process ID More...

unsigned int	numProc ()
	number of parallel processes More...

void	barrier ()
	all ranks wait here to sync-up More...

double	fineCoef (Point p, Point s, double h, int refRatio)

const char *	WriteBoxData (BoxData< double > &a_array, double dx=1.0)
	Write Scalar BoxData. More...

void	WriteBoxData (const char *a_filename, BoxData< double > &a_array, double dx=1.0)
	Write Scalar BoxData with Name. More...

template<class T , unsigned int C, MemType MEMTYPE, unsigned char D, unsigned char E>
void	WriteBoxData (const char a_filename, BoxData< T, C, MEMTYPE, D, E > &a_array, const char const a_varnames[C D E], const double *a_corner, const double &a_h)
	Write Non-Scalar BoxData with Name. More...

template<class T , unsigned int C, MemType MEMTYPE, unsigned char D, unsigned char E>
void	WriteBoxData (FILE a_fp, BoxData< T, C, MEMTYPE, D, E > &a_array, const char const a_varnames[C D E], const double *a_corner, const double &a_h)
	Write Non-Scalar BoxData to Existing File. More...

Alias and Slice Operators
The alias and slice operations facilitate BoxData operations while avoiding unnecessary copies. See the sample code below for an explanation of the syntax. Example `alias` usage: Box srcBox = Box::Cube(4); BoxData<double, 1, 2, 3> Src(srcBox); Src.setVal(17); // Alias is identical to Src and points to the same data. Changing alias will change Src. auto Alias = alias(Src); // shiftedAlias points to the same buffer as Src, but the domain is shifted by (1,...,1); // (e.g. shiftedAlias[Point::Ones()] == Src[Point::Zeros] will return true.) auto shiftedAlias = alias(Src, Point::Ones()); //shiftedAlias points to the same data, but the associated domain Example `slice` usage: Box srcBox = Box::Cube(4); BoxData<double, 1, 2, 3> Src(srcBox); Src.setVal(17); // Create an alias to the {1,1,0} component of Src // Slice and Src are sharing data. slice[srcBox.low(),0,0,0] == Src[srcBox.low(),1,1,0] returns true; auto Slice = slice(Src, 0, 1);
template<class T , unsigned int C = 1, unsigned char D = 1, unsigned char E = 1, MemType MEMTYPE = MEMTYPE_DEFAULT>
BoxData< T, C, MEMTYPE, D, E >	alias (BoxData< T, C, MEMTYPE, D, E > &a_original, const Point &shift=Point::Zeros())
	Alias (Non-Const) More...

template<class T , unsigned int C = 1, unsigned char D = 1, unsigned char E = 1, MemType MEMTYPE = MEMTYPE_DEFAULT>
const BoxData< T, C, MEMTYPE, D, E >	alias (const BoxData< T, C, MEMTYPE, D, E > &a_original, const Point &shift=Point::Zeros())
	Alias (Const) More...

template<typename T , unsigned int C, MemType MEMTYPE = MEMTYPE_DEFAULT, unsigned char D = 1, unsigned char E = 1>
BoxData< T, 1, MEMTYPE, 1, 1 >	slice (const BoxData< T, C, MEMTYPE, D, E > &a_src, unsigned int a_c, unsigned int a_d=0, unsigned int a_e=0)
	Slice Arbitrary Component (Non-Const) More...

template<typename T , unsigned int C, unsigned char CC, MemType MEMTYPE = MEMTYPE_DEFAULT>
BoxData< T, CC, MEMTYPE, 1, 1 >	slice (const BoxData< T, C, MEMTYPE, 1, 1 > &a_src, unsigned int a_nstart)
	Slice Arbitrary Component Range (Non-Const) More...

Pointwise Operators
The suite of forall functions facilitate writing functions that operate pointwise on BoxData. To this end, the user must write a function with one of the following structures: PROTO_KERNEL_START void F_temp(Var<T,C,MEMTYPE,D,E>&, Args...) { ... } PROTO_KERNEL_END(F_temp, F) // OR PROTO_KERNEL_START void F_p_temp(Point&, Var<T,C,MEMTYPE,D,E>&, Args...) { ... } PROTO_KERNEL_END(F_p_temp, F_p) PROTO_KERNEL_START and PROTO_KERNEL_END are required for cross-platform (e.g. GPU compatable) code. The "#_temp" symbols are temporaries; the actual function symbol is the one without "_temp" The template arguments of the first Var argument must match the output BoxData The Point argument in the second signature corresponds to the Point of function application Args... may include any number of Var& or read-only scalars. The elements of Args... may have arbitrary tensor structure and const-ness non-const objects in Args... have input-output semantics The order and template arguments of the Vars in Args... must match the BoxData inputs of forall If F is a member function of a class F MUST BE DECLARED STATIC F or F_p may be an anonymous (lambda) function defined using the PROTO_LAMBDA macro Refer to the following code snippet for some sample valid forall input functions: // Valid funcion inputs to forall may be STATIC members of classes: namespace Operator { // Pointwise function with no point dependence PROTO_KERNEL_START // necessary for use with GPU devices static void foo_temp(Var<double, 3, 2>& arg_0, double arg_1, // plain-old-data can be passed by value Var<bool>& arg_2) // any number of Var objects with different types / structures can be passed by reference { // if arg_2 == true at this Point... if (arg_2(0)) { arg_0(1,1) = arg_1; // Access the (1,1,0) component at each point and set it to arg_1 } else { arg_0(1,1) = -arg_1; // Access the (1,1,0) component at each point and se tit to -arg1 } } PROTO_KERNEL_END(foo_temp, foo) // Pointwise function with point dependence PROTO_KERNEL_START static void foo_p_temp(Point& a_p, // If the function depends on the point of evaluation, the Point must be the first argument Var<double, 3, 2>& arg_0, Var<bool>& arg_1) { if (arg_1(0)) { for (int ii = 0; ii < DIM; ii++) { arg_0(1,1) += a_p[ii]; // Set the (1,1,0) component of arg_0 equal to the sum of the components of this Point } } } PROTO_KERNEL_END(foo_p_temp, foo_p) } // globally defined functions are also valid: PROTO_KERNEL_START void bar_temp(Var<double>& arg_0, int arg_1) { arg_0(0) = arg_1; } PROTO_KERNEL_END(bar_temp, bar) // globally defined functions are also valid: PROTO_KERNEL_START void bar_p_temp(Point& a_p, Var<double>& arg_0, int arg_1) { arg_0(0) = a_p[0]*arg_1; } PROTO_KERNEL_END(bar_p_temp, bar_p)
template<typename T , unsigned int C = 1, unsigned char D = 1, unsigned char E = 1, MemType MEMTYPE = MEMTYPE_DEFAULT, typename Func , typename... Srcs>
BoxData< T, C, MEMTYPE, D, E >	forall (const Func &a_F, Srcs &&... a_srcs)
	Pointwise Operator. More...

template<typename T , unsigned int C = 1, unsigned char D = 1, unsigned char E = 1, MemType MEMTYPE = MEMTYPE_DEFAULT, typename Func , typename... Srcs>
BoxData< T, C, MEMTYPE, D, E >	forallOp (unsigned long long int a_num_flops_point, const char *a_timername, const Func &a_F, Srcs &&... a_srcs)
	same idea, but with flop counts and a timer name

template<typename T , unsigned int C = 1, unsigned char D = 1, unsigned char E = 1, MemType MEMTYPE = MEMTYPE_DEFAULT, typename Func , typename... Srcs>
BoxData< T, C, MEMTYPE, D, E >	forall (const Func &a_F, Box a_box, Srcs &&... a_srcs)
	Pointwise Operator: Overload with Box Argument. More...

template<typename T , unsigned int C = 1, unsigned char D = 1, unsigned char E = 1, MemType MEMTYPE = MEMTYPE_DEFAULT, typename Func , typename... Srcs>
BoxData< T, C, MEMTYPE, D, E >	forallOp (unsigned long long int a_num_flops_point, const char *a_timername, const Func &a_F, Box a_box, Srcs &&... a_srcs)
	same idea, but with flop counts and a timer name

template<typename T , unsigned int C = 1, unsigned char D = 1, unsigned char E = 1, MemType MEMTYPE = MEMTYPE_DEFAULT, typename Func , typename... Srcs>
BoxData< T, C, MEMTYPE, D, E >	forall_p (const Func &a_F, Srcs &&... a_srcs)
	Pointwise Operator with Point Dependence. More...

template<typename T , unsigned int C = 1, unsigned char D = 1, unsigned char E = 1, MemType MEMTYPE = MEMTYPE_DEFAULT, typename Func , typename... Srcs>
BoxData< T, C, MEMTYPE, D, E >	forallOp_p (unsigned long long int a_num_flops_point, const char *a_timername, const Func &a_F, Srcs &&... a_srcs)
	same idea, but with flop counts and a timer name

template<typename T , unsigned int C = 1, unsigned char D = 1, unsigned char E = 1, MemType MEMTYPE = MEMTYPE_DEFAULT, typename Func , typename... Srcs>
BoxData< T, C, MEMTYPE, D, E >	forall_p (const Func &a_F, Box a_box, Srcs &&... a_srcs)
	Pointwise Operator with Point Dependence: Overload with const Box Argument. More...

template<typename T , unsigned int C = 1, unsigned char D = 1, unsigned char E = 1, MemType MEMTYPE = MEMTYPE_DEFAULT, typename Func , typename... Srcs>
BoxData< T, C, MEMTYPE, D, E >	forallOp_p (unsigned long long int a_num_flops_point, const char *a_timername, const Func &a_F, Box a_box, Srcs &&... a_srcs)
	same idea, but with flop counts and a timer name

template<typename Func , typename... Srcs>
void	forallInPlace (const Func &a_F, Srcs &&... a_srcs)
	In-Place Pointwise Operator. More...

template<typename Func , typename... Srcs>
void	forallInPlaceOp (unsigned long long int a_num_flops_point, const char *a_timername, const Func &a_F, Srcs &&... a_srcs)
	same idea, but with flop counts and a timer name

template<typename Func , typename... Srcs>
void	forallInPlace (const Func &a_F, Box a_box, Srcs &&... a_srcs)
	In-Place Pointwise Operator on Prescribed Box. More...

template<typename Func , typename... Srcs>
void	forallInPlaceOp (unsigned long long int a_num_flops_point, const char *a_timername, const Func &a_F, Box a_box, Srcs &&... a_srcs)
	same idea, but with flop counts and a timer name

template<typename Func , typename... Srcs>
void	forallInPlace_p (const Func &a_F, Srcs &&... a_srcs)
	In-Place Pointwise Operator with Point Dependence. More...

template<typename Func , typename... Srcs>
void	forallInPlaceOp_p (unsigned long long int a_num_flops_point, const char *a_timername, const Func &a_F, Srcs &&... a_srcs)
	same idea, but with flop counts and a timer name

template<typename Func , typename... Srcs>
void	forallInPlace_p (const Func &a_F, Box a_box, Srcs &&... a_srcs)
	In-Place Pointwise Operator with Point Dependence and Prescribed Box. More...

template<typename Func , typename... Srcs>
void	forallInPlaceOp_p (unsigned long long int a_num_flops_point, const char *a_timername, const Func &a_F, Box a_box, Srcs &&... a_srcs)
	same idea, but with flop counts and a timer name

External Operators
std::ostream &	operator<< (std::ostream &a_os, const Point &a_pt)
	Stream Operator.

Point	operator* (int a_scale, Point a_pt)
	Premultiplication by scalar.

Point	operator- (Point a_pt)
	Unary Negation.

Non-Member Functions
template<typename T >
Stencil< T >	operator* (T a_coef, Shift a_shift)
	Coefficient Shift Product "Constructor". More...

template<typename T >
Stencil< T >	operator* (T a_coef, const Stencil< T > a_stencil)
	Scalar Multiplication of Stencil Coefficients. More...

template<typename T , unsigned int C, MemType MEMTYPE, unsigned char D, unsigned char E>
BoxData< T, C, MEMTYPE, D, E > &	operator\|= (BoxData< T, C, MEMTYPE, D, E > &a_dest, LazyStencil< T, C, MEMTYPE, D, E > &&a_op)
	Application by Replacement. More...

template<class T , unsigned int C, MemType MEMTYPE, unsigned char D, unsigned char E>
BoxData< T, C, MEMTYPE, D, E > &	operator+= (BoxData< T, C, MEMTYPE, D, E > &a_dest, LazyStencil< T, C, MEMTYPE, D, E > &&a_op)
	Application by Increment. More...

Detailed Description

TraceTimer class is a self-tracing code instrumentation system

TraceTimer class is a self-tracing code instrumentation system for Chombo (or any other package really). The user interface is specified by a small set of macros. The usage model is that you just leave these timers in the code, for good. Initially, your application will have 'main' and a few hewavy functions instrumented, and the lower level Chombo library instrumentation. As your tool or application matures, it will garner a larger set of instrumentation giving clear views of your code performance. After a routine has been cleverly and lovingly optimized, you leave in the timers, to spot when some later bug fix or improvement undoes your previous labors.

Note: You should never need to use or interact with the the classes TraceTimer or AutoStart. Use the macros. They call the right functions and classes for you.

The first macro is what people will use the most:

PR_TIME("label");

This is the simplest interface for timers. you place this macro call in a function you wish to be timed. It handles making the timer, calling 'start' when you enter the function, and calling 'stop' when you leave the function. A good idea is to use a 'label' specific enough to be unambiguous without being overwhelming. for instance:

void AMRLevelPolytropicGas::define(AMRLevel*            a_coarserLevelPtr,
const ProblemDomain& a_problemDomain,
int                  a_level,
int                  a_refRatio)
{
PR_TIME("AMRLevelPolytropicGas::define");
.
.
}

In this case, we have a class with many constructors and define functions that all funnel into a single general function. We can just call this 'define' and not worry about naming/instrumenting all the different overloaded instances. If you slip up and use the same label twice, that is not a real problem, the two locations will be timed and tracked properly (even if one is a sibling or parent of the other). The only place it will make things a little harder is in the output where you might have the same name show up and look confusing.

In serial, you will see a file called time.table (in parallel, you will get a time.table.n (where n is the rank number) files). If you want fewer files, you can do setenv PR_OUTPUT_INTERVAL nproc and it will only output every nproc processors time.table.n files (where nnproc == 0). I won't go into this file format. It is kind of gprof-ish, with what I consider improvements. The real benefit here is profiling that understands our Chombo context, a smaller information set to observe, and the fact that, so far in my testing, the timers have negligible impact on the run time or memory use of the code.

By default, Chombo compiles in the instructions for the timers wherever the macros appear. If the compiler macro PR_NTIMER is defined, then all the PR_TIME* macros evaluate to empty expressions at compile time.

So, you put some PR_TIME calls in your code and ran it, and nothing happened:: Chombo looks for the environment variable PR_TIMER. If it is set to anything (even if it is set to 'false' or 'no' or whatever) then the timers will be active and reporting will happen. If this environment variable is not set, then all the timers check a bool and return after doing nothing.

: One point of interest with using the environment variable: In parallel jobs using mpich, only processor 0 inherits the environment variables from the shell where you invoke 'mpirun', the rest read your .cshrc (.bashrc, etc.) file to get their environment. To time all your processes, you need to make sure the PR_TIMER environment variable gets to all your processes.

Auto hierarchy:: The timers automatically figure out their parent/child relationships. They also can be placed in template code. This has some consequences. First, if you have a low level function instrumented that has no timers near it in the code call stack, you will see it show up as a child of a high level timer. the root timer "main" will catch all orphaned timers. So, even though you might make no call to, say, 'exchange' in your 'main' function, you might very well call a function, that calls a function, that calls 'exchange'. Since no code in between was instrumented, this exchange is accounted for at 'main'. This might look strange, but it should prove very powerful. An expensive orphan is exactly where you should consider some more timers, or reconsidering code design.

: For performance reasons, child timers have only one parent. As a consequence each PR_TIME("label") label can show up at multiple places in your output. Each instance has it's own timer. So, each path through the call graph that arrives at a low-level function has a unique lineage, with it's own counter and time. Thus, I can instrument LevelData::copyTo once, but copyTo can appear in many places in the time.table file.

The next level up in complexity is the set of four macros for when you want sub-function resolution in your timers. For instance, in a really huge function that you have not figured out how to re-factor, or built with lots of bad cut n paste code 're-use'.

PR_TIMERS("parent");
PR_TIMER("child1", t1);
PR_TIMER("child2", t2);
PR_START(t1);
PR_STOP(t1);
PR_START(t2);
PR_STOP(t2);
PR_START(t1);
PR_STOP(t1);

PR_TIMERS has the same semantic as PR_TIME, except that you can declare an arbitrary number of children after it in the same function scope. The children here do not autostart and autostop, you have to tell them where to start and stop timing. The children can themselves be parents for timers in called functions, of course. The children obey a set of mutual exclusions. The following generate run time errors:

double start called
double stop called
start called when another child is also started
you leave the function with a child not stopped

the following will generate compile time errors:

more than one PR_TIME macro in a function
invoking PR_TIMER("child", t) without having first invoked PR_TIMERS
re-using the timer handle ie. PR_TIMER("bobby", t1); PR_TIMER("sally", t1)
mixing PR_TIME macro with PR_TIMER
mixing PR_TIME macro with PR_TIMERS

You do not have to put any calls in your main routine to activate the clocks or generate a report at completion, this is handled with static iniitalization and an atexit function.

There is a larger argument of manual instrumentation being counter to good development. Profiling the code is supposed to tell you where to expend your optimization effort. Manual instrumentation opens the door to people wasting time assuming what parts of the code are going to take up lots of time and instrumenting them, before seeing any real performance data. Good judgement is needed. We have a body of knowledge about Chombo that will inform us about a good minimal first set of functions to instrument.

Function Documentation

◆ barrier()

void Proto::barrier ( )

inline

all ranks wait here to sync-up

All MPI ranks wait here to sync-up. Calls MPI_Barrier(comm). This is a no-op in the non-MPI/serial case.

◆ numProc()

unsigned int Proto::numProc ( )

inline

number of parallel processes

Returns the number of parallel processes running. Always returns at least 1.

◆ pout()

std::ostream & Proto::pout ( )

inline

Use this in place of std::cout for program output.

the stream that all output except error msgs should use

Replaces std::cout in most of the Chombo code. In serial this just returns std::cout. In parallel, this creates a separate file for each proc called <basename>.n where n is the procID and <basename> defaults to "pout" but can be set by calling setPoutBaseName(). Output is then directed to these files. This keeps the output from different processors from getting all jumbled up. If you want fewer files, you can do setenv CH_OUTPUT_INTERVAL nproc and it will only output every nproc processors pout.n files (where nnproc == 0).

In serial this is the standard output, in parallel it is a different file on each proc (see setPoutBaseName()).

◆ poutFileName()

const std::string & Proto::poutFileName ( )

inline

Accesses the filename for the local pout() file.

return the current filename as used by pout()

Returns the name used for the local pout() file. In parallel this is "<pout_basename>.<procID>", where <pout_basename> defaults to "pout" and can be modified by calling setPoutBaseName(), and <procID> is the local proc number. In serial, this always returns the string "cout". It is an error (exit code 111) to call this in parallel before MPI_Initialize().

in serial, just return the string "cout"; abort if MPI is not initialized.

◆ procID()

int Proto::procID ( )

inline

local process ID

Returns the ID of the locally running process in the range 0 <= procID() < numProc(). This has no relation to the operating system pid. There is always a procID() == 0.

◆ setPoutBaseName()

void Proto::setPoutBaseName ( const std::string & a_Name )

inline

Changes the base part of the filename for pout() files.

Set the base name for the parallel output files used by pout().

When in parallel, changes the base name of the pout() files. If pout() has already been called, it closes the current output file and opens a new one (unless the name is the same, in which case it does nothing). In serial, ignores the argument and does nothing.

If the file has already been used and this is a different name, close the current file and open a new one.

Data Structures

Functions

Detailed Description

Function Documentation

◆ barrier()

◆ numProc()

◆ pout()

◆ poutFileName()

◆ procID()

◆ setPoutBaseName()