Chombo + EB  3.0
sort_histI.H
Go to the documentation of this file.
1 #ifdef CH_LANG_CC
2 /*
3  * _______ __
4  * / ___/ / ___ __ _ / / ___
5  * / /__/ _ \/ _ \/ V \/ _ \/ _ \
6  * \___/_//_/\___/_/_/_/_.__/\___/
7  * Please refer to Copyright.txt, in Chombo's root directory.
8  */
9 #endif
10 
11 #ifndef _SORT_HISTI_H_
12 #define _SORT_HISTI_H_
13 
14 #include <iterator>
15 
16 #include "CH_Timer.H"
17 
18 ////////////////////////////////////////////////////////////////////////
19 // Histogram::Histogram
20 ////////////////////////////////////////////////////////////////////////
22  : m_min_key(min), m_max_key(max), m_num_pes(env->m_numpes),
23  m_my_pe(env->m_mype), m_ncalls(0)
24 {
25  m_probe_size = m_num_pes - 1; // probe size
26  m_probe_size_max = 2*m_probe_size; // ?????
29 }
30 
32 {
34 }
35 
36 ////////////////////////////////////////////////////////////////////////
37 // Histogram::Init
38 ////////////////////////////////////////////////////////////////////////
39 void Histogram::Init( MPI_Comm a_comm,
40  const int a_nloc,
41  float a_margin )
42 {
43  //set up initial probe (evenly spaced guesses in data range)
44  index_type nl = a_nloc;
45  MPI_Allreduce( &nl, &m_num_total_keys, 1, MPI_INDEX_TYPE, MPI_SUM, a_comm );
46  int goal = (int)(m_num_total_keys/m_num_pes); CH_assert(goal>0);
47  CH_assert(a_margin>0.0 && a_margin < 1.0);
48  m_margin_error = (int)( (float)goal*a_margin ) + 1;
49 
50  //set up splitters and some variables
51  m_num_steps = 0;
52  m_num_achv = 0;
53 
54  //initial probe divides the range into even chunks
55  if ( m_ncalls++ == 0 ) // init init
56  {
57  for ( key_type i = 0; i < m_probe_size; i++ )
58  {
60  }
61  }
62  else
63  {
64  for (int i = 0; i < m_probe_size ; i++ )
65  {
67  }
68  }
69 
70  // init splitters
71  for (int i = 0; i < m_num_pes-1; i++)
72  {
74  m_splitters[i].upperb_count = -1;
77  m_splitters[i].goal = -1; //need to know total number of keys to determine this
78  m_splitters[i].achieved = false;
79  m_splitters[i].broadcasted = false;
80  }
81 }
82 
83 ////////////////////////////////////////////////////////////////////////
84 // Histogram::mws_sdispl
85 // return the number of elements needed from every processor
86 ////////////////////////////////////////////////////////////////////////
88  int a_nlocal,
89  MPI_Comm a_comm,
90  float a_margin )
91 {
92  Init( a_comm, a_nlocal, a_margin );
93 
94  splitter *splits = mws_private( a_keys,
95  a_nlocal,
96  a_comm );
97 
98  // return number or elements needed on each processor, set 'a_nlocal'
99  int *sdisp = new int[m_num_pes+1];
100  for ( int p=0, ki=0 ; ki < a_nlocal ; /*void*/)
101  {
102  while ( a_keys[ki] < splits[p].lowerb_key ) ki++;
103  sdisp[p++] = ki;
104  }
105  sdisp[m_num_pes] = a_nlocal;
106 
107  return sdisp;
108 }
109 
110 ////////////////////////////////////////////////////////////////////////
111 // Histogram::mws_private
112 ////////////////////////////////////////////////////////////////////////
114  int a_nlocal,
115  MPI_Comm a_comm )
116 {
117  const int root = 0;
118 
119 #define FACT 10000000000
120 
121  while (m_num_achv == m_num_pes)
122  {
123  m_num_steps++;
124  // create local histogram, could use 'int' but be safe
125  std::vector<index_type> histogram(m_probe_size+1+1); // +1: use for prefix sum
126  {
127  std::vector<index_type> loc_histo(m_probe_size+1, 0);
128  for (int i=0; i<a_nlocal ; i++)
129  {
130  key_type key = a_keys[i], tkey1;
131  int k = m_probe_size/2, inc = m_probe_size/4;
132 CH_assert(k<m_probe_size-1 && k >= 0);
133  if (inc==0) inc=1;
135  while ( (tkey1=m_last_probe[k]) > key || m_last_probe[k+1] <= key )
136  {
137  if ( inc < 4 ) // linear search
138  {
139  while ( m_last_probe[k] > key ) k--;
140  while ( k < m_probe_size && m_last_probe[k+1] <= key ) k++;
141 if (!(k<=m_probe_size && k>=0))std::cout << "[" << m_my_pe << "] ERROR: k=" << k << " key=" << key << " max key = " << m_max_key << std::endl;
142  CH_assert(k<=m_probe_size && k >= 0);
143 CH_assert(m_last_probe[k] <= key && m_last_probe[k+1] > key);
144  break;
145  }
146  else
147  {
148  if ( tkey1 > key ) k -= inc;
149  else k += inc;
150  inc /= 2;
151 CH_assert(k<m_probe_size-1 && k >= 0);
152  }
153  }
154 CH_assert( m_last_probe[k] <= key && m_last_probe[k+1] > key);
155  loc_histo[k]++;
156  }
157  MPI_Reduce(&loc_histo[0], &histogram[1], m_probe_size, MPI_INDEX_TYPE, MPI_SUM, root, a_comm);
158  histogram[0] = 0; // room for prefix sum
159  }
160 
161  if (m_my_pe == root)
162  {
163 // std::cout << "[0] " << m_num_steps << ") margin=" << m_margin_error << ", min key=" << m_min_key << ", max key=" << m_max_key
164 // << ", max HISTO ="
165 // << *std::max_element(histogram.begin(), histogram.end())
166 // << std::endl << "\thistogram: ";
167 // copy(histogram.begin(), histogram.end(), ostream_iterator<index_type>(cout, " "));
168 // std::cout << std::endl << "m_last_probe: " ;
169 // copy(m_last_probe.begin(), m_last_probe.end(), ostream_iterator<index_type>(cout, " "));
170 // std::cout << std::endl << "prefix sum: ";
171  // form prefix sum
172  for (int hi=0; hi<m_probe_size+1 ; hi++) histogram[hi+1] += histogram[hi];
173  histogram[0] = 0;
174  copy(histogram.begin(), histogram.end(), ostream_iterator<index_type>(cout, " "));
175  std::cout << std::endl;
176 
177  if (m_num_steps==2) exit(13);
178 
179  // create new probes
180  RefineProbes( histogram );
181 
182  // broadcast new probes
183 // m_probe_size = new_probes.size();
184 // int tt[2] = {m_probe_size, m_num_achv }; CH_assert(sizeof(index_type) == sizeof(key_type));
185 // m_last_probe.clear();
186 // m_last_probe.reserve( m_probe_size );
187 // while ( !new_probes.empty() )
188 // {
189 // key_type probe = new_probes.front(); new_probes.pop_front();
190 // m_last_probe.push_back( probe );
191 // }
192 // MPI_Bcast ( tt, 2, MPI_INT, root, a_comm );
193 // MPI_Bcast ( &m_last_probe[0], m_last_probe_size, MPI_INDEX_TYPE, root, a_comm );
194 
195 // cout << "[0]sent " << m_probe_size << " new probes, NUM_ACHV *** " << m_num_achv << " probes: ";
196 // copy(m_last_probe.begin(), m_last_probe.end(), ostream_iterator<index_type>(cout, " "));
197 // std::cout << std::endl;
198  }
199  else // receive: done?, size of probe, probe
200  {
201  int tt[2];
202  MPI_Bcast ( tt, 2, MPI_INT, root, a_comm );
203  m_probe_size = tt[0]; m_num_achv = tt[1];
204  //m_last_probe.resize( m_probe_size );
205  MPI_Bcast ( &m_last_probe[0], m_probe_size, MPI_INDEX_TYPE, root, a_comm );
206  }
207  }
208 
209  MPI_Barrier(a_comm);
210  if (m_my_pe==m_num_pes-1) std::cout << "[" << m_num_steps << "] done splits" << std::endl;
211  exit(11);
212 
213  return m_splitters;
214 }
215 
216 ////////////////////////////////////////////////////////////////////////
217 // Histogram::RefineProbes
218 ////////////////////////////////////////////////////////////////////////
219 void Histogram::RefineProbes( std::vector<index_type> &histogram )
220 {
221  //uint64_t* hist_counts = (uint64_t*)msg->getData();
222  int lenhist = histogram.size();
223 
224  ///if first step, initialize total length and splitter goals
225  if (m_num_steps == 1)
226  {
227  m_num_total_keys = histogram[lenhist-1];
229  index_type offset = 0;
230  for (int i = 0; i < m_num_pes - 1; i++)
231  {
232  offset += hop;
233  m_splitters[i].goal = offset;
235  }
236  }
237 
238  int idxExp = 0;
239  int idxReal = 0;
240  int splitterCount = 0;
241  key_type histval;
242  bool borderChanged;
243  int justachv = 0;
244 
245  std::vector< splitter* > active_splitters;
246  std::vector<int> toBroadcast;
247  index_type sumhist = histogram[0];
248  index_type diffval;
249  ///logic for determining which splitters have been achieved and bounding the rest by the closest counts and splitter-guesses
250  ///this code needs at least some comments or something
251  while (idxReal < m_num_pes-1)
252  {
253  if (m_splitters[idxReal].achieved)
254  {
255  //if this splitter has been determined
256  if (!m_splitters[idxReal].broadcasted && (idxReal == 0 || m_splitters[idxReal-1].achieved))
257  {
258  //if previous splitter has now been determined, broadcast this range
259  toBroadcast.push_back(idxReal);
260  m_splitters[idxReal].broadcasted = true;
261  }
262  //don't need to analyze determined splitter, so move on
263  idxReal++;
264  continue;
265  }
266  //figure out the current count and current guess from last probe
267  if (idxExp >= lenhist-1)
268  {
269  if (idxExp >= lenhist) sumhist = m_num_total_keys;
270  else sumhist = histogram[idxExp];
271  histval = m_max_key;
272  }
273  else
274  {
275  sumhist = histogram[idxExp];
276  histval = m_last_probe[idxExp];
277  }
278 
279  //find how far the current splitter is from the guess
280  if (sumhist > m_splitters[idxReal].goal)
281  diffval = sumhist - m_splitters[idxReal].goal;
282  else
283  diffval = m_splitters[idxReal].goal - sumhist;
284 
285  if (diffval < m_margin_error)
286  {
287  //if we achieved the splitter within the threshold
288  //cement the key and the count
289  m_splitters[idxReal].upperb_key = histval;
290  m_splitters[idxReal].lowerb_key = histval;
291  m_splitters[idxReal].achieved = true;
292  justachv++;
293  m_splitters[idxReal].upperb_count = sumhist;
294  m_splitters[idxReal].lowerb_count = sumhist;
295 
296  //broadcast the range if the previous one has been achieved also
297  if (idxReal == 0 || m_splitters[idxReal-1].achieved)
298  {
299  toBroadcast.push_back(idxReal);
300  m_splitters[idxReal].broadcasted = true;
301  }
302  if (idxReal == m_num_pes-2)
303  {
304  //if this is the last splitter broadcast the last range as well
305  toBroadcast.push_back(m_num_pes-1);
306  }
307  idxExp++;
308  idxReal++;
309  }
310  else if (m_splitters[idxReal].goal < sumhist)
311  { //if we did not hit the splitter within the threshold
312  borderChanged = false;
313  //check if this guess produced better bounds and update the bounds
314 
315  if (idxExp > 0)
316  {
317  if (histogram[idxExp-1] > m_splitters[idxReal].lowerb_count || (histogram[idxExp-1] == m_splitters[idxReal].lowerb_count && m_last_probe[idxExp-1] > m_splitters[idxReal].lowerb_key))
318  {
319  borderChanged = true;
320  m_splitters[idxReal].lowerb_count = histogram[idxExp-1];
321  m_splitters[idxReal].lowerb_key = m_last_probe[idxExp-1];
322  }
323  }
324  if ((sumhist < m_splitters[idxReal].upperb_count) || (sumhist == m_splitters[idxReal].upperb_count && histval < m_splitters[idxReal].upperb_key))
325  {
326  borderChanged = true;
327  m_splitters[idxReal].upperb_count = sumhist;
328  m_splitters[idxReal].upperb_key = histval;
329  }
330  //if the bounds did not tighten, ideally, this should never happen
331  if (!borderChanged)
332  {
333 #ifdef VERBOSE
334  if (lenhist >= m_num_pes) CkPrintf("borderChanged should never be false %d %d %llu %llu %llu %llu %llu!!!\n",
335  idxExp, idxReal, m_splitters[idxReal].goal, sumhist-histogram[idxExp-1], sumhist,
336  m_splitters[idxReal].lowerb_count, m_splitters[idxReal].upperb_count);
337  CkPrintf("The distributions is probably too dense to handle the given margin of error\n");
338 #endif
339  //if there was some guess and it did not tighten bounds
341  {
342  //just declare the splitter converged, currently we cannot do any better
343  m_splitters[idxReal].achieved = true;
344  if (m_splitters[idxReal].goal - m_splitters[idxReal].lowerb_count <= m_splitters[idxReal].upperb_count - m_splitters[idxReal].goal)
345  {
346  m_splitters[idxReal].upperb_key = m_splitters[idxReal].lowerb_key;
347  m_splitters[idxReal].upperb_count = m_splitters[idxReal].lowerb_count;
348  }
349  else
350  {
351  m_splitters[idxReal].lowerb_key = m_splitters[idxReal].upperb_key;
352  m_splitters[idxReal].lowerb_count = m_splitters[idxReal].upperb_count;
353  }
354  //if (m_splitters[idxReal].lowerb_count != 0 && m_splitters[idxReal-1].achieved){
355  if (idxReal == 0 || m_splitters[idxReal-1].achieved)
356  {
357  toBroadcast.push_back(idxReal);
358  m_splitters[idxReal].broadcasted = true;
359  }
360  if (idxReal == m_num_pes-2)
361  {
362  toBroadcast.push_back(m_num_pes-1);
363  }
364  idxReal++;
365  continue;
366  }
367  }
368  //continue to next splitter
369  active_splitters.push_back(&m_splitters[idxReal]);
370  idxReal++;
371  }
372  else
373  {
374  sumhist = histogram[idxExp];
375  idxExp++;
376  }
377  }
378  m_num_achv+=justachv;
379  splitterCount = active_splitters.size();
380 
381 
382  //if we have successfully not determined close enough values for all the splitters, define new probe
383  probe *new_probe;
384  int num_guesses = 0;
385  key_type *guesses;
386  if (splitterCount > 0)
387  {
388  guesses = get_splitter_guesses(&num_guesses, active_splitters);
389  //new_probe = new (num_guesses, toBroadcast.size()) probe;
390  new_probe = new probe[num_guesses];
391  memcpy(new_probe->splitter_guesses, guesses, num_guesses*sizeof(key_type));
392  }
393  else
394  new_probe = new probe[num_guesses]; // (num_guesses, toBroadcast.size())
395 
396  new_probe->num_keys = num_guesses;
397  new_probe->num_ranges = toBroadcast.size();
398 
399  ///send the resolved ranges
400  for (int i = 0; i < toBroadcast.size(); i++)
401  {
402  if (toBroadcast[i] == 0)
403  {
404  new_probe->send_info[i].lowerb = m_min_key;
405  new_probe->send_info[i].upperb = m_splitters[toBroadcast[i]].upperb_key;
406  new_probe->send_info[i].total = m_splitters[0].upperb_count;
407  }
408  else if (toBroadcast[i] == m_num_pes-1)
409  {
410  new_probe->send_info[i].lowerb = m_splitters[toBroadcast[i]-1].upperb_key;
411  new_probe->send_info[i].upperb = m_max_key;
412  new_probe->send_info[i].total = m_num_total_keys - m_splitters[toBroadcast[i]-1].upperb_count;
413  }
414  else
415  {
416  new_probe->send_info[i].lowerb = m_splitters[toBroadcast[i]-1].upperb_key;
417  new_probe->send_info[i].upperb = m_splitters[toBroadcast[i]].upperb_key;
418  new_probe->send_info[i].total = m_splitters[toBroadcast[i]].upperb_count - m_splitters[toBroadcast[i]-1].upperb_count;
419  }
420  new_probe->send_info[i].chare_dest = toBroadcast[i];
421  }
422 #ifdef DEBUG
423  for (int i = 0; i < new_probe->num_keys; i++)
424  CkPrintf("probe[%d] = %llu\n", i, new_probe->splitter_guesses[i]);
425 #endif
426  //CkSetQueueing(new_probe, CK_QUEUEING_LIFO);
427  if (splitterCount == 0)
428  {
429  //if all splitters have been converged to
430  //c2 = CmiWallTimer();
431  //buckets.SendAll(new_probe);
432  //first_use = false;
433  }
434  else
435  {
436  //if we still need to convergeto some splitters
437  //buckets.CountProbe(new_probe);
438  }
439  active_splitters.clear();
440  toBroadcast.clear();
441 
442  delete [] m_last_probe;
443  //delete msg;
444  if (splitterCount > 0)
445  {
446  m_last_probe = guesses;
447  }
448 }
449 
450 
451 ////////////////////////////////////////////////////////////////////////
452 // Histogram::get_splitter_guesses
453 ////////////////////////////////////////////////////////////////////////
454 key_type* Histogram::get_splitter_guesses( int *ptr_num_guesses, std::vector< splitter* > active_splitters )
455 {
456  int num_guesses;// = *ptr_num_guesses;
457  int splitterCount = active_splitters.size();
458 
459  ///figure out how much oversplitting we can do
460  int virtSent = m_probe_size;
461  if (m_probe_size > splitterCount)
462  {
463  int numVirt = m_probe_size / splitterCount;
464  int extraVirt = m_probe_size % splitterCount;
465  for (int i = 0; i < splitterCount; i++)
466  {
467  if (i < extraVirt)
468  active_splitters[i]->virt = numVirt+1;
469  else
470  active_splitters[i]->virt = numVirt;
471  }
472  }
473  else
474  {
475  virtSent = splitterCount+1;
476  for (int i = 0; i < splitterCount; i++)
477  {
478  active_splitters[i]->virt = 1;
479  }
480  }
481 
482  ///define a new probe
483  key_type *newsps = new key_type[m_probe_size];
484  num_guesses = 0;
485  int idx = 0;
486  int divisors;
487  key_type divjump = 0;
488  int accvirt = 0;
489  int firstidx = 0;
490  ///Create guesses out of unresolved splitters
491  while (idx < splitterCount)
492  {
493  if (accvirt == 0)
494  firstidx = idx;
495  accvirt += active_splitters[idx]->virt;
496  //place multiple splitters guesses into ranges that bound multiple unresolved splitters
497  if (idx == splitterCount-1 || active_splitters[idx]->upperb_key != active_splitters[idx+1]->upperb_key)
498  {
499  divisors = std::min(m_probe_size_max, accvirt);
500  if (divisors > 0)
501  divjump = (key_type)(active_splitters[idx]->upperb_key - active_splitters[idx]->lowerb_key)/(divisors+1);
502  if (divjump == 0)
503  {
504  divjump = 1;
505  }
506  ///we first figure out how many guesses to place in the range, now we place them
507  for (int i = 0; i < divisors; i++)
508  {
509  if (divjump == 1 && (i+1)*divjump + active_splitters[idx]->lowerb_key > active_splitters[idx]->upperb_key)
510  newsps[num_guesses] = active_splitters[idx]->upperb_key;
511  else
512  newsps[num_guesses] = (i+1)*divjump + active_splitters[idx]->lowerb_key;
513  num_guesses++;
514  }
515  accvirt = 0;
516  }
517  idx++;
518  }
519  *ptr_num_guesses = num_guesses;
520  return newsps;
521 }
522 
523 #endif
Definition: sort_hist.H:26
index_type upperb_count
Definition: sort_hist.H:37
#define CH_assert(cond)
Definition: CHArray.H:37
key_type * splitter_guesses
Definition: sort_hist.H:29
index_type goal
Definition: sort_hist.H:41
key_type * get_splitter_guesses(int *ptr_num_guesses, std::vector< splitter * > active_splitters)
Definition: sort_histI.H:454
const int m_num_pes
Definition: sort_hist.H:55
key_type upperb
Definition: sort_hist.H:21
int m_probe_size_max
Definition: sort_hist.H:61
index_type m_num_total_keys
Definition: sort_hist.H:64
IndexTM< T, N > min(const IndexTM< T, N > &a_p1, const IndexTM< T, N > &a_p2)
Definition: IndexTMI.H:396
int chare_dest
Definition: sort_hist.H:22
Definition: sort_hist.H:35
int * mws_sdispl(key_type *, int nloc, MPI_Comm c, float margin)
Definition: sort_histI.H:87
void RefineProbes(std::vector< index_type > &)
Definition: sort_histI.H:219
void Init(MPI_Comm a_comm, const int a_nloc, float)
Definition: sort_histI.H:39
int total
Definition: sort_hist.H:23
unsigned long long key_type
Definition: sort_hist.H:15
Histogram(SortComm *env, key_type min, key_type max)
Definition: sort_histI.H:21
int num_keys
Definition: sort_hist.H:30
const int m_my_pe
Definition: sort_hist.H:56
key_type upperb_key
Definition: sort_hist.H:38
index_type lowerb_count
Definition: sort_hist.H:39
int m_num_steps
Definition: sort_hist.H:57
IndexTM< T, N > max(const IndexTM< T, N > &a_p1, const IndexTM< T, N > &a_p2)
Definition: IndexTMI.H:403
int m_margin_error
Definition: sort_hist.H:62
bool achieved
Definition: sort_hist.H:43
int m_probe_size
Definition: sort_hist.H:60
key_type m_min_key
Definition: sort_hist.H:54
key_type * m_last_probe
Definition: sort_hist.H:65
int num_ranges
Definition: sort_hist.H:32
splitter * m_splitters
Definition: sort_hist.H:63
long long index_type
Definition: sort_utils.H:22
bool broadcasted
Definition: sort_hist.H:44
key_type lowerb
Definition: sort_hist.H:20
void set_limits(key_type min, key_type max)
Definition: sort_histI.H:31
int m_num_achv
Definition: sort_hist.H:59
key_type lowerb_key
Definition: sort_hist.H:40
splitter * mws_private(key_type *, int, MPI_Comm)
Definition: sort_histI.H:113
key_type m_max_key
Definition: sort_hist.H:54
resolved_range * send_info
Definition: sort_hist.H:31
int m_ncalls
Definition: sort_hist.H:58