GCC Code Coverage Report

Directory:	./
File:	src/util/numerical/natBreaks.cc
Date:	2025-09-01 06:19:01

	Total	Coverage
Lines:	169	0.0%
Functions:	16	0.0%
Branches:	58	0.0%

  
      Line
      Branch
      Exec
      Source
    
      # include "na64util/numerical/natBreaks.hh"
    
      # include <cassert>
    
      # include <algorithm>
    
      namespace na64dp {
    
      namespace util {
    
      namespace jenks {
    
      const SizeT BUFFER_SIZE = 1024;
    
      // helper funcs
    
      ✗
      template <typename T> T Min(T a, T b) { return (a<b) ? a : b; }
    
      SizeT
    
      ✗
      GetTotalCount( const ValueCountPairContainer & vcpc ) {
    
      ✗
          SizeT sum = 0;
    
          ValueCountPairContainer::const_iterator
    
      ✗
              i = vcpc.begin(),
    
      ✗
              e = vcpc.end();
    
      ✗
          for(sum = 0; i!=e; ++i)
    
      ✗
              sum += (*i).second;
    
      ✗
          return sum;
    
      }
    
      // helper struct JenksFisher
    
      // captures the intermediate data and methods for the calculation of Natural
    
      // Class Breaks.
    
      ✗
      JenksFisher::JenksFisher( const ValueCountPairContainer & vcpc
    
      ✗
                 , SizeT k )
    
      ✗
          : m_M(vcpc.size())
    
      ✗
          , m_K(k)
    
      ✗
          , m_BufSize(vcpc.size()-(k-1))
    
      ✗
          , m_PrevSSM(m_BufSize)
    
      ✗
          , m_CurrSSM(m_BufSize)
    
      ✗
          , m_CB(m_BufSize * (m_K-1))
    
      ✗
          , m_CBPtr() {
    
      ✗
          m_CumulValues.reserve(vcpc.size());
    
      ✗
          double cwv=0;
    
      ✗
          CountType cw = 0, w;
    
      ✗
          for(SizeT i=0; i!=m_M; ++i) {
    
              // PRECONDITION: the value sequence must be strictly increasing
    
      ✗
              assert(!i || vcpc[i].first > vcpc[i-1].first);
    
      ✗
              w   = vcpc[i].second;
    
              // PRECONDITION: all weights must be positive
    
      ✗
              assert(w > 0);
    
      ✗
              cw  += w;
    
              // No overflow? No loss of precision?
    
      ✗
              assert(cw > w || !i);
    
      ✗
              cwv += w * vcpc[i].first;
    
      ✗
              m_CumulValues.push_back(ValueCountPair(cwv, cw));
    
      ✗
              if (i < m_BufSize) {
    
                  // prepare SSM for first class. Last (k-1) values are omitted
    
      ✗
                  m_PrevSSM[i] = cwv * cwv / cw;
    
              }
    
          }
    
      }
    
      double
    
      ✗
      JenksFisher::GetW(SizeT b, SizeT e) {
    
      ✗
          assert(b);    // First element always belongs to class 0, thus queries should never include it.
    
      ✗
          assert(b<=e);
    
      ✗
          assert(e<m_M);
    
      ✗
          double res = m_CumulValues[e].second;
    
      ✗
          res -= m_CumulValues[b-1].second;
    
      ✗
          return res;
    
      }
    
      double
    
      ✗
      JenksFisher::GetWV(SizeT b, SizeT e) {
    
      ✗
          assert(b);
    
      ✗
          assert(b<=e);
    
      ✗
          assert(e<m_M);
    
      ✗
          double res = m_CumulValues[e].first;
    
      ✗
          res -= m_CumulValues[b-1].first;
    
      ✗
          return res;
    
      }
    
      double
    
      ✗
      JenksFisher::GetSSM(SizeT b, SizeT e) {
    
      ✗
          double res = GetWV(b,e);
    
      ✗
          return res * res / GetW(b,e);
    
      }
    
      SizeT
    
      ✗
      JenksFisher::FindMaxBreakIndex(SizeT i, SizeT bp, SizeT ep) {
    
      ✗
          assert(bp < ep);
    
      ✗
          assert(bp <= i);
    
      ✗
          assert(ep <= i+1);
    
      ✗
          assert(i  <  m_BufSize);
    
      ✗
          assert(ep <= m_BufSize);
    
      ✗
          double minSSM = m_PrevSSM[bp] + GetSSM(bp+m_NrCompletedRows, i+m_NrCompletedRows);
    
      ✗
          SizeT foundP = bp;
    
      ✗
          while (++bp < ep)
    
          {
    
      ✗
              double currSSM = m_PrevSSM[bp] + GetSSM(bp+m_NrCompletedRows, i+m_NrCompletedRows);
    
      ✗
              if (currSSM > minSSM)
    
              {
    
      ✗
                  minSSM = currSSM;
    
      ✗
                  foundP = bp;
    
              }
    
          }
    
      ✗
          m_CurrSSM[i] = minSSM;
    
      ✗
          return foundP;
    
      }
    
      void
    
      ✗
      JenksFisher::CalcRange(SizeT bi, SizeT ei, SizeT bp, SizeT ep) {
    
      ✗
          assert(bi <= ei);
    
      ✗
          assert(ep <= ei);
    
      ✗
          assert(bp <= bi);
    
      ✗
          if (bi == ei)
    
      ✗
              return;
    
      ✗
          assert(bp < ep);
    
      ✗
          SizeT mi = (bi + ei)/2;
    
      ✗
          SizeT mp = FindMaxBreakIndex(mi, bp, Min<SizeT>(ep, mi+1));
    
      ✗
          assert(bp <= mp);
    
      ✗
          assert(mp <  ep);
    
      ✗
          assert(mp <= mi);
    
          // solve first half of the sub-problems with lower 'half' of possible outcomes
    
      ✗
          CalcRange(bi, mi, bp, Min<SizeT>(mi, mp+1));
    
      ✗
          m_CBPtr[ mi ] = mp; // store result for the middle element.
    
          // solve second half of the sub-problems with upper 'half' of possible outcomes
    
      ✗
          CalcRange(mi+1, ei, mp, ep);
    
      }
    
      void
    
      ✗
      JenksFisher::CalcAll() {
    
      ✗
          if (m_K>=2) {
    
      ✗
              m_CBPtr = m_CB.begin();
    
      ✗
              for (m_NrCompletedRows=1; m_NrCompletedRows<m_K-1; ++m_NrCompletedRows) {
    
      ✗
                  CalcRange(0, m_BufSize, 0, m_BufSize); // complexity: O(m*log(m))
    
      ✗
                  m_PrevSSM.swap(m_CurrSSM);
    
      ✗
                  m_CBPtr += m_BufSize;
    
              }
    
          }
    
      }
    
      void
    
      ✗
      GetCountsDirect( ValueCountPairContainer & vcpc
    
                     , const double * values
    
                     , SizeT size ) {
    
      ✗
          assert(size <= BUFFER_SIZE);
    
      ✗
          assert(size > 0);
    
      ✗
          assert(vcpc.empty());
    
          double buffer[BUFFER_SIZE];
    
      ✗
          std::copy(values, values+size, buffer);
    
      ✗
          std::sort(buffer, buffer+size);
    
      ✗
          double currValue = buffer[0];
    
      ✗
          SizeT     currCount = 1;
    
      ✗
          for (SizeT index = 1; index != size; ++index) {
    
      ✗
              if (currValue < buffer[index]) {
    
      ✗
                  vcpc.push_back(ValueCountPair(currValue, currCount));
    
      ✗
                  currValue = buffer[index];
    
      ✗
                  currCount = 1;
    
              }
    
              else
    
      ✗
                  ++currCount;
    
          }
    
      ✗
          vcpc.push_back(ValueCountPair(currValue, currCount));
    
      }
    
      struct CompareFirst {
    
      ✗
          bool operator () (const ValueCountPair& lhs, const ValueCountPair& rhs) {
    
      ✗
              return lhs.first < rhs.first;
    
          }
    
      };
    
      static void
    
      ✗
      MergeToLeft( ValueCountPairContainer & vcpcLeft
    
                      , const ValueCountPairContainer & vcpcRight
    
                      , ValueCountPairContainer& vcpcDummy ) {
    
      ✗
          assert(vcpcDummy.empty());
    
      ✗
          vcpcDummy.swap(vcpcLeft);
    
      ✗
          vcpcLeft.resize(vcpcRight.size() + vcpcDummy.size());
    
      ✗
          std::merge( vcpcRight.begin(), vcpcRight.end()
    
                    , vcpcDummy.begin(), vcpcDummy.end()
    
                    , vcpcLeft.begin(), CompareFirst() );
    
          ValueCountPairContainer::iterator
    
      ✗
              currPair = vcpcLeft.begin(),
    
      ✗
              lastPair = vcpcLeft.end();
    
      ✗
          ValueCountPairContainer::iterator index = currPair+1;
    
      ✗
          while (index != lastPair && currPair->first < index->first) {
    
      ✗
              currPair = index;
    
      ✗
              ++index;
    
          }
    
      ✗
          double currValue = currPair->first;
    
      ✗
          SizeT     currCount = currPair->second;
    
      ✗
          for (; index != lastPair;++index) {
    
      ✗
              if (currValue < index->first) {
    
      ✗
                  *currPair++ = ValueCountPair(currValue, currCount);
    
      ✗
                  currValue = index->first;
    
      ✗
                  currCount = index->second;
    
              }
    
              else
    
      ✗
                  currCount += index->second;
    
          }
    
      ✗
          *currPair++ = ValueCountPair(currValue, currCount);
    
      ✗
          vcpcLeft.erase(currPair, lastPair);
    
      ✗
          vcpcDummy.clear();
    
      }
    
      void
    
      ✗
      ValueCountPairContainerArray::resize(SizeT k) {
    
      ✗
          assert(capacity() >= k);
    
      ✗
          while (size() < k) {
    
      ✗
              push_back(ValueCountPairContainer());
    
      ✗
              back().reserve(BUFFER_SIZE);
    
          }
    
      }
    
      void
    
      ✗
      ValueCountPairContainerArray::GetValueCountPairs( ValueCountPairContainer & vcpc
    
                             , const double * values
    
                             , SizeT size
    
                             , unsigned int nrUsedContainers ) {
    
      ✗
          assert(vcpc.empty());
    
      ✗
          if (size <= BUFFER_SIZE) {
    
      ✗
              GetCountsDirect(vcpc, values, size);
    
          } else {
    
      ✗
              resize(nrUsedContainers+2);
    
      ✗
              unsigned int m = size/2;
    
      ✗
              GetValueCountPairs( vcpc, values, m, nrUsedContainers );
    
      ✗
              GetValueCountPairs( begin()[nrUsedContainers]
    
      ✗
                                , values + m
    
      ✗
                                , size - m
    
                                , nrUsedContainers+1 );
    
      ✗
              MergeToLeft( vcpc
    
      ✗
                         , begin()[nrUsedContainers]
    
      ✗
                         , begin()[nrUsedContainers+1]
    
                         );
    
      ✗
              begin()[nrUsedContainers].clear();
    
          }
    
      ✗
          assert(GetTotalCount(vcpc) == size);
    
      }
    
      void
    
      ✗
      GetValueCountPairs( ValueCountPairContainer & vcpc
    
                        , const double * values
    
                        , SizeT n ) {
    
      ✗
          vcpc.clear();
    
      ✗
          if( n ) {
    
      ✗
              ValueCountPairContainerArray vcpca;
    
              // max nr halving is log2(max cardinality / BUFFER_SIZE); max cardinality is SizeT(-1)
    
      ✗
              vcpca.reserve(3 + 8*sizeof(SizeT) - 10);
    
      ✗
              vcpca.GetValueCountPairs(vcpc, values, n, 0);
    
      ✗
              assert(vcpc.size());
    
          }
    
      }
    
      void
    
      ✗
      ClassifyJenksFisherFromValueCountPairs( LimitsContainer & breaksArray
    
                                            , SizeT k
    
                                            , const ValueCountPairContainer& vcpc ) {
    
      ✗
          breaksArray.resize(k);
    
      ✗
          SizeT m  = vcpc.size();
    
      ✗
          assert(k <= m); // PRECONDITION
    
      ✗
          if (!k)
    
      ✗
              return;
    
      ✗
          JenksFisher jf(vcpc,  k);
    
      ✗
          if (k > 1) {
    
      ✗
              jf.CalcAll();
    
      ✗
              SizeT lastClassBreakIndex = jf.FindMaxBreakIndex(jf.m_BufSize-1, 0, jf.m_BufSize);
    
      ✗
              while (--k) {
    
      ✗
                  breaksArray[k] = vcpc[lastClassBreakIndex+k].first;
    
      ✗
                  assert(lastClassBreakIndex < jf.m_BufSize);
    
      ✗
                  if (k > 1)
    
                  {
    
      ✗
                      jf.m_CBPtr -= jf.m_BufSize;
    
      ✗
                      lastClassBreakIndex = jf.m_CBPtr[lastClassBreakIndex];
    
                  }
    
              }
    
      ✗
              assert(jf.m_CBPtr == jf.m_CB.begin());
    
          }
    
      ✗
          assert( k == 0 );
    
      ✗
          breaksArray[0] = vcpc[0].first; // break for the first class is the minimum of the dataset.
    
      }
    
      }  // namespace jenks
    
      }  // util
    
      }
    
      # ifdef TEST_BUILD
    
      // test code
    
      #include "boost/random.hpp"
    
      int
    
      main(int c, char** argv) {
    
          const double rangeMin = 0.0;
    
          const double rangeMax = 10.0;
    
          typedef boost::uniform_real<double> NumberDistribution;
    
          typedef boost::mt19937 RandomNumberGenerator;
    
          typedef boost::variate_generator<RandomNumberGenerator&, NumberDistribution> Generator;
    
          NumberDistribution distribution(rangeMin, rangeMax);
    
          RandomNumberGenerator generator;
    
          generator.seed(0); // seed with the current time 
    
          Generator numberGenerator(generator, distribution);
    
          const int n = 1000000;
    
          const int k = 10;
    
          std::cout << "Generating random numbers..." << std::endl;
    
          std::vector<double> values;
    
          values.reserve(n);
    
          // populate a distribuiton slightly more interesting than uniform,
    
          // with a lower density at higher values.
    
          for( int i=0; i!=n; ++i ) {
    
              double v = numberGenerator();
    
              values.push_back(v*v);
    
          }
    
          assert(values.size() == n);
    
          std::cout << "Generating sortedUniqueValueCounts ..." << std::endl;
    
          ValueCountPairContainer sortedUniqueValueCounts;
    
          GetValueCountPairs( sortedUniqueValueCounts, &values[0], n );
    
          std::cout << sortedUniqueValueCounts.size() << " unique values generated." << std::endl;
    
          //for( auto p : sortedUniqueValueCounts )
    
          std::cout << "Finding Jenks ClassBreaks..." << std::endl;
    
          LimitsContainer resultingbreaksArray;
    
          ClassifyJenksFisherFromValueCountPairs( resultingbreaksArray, k, sortedUniqueValueCounts );
    
          std::cout << "Reporting results..." << std::endl;
    
          for( double breakValue : resultingbreaksArray ) {
    
              std::cout << breakValue << std::endl;
    
          }
    
      } // main
    
      # endif  // TEST_BUILD

Line	Exec	Source
1		# include "na64util/numerical/natBreaks.hh"
2
3		# include <cassert>
4		# include <algorithm>
5
6		namespace na64dp {
7		namespace util {
8		namespace jenks {
9
10		const SizeT BUFFER_SIZE = 1024;
11
12		// helper funcs
13	✗	template <typename T> T Min(T a, T b) { return (a<b) ? a : b; }
14
15		SizeT
16	✗	GetTotalCount( const ValueCountPairContainer & vcpc ) {
17	✗	SizeT sum = 0;
18		ValueCountPairContainer::const_iterator
19	✗	i = vcpc.begin(),
20	✗	e = vcpc.end();
21	✗	for(sum = 0; i!=e; ++i)
22	✗	sum += (*i).second;
23	✗	return sum;
24		}
25
26		// helper struct JenksFisher
27
28		// captures the intermediate data and methods for the calculation of Natural
29		// Class Breaks.
30	✗	JenksFisher::JenksFisher( const ValueCountPairContainer & vcpc
31	✗	, SizeT k )
32	✗	: m_M(vcpc.size())
33	✗	, m_K(k)
34	✗	, m_BufSize(vcpc.size()-(k-1))
35	✗	, m_PrevSSM(m_BufSize)
36	✗	, m_CurrSSM(m_BufSize)
37	✗	, m_CB(m_BufSize * (m_K-1))
38	✗	, m_CBPtr() {
39	✗	m_CumulValues.reserve(vcpc.size());
40	✗	double cwv=0;
41	✗	CountType cw = 0, w;
42
43	✗	for(SizeT i=0; i!=m_M; ++i) {
44		// PRECONDITION: the value sequence must be strictly increasing
45	✗	assert(!i \|\| vcpc[i].first > vcpc[i-1].first);
46
47	✗	w = vcpc[i].second;
48		// PRECONDITION: all weights must be positive
49	✗	assert(w > 0);
50
51	✗	cw += w;
52		// No overflow? No loss of precision?
53	✗	assert(cw > w \|\| !i);
54
55	✗	cwv += w * vcpc[i].first;
56	✗	m_CumulValues.push_back(ValueCountPair(cwv, cw));
57	✗	if (i < m_BufSize) {
58		// prepare SSM for first class. Last (k-1) values are omitted
59	✗	m_PrevSSM[i] = cwv * cwv / cw;
60		}
61		}
62		}
63
64		double
65	✗	JenksFisher::GetW(SizeT b, SizeT e) {
66	✗	assert(b); // First element always belongs to class 0, thus queries should never include it.
67	✗	assert(b<=e);
68	✗	assert(e<m_M);
69
70	✗	double res = m_CumulValues[e].second;
71	✗	res -= m_CumulValues[b-1].second;
72	✗	return res;
73		}
74
75		double
76	✗	JenksFisher::GetWV(SizeT b, SizeT e) {
77	✗	assert(b);
78	✗	assert(b<=e);
79	✗	assert(e<m_M);
80
81	✗	double res = m_CumulValues[e].first;
82	✗	res -= m_CumulValues[b-1].first;
83	✗	return res;
84		}
85
86		double
87	✗	JenksFisher::GetSSM(SizeT b, SizeT e) {
88	✗	double res = GetWV(b,e);
89	✗	return res * res / GetW(b,e);
90		}
91
92		SizeT
93	✗	JenksFisher::FindMaxBreakIndex(SizeT i, SizeT bp, SizeT ep) {
94	✗	assert(bp < ep);
95	✗	assert(bp <= i);
96	✗	assert(ep <= i+1);
97	✗	assert(i < m_BufSize);
98	✗	assert(ep <= m_BufSize);
99
100	✗	double minSSM = m_PrevSSM[bp] + GetSSM(bp+m_NrCompletedRows, i+m_NrCompletedRows);
101	✗	SizeT foundP = bp;
102	✗	while (++bp < ep)
103		{
104	✗	double currSSM = m_PrevSSM[bp] + GetSSM(bp+m_NrCompletedRows, i+m_NrCompletedRows);
105	✗	if (currSSM > minSSM)
106		{
107	✗	minSSM = currSSM;
108	✗	foundP = bp;
109		}
110		}
111	✗	m_CurrSSM[i] = minSSM;
112	✗	return foundP;
113		}
114
115		void
116	✗	JenksFisher::CalcRange(SizeT bi, SizeT ei, SizeT bp, SizeT ep) {
117	✗	assert(bi <= ei);
118
119	✗	assert(ep <= ei);
120	✗	assert(bp <= bi);
121
122	✗	if (bi == ei)
123	✗	return;
124	✗	assert(bp < ep);
125
126	✗	SizeT mi = (bi + ei)/2;
127	✗	SizeT mp = FindMaxBreakIndex(mi, bp, Min<SizeT>(ep, mi+1));
128
129	✗	assert(bp <= mp);
130	✗	assert(mp < ep);
131	✗	assert(mp <= mi);
132
133		// solve first half of the sub-problems with lower 'half' of possible outcomes
134	✗	CalcRange(bi, mi, bp, Min<SizeT>(mi, mp+1));
135
136	✗	m_CBPtr[ mi ] = mp; // store result for the middle element.
137
138		// solve second half of the sub-problems with upper 'half' of possible outcomes
139	✗	CalcRange(mi+1, ei, mp, ep);
140		}
141
142		void
143	✗	JenksFisher::CalcAll() {
144	✗	if (m_K>=2) {
145	✗	m_CBPtr = m_CB.begin();
146	✗	for (m_NrCompletedRows=1; m_NrCompletedRows<m_K-1; ++m_NrCompletedRows) {
147	✗	CalcRange(0, m_BufSize, 0, m_BufSize); // complexity: O(m*log(m))
148
149	✗	m_PrevSSM.swap(m_CurrSSM);
150	✗	m_CBPtr += m_BufSize;
151		}
152		}
153		}
154
155
156		void
157	✗	GetCountsDirect( ValueCountPairContainer & vcpc
158		, const double * values
159		, SizeT size ) {
160	✗	assert(size <= BUFFER_SIZE);
161	✗	assert(size > 0);
162	✗	assert(vcpc.empty());
163
164		double buffer[BUFFER_SIZE];
165
166	✗	std::copy(values, values+size, buffer);
167	✗	std::sort(buffer, buffer+size);
168
169	✗	double currValue = buffer[0];
170	✗	SizeT currCount = 1;
171	✗	for (SizeT index = 1; index != size; ++index) {
172	✗	if (currValue < buffer[index]) {
173	✗	vcpc.push_back(ValueCountPair(currValue, currCount));
174	✗	currValue = buffer[index];
175	✗	currCount = 1;
176		}
177		else
178	✗	++currCount;
179		}
180	✗	vcpc.push_back(ValueCountPair(currValue, currCount));
181		}
182
183		struct CompareFirst {
184	✗	bool operator () (const ValueCountPair& lhs, const ValueCountPair& rhs) {
185	✗	return lhs.first < rhs.first;
186		}
187		};
188
189		static void
190	✗	MergeToLeft( ValueCountPairContainer & vcpcLeft
191		, const ValueCountPairContainer & vcpcRight
192		, ValueCountPairContainer& vcpcDummy ) {
193	✗	assert(vcpcDummy.empty());
194	✗	vcpcDummy.swap(vcpcLeft);
195	✗	vcpcLeft.resize(vcpcRight.size() + vcpcDummy.size());
196
197	✗	std::merge( vcpcRight.begin(), vcpcRight.end()
198		, vcpcDummy.begin(), vcpcDummy.end()
199		, vcpcLeft.begin(), CompareFirst() );
200
201		ValueCountPairContainer::iterator
202	✗	currPair = vcpcLeft.begin(),
203	✗	lastPair = vcpcLeft.end();
204
205	✗	ValueCountPairContainer::iterator index = currPair+1;
206	✗	while (index != lastPair && currPair->first < index->first) {
207	✗	currPair = index;
208	✗	++index;
209		}
210
211	✗	double currValue = currPair->first;
212	✗	SizeT currCount = currPair->second;
213	✗	for (; index != lastPair;++index) {
214	✗	if (currValue < index->first) {
215	✗	*currPair++ = ValueCountPair(currValue, currCount);
216	✗	currValue = index->first;
217	✗	currCount = index->second;
218		}
219		else
220	✗	currCount += index->second;
221		}
222	✗	*currPair++ = ValueCountPair(currValue, currCount);
223	✗	vcpcLeft.erase(currPair, lastPair);
224
225	✗	vcpcDummy.clear();
226		}
227
228
229		void
230	✗	ValueCountPairContainerArray::resize(SizeT k) {
231	✗	assert(capacity() >= k);
232	✗	while (size() < k) {
233	✗	push_back(ValueCountPairContainer());
234	✗	back().reserve(BUFFER_SIZE);
235		}
236		}
237
238		void
239	✗	ValueCountPairContainerArray::GetValueCountPairs( ValueCountPairContainer & vcpc
240		, const double * values
241		, SizeT size
242		, unsigned int nrUsedContainers ) {
243	✗	assert(vcpc.empty());
244	✗	if (size <= BUFFER_SIZE) {
245	✗	GetCountsDirect(vcpc, values, size);
246		} else {
247	✗	resize(nrUsedContainers+2);
248
249	✗	unsigned int m = size/2;
250
251	✗	GetValueCountPairs( vcpc, values, m, nrUsedContainers );
252	✗	GetValueCountPairs( begin()[nrUsedContainers]
253	✗	, values + m
254	✗	, size - m
255		, nrUsedContainers+1 );
256
257	✗	MergeToLeft( vcpc
258	✗	, begin()[nrUsedContainers]
259	✗	, begin()[nrUsedContainers+1]
260		);
261	✗	begin()[nrUsedContainers].clear();
262		}
263	✗	assert(GetTotalCount(vcpc) == size);
264		}
265
266
267		void
268	✗	GetValueCountPairs( ValueCountPairContainer & vcpc
269		, const double * values
270		, SizeT n ) {
271	✗	vcpc.clear();
272	✗	if( n ) {
273	✗	ValueCountPairContainerArray vcpca;
274		// max nr halving is log2(max cardinality / BUFFER_SIZE); max cardinality is SizeT(-1)
275	✗	vcpca.reserve(3 + 8*sizeof(SizeT) - 10);
276	✗	vcpca.GetValueCountPairs(vcpc, values, n, 0);
277
278	✗	assert(vcpc.size());
279		}
280		}
281
282		void
283	✗	ClassifyJenksFisherFromValueCountPairs( LimitsContainer & breaksArray
284		, SizeT k
285		, const ValueCountPairContainer& vcpc ) {
286	✗	breaksArray.resize(k);
287	✗	SizeT m = vcpc.size();
288
289	✗	assert(k <= m); // PRECONDITION
290
291	✗	if (!k)
292	✗	return;
293
294	✗	JenksFisher jf(vcpc, k);
295
296	✗	if (k > 1) {
297	✗	jf.CalcAll();
298
299	✗	SizeT lastClassBreakIndex = jf.FindMaxBreakIndex(jf.m_BufSize-1, 0, jf.m_BufSize);
300
301	✗	while (--k) {
302	✗	breaksArray[k] = vcpc[lastClassBreakIndex+k].first;
303	✗	assert(lastClassBreakIndex < jf.m_BufSize);
304	✗	if (k > 1)
305		{
306	✗	jf.m_CBPtr -= jf.m_BufSize;
307	✗	lastClassBreakIndex = jf.m_CBPtr[lastClassBreakIndex];
308		}
309		}
310	✗	assert(jf.m_CBPtr == jf.m_CB.begin());
311		}
312	✗	assert( k == 0 );
313	✗	breaksArray[0] = vcpc[0].first; // break for the first class is the minimum of the dataset.
314		}
315
316		} // namespace jenks
317		} // util
318		}
319
320
321		# ifdef TEST_BUILD
322		// test code
323		#include "boost/random.hpp"
324
325		int
326		main(int c, char** argv) {
327		const double rangeMin = 0.0;
328		const double rangeMax = 10.0;
329		typedef boost::uniform_real<double> NumberDistribution;
330		typedef boost::mt19937 RandomNumberGenerator;
331		typedef boost::variate_generator<RandomNumberGenerator&, NumberDistribution> Generator;
332
333		NumberDistribution distribution(rangeMin, rangeMax);
334		RandomNumberGenerator generator;
335		generator.seed(0); // seed with the current time
336		Generator numberGenerator(generator, distribution);
337
338		const int n = 1000000;
339		const int k = 10;
340
341		std::cout << "Generating random numbers..." << std::endl;
342
343		std::vector<double> values;
344		values.reserve(n);
345		// populate a distribuiton slightly more interesting than uniform,
346		// with a lower density at higher values.
347		for( int i=0; i!=n; ++i ) {
348		double v = numberGenerator();
349		values.push_back(v*v);
350		}
351		assert(values.size() == n);
352
353		std::cout << "Generating sortedUniqueValueCounts ..." << std::endl;
354		ValueCountPairContainer sortedUniqueValueCounts;
355		GetValueCountPairs( sortedUniqueValueCounts, &values[0], n );
356
357		std::cout << sortedUniqueValueCounts.size() << " unique values generated." << std::endl;
358		//for( auto p : sortedUniqueValueCounts )
359
360		std::cout << "Finding Jenks ClassBreaks..." << std::endl;
361		LimitsContainer resultingbreaksArray;
362		ClassifyJenksFisherFromValueCountPairs( resultingbreaksArray, k, sortedUniqueValueCounts );
363
364		std::cout << "Reporting results..." << std::endl;
365		for( double breakValue : resultingbreaksArray ) {
366		std::cout << breakValue << std::endl;
367		}
368		} // main
369		# endif // TEST_BUILD
370