GCC Code Coverage Report


Directory: ./
File: include/na64util/numerical/online.hh
Date: 2025-09-01 06:19:01
Exec Total Coverage
Lines: 86 93 92.5%
Functions: 30 32 93.8%
Branches: 21 32 65.6%

Line Branch Exec Source
1 #pragma once
2
3 /**\file
4 * \brief Routines for online statistics
5 *
6 * Contains various application classes implementing windowed (sliding,
7 * rolling) algorithms.
8 *
9 * \ingroup numerical-utils
10 */
11
12 ///\defgroup numerical-utils-online-stats Numerical utils for online statistics operations
13
14 #include "na64util/numerical/sums.h"
15
16 #include <deque>
17 #include <vector>
18
19 #include <stddef.h> // size_t
20
21 namespace na64dp {
22 namespace numerical {
23
24 ///\brief C++ interface over C implementation of Klein's scorer
25 ///
26 ///\see na64dp_KleinScorer
27 ///\ingroup numerical-utils-online-stats
28 class KleinScorer : protected na64dp_KleinScorer_t {
29 public:
30 /// Constructs initialized scorer instance
31 3 KleinScorer() {
32 3 na64dp_sum_klein_init(this);
33 3 }
34 /// Adds the value to sum
35 4157399 void add(double v) {
36 4157399 na64dp_sum_klein_add(this, v);
37 4157399 }
38 /// Adds the value to sum (operator shortcut)
39 4157399 KleinScorer & operator+=(double v) {
40 4157399 add(v);
41 4157399 return *this;
42 }
43 /// Returns computed sum
44 39201 double result() const {
45 39201 return na64dp_sum_klein_result(this);
46 }
47 /// Returns computed sum (operator shortcut)
48 39201 operator double() const {
49 39201 return result();
50 }
51 /// Drops the scorer value to 0.
52 19386 void reset() { na64dp_sum_klein_init(this); }
53 };
54
55 ///\brief A C++ wrapper around `na64dp_Covariance`
56 ///
57 ///\ingroup numerical-utils-online-stats
58 class CorrelationScorer : protected na64dp_Covariance {
59 public:
60 /// Initializes and constructs covariance scorer instance
61 1 CorrelationScorer() {
62 1 na64dp_cov_init(this);
63 1 }
64 /// Accounts pair of values
65 7 void account( double x, double y ) {
66 7 na64dp_cov_account( this, x, y );
67 7 }
68 /// Returns unbiased covariance value
69 6 double covariance() const {
70 6 return na64dp_covariance_get(this);
71 }
72
73 /// Returns Pearson's correlation coefficient
74 6 double pearson_correlation() const {
75 6 return na64dp_covariance_get(this)/sqrt(variance_x()*variance_y());
76 }
77
78 /// Returns number of accounted pairs
79 unsigned long n_accounted() const { return n; }
80 /// Returns mean X value
81 7 double mean_x() const { return na64dp_sum_klein_result(&xMean); }
82 /// Returns mean Y value
83 7 double mean_y() const { return na64dp_sum_klein_result(&yMean); }
84 /// Returns sum of squared deviations of X value
85 12 double sq_dev_x() const { return na64dp_sum_klein_result(&d2x); }
86 /// Returns variance of X
87 12 double variance_x() const { return sq_dev_x()/(n-1); }
88 /// Returns sum of squared deviations of Y value
89 12 double sq_dev_y() const { return na64dp_sum_klein_result(&d2y); }
90 /// Returns variance of Y
91 12 double variance_y() const { return sq_dev_y()/(n-1); }
92 };
93
94 /**\brief Covariance matrix wrapper around `na64dp_CovarianceMatrix` C struct
95 *
96 * This implementation is a bit more efficient than multiple
97 * `CorrelationScorer` instances though it explicitly requires that none of the
98 * considered vector components may be NaN.
99 *
100 *\ingroup numerical-utils-online-stats
101 * */
102 class CovarianceMatrix : protected na64dp_CovarianceMatrix_t {
103 public:
104 /// Creates pairwise covariance scorer for vector of `d`
105 1 CovarianceMatrix( unsigned short v ) {
106 1 na64dp_mcov_init( this, v );
107 1 }
108 /// Frees covariance scorer (heap used)
109 1 ~CovarianceMatrix() {
110 1 na64dp_mcov_free(this);
111 1 }
112 /// Takes into account given sample vector
113 7 void account( const double * v ) {
114 7 na64dp_mcov_account(this, v);
115 7 }
116 /// Returns i-th mean value
117 double mean(int i) const { return na64dp_sum_klein_result(means + i); }
118 /// Returns number of accounted vectors
119 unsigned long n_accounted() const { return n; }
120 /// Returns dimensionality
121 unsigned short n_dimensions() const { return d; }
122 /// Returns i-th mean value
123 35 double mean(int i) { return na64dp_sum_klein_result(means + i); } // sqds, covs
124 /// Returns i-th squared deviation value
125 210 double sq_dev(int i) { return na64dp_sum_klein_result(sqds + i); }
126 /// Returns variance value of i-th component
127 210 double variance(int i) { return sq_dev(i)/(n-1); }
128 /// Returns covariance value of i-th and j-th component
129 180 double covariance(int i, int j) { return na64dp_mcov( this, i, j ); }
130 /// Returns Pearson's correlation value of i-th and j-th component
131 90 double pearson_correlation(int i, int j) {
132 90 return covariance(i, j)/sqrt(variance(i)*variance(j));
133 }
134 };
135
136 ///\brief Utility interface class representing windowed (sliding) statistics.
137 ///
138 ///\ingroup numerical-utils-online-stats
139 class iMovStats {
140 public:
141 virtual bool is_outlier(double, double threshold=3.) const = 0;
142 virtual double mean() const = 0;
143 virtual void account(double) = 0;
144 virtual std::vector<double> data_copy() const = 0;
145 virtual size_t n_samples() const = 0;
146 };
147
148 namespace aux {
149
150 template<typename ScorerT> void reset_summation_scorer(ScorerT &);
151
152 template<> void reset_summation_scorer<double>(double &);
153 template<> void reset_summation_scorer<KleinScorer>(KleinScorer &);
154
155 }
156
157 /** Implements moving statistics using strightforward approach
158 *
159 * Maintains a deque of N samples, dynamically recalculating mean/stddev values
160 * each time new sample is accounted.
161 *
162 *\ingroup numerical-utils-online-stats
163 */
164 template<typename ScorerT>
165 class MovingStats : public iMovStats
166 , public std::deque<double> {
167 private:
168 /// Numbor ef entries since last recaching
169 size_t _recacheCounter;
170 /// Threshold number of entries for recaching
171 const size_t _recacheCounterThreshold;
172 protected:
173 /// Threshold (in units of \f$\sigma\f$) of the values to be omitted
174 double _excludeOutliersThreshold;
175 /// Threshold (in units of \f$\sigma\f$) of the values to cause recache
176 double _recacheOnOutliersThreshold;
177 /// \f$\sum\limits_{i=1}^N x_i\f$
178 ScorerT _runningSum;
179 /// \f$\sum\limits_{i=1}^N x_i^2\f$
180 ScorerT _runningSqSum;
181 const size_t _windowSize;
182
183 /// Re-calculates the sums
184 9693 void _recache() {
185 9693 aux::reset_summation_scorer<ScorerT>(_runningSum);
186 9693 aux::reset_summation_scorer<ScorerT>(_runningSqSum);
187
2/2
✓ Branch 5 taken 1918700 times.
✓ Branch 6 taken 9693 times.
1928393 for( auto v : *this ) {
188
1/1
✓ Branch 1 taken 1918700 times.
1918700 _runningSum += v;
189
1/1
✓ Branch 1 taken 1918700 times.
1918700 _runningSqSum += v*v;
190 }
191 9693 _recacheCounter = 0;
192 9693 }
193 public:
194 /**\brief Constructs empty sliding scorer
195 *
196 * Used to create new instances of the scorer in cases when no initial data
197 * is available. Various estimations derived by this instance (like mean
198 * and stdev) at first `N` events will not be properly conditioned because
199 * of lack of the data, so wherever is possible an alternative form of
200 * constructor has to be used.
201 *
202 * \param N size of sliding window
203 * \param excludeOutliers \f$k\f$ in \f$k \times \sigma\f$ to classify outliers
204 * \param recacheOnOutliers \f$k\f$ in \f$k \times \sigma\f$ to recache
205 * \param recachingFrac fraction of events to force sums recache
206 * */
207 1 MovingStats( size_t N
208 , double excludeOutliers=6.
209 , double recacheOnOutliers=6.
210 , double recachingFrac=1.
211 1 ) : _recacheCounter(0)
212 1 , _recacheCounterThreshold(N*recachingFrac)
213 1 , _excludeOutliersThreshold(excludeOutliers)
214 1 , _recacheOnOutliersThreshold(recacheOnOutliers)
215
2/2
✓ Branch 3 taken 1 times.
✓ Branch 6 taken 1 times.
1 , _windowSize(N) {}
216
217 /**\brief Constructs empty sliding scorer
218 *
219 * Used to create new instances of the scorer in cases when no initial data
220 * is available. Various estimations derived by this instance (like mean
221 * and stdev) at first `N` events will not be properly conditioned because
222 * of lack of the data, so wherever is possible an alternative form of
223 * constructor has to be used.
224 *
225 * \param initial shall contain at least `N` samples for initial state of accumulator
226 * \param N size of sliding window
227 * \param excludeOutliers \f$k\f$ in \f$k \times \sigma\f$ to classify outliers
228 * \param recacheOnOutliers \f$k\f$ in \f$k \times \sigma\f$ to recache
229 * \param recachingFrac fraction of events to force sums recache
230 * */
231 MovingStats( double * initial
232 , size_t N
233 , double excludeOutliers=6.
234 , double recacheOnOutliers=6.
235 , double recachingFrac=1.
236 ) : std::deque<double>(initial, initial + N)
237 , _recacheCounter(0)
238 , _recacheCounterThreshold(N*recachingFrac)
239 , _excludeOutliersThreshold(excludeOutliers)
240 , _recacheOnOutliersThreshold(recacheOnOutliers)
241 , _windowSize(N) {}
242
243 /// Returns true if given sample is beyond $6 \f$\sigma\f$
244 9800 virtual bool is_outlier( double sample, double threshold=3.) const override {
245
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 9800 times.
9800 if( empty() ) return false;
246 9800 return fabs(sample - mean()) > sqrt(sigma_sq())*threshold;
247 }
248
249 /// Returns mean value \f$(1/N) \sum\limits_{i=1}^N x_i\f$
250 29400 virtual double mean() const override {
251
1/2
✓ Branch 1 taken 29400 times.
✗ Branch 2 not taken.
29400 if( !empty() )
252 29400 return _runningSum/size();
253 return std::nan("0");
254 }
255
256 /// Takes into account sample \f$x_i\f$
257 10000 virtual void account(double sample) override {
258 20000 if( _excludeOutliersThreshold
259
2/5
✗ Branch 0 not taken.
✓ Branch 1 taken 10000 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 10000 times.
10000 && is_outlier(sample, _excludeOutliersThreshold) ) return;
260
2/5
✗ Branch 0 not taken.
✓ Branch 1 taken 10000 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 10000 times.
10000 if( _recacheCounterThreshold && ++_recacheCounter > _recacheCounterThreshold ) {
261 // Recache sums to avoid (mitigate) catstrophic cancellation
262 _recache();
263 }
264
2/2
✓ Branch 1 taken 200 times.
✓ Branch 2 taken 9800 times.
10000 if( size() < _windowSize ) {
265 200 _runningSum += sample;
266 200 _runningSqSum += sample*sample;
267 200 push_back( sample );
268
1/2
✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.
200 if( !_recacheCounterThreshold ) _recache();
269 200 return;
270 }
271 9800 const double prevFirst = *begin();
272 9800 _runningSum += sample - prevFirst;
273 9800 _runningSqSum += sample*sample - prevFirst*prevFirst;
274 9800 pop_front();
275 9800 push_back( sample );
276 19600 if( _recacheOnOutliersThreshold
277
5/6
✓ Branch 0 taken 9800 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 9493 times.
✓ Branch 4 taken 307 times.
✓ Branch 5 taken 9493 times.
✓ Branch 6 taken 307 times.
9800 && is_outlier(sample, _recacheOnOutliersThreshold) ) {
278 9493 _recache();
279 }
280 }
281
282 /// Returns (unbiased, sliding) \f$ \sigma^2 \f$
283 9800 double sigma_sq() const {
284
1/2
✓ Branch 1 taken 9800 times.
✗ Branch 2 not taken.
9800 if( size() > 1 )
285 9800 return (_runningSqSum/size() - mean()*mean())/(size()-1);
286 return std::nan("0");
287 }
288
289 /// Copies entries from deque to vector
290 virtual std::vector<double> data_copy() const override {
291 return std::vector<double>( this->begin(), this->end() );
292 }
293
294 virtual size_t n_samples() const {
295 return size();
296 }
297 };
298
299 }
300 }
301
302