| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "na64util/numerical/online.hh" | ||
| 2 | |||
| 3 | #include <cmath> | ||
| 4 | #include <gsl/gsl_statistics_double.h> | ||
| 5 | #include <gsl/gsl_vector.h> | ||
| 6 | #define delete delete_sample | ||
| 7 | #include <gsl/gsl_movstat.h> | ||
| 8 | #undef delete | ||
| 9 | |||
| 10 | #include <fstream> | ||
| 11 | #include <gtest/gtest.h> | ||
| 12 | |||
| 13 | namespace na64dp { | ||
| 14 | namespace test { | ||
| 15 | |||
| 16 | using namespace numerical; | ||
| 17 | |||
| 18 | // Checks basic validity of Klein scorer implementation; uses series with known | ||
| 19 | // sum to validate summation convergence. | ||
| 20 | // TODO: currently this UT takes 10ms and is one of the slowest UTs. Consider | ||
| 21 | // using series with faster convergence and attenuation instead | ||
| 22 | // of Basel's problem/Apéry's constant calculus | ||
| 23 | 8 | TEST(Sums, KleinScorer) { | |
| 24 | // Basel's problem is used to approximate the convergence limit of the sum; | ||
| 25 | // unfortunately, convergence is too slow to be used in unit tests | ||
| 26 | #if 0 | ||
| 27 | const double sumLimit = M_PI*M_PI/6.; | ||
| 28 | KleinScorer kleinS; | ||
| 29 | double directS = 0; | ||
| 30 | for( long int i = 1; i < 1e7; ++i ) { | ||
| 31 | const double v = 1./(i*i); | ||
| 32 | directS += v; | ||
| 33 | kleinS.add(v); | ||
| 34 | } | ||
| 35 | const double directError = fabs( sumLimit - directS ) | ||
| 36 | , kleinError = fabs( sumLimit - kleinS.result() ) | ||
| 37 | ; | ||
| 38 | std::cout << "xxx direct=" << directS << " (" << directError << ")" | ||
| 39 | << ", klein=" << kleinS.result() << " (" << kleinError << ")" | ||
| 40 | << ", real=" << sumLimit << std::endl; | ||
| 41 | #else | ||
| 42 | // Apéry's constant as cubic series converges faster | ||
| 43 | 2 | const double sumLimit = 1.202056903159594285399738161511449990764986292; | |
| 44 |
1/1✓ Branch 1 taken 1 times.
|
2 | KleinScorer kleinS; |
| 45 | 2 | double directS = 0; | |
| 46 | // Note: number of iterations needed to reveal the error may vary depending | ||
| 47 | // on hardware/platform/double type precision, etc. | ||
| 48 |
2/2✓ Branch 0 taken 299999 times.
✓ Branch 1 taken 1 times.
|
600000 | for( long int i = 1; i < 3e5; ++i ) { |
| 49 | 599998 | const double v = 1./(i*i*i); | |
| 50 | 599998 | directS += v; | |
| 51 |
1/1✓ Branch 1 taken 299999 times.
|
599998 | kleinS += v; |
| 52 | } | ||
| 53 | 2 | const double directError = fabs( sumLimit - directS ) | |
| 54 |
1/1✓ Branch 1 taken 1 times.
|
2 | , kleinError = fabs( sumLimit - kleinS ) |
| 55 | ; | ||
| 56 | #endif | ||
| 57 | // In case of errors, check this output: | ||
| 58 | //std::cout << "xxx direct=" << directS << " (" << directError << ")" | ||
| 59 | // << ", klein=" << kleinS.result() << " (" << kleinError << ")" | ||
| 60 | // << ", real=" << sumLimit << std::endl; | ||
| 61 |
2/3✓ Branch 1 taken 1 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
|
2 | EXPECT_GT( directError, kleinError ); |
| 62 | 2 | } | |
| 63 | |||
| 64 | // Uses GSL routines to validate the online covariance calculus on each step | ||
| 65 | 8 | TEST(Sums, Correlation) { | |
| 66 | 2 | double testSamples[][2] = { | |
| 67 | {3.45, 1.18}, | ||
| 68 | {7.62, 2.33}, | ||
| 69 | {5.56, 1.76}, | ||
| 70 | {.44, 0.12}, | ||
| 71 | {7.89, 5.45}, | ||
| 72 | {-1.34, 0.1}, | ||
| 73 | {123.37, 80}, | ||
| 74 | {std::nan("0"), 0.} | ||
| 75 | }; | ||
| 76 | double dataX[sizeof(testSamples)/(2*sizeof(double))] | ||
| 77 | , dataY[sizeof(testSamples)/(2*sizeof(double))] | ||
| 78 | ; | ||
| 79 |
1/1✓ Branch 1 taken 1 times.
|
2 | CorrelationScorer C; |
| 80 | 2 | size_t n = 0; | |
| 81 | 16 | for( double (* sample) [2] = testSamples | |
| 82 |
2/2✓ Branch 1 taken 7 times.
✓ Branch 2 taken 1 times.
|
16 | ; !std::isnan((*sample)[0]) |
| 83 | ; ++sample ) { | ||
| 84 | //std::cout << "xxx accounting " << (*sample)[0] | ||
| 85 | // << ", " << (*sample)[1] << std::endl; | ||
| 86 |
1/1✓ Branch 1 taken 7 times.
|
14 | C.account( (*sample)[0], (*sample)[1] ); |
| 87 | 14 | dataX[n] = (*sample)[0]; | |
| 88 | 14 | dataY[n] = (*sample)[1]; | |
| 89 | |||
| 90 | 14 | ++n; | |
| 91 |
1/1✓ Branch 1 taken 7 times.
|
14 | double rMeanX = gsl_stats_mean( dataX, 1, n ) |
| 92 |
1/1✓ Branch 1 taken 7 times.
|
14 | , rMeanY = gsl_stats_mean( dataY, 1, n ) |
| 93 | ; | ||
| 94 |
4/6✓ Branch 1 taken 7 times.
✓ Branch 4 taken 7 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 7 times.
✓ Branch 22 taken 7 times.
✗ Branch 23 not taken.
|
14 | ASSERT_NEAR( rMeanX, C.mean_x(), 1e-12 ); |
| 95 |
4/6✓ Branch 1 taken 7 times.
✓ Branch 4 taken 7 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 7 times.
✓ Branch 22 taken 7 times.
✗ Branch 23 not taken.
|
14 | ASSERT_NEAR( rMeanY, C.mean_y(), 1e-12 ); |
| 96 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 1 times.
|
14 | if( n > 1 ) { |
| 97 |
1/1✓ Branch 1 taken 6 times.
|
12 | const double rVarX = gsl_stats_variance_m( dataX, 1, n, rMeanX ) |
| 98 |
1/1✓ Branch 1 taken 6 times.
|
12 | , rVarY = gsl_stats_variance_m( dataY, 1, n, rMeanY ) |
| 99 |
1/1✓ Branch 1 taken 6 times.
|
12 | , rCov = gsl_stats_covariance_m( dataX, 1 |
| 100 | , dataY, 1 | ||
| 101 | , n, rMeanX, rMeanY ) | ||
| 102 |
1/1✓ Branch 1 taken 6 times.
|
12 | , rCorr = gsl_stats_correlation( dataX, 1 |
| 103 | , dataY, 1 | ||
| 104 | , n ); | ||
| 105 | ; | ||
| 106 |
4/6✓ Branch 1 taken 6 times.
✓ Branch 4 taken 6 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 6 times.
✓ Branch 22 taken 6 times.
✗ Branch 23 not taken.
|
12 | ASSERT_NEAR( rVarX, C.variance_x(), 1e-12 ); |
| 107 |
4/6✓ Branch 1 taken 6 times.
✓ Branch 4 taken 6 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 6 times.
✓ Branch 22 taken 6 times.
✗ Branch 23 not taken.
|
12 | ASSERT_NEAR( rVarY, C.variance_y(), 1e-12 ); |
| 108 |
4/6✓ Branch 1 taken 6 times.
✓ Branch 4 taken 6 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 6 times.
✓ Branch 22 taken 6 times.
✗ Branch 23 not taken.
|
12 | ASSERT_NEAR( rCov, C.covariance(), 1e-12 ); |
| 109 |
4/6✓ Branch 1 taken 6 times.
✓ Branch 4 taken 6 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 6 times.
✓ Branch 22 taken 6 times.
✗ Branch 23 not taken.
|
12 | ASSERT_NEAR( rCorr, C.pearson_correlation(), 1e-12 ); |
| 110 | } | ||
| 111 | } | ||
| 112 | } | ||
| 113 | |||
| 114 | // Uses GSL routines to validate the online covariance matrix calculus on | ||
| 115 | // each step | ||
| 116 | 8 | TEST(Sums, CorrelationMatrix) { | |
| 117 | 2 | double testSamples[][5] = { | |
| 118 | {3.45, 1.18, -3, 0, 0.11}, | ||
| 119 | {7.62, 2.33, -7, 1, 0.87}, | ||
| 120 | {5.56, 1.76, -11, 0, 0.53}, | ||
| 121 | {.44, 0.12, -1, 0, 0.21}, | ||
| 122 | {7.89, 5.45, 786, 0, 0.44}, | ||
| 123 | {-1.34, 0.1, -99, 0, 0.42}, | ||
| 124 | {123.37, 80, 11, 1, 0. }, | ||
| 125 | {std::nan("0"), 0, 0, 0, 0} | ||
| 126 | }; | ||
| 127 |
1/1✓ Branch 1 taken 1 times.
|
2 | CovarianceMatrix m(5); |
| 128 | double means[5], rVar[5], rCov[5][5], rCorr[5][5]; | ||
| 129 | 2 | size_t n = 0; | |
| 130 | 16 | for( double (* sample) [5] = testSamples | |
| 131 |
2/2✓ Branch 1 taken 7 times.
✓ Branch 2 taken 1 times.
|
16 | ; !std::isnan((*sample)[0]) |
| 132 | ; ++sample ) { | ||
| 133 |
1/1✓ Branch 1 taken 7 times.
|
14 | m.account(*sample); |
| 134 | 14 | ++n; | |
| 135 |
2/2✓ Branch 0 taken 35 times.
✓ Branch 1 taken 7 times.
|
84 | for( int i = 0; i < 5; ++i ) { |
| 136 |
1/1✓ Branch 1 taken 35 times.
|
70 | means[i] = gsl_stats_mean( (*testSamples) + i, 5, n ); |
| 137 |
3/4✓ Branch 1 taken 35 times.
✓ Branch 4 taken 35 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 35 times.
|
70 | EXPECT_NEAR( means[i], m.mean(i), 1e-10 ); |
| 138 | } | ||
| 139 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 6 times.
|
14 | if( n < 2 ) continue; |
| 140 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 6 times.
|
72 | for( int i = 0; i < 5; ++i ) { |
| 141 |
1/1✓ Branch 1 taken 30 times.
|
60 | rVar[i] = gsl_stats_variance_m( (*testSamples) + i, 5, n, means[i] ); |
| 142 |
3/4✓ Branch 1 taken 30 times.
✓ Branch 4 taken 30 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 30 times.
|
60 | EXPECT_NEAR( rVar[i], m.variance(i), 1e-10 ); |
| 143 |
2/2✓ Branch 0 taken 90 times.
✓ Branch 1 taken 30 times.
|
240 | for( int j = i; j < 5; ++j ) { |
| 144 | 360 | rCov[i][j] = gsl_stats_covariance_m( (*testSamples) + i, 5 | |
| 145 |
1/1✓ Branch 1 taken 90 times.
|
180 | , (*testSamples) + j, 5 |
| 146 | , n, means[i], means[j] ); | ||
| 147 | 360 | rCorr[i][j] = gsl_stats_correlation( (*testSamples) + i, 5 | |
| 148 |
1/1✓ Branch 1 taken 90 times.
|
180 | , (*testSamples) + j, 5 |
| 149 | , n ); | ||
| 150 | //std::cout << "xxx #" << n << " " | ||
| 151 | // << i << ", " << j | ||
| 152 | // << ", vars=" << rVar[i] << "/" << rVar[j] | ||
| 153 | // << ", real cov=" << rCov[i][j] | ||
| 154 | // << ", m cov=" << m.covariance(i, j) | ||
| 155 | // << ", real cor=" << rCorr[i][j] | ||
| 156 | // << ", m corr=" << m.pearson_correlation(i, j) | ||
| 157 | // << std::endl; // XXX | ||
| 158 |
3/4✓ Branch 1 taken 90 times.
✓ Branch 4 taken 90 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 90 times.
|
180 | EXPECT_NEAR( rCov[i][j], m.covariance(i, j), 1e-10 ); |
| 159 |
4/6✓ Branch 1 taken 90 times.
✓ Branch 4 taken 90 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 90 times.
✓ Branch 22 taken 90 times.
✗ Branch 23 not taken.
|
180 | ASSERT_NEAR( rCorr[i][j], m.pearson_correlation(i, j), 1e-10 ); |
| 160 | } | ||
| 161 | } | ||
| 162 | } | ||
| 163 | 2 | } | |
| 164 | |||
| 165 | // uncomment to write the output file | ||
| 166 | //#define WRITE_SUM_COMPARISON "./sums.2move/sliding-sum-10000x200-norecacheO.klein.dat" | ||
| 167 | |||
| 168 | 8 | TEST(Sums, MovingStats) { | |
| 169 | #ifdef WRITE_SUM_COMPARISON | ||
| 170 | std::ofstream ofs(WRITE_SUM_COMPARISON); | ||
| 171 | #endif | ||
| 172 | 2 | const size_t nSamples = 10000 | |
| 173 | 2 | , winSize = 200 | |
| 174 | ; | ||
| 175 | double args[nSamples]; | ||
| 176 |
1/1✓ Branch 1 taken 1 times.
|
2 | gsl_vector * inVec = gsl_vector_alloc(nSamples); |
| 177 |
1/1✓ Branch 1 taken 1 times.
|
2 | MovingStats<KleinScorer> ms(winSize, false, true, 0.); |
| 178 | // Generate samples -- a tangent curve with logarithmic sampling | ||
| 179 |
2/2✓ Branch 0 taken 10000 times.
✓ Branch 1 taken 1 times.
|
20002 | for( size_t i = 0; i < nSamples; ++i ) { |
| 180 | 20000 | double arg_ = pow((i+1)/double(nSamples), 8); | |
| 181 | 20000 | args[i] = arg_*2*M_PI; | |
| 182 |
1/1✓ Branch 1 taken 10000 times.
|
20000 | gsl_vector_set( inVec, i, tan( args[i] ) ); |
| 183 | } | ||
| 184 | |||
| 185 |
1/1✓ Branch 1 taken 1 times.
|
2 | gsl_movstat_workspace * wsPtr = gsl_movstat_alloc2(winSize - 1, 0); |
| 186 |
1/1✓ Branch 1 taken 1 times.
|
2 | gsl_vector * meanVec = gsl_vector_alloc(nSamples) |
| 187 |
1/1✓ Branch 1 taken 1 times.
|
2 | , * varVec = gsl_vector_alloc(nSamples) |
| 188 |
1/1✓ Branch 1 taken 1 times.
|
2 | , * stdDevVec = gsl_vector_alloc(nSamples) |
| 189 | ; | ||
| 190 | |||
| 191 | // Compute window mean and standard deviation over all the input values | ||
| 192 | 2 | auto endType = GSL_MOVSTAT_END_TRUNCATE; | |
| 193 |
1/1✓ Branch 1 taken 1 times.
|
2 | gsl_movstat_mean( endType, inVec, meanVec, wsPtr ); |
| 194 |
1/1✓ Branch 1 taken 1 times.
|
2 | gsl_movstat_variance( endType, inVec, varVec, wsPtr ); |
| 195 |
1/1✓ Branch 1 taken 1 times.
|
2 | gsl_movstat_sd( endType, inVec, stdDevVec, wsPtr ); |
| 196 | |||
| 197 | #ifdef WRITE_SUM_COMPARISON | ||
| 198 | ofs << "#" | ||
| 199 | << std::setw(14) << "arg " // 1 | ||
| 200 | << std::setw(15) << "sample " // 2 | ||
| 201 | << std::setw(15) << "m.direct " // 3 | ||
| 202 | << std::setw(15) << "m.mov " // 4 | ||
| 203 | << std::setw(15) << "m.gsl " // 5 | ||
| 204 | << std::setw(15) << "m.d.vs.gsl " // 6 | ||
| 205 | << std::setw(15) << "m.d.vs.mov " // 7 | ||
| 206 | //<< std::setw(14) << "" | ||
| 207 | << std::endl; | ||
| 208 | #endif | ||
| 209 |
2/2✓ Branch 0 taken 10000 times.
✓ Branch 1 taken 1 times.
|
20002 | for( ssize_t i = 0; i < (ssize_t) nSamples; ++i ) { |
| 210 |
2/2✓ Branch 1 taken 10000 times.
✓ Branch 4 taken 10000 times.
|
20000 | ms.account( gsl_vector_get(inVec, i) ); |
| 211 | //if( (ms.mean() - gsl_vector_get( meanVec, i ))/ms.mean() < 1e-2 ) continue; | ||
| 212 | 20000 | ssize_t k = 0; | |
| 213 | 20000 | double directMean = 0.; | |
| 214 |
4/4✓ Branch 0 taken 1980299 times.
✓ Branch 1 taken 9801 times.
✓ Branch 2 taken 1980100 times.
✓ Branch 3 taken 199 times.
|
3980200 | for( k = 0; k < (ssize_t) winSize && i - k >= 0; ++k ) { |
| 215 |
1/1✓ Branch 1 taken 1980100 times.
|
3960200 | double sample = gsl_vector_get( inVec, i - k ); |
| 216 | 3960200 | directMean += sample; | |
| 217 | } | ||
| 218 | 20000 | directMean /= k; | |
| 219 | #ifdef WRITE_SUM_COMPARISON | ||
| 220 | ofs << std::scientific | ||
| 221 | << std::setw(14) << args[i] << " " | ||
| 222 | << std::setw(14) << gsl_vector_get( inVec, i ) << " " | ||
| 223 | << std::setw(14) << directMean << " " | ||
| 224 | << std::setw(14) << ms.mean() << " " | ||
| 225 | << std::setw(14) << gsl_vector_get( meanVec, i ) << " " | ||
| 226 | << std::setw(14) << fabs(directMean - gsl_vector_get( meanVec, i )) << " " | ||
| 227 | << std::setw(14) << fabs(directMean - ms.mean()) << " " | ||
| 228 | //<< std::setw(14) << gsl_vector_get( varVec, i ) << " " | ||
| 229 | //<< std::setw(14) << gsl_vector_get( stdDevVec, i ) | ||
| 230 | << std::endl; | ||
| 231 | #endif | ||
| 232 | } | ||
| 233 | |||
| 234 |
1/1✓ Branch 1 taken 1 times.
|
2 | gsl_vector_free(inVec); |
| 235 |
1/1✓ Branch 1 taken 1 times.
|
2 | gsl_vector_free(meanVec); |
| 236 |
1/1✓ Branch 1 taken 1 times.
|
2 | gsl_vector_free(varVec); |
| 237 |
1/1✓ Branch 1 taken 1 times.
|
2 | gsl_vector_free(stdDevVec); |
| 238 |
1/1✓ Branch 1 taken 1 times.
|
2 | gsl_movstat_free(wsPtr); |
| 239 | 2 | } | |
| 240 | |||
| 241 | } | ||
| 242 | } | ||
| 243 |