Loading...
Searching...
No Matches
Histogram.h
1/*
2
3MIT License
4
5Copyright (c) 2017 FMI Open Development / Markus Peura, first.last@fmi.fi
6
7Permission is hereby granted, free of charge, to any person obtaining a copy
8of this software and associated documentation files (the "Software"), to deal
9in the Software without restriction, including without limitation the rights
10to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11copies of the Software, and to permit persons to whom the Software is
12furnished to do so, subject to the following conditions:
13
14The above copyright notice and this permission notice shall be included in all
15copies or substantial portions of the Software.
16
17THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23SOFTWARE.
24
25*/
26/*
27Part of Rack development has been done in the BALTRAD projects part-financed
28by the European Union (European Regional Development Fund and European
29Neighbourhood Partnership Instrument, Baltic Sea Region Programme 2007-2013)
30*/
31#ifndef HISTOGRAM_H_
32#define HISTOGRAM_H_
33
34//
35#include <drain/Log.h>
36#include <drain/TypeUtils.h>
37#include <typeinfo>
38#include <cmath>
39
40#include <vector>
41#include <iostream>
42#include <stdexcept>
43#include <map>
44
45#include "ValueScaling.h"
46
47
48
49namespace drain
50{
51
52// // // using namespace std;
53
54//std::cerr;
55
57
61class Histogram : protected std::vector<unsigned long> {
62public:
63
64 typedef unsigned long count_t;
65 typedef std::vector<count_t> vect_t;
66
67 Histogram(size_t size=256);
68
69 Histogram(const Histogram & histogram);
70
71 virtual ~Histogram(){};
72
73 ValueScaling scaling;
74
76 void setSize(size_t s);
77
78 static
79 std::size_t recommendSizeByType(const std::type_info & type, std::size_t defaultValue = 256);
80
81 inline
82 int getSize() const { return size(); };
83
84 inline
85 int autoSize(const std::type_info & type){
86 if (empty()){
87 resize(recommendSizeByType(type, 256));
88 }
89 return size();
90 };
91
93 inline
94 void clearBins(){
95 std::fill(begin(), end(), 0);
96 }
97
99
105 template <class T>
106 void compute(const T & src, const std::type_info & type = typeid(double), const UniTuple<double,2> & scaling = {1.0, 0.0});
107
109 //void clearBins();
110
111
113 void setSampleCount(long int n){
114 //sampleCount = n;
115 sampleCount = n;
116 sampleCountMedian = static_cast<size_t>(weight * static_cast<double>(n));
117 }
118
119 size_t getSampleCount() const {
120 return sampleCount;
121 }
122
123
125 void setRange(double dataMin, double dataMax);
126
128 inline
129 void setRange(const Range<double> & range){
130 setRange(range.min, range.max);
131 }
132
133 inline
134 void deriveScaling(const ValueScaling & s, const Type & type){
135 setRange(s.fwd(0.0), s.fwd(1 << (8*drain::Type::call<drain::sizeGetter>(type))));
136 }
137
138 inline
139 void setScale(const ValueScaling & scaling){
140 drain::Logger mout(__FILE__, __FUNCTION__);
141 mout.discouraged("use setRange instead (scaling=", scaling, ")");
142 mout.advice("range perhaps: [", this->scaling.getPhysicalRange());
143 this->scaling.assignSequence(scaling);
144 //int s = 1 << (8*drain::Type::call<drain::sizeGetter>(type));
145 }
146
148 /*
149 inline
150 void setScale(const ValueScaling & scaling){
151 //scaling.setRange(0.0, bins-1.0, dataMin, dataMax);
152 };
153 */
154
155 inline
156 int getInMin() const { return 0; };
157
159 inline
160 int getUpperBoundIn() const { return size(); };
161
162
163 inline
164 int getOutMin() const { return scaling.fwd(0); };
165
167 inline
168 int getUpperBoundOut() const { return scaling.fwd(size()); };
169
170
171
173
176 inline
177 void setMedianPosition(double pos){
178 weight = pos;
179 sampleCountMedian = static_cast<size_t>(weight * static_cast<double>(sampleCount));
180 };
181
182 /*
183 template <class T>
184 inline
185 bool withinLimits(T i) const {
186 return ((i >= inMin) && (i <= inMax)); // check if ok inMax is too much
187 }
188 */
189
190 template <class T>
191 inline
192 void increment(T i){
193 //++(*this)[((i-inMin)*bins)/inSpan];
194 ++(*this)[scaling.inv(i)];
195 ++sampleCount; // TODO: slow?
196 }
197
198 template <class T>
199 inline
200 void increment(T i, int count){
201 (*this)[scaling.inv(i)] += count;
202 //(*this)[((i-inMin)*bins)/inSpan] += count;
203 sampleCount += count;
204 }
205
206 template <class T>
207 inline
208 void incrementRaw(T i){
209 ++(*this)[i];
210 ++sampleCount; // TODO: slow?
211 }
212
213 template <class T>
214 inline
215 void incrementRaw(T i, int count){
216 (*this)[i] += count;
217 sampleCount += count;
218 }
219
220 template <class T>
221 inline
222 void decrement(T i){
223 --(*this)[scaling.inv(i)]; // [((i-inMin)*bins)/inSpan];
224 --sampleCount;
225 }
226
227 template <class T>
228 inline
229 void decrement(T i, int count){
230 (*this)[scaling.inv(i)] -= count; //[((i-inMin)*bins)/inSpan] -= count;
231 sampleCount -= count;
232 }
233
234 template <class T>
235 inline
236 void decrementRaw(T i){
237 --(*this)[i];
238 --sampleCount;
239 }
240
241 template <class T>
242 inline
243 void decrementRaw(T i, int count){
244 (*this)[i] -= count;
245 sampleCount -= count;
246 }
247
248
249 template <class T>
250 inline
251 T scaleOut(size_type i) const {
252 //return static_cast<T>(outMin + (i*outSpan)/bins);
253 return static_cast<T>(scaling.fwd(i));
254 }
255
256
257
258
260
262 // @param p applies to weighted median; for standard median, p = 0.5.
263 template <class T>
264 inline
265 T getMax() const {
266 for (size_type i = size()-1; i > 0; --i)
267 if ((*this)[i] > 0)
268 return scaleOut<T>(i);
269 // cerr << "Warning: Histogram empty.\n";
270 return getUpperBoundOut(); //static_cast<T>(outMax);
271 }
272
273
274 template <class T>
275 inline
276 T getMin() const {
277 for (size_type i = 0; i < size(); ++i)
278 if ((*this)[i] > 0)
279 return scaleOut<T>(i);
280 //cerr << "Warning: Histogram empty.\n";
281 return getUpperBoundOut(); //static_cast<T>(outMax);
282 }
283
285
288 template <class T>
289 //inline
290 T getSum() const {
291 double sum = 0;
292 //std::cout << __FUNCTION__ << ':' << sum << std::endl;
293 for (size_type i = 0; i < size(); i++){
294 sum += ((*this)[i] * scaleOut<T>(i)); // count * nominal (should be scaled to middle)
295 //if ((*this)[i])
296 // std::cout << i << '\t' << (*this)[i] << '\t' << scaleOut<T>(i) << '\t' << sum << '\n';
297 }
298 return sum;
299 }
300
301
303 template <class T>
304 inline
305 T getMean() const {
306 //return scaleOut(getSum()/sampleCount);
307 if (sampleCount > 0)
308 return getSum<T>()/sampleCount;
309 else
310 return 0;
311 }
312
313
314 template <class T>
315 inline
316 T getMedian() const {
317 size_type sum = 0;
318 //const
319 //size_type limit = static_cast<size_t>(weight*sampleCount);
320
321 for (size_type i = 0; i < size(); ++i){
322 sum += (*this)[i];
323 if (sum >= sampleCountMedian){ // limit){
324 //if (sum >= limit){
325 return scaleOut<T>(i);
326 }
327 }
328 //return (245*sum)/sampleCount;
329
330 //return ::rand(); //static_cast<T>(outMax);
331 return getUpperBoundOut(); //static_cast<T>(outMax);
332 }
333
334
335 template <class T>
336 inline
337 T getWeightedMedian(float p) const {
338 size_type _sum;
339 if ((p < 0.0) || (p>1.0)){
340 throw std::runtime_error("Histogram<T>::getMedian: median point <0 or >1.0 .");
341 }
342 const size_t limit = sampleCountMedian; // static_cast<size_t>(p * sampleCount);
343 _sum = 0;
344 for (size_type i = 0; i < size(); i++){
345 _sum += (*this)[i];
346 if (_sum >= limit){
347 return static_cast<T>(i);
348 }
349 }
350 return static_cast<T>(size());
351 }
352
353
355 // NOTE: could be pipelined.
356 template <class T>
357 inline
358 T getVariance() const {
359 int n;
360 long sum;
361 long sum2;
362 T f;
363 T sumT;
364
365 sum = 0;
366 sum2 = 0;
367 for (size_type i = 0; i < size(); i++){
368 f = scaleOut<T>(i);
369 n = (*this)[i];
370 sum += n * f;
371 sum2 += n * (f*f);
372 }
373 sumT = static_cast<T>(sum)/sampleCount;
374 return static_cast<T>(sum2)/sampleCount - sumT*sumT;
375 }
376
377
378 template <class T>
379 inline
380 T getStdDeviation() const {
381 return static_cast<T>(sqrt(getVariance<double>()));
382 }
383
384
385
386
387
388
389 inline
390 const vect_t getVector() const {
391 return *this;
392 };
393
394 std::ostream & toStream(std::ostream & ostr) const;
395
396 void dump(std::ostream & ostr = std::cout);
397
398 std::string delimiter;
399
400 inline
401 double getValue(){
402 return (this->*statisticPtr)();
403 }
404
406
410 inline
411 double getValue(char c){
412 return (this->*getStatisticPtr(c))();
413 }
414
415
416 inline
417 void setValueFunc(char c){
418 statisticPtr = getStatisticPtr(c);
419 }
420
421
422protected:
423
424 void initialize();
425
426 typedef double (Histogram::*stat_ptr_t)() const;
427
428 double (Histogram::*statisticPtr)() const;
429
430 stat_ptr_t getStatisticPtr(char c);
431
432
433 //double (Histogram::*)getPtr const;
434
435private:
437 // size_t bins;
438
440 //size_type sampleCount;
441
443 size_type sampleCount = 0;
444
445 // Half of sampleCount.
446 size_type sampleCountMedian = 0;
447
448 // weight for weighted median;
449 float weight = 0.5;
450
452 static inline
453 bool isSmallInt(const std::type_info & type){
454 return (type == typeid(unsigned char)) || (type == typeid(unsigned short int));
455 }
456
458
461 static inline
462 short getBitShift(unsigned int value){
463 short i = 0;
464 while ((value = (value>>1)) > 0){
465 ++i;
466 }
467 return i;
468 }
469
470
471};
472
473
474template <class T>
475void Histogram::compute(const T & dst, const std::type_info & type, const UniTuple<double,2> & scaling){
476
477 drain::Logger mout(__FILE__, __FUNCTION__);
478
479 const int size = autoSize(type);
480
481 if (size <= 0){
482 mout.error(size, " bins, something went wrong");
483 return;
484 }
485
486 if (size > 0x10000){
487 mout.error(size, " bins exceeds Histogram size limit: ", 0x10000);
488 return;
489 }
490
491
492 initialize();
493
494 ValueScaling inputScaling(scaling);
495
496 const bool SAME_SCALING = (inputScaling == this->scaling);
497
498 mout.attention("scalings: ", this->scaling, ", ", inputScaling, " same? ", SAME_SCALING);
499 if (inputScaling == this->scaling){
500
501 }
502
503 if (isSmallInt(type)){
504 const unsigned short histBits = getBitShift(size);
505 if (size == (1<<histBits)){ //
506 const signed short dataBits = (8*drain::Type::call<drain::sizeGetter>(type));
507 const short int bitShift = dataBits - histBits;
508
509 mout.note("Small int (", dataBits, "b) data, histogram[", size,"] ", histBits, " b, computing with bit shift ", bitShift);
510 // setRange(inputScaling.fwd(0), inputScaling.fwd(size -1));
511 mout.info("Physical range of the histogram: ", this->scaling.getPhysicalRange());
512 // mout.warn("Histogram size ", size, "=2^N, using bitShift=", bitShift, ", computing raw values.");
513 for (typename T::const_iterator it = dst.begin(); it != dst.end(); ++it){
514 this->incrementRaw(static_cast<unsigned short int>(*it) >> bitShift);
515 }
516 }
517 else {
518 mout.advice("Consider histogram size of 2^N, e.g.", (1<<histBits));
519 }
520 return;
521 }
522
523 //mout.warn("Data range: ", range, ", mapped (physical) Range: ", rangeOut);
524 //mout.warn("Size ", s, " != 2^N (slow mode)");
525 //mout.advice("Consider histogram size of 2^N, e.g.", (1<<histBits));
526 mout.error("Skipping...");
527
528
529}
530
531inline
532std::ostream & operator<<(std::ostream &ostr,const Histogram &h){
533 return h.toStream(ostr);
534}
535
536}
537
538//std::ostream & operator<<(std::ostream &ostr,const drain::Histogram &h);
539
540//template <class T>
541
542
543#endif /*HISTOGRAM_H_*/
544
545// Drain
Class for computing a histogram and some statistics: average, min, max, mean, std....
Definition Histogram.h:61
void clearBins()
Does not change the size of the histogram.
Definition Histogram.h:94
double getValue(char c)
Return requested statistic.
Definition Histogram.h:411
void setRange(double dataMin, double dataMax)
Set range of original (physical) values to be mapped on the limited number of bins....
Definition Histogram.cpp:76
void setMedianPosition(double pos)
Set location of the median, if not in the middle (50%).
Definition Histogram.h:177
int getUpperBoundIn() const
Returns the upperLimit (exclusive)
Definition Histogram.h:160
int getInMin() const
Set range of original (physical) values to be mapped on the limited number of bins....
Definition Histogram.h:156
T getSum() const
Sum of the samples.
Definition Histogram.h:290
T getMean() const
Unscaled mean.
Definition Histogram.h:305
void setSampleCount(long int n)
Does not change the size of the histogram.
Definition Histogram.h:113
int getUpperBoundOut() const
Returns the upperLimit (exclusive)
Definition Histogram.h:168
void compute(const T &src, const std::type_info &type=typeid(double), const UniTuple< double, 2 > &scaling={1.0, 0.0})
Collect distribution.
Definition Histogram.h:475
T getVariance() const
Computes variance of the values inside the window.
Definition Histogram.h:358
T getMax() const
Statistics.
Definition Histogram.h:265
void setRange(const Range< double > &range)
Set range of original (physical) values to be mapped on the limited number of bins....
Definition Histogram.h:129
void setSize(size_t s)
Sets the number of bins; the resolution of the histogram.
Definition Histogram.cpp:62
LogSourc e is the means for a function or any program segment to "connect" to a Log.
Definition Log.h:312
Logger & note(const TT &... args)
For top-level information.
Definition Log.h:489
Logger & attention(const TT &... args)
Possible error, but execution can continue. Special type of Logger::warn().
Definition Log.h:476
Logger & error(const TT &... args)
Echoes.
Definition Log.h:416
Definition Range.h:52
Utilities related to std::type_info.
Definition Type.h:51
Tuple of N elements of type T.
Definition UniTuple.h:65
Linear scaling and physical range for image intensities.
Definition ValueScaling.h:64
double fwd(double x) const
Forward scaling: given encoded value x, returns corresponding value (possibly physically meaningful).
Definition ValueScaling.h:295
Definition DataSelector.cpp:1277