Histogram.h
1 /*
2 
3 MIT License
4 
5 Copyright (c) 2017 FMI Open Development / Markus Peura, first.last@fmi.fi
6 
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13 
14 The above copyright notice and this permission notice shall be included in all
15 copies or substantial portions of the Software.
16 
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 SOFTWARE.
24 
25 */
26 /*
27 Part of Rack development has been done in the BALTRAD projects part-financed
28 by the European Union (European Regional Development Fund and European
29 Neighbourhood Partnership Instrument, Baltic Sea Region Programme 2007-2013)
30 */
31 #ifndef HISTOGRAM_H_
32 #define HISTOGRAM_H_
33 
34 //
35 #include <drain/Log.h>
36 #include <drain/TypeUtils.h>
37 #include <typeinfo>
38 #include <cmath>
39 
40 #include <vector>
41 #include <iostream>
42 #include <stdexcept>
43 #include <map>
44 
45 #include "ValueScaling.h"
46 
47 
48 
49 namespace drain
50 {
51 
52 // // // using namespace std;
53 
54 //std::cerr;
55 
57 
61 class Histogram : protected std::vector<unsigned long> {
62 public:
63 
64  typedef unsigned long count_t;
65  typedef std::vector<count_t> vect_t;
66 
67  Histogram(size_t size=256);
68 
69  Histogram(const Histogram & histogram);
70 
71  virtual ~Histogram(){};
72 
73  ValueScaling scaling;
74 
76  void setSize(size_t s);
77 
78  static
79  std::size_t recommendSizeByType(const std::type_info & type, std::size_t defaultValue = 256);
80 
81  inline
82  int getSize() const { return size(); };
83 
84  inline
85  int autoSize(const std::type_info & type){
86  if (empty()){
87  resize(recommendSizeByType(type, 256));
88  }
89  return size();
90  };
91 
93  inline
94  void clearBins(){
95  std::fill(begin(), end(), 0);
96  }
97 
99 
105  template <class T>
106  void compute(const T & src, const std::type_info & type = typeid(double), const UniTuple<double,2> & scaling = {1.0, 0.0});
107 
109  //void clearBins();
110 
111 
113  void setSampleCount(long int n){
114  //sampleCount = n;
115  sampleCount = n;
116  sampleCountMedian = static_cast<size_t>(weight * static_cast<double>(n));
117  }
118 
119  size_t getSampleCount() const {
120  return sampleCount;
121  }
122 
123 
125  void setRange(double dataMin, double dataMax);
126 
128  inline
129  void setRange(const Range<double> & range){
130  setRange(range.min, range.max);
131  }
132 
133  inline
134  void deriveScaling(const ValueScaling & s, const Type & type){
135  setRange(s.fwd(0.0), s.fwd(1 << (8*drain::Type::call<drain::sizeGetter>(type))));
136  }
137 
138  inline
139  void setScale(const ValueScaling & scaling){
140  drain::Logger mout(__FILE__, __FUNCTION__);
141  mout.discouraged("use setRange instead (scaling=", scaling, ")");
142  mout.advice("range perhaps: [", this->scaling.getPhysicalRange());
143  this->scaling.assignSequence(scaling);
144  //int s = 1 << (8*drain::Type::call<drain::sizeGetter>(type));
145  }
146 
148  /*
149  inline
150  void setScale(const ValueScaling & scaling){
151  //scaling.setRange(0.0, bins-1.0, dataMin, dataMax);
152  };
153  */
154 
155  inline
156  int getInMin() const { return 0; };
157 
159  inline
160  int getUpperBoundIn() const { return size(); };
161 
162 
163  inline
164  int getOutMin() const { return scaling.fwd(0); };
165 
167  inline
168  int getUpperBoundOut() const { return scaling.fwd(size()); };
169 
170 
171 
173 
176  inline
177  void setMedianPosition(double pos){
178  weight = pos;
179  sampleCountMedian = static_cast<size_t>(weight * static_cast<double>(sampleCount));
180  };
181 
182  /*
183  template <class T>
184  inline
185  bool withinLimits(T i) const {
186  return ((i >= inMin) && (i <= inMax)); // check if ok inMax is too much
187  }
188  */
189 
190  template <class T>
191  inline
192  void increment(T i){
193  //++(*this)[((i-inMin)*bins)/inSpan];
194  ++(*this)[scaling.inv(i)];
195  ++sampleCount; // TODO: slow?
196  }
197 
198  template <class T>
199  inline
200  void increment(T i, int count){
201  (*this)[scaling.inv(i)] += count;
202  //(*this)[((i-inMin)*bins)/inSpan] += count;
203  sampleCount += count;
204  }
205 
206  template <class T>
207  inline
208  void incrementRaw(T i){
209  ++(*this)[i];
210  ++sampleCount; // TODO: slow?
211  }
212 
213  template <class T>
214  inline
215  void incrementRaw(T i, int count){
216  (*this)[i] += count;
217  sampleCount += count;
218  }
219 
220  template <class T>
221  inline
222  void decrement(T i){
223  --(*this)[scaling.inv(i)]; // [((i-inMin)*bins)/inSpan];
224  --sampleCount;
225  }
226 
227  template <class T>
228  inline
229  void decrement(T i, int count){
230  (*this)[scaling.inv(i)] -= count; //[((i-inMin)*bins)/inSpan] -= count;
231  sampleCount -= count;
232  }
233 
234  template <class T>
235  inline
236  void decrementRaw(T i){
237  --(*this)[i];
238  --sampleCount;
239  }
240 
241  template <class T>
242  inline
243  void decrementRaw(T i, int count){
244  (*this)[i] -= count;
245  sampleCount -= count;
246  }
247 
248 
249  template <class T>
250  inline
251  T scaleOut(size_type i) const {
252  //return static_cast<T>(outMin + (i*outSpan)/bins);
253  return static_cast<T>(scaling.fwd(i));
254  }
255 
256 
257 
258 
260 
262  // @param p applies to weighted median; for standard median, p = 0.5.
263  template <class T>
264  inline
265  T getMax() const {
266  for (size_type i = size()-1; i > 0; --i)
267  if ((*this)[i] > 0)
268  return scaleOut<T>(i);
269  // cerr << "Warning: Histogram empty.\n";
270  return getUpperBoundOut(); //static_cast<T>(outMax);
271  }
272 
273 
274  template <class T>
275  inline
276  T getMin() const {
277  for (size_type i = 0; i < size(); ++i)
278  if ((*this)[i] > 0)
279  return scaleOut<T>(i);
280  //cerr << "Warning: Histogram empty.\n";
281  return getUpperBoundOut(); //static_cast<T>(outMax);
282  }
283 
285 
288  template <class T>
289  //inline
290  T getSum() const {
291  double sum = 0;
292  //std::cout << __FUNCTION__ << ':' << sum << std::endl;
293  for (size_type i = 0; i < size(); i++){
294  sum += ((*this)[i] * scaleOut<T>(i)); // count * nominal (should be scaled to middle)
295  //if ((*this)[i])
296  // std::cout << i << '\t' << (*this)[i] << '\t' << scaleOut<T>(i) << '\t' << sum << '\n';
297  }
298  return sum;
299  }
300 
301 
303  template <class T>
304  inline
305  T getMean() const {
306  //return scaleOut(getSum()/sampleCount);
307  if (sampleCount > 0)
308  return getSum<T>()/sampleCount;
309  else
310  return 0;
311  }
312 
313 
314  template <class T>
315  inline
316  T getMedian() const {
317  size_type sum = 0;
318  //const
319  //size_type limit = static_cast<size_t>(weight*sampleCount);
320 
321  for (size_type i = 0; i < size(); ++i){
322  sum += (*this)[i];
323  if (sum >= sampleCountMedian){ // limit){
324  //if (sum >= limit){
325  return scaleOut<T>(i);
326  }
327  }
328  //return (245*sum)/sampleCount;
329 
330  //return ::rand(); //static_cast<T>(outMax);
331  return getUpperBoundOut(); //static_cast<T>(outMax);
332  }
333 
334 
335  template <class T>
336  inline
337  T getWeightedMedian(float p) const {
338  size_type _sum;
339  if ((p < 0.0) || (p>1.0)){
340  throw std::runtime_error("Histogram<T>::getMedian: median point <0 or >1.0 .");
341  }
342  const size_t limit = sampleCountMedian; // static_cast<size_t>(p * sampleCount);
343  _sum = 0;
344  for (size_type i = 0; i < size(); i++){
345  _sum += (*this)[i];
346  if (_sum >= limit){
347  return static_cast<T>(i);
348  }
349  }
350  return static_cast<T>(size());
351  }
352 
353 
355  // NOTE: could be pipelined.
356  template <class T>
357  inline
358  T getVariance() const {
359  int n;
360  long sum;
361  long sum2;
362  T f;
363  T sumT;
364 
365  sum = 0;
366  sum2 = 0;
367  for (size_type i = 0; i < size(); i++){
368  f = scaleOut<T>(i);
369  n = (*this)[i];
370  sum += n * f;
371  sum2 += n * (f*f);
372  }
373  sumT = static_cast<T>(sum)/sampleCount;
374  return static_cast<T>(sum2)/sampleCount - sumT*sumT;
375  }
376 
377 
378  template <class T>
379  inline
380  T getStdDeviation() const {
381  return static_cast<T>(sqrt(getVariance<double>()));
382  }
383 
384 
385 
386 
387 
388 
389  inline
390  const vect_t getVector() const {
391  return *this;
392  };
393 
394  std::ostream & toStream(std::ostream & ostr) const;
395 
396  void dump(std::ostream & ostr = std::cout);
397 
398  std::string delimiter;
399 
400  inline
401  double getValue(){
402  return (this->*statisticPtr)();
403  }
404 
406 
410  inline
411  double getValue(char c){
412  return (this->*getStatisticPtr(c))();
413  }
414 
415 
416  inline
417  void setValueFunc(char c){
418  statisticPtr = getStatisticPtr(c);
419  }
420 
421 
422 protected:
423 
424  void initialize();
425 
426  typedef double (Histogram::*stat_ptr_t)() const;
427 
428  double (Histogram::*statisticPtr)() const;
429 
430  stat_ptr_t getStatisticPtr(char c);
431 
432 
433  //double (Histogram::*)getPtr const;
434 
435 private:
437  // size_t bins;
438 
440  //size_type sampleCount;
441 
443  size_type sampleCount = 0;
444 
445  // Half of sampleCount.
446  size_type sampleCountMedian = 0;
447 
448  // weight for weighted median;
449  float weight = 0.5;
450 
452  static inline
453  bool isSmallInt(const std::type_info & type){
454  return (type == typeid(unsigned char)) || (type == typeid(unsigned short int));
455  }
456 
458 
461  static inline
462  short getBitShift(unsigned int value){
463  short i = 0;
464  while ((value = (value>>1)) > 0){
465  ++i;
466  }
467  return i;
468  }
469 
470 
471 };
472 
473 
474 template <class T>
475 void Histogram::compute(const T & dst, const std::type_info & type, const UniTuple<double,2> & scaling){
476 
477  drain::Logger mout(__FILE__, __FUNCTION__);
478 
479  const int size = autoSize(type);
480 
481  if (size <= 0){
482  mout.error(size, " bins, something went wrong");
483  return;
484  }
485 
486  if (size > 0x10000){
487  mout.error(size, " bins exceeds Histogram size limit: ", 0x10000);
488  return;
489  }
490 
491 
492  initialize();
493 
494  ValueScaling inputScaling(scaling);
495 
496  const bool SAME_SCALING = (inputScaling == this->scaling);
497 
498  mout.attention("scalings: ", this->scaling, ", ", inputScaling, " same? ", SAME_SCALING);
499  if (inputScaling == this->scaling){
500 
501  }
502 
503  if (isSmallInt(type)){
504  const unsigned short histBits = getBitShift(size);
505  if (size == (1<<histBits)){ //
506  const signed short dataBits = (8*drain::Type::call<drain::sizeGetter>(type));
507  const short int bitShift = dataBits - histBits;
508 
509  mout.note("Small int (", dataBits, "b) data, histogram[", size,"] ", histBits, " b, computing with bit shift ", bitShift);
510  // setRange(inputScaling.fwd(0), inputScaling.fwd(size -1));
511  mout.info("Physical range of the histogram: ", this->scaling.getPhysicalRange());
512  // mout.warn("Histogram size ", size, "=2^N, using bitShift=", bitShift, ", computing raw values.");
513  for (typename T::const_iterator it = dst.begin(); it != dst.end(); ++it){
514  this->incrementRaw(static_cast<unsigned short int>(*it) >> bitShift);
515  }
516  }
517  else {
518  mout.advice("Consider histogram size of 2^N, e.g.", (1<<histBits));
519  }
520  return;
521  }
522 
523  //mout.warn("Data range: ", range, ", mapped (physical) Range: ", rangeOut);
524  //mout.warn("Size ", s, " != 2^N (slow mode)");
525  //mout.advice("Consider histogram size of 2^N, e.g.", (1<<histBits));
526  mout.error("Skipping...");
527 
528 
529 }
530 
531 inline
532 std::ostream & operator<<(std::ostream &ostr,const Histogram &h){
533  return h.toStream(ostr);
534 }
535 
536 }
537 
538 //std::ostream & operator<<(std::ostream &ostr,const drain::Histogram &h);
539 
540 //template <class T>
541 
542 
543 #endif /*HISTOGRAM_H_*/
544 
545 // Drain
Class for computing a histogram and some statistics: average, min, max, mean, std....
Definition: Histogram.h:61
void clearBins()
Does not change the size of the histogram.
Definition: Histogram.h:94
double getValue(char c)
Return requested statistic.
Definition: Histogram.h:411
void setRange(double dataMin, double dataMax)
Set range of original (physical) values to be mapped on the limited number of bins....
Definition: Histogram.cpp:76
void setMedianPosition(double pos)
Set location of the median, if not in the middle (50%).
Definition: Histogram.h:177
int getUpperBoundIn() const
Returns the upperLimit (exclusive)
Definition: Histogram.h:160
int getInMin() const
Set range of original (physical) values to be mapped on the limited number of bins....
Definition: Histogram.h:156
T getSum() const
Sum of the samples.
Definition: Histogram.h:290
T getMean() const
Unscaled mean.
Definition: Histogram.h:305
void setSampleCount(long int n)
Does not change the size of the histogram.
Definition: Histogram.h:113
int getUpperBoundOut() const
Returns the upperLimit (exclusive)
Definition: Histogram.h:168
void compute(const T &src, const std::type_info &type=typeid(double), const UniTuple< double, 2 > &scaling={1.0, 0.0})
Collect distribution.
Definition: Histogram.h:475
T getVariance() const
Computes variance of the values inside the window.
Definition: Histogram.h:358
T getMax() const
Statistics.
Definition: Histogram.h:265
void setRange(const Range< double > &range)
Set range of original (physical) values to be mapped on the limited number of bins....
Definition: Histogram.h:129
void setSize(size_t s)
Sets the number of bins; the resolution of the histogram.
Definition: Histogram.cpp:62
LogSourc e is the means for a function or any program segment to "connect" to a Log.
Definition: Log.h:308
Logger & error(const TT &... args)
Echoes.
Definition: Log.h:412
Logger & attention(const TT &... args)
Possible error, but execution can continue. Special type of Logger::warn().
Definition: Log.h:472
Logger & note(const TT &... args)
For top-level information.
Definition: Log.h:485
Utilities related to std::type_info.
Definition: Type.h:51
Linear scaling and physical range for image intensities.
Definition: ValueScaling.h:64
double fwd(double x) const
Forward scaling: given encoded value x, returns corresponding value (possibly physically meaningful).
Definition: ValueScaling.h:295
Definition: DataSelector.cpp:1277