Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
statistc.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: statistc.h (Formerly stats.h)
3  * Description: Class description for STATS class.
4  * Author: Ray Smith
5  * Created: Mon Feb 04 16:19:07 GMT 1991
6  *
7  * (C) Copyright 1991, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifndef TESSERACT_CCSTRUCT_STATISTC_H_
21 #define TESSERACT_CCSTRUCT_STATISTC_H_
22 
23 #include <stdio.h>
24 #include "host.h"
25 #include "scrollview.h"
26 
27 // Simple histogram-based statistics for integer values in a known
28 // range, such that the range is small compared to the number of samples.
29 class STATS {
30  public:
31  // The histogram buckets are in the range
32  // [min_bucket_value, max_bucket_value_plus_1 - 1] i.e.
33  // [min_bucket_value, max_bucket_value].
34  // Any data under min_bucket value is silently mapped to min_bucket_value,
35  // and likewise, any data over max_bucket_value is silently mapped to
36  // max_bucket_value.
37  // In the internal array, min_bucket_value maps to 0 and
38  // max_bucket_value_plus_1 - min_bucket_value to the array size.
39  // TODO(rays) This is ugly. Convert the second argument to
40  // max_bucket_value and all the code that uses it.
41  STATS(inT32 min_bucket_value, inT32 max_bucket_value_plus_1);
42  STATS(); // empty for arrays
43 
44  ~STATS();
45 
46  // (Re)Sets the range and clears the counts.
47  // See the constructor for info on max and min values.
48  bool set_range(inT32 min_bucket_value, inT32 max_bucket_value_plus_1);
49 
50  void clear(); // empty buckets
51 
52  void add(inT32 value, inT32 count);
53 
54  // "Accessors" return various statistics on the data.
55  inT32 mode() const; // get mode of samples
56  double mean() const; // get mean of samples
57  double sd() const; // standard deviation
58  // Returns the fractile value such that frac fraction (in [0,1]) of samples
59  // has a value less than the return value.
60  double ile(double frac) const;
61  // Returns the minimum used entry in the histogram (ie the minimum of the
62  // data, NOT the minimum of the supplied range, nor is it an index.)
63  // Would normally be called min(), but that is a reserved word in VC++.
64  inT32 min_bucket() const; // Find min
65  // Returns the maximum used entry in the histogram (ie the maximum of the
66  // data, NOT the maximum of the supplied range, nor is it an index.)
67  inT32 max_bucket() const; // Find max
68  // Finds a more useful estimate of median than ile(0.5).
69  // Overcomes a problem with ile() - if the samples are, for example,
70  // 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway
71  // between 6 and 13 = 9.5
72  double median() const; // get median of samples
73  // Returns the count of the given value.
74  inT32 pile_count(inT32 value ) const {
75  if (value <= rangemin_)
76  return buckets_[0];
77  if (value >= rangemax_ - 1)
78  return buckets_[rangemax_ - rangemin_ - 1];
79  return buckets_[value - rangemin_];
80  }
81  // Returns the total count of all buckets.
82  inT32 get_total() const {
83  return total_count_; // total of all piles
84  }
85  // Returns true if x is a local min.
86  bool local_min(inT32 x) const;
87 
88  // Apply a triangular smoothing filter to the stats.
89  // This makes the modes a bit more useful.
90  // The factor gives the height of the triangle, i.e. the weight of the
91  // centre.
92  void smooth(inT32 factor);
93 
94  // Cluster the samples into max_cluster clusters.
95  // Each call runs one iteration. The array of clusters must be
96  // max_clusters+1 in size as cluster 0 is used to indicate which samples
97  // have been used.
98  // The return value is the current number of clusters.
99  inT32 cluster(float lower, // thresholds
100  float upper,
101  float multiple, // distance threshold
102  inT32 max_clusters, // max no to make
103  STATS *clusters); // array of clusters
104 
105 
106  // Prints a summary and table of the histogram.
107  void print() const;
108  // Prints summary stats only of the histogram.
109  void print_summary() const;
110 
111  #ifndef GRAPHICS_DISABLED
112  // Draws the histogram as a series of rectangles.
113  void plot(ScrollView* window, // window to draw in
114  float xorigin, // origin of histo
115  float yorigin, // gram
116  float xscale, // size of one unit
117  float yscale, // size of one uint
118  ScrollView::Color colour) const; // colour to draw in
119 
120  // Draws a line graph of the histogram.
121  void plotline(ScrollView* window, // window to draw in
122  float xorigin, // origin of histo
123  float yorigin, // gram
124  float xscale, // size of one unit
125  float yscale, // size of one uint
126  ScrollView::Color colour) const; // colour to draw in
127  #endif // GRAPHICS_DISABLED
128 
129  private:
130  inT32 rangemin_; // min of range
131  // rangemax_ is not well named as it is really one past the max.
132  inT32 rangemax_; // max of range
133  inT32 total_count_; // no of samples
134  inT32* buckets_; // array of cells
135 };
136 
137 // Returns the nth ordered item from the array, as if they were
138 // ordered, but without ordering them, in linear time.
139 // The array does get shuffled!
140 inT32 choose_nth_item(inT32 index, // index to choose
141  float *array, // array of items
142  inT32 count); // no of items
143 // Generic version uses a defined comparator (with qsort semantics).
144 inT32 choose_nth_item(inT32 index, // index to choose
145  void *array, // array of items
146  inT32 count, // no of items
147  size_t size, // element size
148  int (*compar)(const void*, const void*)); // comparator
149 // Swaps 2 entries in an array in-place.
150 void swap_entries(void *array, // array of entries
151  size_t size, // size of entry
152  inT32 index1, // entries to swap
153  inT32 index2);
154 
155 #endif // TESSERACT_CCSTRUCT_STATISTC_H_