Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
detlinefit.h
Go to the documentation of this file.
1 
2 // File: detlinefit.h
3 // Description: Deterministic least upper-quartile squares line fitting.
4 // Author: Ray Smith
5 // Created: Thu Feb 28 14:35:01 PDT 2008
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifndef TESSERACT_CCSTRUCT_DETLINEFIT_H_
21 #define TESSERACT_CCSTRUCT_DETLINEFIT_H_
22 
23 #include "points.h"
24 
25 namespace tesseract {
26 
27 // This class fits a line to a set of ICOORD points.
28 // There is no restriction on the direction of the line, as it
29 // uses a vector method, ie no concern over infinite gradients.
30 // The fitted line has the least upper quartile of squares of perpendicular
31 // distances of all source points from the line, subject to the constraint
32 // that the line is made from one of the pairs of [{p1,p2,p3},{pn-2, pn-1, pn}]
33 // i.e. the 9 combinations of one of the first 3 and last 3 points.
34 // A fundamental assumption of this algorithm is that one of the first 3 and
35 // one of the last 3 points are near the best line fit.
36 // The points must be Added in line order for the algorithm to work properly.
37 // No floating point calculations are needed* to make an accurate fit,
38 // and no random numbers are needed** so the algorithm is deterministic,
39 // architecture-stable, and compiler-stable as well as stable to minor
40 // changes in the input.
41 // *A single floating point division is used to compute each line's distance.
42 // This is unlikely to result in choice of a different line, but if it does,
43 // it would be easy to replace with a 64 bit integer calculation.
44 // **Random numbers are used in the nth_item function, but the worst
45 // non-determinism that can result is picking a different result among equals,
46 // and that wouldn't make any difference to the end-result distance, so the
47 // randomness does not affect the determinism of the algorithm. The random
48 // numbers are only there to guarantee average linear time.
49 // Fitting time is linear, but with a high constant, as it tries 9 different
50 // lines and computes the distance of all points each time.
51 // This class is aimed at replacing the LLSQ (linear least squares) and
52 // LMS (least median of squares) classes that are currently used for most
53 // of the line fitting in Tesseract.
54 class DetLineFit {
55  public:
56  DetLineFit();
57  ~DetLineFit();
58 
59  // Delete all Added points.
60  void Clear();
61 
62  // Add a new point. Takes a copy - the pt doesn't need to stay in scope.
63  // Add must be called on points in sequence along the line.
64  void Add(const ICOORD& pt);
65 
66  // Fit a line to the points, returning the fitted line as a pair of
67  // points, and the upper quartile error.
68  double Fit(ICOORD* pt1, ICOORD* pt2);
69 
70  // Backwards compatible fit returning a gradient and constant.
71  // Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
72  // function in preference to the LMS class.
73  double Fit(float* m, float* c);
74 
75  // Backwards compatible constrained fit with a supplied gradient.
76  double ConstrainedFit(double m, float* c);
77 
78  private:
79  double ComputeErrors(const ICOORD start, const ICOORD end, int* distances);
80 
81  ICOORDELT_LIST pt_list_; // All the added points.
82 };
83 
84 } // namespace tesseract.
85 
86 #endif // TESSERACT_CCSTRUCT_DETLINEFIT_H_
87 
88