Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
detlinefit.cpp
Go to the documentation of this file.
1 
2 // File: detlinefit.cpp
3 // Description: Deterministic least median squares line fitting.
4 // Author: Ray Smith
5 // Created: Thu Feb 28 14:45:01 PDT 2008
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #include "detlinefit.h"
21 #include "statistc.h"
22 #include "ndminx.h"
23 
24 namespace tesseract {
25 
26 // The number of points to consider at each end.
27 const int kNumEndPoints = 3;
28 
30 }
31 
33 }
34 
35 // Delete all Added points.
37  pt_list_.clear();
38 }
39 
40 // Add a new point. Takes a copy - the pt doesn't need to stay in scope.
41 void DetLineFit::Add(const ICOORD& pt) {
42  ICOORDELT_IT it = &pt_list_;
43  ICOORDELT* new_pt = new ICOORDELT(pt);
44  it.add_to_end(new_pt);
45 }
46 
47 // Fit a line to the points, returning the fitted line as a pair of
48 // points, and the upper quartile error.
49 double DetLineFit::Fit(ICOORD* pt1, ICOORD* pt2) {
50  ICOORDELT_IT it(&pt_list_);
51  // Do something sensible with no points.
52  if (pt_list_.empty()) {
53  pt1->set_x(0);
54  pt1->set_y(0);
55  *pt2 = *pt1;
56  return 0.0;
57  }
58  // Count the points and find the first and last kNumEndPoints.
59  ICOORD* starts[kNumEndPoints];
60  ICOORD* ends[kNumEndPoints];
61  int pt_count = 0;
62  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
63  if (pt_count < kNumEndPoints) {
64  starts[pt_count] = it.data();
65  ends[pt_count] = starts[pt_count];
66  } else {
67  for (int i = 1; i < kNumEndPoints; ++i)
68  ends[i - 1] = ends[i];
69  ends[kNumEndPoints - 1] = it.data();
70  }
71  ++pt_count;
72  }
73  // 1 or 2 points need special treatment.
74  if (pt_count <= 2) {
75  *pt1 = *starts[0];
76  if (pt_count > 1)
77  *pt2 = *starts[1];
78  else
79  *pt2 = *pt1;
80  return 0.0;
81  }
82  int end_count = MIN(pt_count, kNumEndPoints);
83  int* distances = new int[pt_count];
84  double best_uq = -1.0;
85  // Iterate each pair of points and find the best fitting line.
86  for (int i = 0; i < end_count; ++i) {
87  ICOORD* start = starts[i];
88  for (int j = 0; j < end_count; ++j) {
89  ICOORD* end = ends[j];
90  if (start != end) {
91  // Compute the upper quartile error from the line.
92  double dist = ComputeErrors(*start, *end, distances);
93  if (dist < best_uq || best_uq < 0.0) {
94  best_uq = dist;
95  *pt1 = *start;
96  *pt2 = *end;
97  }
98  }
99  }
100  }
101  delete [] distances;
102  // Finally compute the square root to return the true distance.
103  return best_uq > 0.0 ? sqrt(best_uq) : best_uq;
104 }
105 
106 // Backwards compatible fit returning a gradient and constant.
107 // Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
108 // function in preference to the LMS class.
109 double DetLineFit::Fit(float* m, float* c) {
110  ICOORD start, end;
111  double error = Fit(&start, &end);
112  if (end.x() != start.x()) {
113  *m = static_cast<float>(end.y() - start.y()) / (end.x() - start.x());
114  *c = start.y() - *m * start.x();
115  } else {
116  *m = 0.0f;
117  *c = 0.0f;
118  }
119  return error;
120 }
121 
122 // Helper function to compute a fictitious end point that is on a line
123 // of a given gradient through the given start.
124 ICOORD ComputeEndFromGradient(const ICOORD& start, double m) {
125  if (m > 1.0 || m < -1.0) {
126  // dy dominates. Force it to have the opposite sign of start.y() and
127  // compute dx based on dy being as large as possible
128  int dx = static_cast<int>(floor(MAX_INT16 / m));
129  if (dx < 0) ++dx; // Truncate towards 0.
130  if (start.y() > 0) dx = - dx; // Force dy to be opposite to start.y().
131  // Constrain dx so the result fits in an inT16.
132  while (start.x() + dx > MAX_INT16 || start.x() + dx < -MAX_INT16)
133  dx /= 2;
134  if (-1 <= dx && dx <= 1) {
135  return ICOORD(start.x(), start.y() + 1); // Too steep for anything else.
136  }
137  int y = start.y() + static_cast<int>(floor(dx * m + 0.5));
138  ASSERT_HOST(-MAX_INT16 <= y && y <= MAX_INT16);
139  return ICOORD(start.x() + dx, y);
140  } else {
141  // dx dominates. Force it to have the opposite sign of start.x() and
142  // compute dy based on dx being as large as possible.
143  int dy = static_cast<int>(floor(MAX_INT16 * m));
144  if (dy < 0) ++dy; // Truncate towards 0.
145  if (start.x() > 0) dy = - dy; // Force dx to be opposite to start.x().
146  // Constrain dy so the result fits in an inT16.
147  while (start.y() + dy > MAX_INT16 || start.y() + dy < -MAX_INT16)
148  dy /= 2;
149  if (-1 <= dy && dy <= 1) {
150  return ICOORD(start.x() + 1, start.y()); // Too flat for anything else.
151  }
152  int x = start.x() + static_cast<int>(floor(dy / m + 0.5));
153  ASSERT_HOST(-MAX_INT16 <= x && x <= MAX_INT16);
154  return ICOORD(x, start.y() + dy);
155  }
156 }
157 
158 // Backwards compatible constrained fit with a supplied gradient.
159 double DetLineFit::ConstrainedFit(double m, float* c) {
160  ICOORDELT_IT it(&pt_list_);
161  // Do something sensible with no points.
162  if (pt_list_.empty()) {
163  *c = 0.0f;
164  return 0.0;
165  }
166  // Count the points and find the first and last kNumEndPoints.
167  // Put the ends in a single array to make their use easier later.
168  ICOORD* pts[kNumEndPoints * 2];
169  int pt_count = 0;
170  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
171  if (pt_count < kNumEndPoints) {
172  pts[pt_count] = it.data();
173  pts[kNumEndPoints + pt_count] = pts[pt_count];
174  } else {
175  for (int i = 1; i < kNumEndPoints; ++i)
176  pts[kNumEndPoints + i - 1] = pts[kNumEndPoints + i];
177  pts[kNumEndPoints * 2 - 1] = it.data();
178  }
179  ++pt_count;
180  }
181  while (pt_count < kNumEndPoints) {
182  pts[pt_count] = NULL;
183  pts[kNumEndPoints + pt_count++] = NULL;
184  }
185  int* distances = new int[pt_count];
186  double best_uq = -1.0;
187  // Iterate each pair of points and find the best fitting line.
188  for (int i = 0; i < kNumEndPoints * 2; ++i) {
189  ICOORD* start = pts[i];
190  if (start == NULL) continue;
191  ICOORD end = ComputeEndFromGradient(*start, m);
192  // Compute the upper quartile error from the line.
193  double dist = ComputeErrors(*start, end, distances);
194  if (dist < best_uq || best_uq < 0.0) {
195  best_uq = dist;
196  *c = start->y() - start->x() * m;
197  }
198  }
199  delete [] distances;
200  // Finally compute the square root to return the true distance.
201  return best_uq > 0.0 ? sqrt(best_uq) : best_uq;
202 }
203 
204 // Comparator function used by the nth_item funtion.
205 static int CompareInts(const void *p1, const void *p2) {
206  const int* i1 = reinterpret_cast<const int*>(p1);
207  const int* i2 = reinterpret_cast<const int*>(p2);
208 
209  return *i1 - *i2;
210 }
211 
212 // Compute all the cross product distances of the points from the line
213 // and return the true squared upper quartile distance.
214 double DetLineFit::ComputeErrors(const ICOORD start, const ICOORD end,
215  int* distances) {
216  ICOORDELT_IT it(&pt_list_);
217  ICOORD line_vector = end;
218  line_vector -= start;
219  // Compute the distance of each point from the line.
220  int pt_index = 0;
221  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
222  ICOORD pt_vector = *it.data();
223  pt_vector -= start;
224  // Compute |line_vector||pt_vector|sin(angle between)
225  int dist = line_vector * pt_vector;
226  if (dist < 0)
227  dist = -dist;
228  distances[pt_index++] = dist;
229  }
230  // Now get the upper quartile distance.
231  int index = choose_nth_item(3 * pt_index / 4, distances, pt_index,
232  sizeof(distances[0]), CompareInts);
233  double dist = distances[index];
234  // The true distance is the square root of the dist squared / the
235  // squared length of line_vector (which is the dot product with itself)
236  // Don't bother with the square root. Just return the square distance.
237  return dist * dist / (line_vector % line_vector);
238 }
239 
240 } // namespace tesseract.
241 
242