Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
underlin.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: underlin.cpp (Formerly undrline.c)
3  * Description: Code to chop blobs apart from underlines.
4  * Author: Ray Smith
5  * Created: Mon Aug 8 11:14:00 BST 1994
6  *
7  * (C) Copyright 1994, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "mfcpch.h"
21 #ifdef __UNIX__
22 #include <assert.h>
23 #endif
24 #include "underlin.h"
25 
26 #define PROJECTION_MARGIN 10 //arbitrary
27 #define EXTERN
28 
29 EXTERN double_VAR (textord_underline_offset, 0.1, "Fraction of x to ignore");
31 "Chop underlines & put back");
32 
33 /**********************************************************************
34  * restore_underlined_blobs
35  *
36  * Find underlined blobs and put them back in the row.
37  **********************************************************************/
38 
39 void restore_underlined_blobs( //get chop points
40  TO_BLOCK *block //block to do
41  ) {
42  inT16 chop_coord; //chop boundary
43  TBOX blob_box; //of underline
44  BLOBNBOX *u_line; //underline bit
45  TO_ROW *row; //best row for blob
46  ICOORDELT_LIST chop_cells; //blobs to cut out
47  //real underlines
48  BLOBNBOX_LIST residual_underlines;
49  C_OUTLINE_LIST left_coutlines;
50  C_OUTLINE_LIST right_coutlines;
51  ICOORDELT_IT cell_it = &chop_cells;
52  //under lines
53  BLOBNBOX_IT under_it = &block->underlines;
54  BLOBNBOX_IT ru_it = &residual_underlines;
55 
56  if (block->get_rows()->empty())
57  return; // Don't crash if there are no rows.
58  for (under_it.mark_cycle_pt (); !under_it.cycled_list ();
59  under_it.forward ()) {
60  u_line = under_it.extract ();
61  blob_box = u_line->bounding_box ();
62  row = most_overlapping_row (block->get_rows (), u_line);
63  find_underlined_blobs (u_line, &row->baseline, row->xheight,
65  &chop_cells);
66  cell_it.set_to_list (&chop_cells);
67  for (cell_it.mark_cycle_pt (); !cell_it.cycled_list ();
68  cell_it.forward ()) {
69  chop_coord = cell_it.data ()->x ();
70  if (cell_it.data ()->y () - chop_coord > textord_fp_chop_error + 1) {
71  split_to_blob (u_line, chop_coord,
73  &left_coutlines,
74  &right_coutlines);
75  if (!left_coutlines.empty()) {
76  ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
77  }
78  chop_coord = cell_it.data ()->y ();
79  split_to_blob(NULL, chop_coord, textord_fp_chop_error + 0.5,
80  &left_coutlines, &right_coutlines);
81  if (!left_coutlines.empty()) {
82  row->insert_blob(new BLOBNBOX(new C_BLOB(&left_coutlines)));
83  } else {
84  fprintf(stderr,
85  "Error:no outlines after chopping from %d to %d from (%d,%d)->(%d,%d)\n",
86  cell_it.data ()->x (), cell_it.data ()->y (),
87  blob_box.left (), blob_box.bottom (),
88  blob_box.right (), blob_box.top ());
90  }
91  u_line = NULL; //no more blobs to add
92  }
93  delete cell_it.extract();
94  }
95  if (!right_coutlines.empty ()) {
96  split_to_blob(NULL, blob_box.right(), textord_fp_chop_error + 0.5,
97  &left_coutlines, &right_coutlines);
98  if (!left_coutlines.empty())
99  ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
100  }
101  if (u_line != NULL) {
102  if (u_line->cblob() != NULL)
103  delete u_line->cblob();
104  delete u_line;
105  }
106  }
107  if (!ru_it.empty()) {
108  ru_it.move_to_first();
109  for (ru_it.mark_cycle_pt(); !ru_it.cycled_list(); ru_it.forward()) {
110  under_it.add_after_then_move(ru_it.extract());
111  }
112  }
113 }
114 
115 
116 /**********************************************************************
117  * most_overlapping_row
118  *
119  * Return the row which most overlaps the blob.
120  **********************************************************************/
121 
122 TO_ROW *most_overlapping_row( //find best row
123  TO_ROW_LIST *rows, //list of rows
124  BLOBNBOX *blob //blob to place
125  ) {
126  inT16 x = (blob->bounding_box ().left ()
127  + blob->bounding_box ().right ()) / 2;
128  TO_ROW_IT row_it = rows; //row iterator
129  TO_ROW *row; //current row
130  TO_ROW *best_row; //output row
131  float overlap; //of blob & row
132  float bestover; //best overlap
133 
134  best_row = NULL;
135  bestover = (float) -MAX_INT32;
136  if (row_it.empty ())
137  return NULL;
138  row = row_it.data ();
139  row_it.mark_cycle_pt ();
140  while (row->baseline.y (x) + row->descdrop > blob->bounding_box ().top ()
141  && !row_it.cycled_list ()) {
142  best_row = row;
143  bestover =
144  blob->bounding_box ().top () - row->baseline.y (x) + row->descdrop;
145  row_it.forward ();
146  row = row_it.data ();
147  }
148  while (row->baseline.y (x) + row->xheight + row->ascrise
149  >= blob->bounding_box ().bottom () && !row_it.cycled_list ()) {
150  overlap = row->baseline.y (x) + row->xheight + row->ascrise;
151  if (blob->bounding_box ().top () < overlap)
152  overlap = blob->bounding_box ().top ();
153  if (blob->bounding_box ().bottom () >
154  row->baseline.y (x) + row->descdrop)
155  overlap -= blob->bounding_box ().bottom ();
156  else
157  overlap -= row->baseline.y (x) + row->descdrop;
158  if (overlap > bestover) {
159  bestover = overlap;
160  best_row = row;
161  }
162  row_it.forward ();
163  row = row_it.data ();
164  }
165  if (bestover < 0
166  && row->baseline.y (x) + row->xheight + row->ascrise
167  - blob->bounding_box ().bottom () > bestover)
168  best_row = row;
169  return best_row;
170 }
171 
172 
173 /**********************************************************************
174  * find_underlined_blobs
175  *
176  * Find the start and end coords of blobs in the underline.
177  **********************************************************************/
178 
179 void find_underlined_blobs( //get chop points
180  BLOBNBOX *u_line, //underlined unit
181  QSPLINE *baseline, //actual baseline
182  float xheight, //height of line
183  float baseline_offset, //amount to shrinke it
184  ICOORDELT_LIST *chop_cells //places to chop
185  ) {
186  inT16 x, y; //sides of blob
187  ICOORD blob_chop; //sides of blob
188  TBOX blob_box = u_line->bounding_box ();
189  //cell iterator
190  ICOORDELT_IT cell_it = chop_cells;
191  STATS upper_proj (blob_box.left (), blob_box.right () + 1);
192  STATS middle_proj (blob_box.left (), blob_box.right () + 1);
193  STATS lower_proj (blob_box.left (), blob_box.right () + 1);
194  C_OUTLINE_IT out_it; //outlines of blob
195 
196  ASSERT_HOST (u_line->cblob () != NULL);
197 
198  out_it.set_to_list (u_line->cblob ()->out_list ());
199  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
200  vertical_cunderline_projection (out_it.data (),
201  baseline, xheight, baseline_offset,
202  &lower_proj, &middle_proj, &upper_proj);
203  }
204 
205  for (x = blob_box.left (); x < blob_box.right (); x++) {
206  if (middle_proj.pile_count (x) > 0) {
207  for (y = x + 1;
208  y < blob_box.right () && middle_proj.pile_count (y) > 0; y++);
209  blob_chop = ICOORD (x, y);
210  cell_it.add_after_then_move (new ICOORDELT (blob_chop));
211  x = y;
212  }
213  }
214 }
215 
216 
217 /**********************************************************************
218  * vertical_cunderline_projection
219  *
220  * Compute the vertical projection of a outline from its outlines
221  * and add to the given STATS.
222  **********************************************************************/
223 
224 void vertical_cunderline_projection( //project outlines
225  C_OUTLINE *outline, //outline to project
226  QSPLINE *baseline, //actual baseline
227  float xheight, //height of line
228  float baseline_offset, //amount to shrinke it
229  STATS *lower_proj, //below baseline
230  STATS *middle_proj, //centre region
231  STATS *upper_proj //top region
232  ) {
233  ICOORD pos; //current point
234  ICOORD step; //edge step
235  inT16 lower_y, upper_y; //region limits
236  inT32 length; //of outline
237  inT16 stepindex; //current step
238  C_OUTLINE_IT out_it = outline->child ();
239 
240  pos = outline->start_pos ();
241  length = outline->pathlength ();
242  for (stepindex = 0; stepindex < length; stepindex++) {
243  step = outline->step (stepindex);
244  if (step.x () > 0) {
245  lower_y =
246  (inT16) floor (baseline->y (pos.x ()) + baseline_offset + 0.5);
247  upper_y =
248  (inT16) floor (baseline->y (pos.x ()) + baseline_offset +
249  xheight + 0.5);
250  if (pos.y () >= lower_y) {
251  lower_proj->add (pos.x (), -lower_y);
252  if (pos.y () >= upper_y) {
253  middle_proj->add (pos.x (), lower_y - upper_y);
254  upper_proj->add (pos.x (), upper_y - pos.y ());
255  }
256  else
257  middle_proj->add (pos.x (), lower_y - pos.y ());
258  }
259  else
260  lower_proj->add (pos.x (), -pos.y ());
261  }
262  else if (step.x () < 0) {
263  lower_y =
264  (inT16) floor (baseline->y (pos.x () - 1) + baseline_offset +
265  0.5);
266  upper_y =
267  (inT16) floor (baseline->y (pos.x () - 1) + baseline_offset +
268  xheight + 0.5);
269  if (pos.y () >= lower_y) {
270  lower_proj->add (pos.x () - 1, lower_y);
271  if (pos.y () >= upper_y) {
272  middle_proj->add (pos.x () - 1, upper_y - lower_y);
273  upper_proj->add (pos.x () - 1, pos.y () - upper_y);
274  }
275  else
276  middle_proj->add (pos.x () - 1, pos.y () - lower_y);
277  }
278  else
279  lower_proj->add (pos.x () - 1, pos.y ());
280  }
281  pos += step;
282  }
283 
284  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
285  vertical_cunderline_projection (out_it.data (),
286  baseline, xheight, baseline_offset,
287  lower_proj, middle_proj, upper_proj);
288  }
289 }