Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
makerow.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: makerow.h (Formerly makerows.h)
3  * Description: Code to arrange blobs into rows of text.
4  * Author: Ray Smith
5  * Created: Mon Sep 21 14:34:48 BST 1992
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifndef MAKEROW_H
21 #define MAKEROW_H
22 
23 #include "params.h"
24 #include "ocrblock.h"
25 #include "blobs.h"
26 #include "blobbox.h"
27 #include "statistc.h"
28 #include "notdll.h"
29 
31 {
32  ASSIGN, //assign it to row
33  REJECT, //reject it - dual overlap
35 };
36 
42 };
43 
45 "Display row accumulation");
47 "Display page correlated rows");
49 "Display rows after expanding");
51 "Display rows after final fitting");
53 "Display blob bounds after pre-ass");
54 extern BOOL_VAR_H (textord_test_landscape, FALSE, "Tests refer to land/port");
56 "Force parallel baselines");
58 "Force straight baselines");
59 extern BOOL_VAR_H (textord_quadratic_baselines, FALSE,
60 "Use quadratic splines");
61 extern BOOL_VAR_H (textord_old_baselines, TRUE, "Use old baseline algorithm");
62 extern BOOL_VAR_H (textord_old_xheight, TRUE, "Use old xheight algorithm");
63 extern BOOL_VAR_H (textord_fix_xheight_bug, TRUE, "Use spline baseline");
65 "Prevent multiple baselines");
66 extern BOOL_VAR_H (textord_cblob_blockocc, TRUE,
67 "Use new projection for underlines");
68 extern BOOL_VAR_H (textord_debug_xheights, FALSE, "Test xheight algorithms");
69 extern INT_VAR_H (textord_test_x, 0, "coord of test pt");
70 extern INT_VAR_H (textord_test_y, 0, "coord of test pt");
72 "Min blobs before gradient counted");
74 "Min blobs in each spline segment");
76 "Size of window for spline segmentation");
77 extern INT_VAR_H (textord_min_xheight, 10, "Min credible pixel xheight");
79 "Fraction of line spacing for quad");
81 "Fraction of line spacing for outlier");
82 extern double_VAR_H (textord_skew_ile, 0.5, "Ile of gradients for page skew");
83 extern double_VAR_H (textord_skew_lag, 0.75,
84 "Lag for skew on row accumulation");
86 "Max iqr/median for linespace");
88 "Max width of blobs to make rows");
89 extern double_VAR_H (textord_chop_width, 1.5, "Max width before chopping");
90 extern double_VAR_H (textord_minxh, 0.25,
91 "fraction of linesize for min xheight");
93 "* blob height for initial linesize");
95 "New row made if blob makes row this big");
97 "Fraction of neighbourhood");
99 "Multiple of line_size for underline");
101 "Min blob height/top to include blob top into xheight stats");
103 "Min pile height to make xheight");
105 "Min pile height to make ascheight");
106 extern double_VAR_H (textord_ascx_ratio_min, 1.2, "Min cap/xheight");
107 extern double_VAR_H (textord_ascx_ratio_max, 1.7, "Max cap/xheight");
108 extern double_VAR_H (textord_descx_ratio_min, 0.15, "Min desc/xheight");
109 extern double_VAR_H (textord_descx_ratio_max, 0.6, "Max desc/xheight");
110 extern double_VAR_H (textord_xheight_error_margin, 0.1, "Accepted variation");
111 extern INT_VAR_H (textord_lms_line_trials, 12, "Number of linew fits to do");
113 "Use test xheight mechanism");
114 
115 inline void get_min_max_xheight(int block_linesize,
116  int *min_height, int *max_height) {
117  *min_height = static_cast<inT32>(floor(block_linesize * textord_minxh));
118  if (*min_height < textord_min_xheight) *min_height = textord_min_xheight;
119  *max_height = static_cast<inT32>(ceil(block_linesize * 3.0));
120 }
121 
123  if (row->xheight <= 0) return ROW_INVALID;
124  return (row->ascrise > 0) ? ROW_ASCENDERS_FOUND :
125  (row->descdrop != 0) ? ROW_DESCENDERS_FOUND : ROW_UNKNOWN;
126 }
127 
128 inline bool within_error_margin(float test, float num, float margin) {
129  return (test >= num * (1 - margin) && test <= num * (1 + margin));
130 }
131 
132 void fill_heights(TO_ROW *row, float gradient, int min_height,
133  int max_height, STATS *heights, STATS *floating_heights);
134 
135 float make_single_row(ICOORD page_tr, TO_BLOCK* block,
136  TO_BLOCK_LIST* blocks);
137 float make_rows(ICOORD page_tr, // top right
138  TO_BLOCK_LIST *port_blocks);
139 void make_initial_textrows(ICOORD page_tr,
140  TO_BLOCK *block, // block to do
141  FCOORD rotation, // for drawing
142  BOOL8 testing_on); // correct orientation
143 void fit_lms_line(TO_ROW *row);
144 void compute_page_skew(TO_BLOCK_LIST *blocks, // list of blocks
145  float &page_m, // average gradient
146  float &page_err); // average error
147 void cleanup_rows_making(ICOORD page_tr, // top right
148  TO_BLOCK *block, // block to do
149  float gradient, // gradient to fit
150  FCOORD rotation, // for drawing
151  inT32 block_edge, // edge of block
152  BOOL8 testing_on); // correct orientation
153 void delete_non_dropout_rows( //find lines
154  TO_BLOCK *block, //block to do
155  float gradient, //global skew
156  FCOORD rotation, //deskew vector
157  inT32 block_edge, //left edge
158  BOOL8 testing_on //correct orientation
159  );
160 BOOL8 find_best_dropout_row( //find neighbours
161  TO_ROW *row, //row to test
162  inT32 distance, //dropout dist
163  float dist_limit, //threshold distance
164  inT32 line_index, //index of row
165  TO_ROW_IT *row_it, //current position
166  BOOL8 testing_on //correct orientation
167  );
168 TBOX deskew_block_coords( //block box
169  TO_BLOCK *block, //block to do
170  float gradient //global skew
171  );
172 void compute_line_occupation( //project blobs
173  TO_BLOCK *block, //block to do
174  float gradient, //global skew
175  inT32 min_y, //min coord in block
176  inT32 max_y, //in block
177  inT32 *occupation, //output projection
178  inT32 *deltas //derivative
179  );
180 void compute_occupation_threshold( //project blobs
181  inT32 low_window, //below result point
182  inT32 high_window, //above result point
183  inT32 line_count, //array sizes
184  inT32 *occupation, //input projection
185  inT32 *thresholds //output thresholds
186  );
187 void compute_dropout_distances( //project blobs
188  inT32 *occupation, //input projection
189  inT32 *thresholds, //output thresholds
190  inT32 line_count //array sizes
191  );
192 void expand_rows( //find lines
193  ICOORD page_tr, //top right
194  TO_BLOCK *block, //block to do
195  float gradient, //gradient to fit
196  FCOORD rotation, //for drawing
197  inT32 block_edge, //edge of block
198  BOOL8 testing_on //correct orientation
199  );
200 void adjust_row_limits( //tidy limits
201  TO_BLOCK *block //block to do
202  );
203 void compute_row_stats( //find lines
204  TO_BLOCK *block, //block to do
205  BOOL8 testing_on //correct orientation
206  );
207 float median_block_xheight( //find lines
208  TO_BLOCK *block, //block to do
209  float gradient //global skew
210  );
211 
213  STATS *heights, STATS *floating_heights, bool cap_only, int min_height,
214  int max_height, float *xheight, float *ascrise);
215 
216 inT32 compute_row_descdrop(TO_ROW *row, // row to do
217  float gradient, // global skew
218  int xheight_blob_count,
219  STATS *heights);
220 inT32 compute_height_modes(STATS *heights, // stats to search
221  inT32 min_height, // bottom of range
222  inT32 max_height, // top of range
223  inT32 *modes, // output array
224  inT32 maxmodes); // size of modes
225 void correct_row_xheight(TO_ROW *row, // row to fix
226  float xheight, // average values
227  float ascrise,
228  float descdrop);
229 void separate_underlines(TO_BLOCK *block, // block to do
230  float gradient, // skew angle
231  FCOORD rotation, // inverse landscape
232  BOOL8 testing_on); // correct orientation
233 void pre_associate_blobs( ICOORD page_tr, // top right
234  TO_BLOCK *block, // block to do
235  FCOORD rotation, // inverse landscape
236  BOOL8 testing_on); // correct orientation
237 void fit_parallel_rows(TO_BLOCK *block, // block to do
238  float gradient, // gradient to fit
239  FCOORD rotation, // for drawing
240  inT32 block_edge, // edge of block
241  BOOL8 testing_on); // correct orientation
242 void fit_parallel_lms(float gradient, // forced gradient
243  TO_ROW *row); // row to fit
244 void make_baseline_spline(TO_ROW *row, // row to fit
245  TO_BLOCK *block); // block it came from
246 BOOL8 segment_baseline ( //split baseline
247 TO_ROW * row, //row to fit
248 TO_BLOCK * block, //block it came from
249 inT32 & segments, //no fo segments
250 inT32 xstarts[] //coords of segments
251 );
252 double *linear_spline_baseline ( //split baseline
253 TO_ROW * row, //row to fit
254 TO_BLOCK * block, //block it came from
255 inT32 & segments, //no fo segments
256 inT32 xstarts[] //coords of segments
257 );
258 void assign_blobs_to_rows( //find lines
259  TO_BLOCK *block, //block to do
260  float *gradient, //block skew
261  int pass, //identification
262  BOOL8 reject_misses, //chuck big ones out
263  BOOL8 make_new_rows, //add rows for unmatched
264  BOOL8 drawing_skew //draw smoothed skew
265  );
266  //find best row
267 OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it, //iterator
268  TO_ROW *&best_row, //output row
269  float top, //top of blob
270  float bottom, //bottom of blob
271  float rowsize, //max row size
272  BOOL8 testing_blob //test stuff
273  );
274 int blob_x_order( //sort function
275  const void *item1, //items to compare
276  const void *item2);
277 int row_y_order( //sort function
278  const void *item1, //items to compare
279  const void *item2);
280 int row_spacing_order( //sort function
281  const void *item1, //items to compare
282  const void *item2);
283 
284 void mark_repeated_chars(TO_ROW *row);
285 #endif