22 #include "config_auto.h"
35 : ccstruct_(ccstruct), use_cjk_fp_model_(false),
38 "Script has no xheight, so use a single mode",
41 BOOL_MEMBER(tosp_old_to_method, false,
"Space stats use prechopping?",
44 "Constrain relative values of inter and intra-word gaps for "
48 "Block stats to use fixed pitch rows?",
51 "Force word breaks on punct to break long lines in non-space "
55 "Space stats use prechopping?",
57 BOOL_MEMBER(tosp_old_to_bug_fix, false,
"Fix suspected bug in old code",
60 "Only stat OBVIOUS spaces",
62 BOOL_MEMBER(tosp_row_use_cert_spaces, true,
"Only stat OBVIOUS spaces",
65 "Only stat OBVIOUS spaces",
67 BOOL_MEMBER(tosp_row_use_cert_spaces1, true,
"Only stat OBVIOUS spaces",
70 "Use row alone when inadequate cert spaces",
72 BOOL_MEMBER(tosp_only_small_gaps_for_kern, false,
"Better guess",
74 BOOL_MEMBER(tosp_all_flips_fuzzy, false,
"Pass ANY flip to context?",
77 "Dont restrict kn->sp fuzzy limit to tables",
80 "Use within xht gap for wd breaks",
82 BOOL_MEMBER(tosp_use_xht_gaps, true,
"Use within xht gap for wd breaks",
85 "Only use within xht gap for wd breaks",
88 "Dont chng kn to space next to punct",
90 BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true,
"Default flip",
92 BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true,
"Default flip",
94 BOOL_MEMBER(tosp_improve_thresh, false,
"Enable improvement heuristic",
98 INT_MEMBER(tosp_enough_space_samples_for_median, 3,
99 "or should we use mean",
100 ccstruct_->params()),
102 "No.samples reqd to reestimate for row",
103 ccstruct_->params()),
105 "No.gaps reqd with 1 large gap to treat as a table",
106 ccstruct_->params()),
108 "No.gaps reqd with few cert spaces to use certs",
109 ccstruct_->params()),
110 INT_MEMBER(tosp_sanity_method, 1,
"How to avoid being silly",
111 ccstruct_->params()),
113 "Factor for defining space threshold in terms of space and "
115 ccstruct_->params()),
117 "how far between kern and space?",
118 ccstruct_->params()),
120 "how far between kern and space?",
121 ccstruct_->params()),
122 double_MEMBER(tosp_narrow_fraction, 0.3,
"Fract of xheight for narrow",
123 ccstruct_->params()),
125 "narrow if w/h less than this",
126 ccstruct_->params()),
127 double_MEMBER(tosp_wide_fraction, 0.52,
"Fract of xheight for wide",
128 ccstruct_->params()),
129 double_MEMBER(tosp_wide_aspect_ratio, 0.0,
"wide if w/h less than this",
130 ccstruct_->params()),
132 "Fract of xheight for fuzz sp",
133 ccstruct_->params()),
135 "Fract of xheight for fuzz sp",
136 ccstruct_->params()),
138 "Fract of xheight for fuzz sp",
139 ccstruct_->params()),
140 double_MEMBER(tosp_gap_factor, 0.83,
"gap ratio to flip sp->kern",
141 ccstruct_->params()),
142 double_MEMBER(tosp_kern_gap_factor1, 2.0,
"gap ratio to flip kern->sp",
143 ccstruct_->params()),
144 double_MEMBER(tosp_kern_gap_factor2, 1.3,
"gap ratio to flip kern->sp",
145 ccstruct_->params()),
146 double_MEMBER(tosp_kern_gap_factor3, 2.5,
"gap ratio to flip kern->sp",
147 ccstruct_->params()),
149 ccstruct_->params()),
150 double_MEMBER(tosp_ignore_very_big_gaps, 3.5,
"xht multiplier",
151 ccstruct_->params()),
152 double_MEMBER(tosp_rep_space, 1.6,
"rep gap multiplier for space",
153 ccstruct_->params()),
155 "Fract of kerns reqd for isolated row stats",
156 ccstruct_->params()),
158 "Min difference of kn & sp in table",
159 ccstruct_->params()),
161 "Expect spaces bigger than this",
162 ccstruct_->params()),
164 "Fuzzy if less than this",
165 ccstruct_->params()),
166 double_MEMBER(tosp_fuzzy_kn_fraction, 0.5,
"New fuzzy kn alg",
167 ccstruct_->params()),
168 double_MEMBER(tosp_fuzzy_sp_fraction, 0.5,
"New fuzzy sp alg",
169 ccstruct_->params()),
171 "Dont trust spaces less than this time kn",
172 ccstruct_->params()),
174 "Thresh guess - mult kn by this",
175 ccstruct_->params()),
177 "Thresh guess - mult xht by this",
178 ccstruct_->params()),
180 "Multiplier on kn to limit thresh",
181 ccstruct_->params()),
183 "Dont autoflip kn to sp when large separation",
184 ccstruct_->params()),
186 "Limit use of xht gap with large kns",
187 ccstruct_->params()),
189 "Limit use of xht gap with odd small kns",
190 ccstruct_->params()),
192 "Dont reduce box if the top left is non blank",
193 ccstruct_->params()),
195 "Dont let sp minus kn get too small",
196 ccstruct_->params()),
198 "How wide fuzzies need context",
199 ccstruct_->params()),
201 BOOL_MEMBER(textord_no_rejects, false,
"Don't remove noise blobs",
202 ccstruct_->params()),
203 BOOL_MEMBER(textord_show_blobs, false,
"Display unsorted blobs",
204 ccstruct_->params()),
205 BOOL_MEMBER(textord_show_boxes, false,
"Display unsorted blobs",
206 ccstruct_->params()),
207 INT_MEMBER(textord_max_noise_size, 7,
"Pixel size of noise",
208 ccstruct_->params()),
209 double_MEMBER(textord_blob_size_bigile, 95,
"Percentile for large blobs",
210 ccstruct_->params()),
212 "Fraction of bounding box for noise",
213 ccstruct_->params()),
215 "Percentile for small blobs",
216 ccstruct_->params()),
218 "Ile of sizes for xheight guess",
219 ccstruct_->params()),
221 "Ile of sizes for xheight guess",
222 ccstruct_->params()),
224 "Fraction of size for maxima",
225 ccstruct_->params()),
227 "Fraction of x for big t count",
228 ccstruct_->params()),
229 INT_MEMBER(textord_noise_translimit, 16,
"Transitions for normal blob",
230 ccstruct_->params()),
232 "Dot to norm ratio for deletion",
233 ccstruct_->params()),
234 BOOL_MEMBER(textord_noise_rejwords, true,
"Reject noise-like words",
235 ccstruct_->params()),
236 BOOL_MEMBER(textord_noise_rejrows, true,
"Reject noise-like rows",
237 ccstruct_->params()),
239 "xh fract height error for norm blobs",
240 ccstruct_->params()),
242 "xh fract width error for norm blobs",
243 ccstruct_->params()),
245 "Height fraction to discard outlines as speckle noise",
246 ccstruct_->params()),
247 INT_MEMBER(textord_noise_sncount, 1,
"super norm blobs to save row",
248 ccstruct_->params()),
250 "Dot to norm ratio for deletion",
251 ccstruct_->params()),
252 BOOL_MEMBER(textord_noise_debug, false,
"Debug row garbage detector",
253 ccstruct_->params()),
254 double_MEMBER(textord_blshift_maxshift, 0.00,
"Max baseline shift",
255 ccstruct_->params()),
257 "Min size of baseline shift",
258 ccstruct_->params()) {
266 int width,
int height, Pix* pix,
267 BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) {
268 page_tr_.
set_x(width);
269 page_tr_.
set_y(height);
270 if (to_blocks->empty()) {
281 const FCOORD anticlockwise90(0.0
f, 1.0
f);
282 const FCOORD clockwise90(0.0
f, -1.0
f);
283 TO_BLOCK_IT it(to_blocks);
284 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
291 to_block->
rotate(anticlockwise90);
299 TO_BLOCK_IT to_block_it(to_blocks);
300 TO_BLOCK* to_block = to_block_it.data();
305 gradient =
make_rows(page_tr_, to_blocks);
311 fit_rows(gradient, page_tr_, to_blocks);
315 make_words(
this, page_tr_, gradient, blocks, to_blocks);
320 TO_BLOCK* to_block = to_block_it.data();
324 cleanup_blocks(blocks);
328 BLOCK_IT b_it(blocks);
329 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
330 b_it.data()->compute_row_margins();
332 #ifndef GRAPHICS_DISABLED
345 float row_total_conf = 0.0f;
346 int row_word_count = 0;
348 float best_conf = 0.0f;
354 row_total_conf /= row_word_count;
355 if (best_row ==
NULL || best_conf < row_total_conf) {
357 best_conf = row_total_conf;
359 row_total_conf = 0.0f;
365 if (it.
row() != best_row)