35 #define PERFECT_WERDS 999
36 #define MAXSPACING 128
52 BLOCK_RES_IT block_res_it;
53 ROW_RES_IT row_res_it;
54 WERD_RES_IT word_res_it_from;
55 WERD_RES_IT word_res_it_to;
57 WERD_RES_LIST fuzzy_space_words;
59 BOOL8 prevent_null_wd_fixsp;
64 for (block_res_it.mark_cycle_pt(); !block_res_it.cycled_list();
65 block_res_it.forward()) {
66 row_res_it.set_to_list(&block_res_it.data()->row_res_list);
67 for (row_res_it.mark_cycle_pt(); !row_res_it.cycled_list();
68 row_res_it.forward()) {
69 word_res_it_from.set_to_list(&row_res_it.data()->word_res_list);
70 while (!word_res_it_from.at_last()) {
71 word_res = word_res_it_from.data();
72 while (!word_res_it_from.at_last() &&
74 word_res_it_from.data_relative(1)->word->flag(
W_FUZZY_NON) ||
75 word_res_it_from.data_relative(1)->word->flag(
W_FUZZY_SP))) {
77 block_res_it.data()->block);
78 word_res = word_res_it_from.forward();
80 if (monitor !=
NULL) {
82 monitor->
progress = 90 + 5 * word_index / word_count;
90 if (!word_res_it_from.at_last()) {
91 word_res_it_to = word_res_it_from;
92 prevent_null_wd_fixsp =
96 word_res_it_to.forward();
98 if (monitor !=
NULL) {
100 monitor->
progress = 90 + 5 * word_index / word_count;
106 while (!word_res_it_to.at_last () &&
107 (word_res_it_to.data_relative(1)->word->flag(
W_FUZZY_NON) ||
108 word_res_it_to.data_relative(1)->word->flag(
W_FUZZY_SP))) {
112 prevent_null_wd_fixsp =
TRUE;
113 word_res = word_res_it_to.forward();
118 prevent_null_wd_fixsp =
TRUE;
119 if (prevent_null_wd_fixsp) {
120 word_res_it_from = word_res_it_to;
122 fuzzy_space_words.assign_to_sublist(&word_res_it_from,
125 row_res_it.data()->row,
126 block_res_it.data()->block);
127 new_length = fuzzy_space_words.length();
128 word_res_it_from.add_list_before(&fuzzy_space_words);
130 !word_res_it_from.at_last() && new_length > 0;
132 word_res_it_from.forward();
139 block_res_it.data()->block);
150 WERD_RES_LIST current_perm;
155 dump_words(best_perm, best_score, 1, improved);
160 while ((best_score !=
PERFECT_WERDS) && !current_perm.empty()) {
163 dump_words(current_perm, current_score, 2, improved);
164 if (current_score > best_score) {
167 best_score = current_score;
173 dump_words(best_perm, best_score, 3, improved);
179 WERD_RES_IT src_it(&src_list);
180 WERD_RES_IT new_it(&new_list);
184 for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
185 src_wd = src_it.data();
190 new_it.add_after_then_move(new_wd);
199 WERD_RES_IT word_it(&words);
204 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
205 word = word_it.data();
241 WERD_RES_IT word_res_it(&word_res_list);
242 inT16 total_score = 0;
243 inT16 word_count = 0;
244 inT16 done_word_count = 0;
249 inT16 prev_word_score = 0;
254 BOOL8 current_word_ok_so_far;
255 STRING punct_chars =
"!\"`',.:;";
261 word = word_res_it.data();
265 total_score += prev_word_score;
270 prev_char_digit =
FALSE;
271 prev_word_done =
FALSE;
279 current_word_ok_so_far =
FALSE;
281 (prev_char_digit && (
287 total_score += prev_word_score;
290 current_word_ok_so_far = word_done;
293 if (current_word_ok_so_far) {
294 prev_word_done =
TRUE;
295 prev_word_score = word_len;
297 prev_word_done =
FALSE;
303 for (i = 0, prev_char_1 =
FALSE; i < word_len; i++) {
305 if (prev_char_1 || (current_char_1 && (i > 0)))
307 prev_char_1 = current_char_1;
313 for (i = 0, offset = 0, prev_char_punct =
FALSE; i < word_len;
317 if (prev_char_punct || (current_char_punct && i > 0))
319 prev_char_punct = current_char_punct;
323 for (i = 0, offset = 0; i < word_len - 1;
332 word_res_it.forward();
333 }
while (word_res_it.data()->part_of_combo);
334 }
while (!word_res_it.at_first());
335 total_score += prev_word_score;
338 if (done_word_count == word_count)
348 for (i = 0, offset = 0; i < char_position;
374 WERD_RES_IT word_it(&words);
375 WERD_RES_IT prev_word_it(&words);
385 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
386 word = word_it.data();
390 gap = box.
left() - prev_right;
394 prev_right = box.
right();
399 word_it.set_to_list(&words);
401 for (; (prev_right == -
MAX_INT16) || !word_it.at_first();
403 word = word_it.data();
407 gap = box.
left() - prev_right;
408 if (gap <= min_gap) {
409 prev_word = prev_word_it.data();
415 copy_word =
new WERD;
416 *copy_word = *(prev_word->
word);
422 prev_word_it.add_before_then_move(combo);
429 delete word_it.extract();
438 prev_word_it = word_it;
441 prev_right = box.
right();
452 WERD_RES_IT word_res_it(&perm);
457 for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
458 word_res_it.forward()) {
459 if (!word_res_it.data()->part_of_combo) {
461 word_res_it.data()->best_choice->unichar_string();
471 tprintf(
"EXTRACTED (%d): \"", score);
474 tprintf(
"TESTED (%d): \"", score);
477 tprintf(
"RETURNED (%d): \"", score);
481 for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
482 word_res_it.forward()) {
483 if (!word_res_it.data()->part_of_combo) {
485 word_res_it.data()->best_choice->unichar_string().string(),
486 (int)word_res_it.data()->best_choice->permuter());
490 }
else if (improved) {
492 for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
493 word_res_it.forward()) {
494 if (!word_res_it.data()->part_of_combo) {
496 word_res_it.data()->best_choice->unichar_string().string(),
497 (int)word_res_it.data()->best_choice->permuter());
520 inT16 max_gap_count = 0;
525 float normalised_max_nonspace;
528 STRING punct_chars =
"\"`',.:;";
532 box = blob->bounding_box();
539 gap = box.
left() - prev_right;
541 gap_stats.
add(gap, 1);
542 }
else if (gap == max_gap) {
545 if (max_gap_count > 0)
546 gap_stats.
add(max_gap, max_gap_count);
551 prev_right = box.
right();
555 max_non_space = (row->
space() + 3 * row->
kern()) / 4;
560 max_gap <= normalised_max_nonspace ||
562 (gap_stats.
get_total() <= 2 && max_gap <= 2 * gap_stats.
mean()));
567 "ACCEPT SPACING FOR: \"%s\" norm_maxnon = %f max=%d maxcount=%d "
568 "total=%d mean=%f median=%f\n",
570 max_gap, max_gap_count, gap_stats.
get_total(), gap_stats.
mean(),
574 "REJECT SPACING FOR: \"%s\" norm_maxnon = %f max=%d maxcount=%d "
575 "total=%d mean=%f median=%f\n",
577 max_gap, max_gap_count, gap_stats.
get_total(), gap_stats.
mean(),
621 WERD_RES_LIST sub_word_list;
622 WERD_RES_IT sub_word_list_it(&sub_word_list);
627 word_res = word_res_it.data();
639 tprintf(
"FP fixspace working on \"%s\"\n",
643 sub_word_list_it.add_after_stay_put(word_res_it.extract());
645 new_length = sub_word_list.length();
646 word_res_it.add_list_before(&sub_word_list);
647 for (; !word_res_it.at_last() && new_length > 1; new_length--) {
648 word_res_it.forward();
655 WERD_RES_IT best_perm_it(&best_perm);
656 WERD_RES_LIST current_perm;
657 WERD_RES_IT current_perm_it(¤t_perm);
665 dump_words(best_perm, best_score, 1, improved);
668 old_word_res = best_perm_it.data();
670 *new_word_res = *old_word_res;
672 current_perm_it.add_to_end(new_word_res);
676 while (best_score !=
PERFECT_WERDS && !current_perm.empty()) {
679 dump_words(current_perm, current_score, 2, improved);
680 if (current_score > best_score) {
683 best_score = current_score;
690 dump_words(best_perm, best_score, 3, improved);
700 WERD_RES_IT word_it(&words);
701 WERD_RES_IT worst_word_it;
702 float worst_noise_score = 9999;
703 int worst_blob_index = -1;
708 C_BLOB_IT rej_cblob_it;
709 C_BLOB_LIST new_blob_list;
710 C_BLOB_IT new_blob_it;
711 C_BLOB_IT new_rej_cblob_it;
713 inT16 start_of_noise_blob;
716 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
718 if (blob_index > -1 && worst_noise_score > noise_score) {
719 worst_noise_score = noise_score;
720 worst_blob_index = blob_index;
721 worst_word_it = word_it;
724 if (worst_blob_index < 0) {
731 word_res = worst_word_it.data();
735 new_blob_it.set_to_list(&new_blob_list);
737 for (i = 0; i < worst_blob_index; i++, blob_it.forward()) {
738 new_blob_it.add_after_then_move(blob_it.extract());
740 start_of_noise_blob = blob_it.data()->bounding_box().left();
741 delete blob_it.extract();
743 new_word =
new WERD(&new_blob_list, word_res->
word);
751 (!rej_cblob_it.empty() &&
752 (rej_cblob_it.data()->bounding_box().left() < start_of_noise_blob));
753 rej_cblob_it.forward()) {
754 new_rej_cblob_it.add_after_then_move(rej_cblob_it.extract());
759 worst_word_it.add_before_then_move(new_word_res);
765 float *worst_noise_score) {
766 float noise_score[512];
789 tprintf(
"FP fixspace Noise metrics for \"%s\": ",
793 for (i = 0; i < blob_count && blob !=
NULL; i++, blob = blob->
next) {
795 noise_score[i] = non_noise_limit;
800 tprintf(
"%1.1f ", noise_score[i]);
809 if (noise_score[i] >= non_noise_limit) {
813 if (non_noise_count < fixsp_non_noise_limit)
821 if (noise_score[i] >= non_noise_limit) {
825 if (non_noise_count < fixsp_non_noise_limit)
830 if (min_noise_blob > max_noise_blob)
833 *worst_noise_score = small_limit;
834 worst_noise_blob = -1;
835 for (i = min_noise_blob; i <= max_noise_blob; i++) {
836 if (noise_score[i] < *worst_noise_score) {
837 worst_noise_blob = i;
838 *worst_noise_score = noise_score[i];
846 inT16 outline_count = 0;
848 inT16 largest_outline_dimension = 0;
852 box = ol->bounding_box();
854 max_dimension = box.
height();
856 max_dimension = box.
width();
859 if (largest_outline_dimension < max_dimension)
860 largest_outline_dimension = max_dimension;
863 if (outline_count > 5) {
865 largest_outline_dimension *= 2;
872 largest_outline_dimension /= 2;
875 return largest_outline_dimension;
886 tprintf(
"Blob count: %d (word); %d/%d (rebuild word)\n",
892 if (show_map_detail) {
901 tprintf(
"Done flag: %s\n\n", word->
done ?
"TRUE" :
"FALSE");
915 WERD_RES_IT word_it(&word_res_list);
922 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
923 word = word_it.data();
936 ++i, blob = blob->
next) {