54 #include "config_auto.h"
75 while (srcpt != start);
84 for (outline = srcline; outline !=
NULL; outline = outline->
next) {
104 if (srcpt->
flags[1] == 2)
108 while (srcpt != start);
111 if (srcpt->
flags[1] == 0) {
123 while (srcpt != real_start);
132 for (outline = srcline; outline !=
NULL; outline = outline->
next) {
147 bool italic_blob,
SEAMS seam_list) {
154 other_blob =
new TBLOB;
157 blob->
next = other_blob;
180 cprintf (
"\n** no seam picked *** \n");
183 apply_seam(blob, other_blob, italic_blob, seam);
186 if ((seam ==
NULL) ||
196 blob->
next = next_blob;
200 #ifndef GRAPHICS_DISABLED
204 cprintf (
"\n** seam being removed ** \n");
220 bool italic_blob,
SEAMS seam_list) {
225 for (x = 0; x < blob_number; x++)
229 italic_blob, seam_list);
235 bool italic_blob,
SEAMS seam_list) {
241 while (blob !=
NULL) {
248 TPOINT original_topleft, original_botright;
252 TBOX original_box =
TBOX(original_topleft.
x, original_botright.
y,
253 original_botright.
x, original_topleft.
y);
255 bool almost_equal_box =
false;
257 for (
int i = 0; i < boxes.
size(); i++) {
261 almost_equal_box =
true;
266 (!almost_equal_box && num_overlap > 1)) {
268 italic_blob, seam_list);
273 *blob_number = *blob_number + 1;
294 for (index = 0; index < length; index++)
311 edgept = outline->
loop;
315 edgept = edgept->
next;
317 while (edgept != outline->
loop);
337 bool split_next_to_fragment,
343 BLOB_CHOICE_LIST *answer;
344 BLOB_CHOICE_IT answer_it;
349 bool split_point_from_dict = (*blob_number != -1);
350 if (split_point_from_dict) {
354 split_next_to_fragment);
357 cprintf(
"blob_number = %d\n", *blob_number);
358 if (*blob_number == -1)
366 answer = char_choices->
get(*blob_number);
369 answer_it.set_to_list(answer);
370 if (!split_point_from_dict) {
372 rating_ceiling = answer_it.data()->rating();
376 for (blob = word->
blobs; x < *blob_number; x++) {
383 delete char_choices->
get(*blob_number);
387 char_choices->
insert(answer, *blob_number);
391 char_choices->
set(answer, *blob_number + 1);
405 char chop_index_string[2];
406 if (chop_index <= 9) {
407 snprintf(chop_index_string,
sizeof(chop_index_string),
"%d", chop_index);
409 chop_index_string[0] =
static_cast<char>(
'A' - 10 + chop_index);
410 chop_index_string[1] =
'\0';
413 if (unichar_id == INVALID_UNICHAR_ID) {
417 BLOB_CHOICE_IT answer_it(answer);
420 answer_it.data()->rating(),
421 answer_it.data()->certainty(),
422 answer_it.data()->fontinfo_id(),
423 answer_it.data()->fontinfo_id2(),
424 answer_it.data()->script_id(),
425 answer_it.data()->min_xheight(),
426 answer_it.data()->max_xheight(),
427 answer_it.data()->adapted());
429 answer_it.set_to_list(answer);
430 answer_it.add_after_then_move(modified_blob);
445 int *right_chop_index) {
449 BLOB_CHOICE_LIST *answer;
450 BLOB_CHOICE_IT answer_it;
453 int left_chop_index = 0;
458 cprintf(
"blob_number = %d\n", *blob_number);
459 if (*blob_number == -1)
465 answer = char_choices->
get(*blob_number);
468 answer_it.set_to_list(answer);
469 rating_ceiling = answer_it.data()->rating();
472 for (blob = word->
blobs; x < *blob_number; x++) {
481 *seam_list =
insert_seam(*seam_list, *blob_number, seam, blob, word->
blobs);
483 answer = char_choices->
get(*blob_number);
484 answer_it.set_to_list(answer);
485 unichar_id = answer_it.data()->unichar_id();
486 float rating = answer_it.data()->rating() / exp(1.0);
489 delete char_choices->
get(*blob_number);
493 char_choices->
insert(answer, *blob_number);
497 char_choices->
set(answer, *blob_number + 1);
525 *seam_list =
insert_seam(*seam_list, blob_number, seam, blob,
547 found_em[0] = found_em[1] = found_em[2] =
FALSE;
549 for (outline = blob->
outlines; outline; outline = outline->
next) {
565 last_outline = outline;
568 if (!found_em[0] || !found_em[1] || !found_em[2])
588 BLOB_CHOICE_LIST *match_result;
598 blob !=
NULL; blob = blob->
next, index++) {
601 if (match_result ==
NULL)
602 cprintf(
"Null classifier output!\n");
603 *char_choices += match_result;
605 bit_count = index - 1;
607 bool acceptable =
false;
608 bool replaced =
false;
609 bool best_choice_updated =
612 if (best_choice_updated &&
623 bool best_choice_acceptable =
false;
630 &best_choice_acceptable);
667 BLOB_CHOICE_IT blob_choice_it(best_char_choices->
get(i));
670 for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list();
671 blob_choice_it.forward()) {
672 if (!(
getDict().getUnicharset().get_fragment(
673 blob_choice_it.data()->unichar_id()))) {
674 first_choice = blob_choice_it.data();
687 debug =
"Best choice is: incorrect, top choice, dictionary word";
688 debug +=
" with permuter ";
691 debug =
"Classifier/Old LM tradeoff is to blame";
700 if (ratings ==
NULL) {
706 if (ratings !=
NULL) {
708 tprintf(
"Final Ratings Matrix:\n");
727 return best_char_choices;
746 bool *best_choice_acceptable) {
749 bool updated_best_choice =
false;
759 updated_best_choice =
779 bool replaced =
false;
780 if ((updated_best_choice &&
781 (*best_choice_acceptable =
789 if (updated_best_choice)
CopyCharChoices(*char_choices, best_char_choices);
802 float rating_ceiling,
803 bool split_next_to_fragment) {
804 BLOB_CHOICE_IT blob_choice_it;
806 BLOB_CHOICE_IT temp_it;
809 int worst_index = -1;
811 int worst_index_near_fragment = -1;
816 cprintf(
"rating_ceiling = %8.4f\n", rating_ceiling);
818 cprintf(
"rating_ceiling = No Limit\n");
821 if (split_next_to_fragment && char_choices.
length() > 0) {
823 if (char_choices.
get(0) !=
NULL) {
824 temp_it.set_to_list(char_choices.
get(0));
826 temp_it.data()->unichar_id());
832 for (x = 0; x < char_choices.
length(); ++x) {
833 if (char_choices.
get(x) ==
NULL) {
834 if (fragments !=
NULL) {
839 blob_choice_it.set_to_list(char_choices.
get(x));
840 blob_choice = blob_choice_it.data();
842 if (split_next_to_fragment && x+1 < char_choices.
length()) {
843 if (char_choices.
get(x+1) !=
NULL) {
844 temp_it.set_to_list(char_choices.
get(x+1));
846 temp_it.data()->unichar_id());
848 fragments[x+1] =
NULL;
851 if (blob_choice->
rating() < rating_ceiling &&
854 if (blob_choice->
rating() > worst) {
856 worst = blob_choice->
rating();
858 if (split_next_to_fragment) {
860 bool expand_following_fragment =
861 (x + 1 < char_choices.
length() &&
863 bool expand_preceding_fragment =
864 (x > 0 && fragments[x-1] !=
NULL && !fragments[x-1]->
is_ending());
865 if ((expand_following_fragment || expand_preceding_fragment) &&
866 blob_choice->
rating() > worst_near_fragment) {
867 worst_index_near_fragment = x;
868 worst_near_fragment = blob_choice->
rating();
870 cprintf(
"worst_index_near_fragment=%d"
871 " expand_following_fragment=%d"
872 " expand_preceding_fragment=%d\n",
873 worst_index_near_fragment,
874 expand_following_fragment,
875 expand_preceding_fragment);
882 if (fragments !=
NULL) {
887 return worst_index_near_fragment != -1 ?
888 worst_index_near_fragment : worst_index;
901 for (
int i = 0; i < fixpt->
size(); i++) {
902 if ((*fixpt)[i].begin == (*fixpt)[i].end &&
903 (*fixpt)[i].dangerous &&
904 (*fixpt)[i].correct_is_ngram) {
905 return (*fixpt)[i].begin;
919 assert(blamer_bundle !=
NULL);
925 bool missing_chop =
false;
929 while (b < blamer_bundle->truth_word.length() && curr_blob !=
NULL) {
933 curr_blob = curr_blob->
next;
940 curr_blob = curr_blob->
next;
944 if (missing_chop || b < blamer_bundle->norm_truth_word.length()) {
946 char debug_buffer[256];
948 sprintf(debug_buffer,
"Detected missing chop (tolerance=%d) at ",
950 debug += debug_buffer;
952 debug.
add_str_int(
"\nNo chop for truth at x=", truth_x);
956 debug +=
" truth box(es)";
958 debug +=
"\nMaximally chopped word boxes:\n";
960 curr_blob = curr_blob->
next) {
962 sprintf(debug_buffer,
"(%d,%d)->(%d,%d)\n",
964 debug += debug_buffer;
966 debug +=
"Truth bounding boxes:\n";
969 sprintf(debug_buffer,
"(%d,%d)->(%d,%d)\n",
971 debug += debug_buffer;
994 BLOB_CHOICE_IT blob_choice_it;
1000 chunks_record.
chunks = blobs;
1006 for (x = 0; x < num_chunks; x++) {
1011 blob_choice_it.set_to_list(choices);
1013 if (blob_choice_it.data()->certainty() == 0) {
1017 -(
inT16) (10 * blob_choice_it.data()->rating() /
1018 blob_choice_it.data()->certainty());
1021 chunks_record.
weights = blob_weights;
1026 if (!only_create_ratings_matrix) {
1033 state, fixpt, best_state);