49 STRING output_fname = fname;
50 const char *lastdot = strrchr(output_fname.
string(),
'.');
51 if (lastdot !=
NULL) output_fname[lastdot - output_fname.
string()] =
'\0';
52 output_fname +=
".txt";
73 if (tbox->
left() < 0) {
93 const char *lastdot = strrchr(box_fname.
string(),
'.');
94 if (lastdot !=
NULL) box_fname[lastdot - box_fname.
string()] =
'\0';
101 page_res_it.restart_page();
109 int examined_words = 0;
111 keep_going =
read_t(&page_res_it, &tbox);
118 read_t(&page_res_it, &tbox) :
123 keep_going = (bbox.
left() > tbox.
left()) ?
read_t(&page_res_it, &tbox) :
131 page_res_it.prev_row(),
132 page_res_it.prev_block(),
133 label.
string(), output_file);
136 }
while (keep_going);
143 for (page_res_it.restart_page(); page_res_it.block() !=
NULL;
144 page_res_it.forward()) {
145 if (page_res_it.word()) {
146 if (page_res_it.word()->uch_set ==
NULL)
151 if (examined_words < 0.85 * total_words) {
152 tprintf(
"TODO(antonova): clean up recog_training_segmented; "
153 " It examined only a small fraction of the ambigs image.\n");
155 tprintf(
"recog_training_segmented: examined %d / %d words.\n",
156 examined_words, total_words);
177 int label_num_unichars = 0;
179 for (offset = 0; label[offset] !=
'\0' && step > 0;
181 offset += step, ++label_num_unichars);
183 tprintf(
"Not outputting illegal unichar %s\n", label);
188 if (label_num_unichars == 1 && best_choice->
blob_choices()->length() == 1) {
189 BLOB_CHOICE_LIST_C_IT outer_blob_choice_it;
190 outer_blob_choice_it.set_to_list(best_choice->
blob_choices());
191 BLOB_CHOICE_IT blob_choice_it;
192 blob_choice_it.set_to_list(outer_blob_choice_it.data());
193 for (blob_choice_it.mark_cycle_pt();
194 !blob_choice_it.cycled_list();
195 blob_choice_it.forward()) {
197 if (blob_choice->
unichar_id() != INVALID_UNICHAR_ID) {
198 fprintf(output_file,
"%s\t%s\t%.4f\t%.4f\n",