21 #ifndef TESSERACT_CCMAIN_TESSERACTCLASS_H__
22 #define TESSERACT_CCMAIN_TESSERACTCLASS_H__
24 #include "allheaders.h"
47 class BLOB_CHOICE_LIST_CLIST;
98 class CubeRecoContext;
101 class TesseractCubeCombiner;
171 pixDestroy(&pix_grey_);
172 pix_grey_ = grey_pix;
182 return pix_grey_ !=
NULL ? pix_grey_ : pix_binary_;
185 return source_resolution_;
188 source_resolution_ = ppi;
191 return pixGetWidth(pix_binary_);
194 return pixGetHeight(pix_binary_);
197 return scaled_color_;
200 return scaled_factor_;
203 scaled_factor_ = factor;
204 scaled_color_ = color;
214 return right_to_left_;
217 return sub_langs_.
size();
220 return sub_langs_[index];
242 int AutoPageSeg(
bool single_column,
bool osd,
bool only_osd,
243 BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks,
246 bool single_column,
bool osd,
bool only_osd,
248 TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix, Pix** music_mask_pix);
252 const char* word_config,
int pass);
255 const TBOX* target_word_box,
256 const char* word_config,
260 const TBOX* target_word_box,
261 const char* word_config);
272 TBOX &selection_box);
279 const char *lengths);
293 BLOB_CHOICE_LIST_CLIST *blob_choices);
318 const char* cube_best_str,
321 Boxa** char_boxes,
CharSamp*** char_samples);
334 const char *lengths);
344 const char *textbase,
345 const char *language,
351 bool set_only_init_params);
353 const char *language,
375 const char *textbase,
376 const char *language,
382 bool set_only_init_params);
389 const char *textbase,
390 const char *language);
396 const char *textbase,
397 const char *language,
403 bool set_only_init_params);
411 #ifndef GRAPHICS_DISABLED
413 #endif // GRAPHICS_DISABLED
435 BLOB_CHOICE_LIST_CLIST *blob_choices,
441 const char *word_lengths);
443 const char *word_lengths);
445 const char *word_lengths);
447 const char *word_lengths);
476 BLOB_CHOICE_LIST_CLIST *blob_choices);
478 BLOB_CHOICE_LIST_CLIST *blob_choices);
480 BLOB_CHOICE_LIST_CLIST *blob_choices);
488 WERD_RES_LIST &best_perm,
513 BOOL8 good_quality_doc);
515 BOOL8 good_quality_doc);
520 inT16 *accepted_match_count);
533 TBOX & selection_box,
542 BLOB_CHOICE_LIST_CLIST *blob_choices);
544 BLOB_CHOICE_LIST_CLIST *blob_choices);
571 BLOCK_LIST *block_list);
576 BLOCK_LIST *block_list);
591 const TBOX& box,
const TBOX& next_box,
592 const char* correct_text);
600 const TBOX& box,
const TBOX& next_box,
601 const char* correct_text);
624 int choices_pos,
int choices_length,
636 const char *err_msg);
654 "Take segmentation and labeling from box file");
656 "Conversion of word/line box file to char box file");
658 "Generate training data from boxed chars");
660 "Generate more boxes from boxed chars");
662 "Dump intermediate images made during page segmentation");
664 "Page seg mode: 0=osd only, 1=auto+osd, 2=auto, 3=col, 4=block,"
665 " 5=line, 6=word, 7=char"
666 " (Values from PageSegMode enum in publictypes.h)");
668 "Which OCR engine(s) to run (Tesseract, Cube, both). Defaults"
669 " to loading and running only Tesseract (no Cube, no combiner)."
670 " (Values from OcrEngineMode enum in tesseractclass.h)");
672 "Blacklist of chars not to recognize");
674 "Whitelist of chars to recognize");
676 "Perform training for ambiguities");
679 "Whether to use the top-line splitting process for Devanagari "
680 "documents while performing page-segmentation.");
683 "Whether to use the top-line splitting process for Devanagari "
684 "documents while performing ocr.");
686 "Write all parameters to the given file.");
688 "Adapt to words that contain "
689 " a character composed form fragments");
691 "Generate and print debug information for adaption");
696 "Exposure value follows this pattern in the image"
697 " filename. The name of the image files are expected"
698 " to be in the form [lang].[fontname].exp[num].tif");
700 "Learn both character fragments (as is done in the"
701 " special low exposure mode) as well as unfragmented"
704 "Each bounding box is assumed to contain ngrams. Only"
705 " learn the ngrams whose outlines overlap horizontally.");
710 "Try to improve fuzzy spaces");
712 "Dont bother with word plausibility");
716 "Add words to the document dictionary");
720 "Enable correction based on the word bigram dictionary.");
731 "good_quality_doc lte outline error limit");
735 "Use reject map to control Tesseract adaption");
737 "Adaptation decision algorithm for tess");
739 "Do minimal rejection on pass 1 output");
743 "Adaptation decision algorithm for tess");
745 "Save the results of the recognition step"
746 " (blob_choices) within the corresponding WERD_CHOICE");
755 "Allow outline errs in unrejection?");
757 "Reduce rejection on good docs");
760 "%rej allowed before rej whole doc");
762 "%rej allowed before rej whole block");
764 "%rej allowed before rej whole row");
766 "Number of row rejects in whole word rejects"
767 "which prevents whole row rejection");
769 "Only rej partially rejected words in block rejection");
771 "Only rej partially rejected words in row rejection");
773 "Use word segmentation quality metric");
775 "Use word segmentation quality metric");
777 "Only preserve wds longer than this");
779 "Apply row rejection to good docs");
781 "rej good doc wd if more than this fraction rejected");
783 "Reject all bad quality wds");
786 "Output data to debug file");
789 "good_quality_doc gte good char limit");
791 "Mark v.bad words for tilde crunch");
797 "crunch garbage cert lt this");
808 "Del if word gt xht x this above bl");
816 "Dont pot crunch sensible strings");
819 "Dont crunch words with long lower case strings");
821 "Dont crunch words with long lower case strings");
825 "How many non-noise blbs either side?");
831 "Punct. chs expected WITHIN numbers");
833 "Max allowed deviation of blob top outside of font data");
836 "Write block separators in output");
838 "Write repetition char code");
842 "Output char for unidentified blobs");
845 "Min suspect level for rejecting spaces");
847 "Dont Suspect dict wds longer than this");
854 "Make output have exactly one word per WERD");
856 "Dont reject ANYTHING AT ALL");
863 "Aspect ratio dot/hyphen test");
865 "Aspect ratio dot/hyphen test");
877 "Allow NN to unrej");
882 "-1 -> All pages, else specifc page to process");
888 "Debug level for TessdataManager functions.");
890 "List of languages to load with this one");
894 "Min acceptable orientation margin");
898 "Only initialize with the config file. Useful if the instance is "
899 "not going to be used for OCR but say only for layout analysis.");
920 const char* backup_config_file_;
932 int source_resolution_;
951 int font_table_size_;
962 #endif // TESSERACT_CCMAIN_TESSERACTCLASS_H__