Tesseract
3.02
|
#include <tesseractclass.h>
Public Member Functions | ||||||||||
Tesseract () | ||||||||||
~Tesseract () | ||||||||||
void | Clear () | |||||||||
void | ResetAdaptiveClassifier () | |||||||||
void | ResetDocumentDictionary () | |||||||||
void | SetEquationDetect (EquationDetect *detector) | |||||||||
const FCOORD & | reskew () const | |||||||||
Pix ** | mutable_pix_binary () | |||||||||
Pix * | pix_binary () const | |||||||||
Pix * | pix_grey () const | |||||||||
void | set_pix_grey (Pix *grey_pix) | |||||||||
Pix * | BestPix () const | |||||||||
int | source_resolution () const | |||||||||
void | set_source_resolution (int ppi) | |||||||||
int | ImageWidth () const | |||||||||
int | ImageHeight () const | |||||||||
Pix * | scaled_color () const | |||||||||
int | scaled_factor () const | |||||||||
void | SetScaledColor (int factor, Pix *color) | |||||||||
const Textord & | textord () const | |||||||||
Textord * | mutable_textord () | |||||||||
bool | right_to_left () const | |||||||||
int | num_sub_langs () const | |||||||||
Tesseract * | get_sub_lang (int index) const | |||||||||
void | SetBlackAndWhitelist () | |||||||||
void | PrepareForPageseg () | |||||||||
void | PrepareForTessOCR (BLOCK_LIST *block_list, Tesseract *osd_tess, OSResults *osr) | |||||||||
int | SegmentPage (const STRING *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr) | |||||||||
void | SetupWordScripts (BLOCK_LIST *blocks) | |||||||||
int | AutoPageSeg (bool single_column, bool osd, bool only_osd, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks, Tesseract *osd_tess, OSResults *osr) | |||||||||
ColumnFinder * | SetupPageSegAndDetectOrientation (bool single_column, bool osd, bool only_osd, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr, TO_BLOCK_LIST *to_blocks, Pix **photo_mask_pix, Pix **music_mask_pix) | |||||||||
bool | ProcessTargetWord (const TBOX &word_box, const TBOX &target_word_box, const char *word_config, int pass) | |||||||||
bool | recog_all_words (PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses) | |||||||||
void | rejection_passes (PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config) | |||||||||
void | bigram_correction_pass (PAGE_RES *page_res) | |||||||||
void | blamer_pass (PAGE_RES *page_res) | |||||||||
bool | RetryWithLanguage (WERD_RES *word, BLOCK *block, ROW *row, WordRecognizer recognizer) | |||||||||
void | classify_word_and_language (WordRecognizer recognizer, BLOCK *block, ROW *row, WERD_RES *word) | |||||||||
void | classify_word_pass1 (BLOCK *block, ROW *row, WERD_RES *word) | |||||||||
void | recog_pseudo_word (PAGE_RES *page_res, TBOX &selection_box) | |||||||||
void | fix_rep_char (PAGE_RES_IT *page_res_it) | |||||||||
void | ExplodeRepeatedWord (BLOB_CHOICE *best_choice, PAGE_RES_IT *page_res_it) | |||||||||
ACCEPTABLE_WERD_TYPE | acceptable_word_string (const UNICHARSET &char_set, const char *s, const char *lengths) | |||||||||
void | match_word_pass2 (WERD_RES *word, ROW *row, BLOCK *block) | |||||||||
void | classify_word_pass2 (BLOCK *block, ROW *row, WERD_RES *word) | |||||||||
void | ReportXhtFixResult (bool accept_new_word, float new_x_ht, WERD_RES *word, WERD_RES *new_word) | |||||||||
bool | RunOldFixXht (WERD_RES *word, BLOCK *block, ROW *row) | |||||||||
bool | TrainedXheightFix (WERD_RES *word, BLOCK *block, ROW *row) | |||||||||
BOOL8 | recog_interactive (BLOCK *block, ROW *row, WERD_RES *word_res) | |||||||||
void | set_word_fonts (WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices) | |||||||||
void | font_recognition_pass (PAGE_RES *page_res) | |||||||||
BOOL8 | check_debug_pt (WERD_RES *word, int location) | |||||||||
bool | init_cube_objects (bool load_combiner, TessdataManager *tessdata_manager) | |||||||||
void | run_cube_combiner (PAGE_RES *page_res) | |||||||||
void | cube_word_pass1 (BLOCK *block, ROW *row, WERD_RES *word) | |||||||||
CubeObject * | cube_recognize_word (BLOCK *block, WERD_RES *word) | |||||||||
void | cube_combine_word (CubeObject *cube_obj, WERD_RES *cube_word, WERD_RES *tess_word) | |||||||||
bool | cube_recognize (CubeObject *cube_obj, BLOCK *block, WERD_RES *word) | |||||||||
void | fill_werd_res (const BoxWord &cube_box_word, WERD_CHOICE *cube_werd_choice, const char *cube_best_str, WERD_RES *tess_werd_res) | |||||||||
bool | extract_cube_state (CubeObject *cube_obj, int *num_chars, Boxa **char_boxes, CharSamp ***char_samples) | |||||||||
bool | create_cube_box_word (Boxa *char_boxes, int num_chars, TBOX word_box, BoxWord *box_word) | |||||||||
void | output_pass (PAGE_RES_IT &page_res_it, const TBOX *target_word_box) | |||||||||
void | write_results (PAGE_RES_IT &page_res_it, char newline_type, BOOL8 force_eol) | |||||||||
void | set_unlv_suspects (WERD_RES *word) | |||||||||
UNICHAR_ID | get_rep_char (WERD_RES *word) | |||||||||
BOOL8 | acceptable_number_string (const char *s, const char *lengths) | |||||||||
inT16 | count_alphanums (const WERD_CHOICE &word) | |||||||||
inT16 | count_alphas (const WERD_CHOICE &word) | |||||||||
void | read_config_file (const char *filename, SetParamConstraint constraint) | |||||||||
int | init_tesseract (const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params) | |||||||||
int | init_tesseract (const char *datapath, const char *language, OcrEngineMode oem) | |||||||||
int | init_tesseract_internal (const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params) | |||||||||
void | SetupUniversalFontIds () | |||||||||
int | init_tesseract_lm (const char *arg0, const char *textbase, const char *language) | |||||||||
void | recognize_page (STRING &image_name) | |||||||||
void | end_tesseract () | |||||||||
bool | init_tesseract_lang_data (const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params) | |||||||||
void | ParseLanguageString (const char *lang_str, GenericVector< STRING > *to_load, GenericVector< STRING > *not_to_load) | |||||||||
SVMenuNode * | build_menu_new () | |||||||||
void | pgeditor_main (int width, int height, PAGE_RES *page_res) | |||||||||
void | process_image_event (const SVEvent &event) | |||||||||
BOOL8 | process_cmd_win_event (inT32 cmd_event, char *new_value) | |||||||||
void | debug_word (PAGE_RES *page_res, const TBOX &selection_box) | |||||||||
void | do_re_display (BOOL8(tesseract::Tesseract::*word_painter)(BLOCK *block, ROW *row, WERD_RES *word_res)) | |||||||||
BOOL8 | word_display (BLOCK *block, ROW *row, WERD_RES *word_res) | |||||||||
BOOL8 | word_bln_display (BLOCK *block, ROW *row, WERD_RES *word_res) | |||||||||
BOOL8 | word_blank_and_set_display (BLOCK *block, ROW *row, WERD_RES *word_res) | |||||||||
BOOL8 | word_set_display (BLOCK *block, ROW *row, WERD_RES *word_res) | |||||||||
BOOL8 | word_dumper (BLOCK *block, ROW *row, WERD_RES *word_res) | |||||||||
void | make_reject_map (WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices, ROW *row, inT16 pass) | |||||||||
BOOL8 | one_ell_conflict (WERD_RES *word_res, BOOL8 update_map) | |||||||||
inT16 | first_alphanum_index (const char *word, const char *word_lengths) | |||||||||
inT16 | first_alphanum_offset (const char *word, const char *word_lengths) | |||||||||
inT16 | alpha_count (const char *word, const char *word_lengths) | |||||||||
BOOL8 | word_contains_non_1_digit (const char *word, const char *word_lengths) | |||||||||
void | dont_allow_1Il (WERD_RES *word) | |||||||||
inT16 | count_alphanums (WERD_RES *word) | |||||||||
void | flip_0O (WERD_RES *word) | |||||||||
BOOL8 | non_0_digit (const UNICHARSET &ch_set, UNICHAR_ID unichar_id) | |||||||||
BOOL8 | non_O_upper (const UNICHARSET &ch_set, UNICHAR_ID unichar_id) | |||||||||
BOOL8 | repeated_nonalphanum_wd (WERD_RES *word, ROW *row) | |||||||||
void | nn_match_word (WERD_RES *word, ROW *row) | |||||||||
void | nn_recover_rejects (WERD_RES *word, ROW *row) | |||||||||
BOOL8 | test_ambig_word (WERD_RES *word) | |||||||||
void | set_done (WERD_RES *word, inT16 pass) | |||||||||
inT16 | safe_dict_word (const WERD_RES *werd_res) | |||||||||
void | flip_hyphens (WERD_RES *word) | |||||||||
void | reject_I_1_L (WERD_RES *word) | |||||||||
void | reject_edge_blobs (WERD_RES *word) | |||||||||
void | reject_mostly_rejects (WERD_RES *word) | |||||||||
BOOL8 | word_adaptable (WERD_RES *word, uinT16 mode) | |||||||||
void | recog_word_recursive (WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices) | |||||||||
void | recog_word (WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices) | |||||||||
void | split_and_recog_word (WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices) | |||||||||
void | match_current_words (WERD_RES_LIST &words, ROW *row, BLOCK *block) | |||||||||
inT16 | fp_eval_word_spacing (WERD_RES_LIST &word_res_list) | |||||||||
void | dump_words (WERD_RES_LIST &perm, inT16 score, inT16 mode, BOOL8 improved) | |||||||||
GARBAGE_LEVEL | garbage_word (WERD_RES *word, BOOL8 ok_dict_word) | |||||||||
BOOL8 | potential_word_crunch (WERD_RES *word, GARBAGE_LEVEL garbage_level, BOOL8 ok_dict_word) | |||||||||
void | tilde_crunch (PAGE_RES_IT &page_res_it) | |||||||||
void | unrej_good_quality_words (PAGE_RES_IT &page_res_it) | |||||||||
void | doc_and_block_rejection (PAGE_RES_IT &page_res_it, BOOL8 good_quality_doc) | |||||||||
void | quality_based_rejection (PAGE_RES_IT &page_res_it, BOOL8 good_quality_doc) | |||||||||
void | convert_bad_unlv_chs (WERD_RES *word_res) | |||||||||
void | tilde_delete (PAGE_RES_IT &page_res_it) | |||||||||
inT16 | word_blob_quality (WERD_RES *word, ROW *row) | |||||||||
void | word_char_quality (WERD_RES *word, ROW *row, inT16 *match_count, inT16 *accepted_match_count) | |||||||||
void | unrej_good_chs (WERD_RES *word, ROW *row) | |||||||||
inT16 | count_outline_errs (char c, inT16 outline_count) | |||||||||
inT16 | word_outline_errs (WERD_RES *word) | |||||||||
BOOL8 | terrible_word_crunch (WERD_RES *word, GARBAGE_LEVEL garbage_level) | |||||||||
CRUNCH_MODE | word_deletable (WERD_RES *word, inT16 &delete_mode) | |||||||||
inT16 | failure_count (WERD_RES *word) | |||||||||
BOOL8 | noise_outlines (TWERD *word) | |||||||||
void | process_selected_words (PAGE_RES *page_res, TBOX &selection_box, BOOL8(tesseract::Tesseract::*word_processor)(BLOCK *block, ROW *row, WERD_RES *word_res)) | |||||||||
void | tess_segment_pass1 (WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices) | |||||||||
PAGE_RES * | ApplyBoxes (const STRING &fname, bool find_segmentation, BLOCK_LIST *block_list) | |||||||||
PAGE_RES * | SetupApplyBoxes (const GenericVector< TBOX > &boxes, BLOCK_LIST *block_list) | |||||||||
void | MaximallyChopWord (const GenericVector< TBOX > &boxes, BLOCK *block, ROW *row, WERD_RES *word_res) | |||||||||
bool | ResegmentCharBox (PAGE_RES *page_res, const TBOX *prev_box, const TBOX &box, const TBOX &next_box, const char *correct_text) | |||||||||
bool | ResegmentWordBox (BLOCK_LIST *block_list, const TBOX &box, const TBOX &next_box, const char *correct_text) | |||||||||
void | ReSegmentByClassification (PAGE_RES *page_res) | |||||||||
bool | ConvertStringToUnichars (const char *utf8, GenericVector< UNICHAR_ID > *class_ids) | |||||||||
bool | FindSegmentation (const GenericVector< UNICHAR_ID > &target_text, WERD_RES *word_res) | |||||||||
void | SearchForText (const GenericVector< BLOB_CHOICE_LIST * > *choices, int choices_pos, int choices_length, const GenericVector< UNICHAR_ID > &target_text, int text_index, float rating, GenericVector< int > *segmentation, float *best_rating, GenericVector< int > *best_segmentation) | |||||||||
void | TidyUp (PAGE_RES *page_res) | |||||||||
void | ReportFailedBox (int boxfile_lineno, TBOX box, const char *box_ch, const char *err_msg) | |||||||||
void | CorrectClassifyWords (PAGE_RES *page_res) | |||||||||
void | ApplyBoxTraining (const STRING &filename, PAGE_RES *page_res) | |||||||||
int | CountMisfitTops (WERD_RES *word_res) | |||||||||
float | ComputeCompatibleXheight (WERD_RES *word_res) | |||||||||
FILE * | init_recog_training (const STRING &fname) | |||||||||
void | recog_training_segmented (const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file) | |||||||||
void | ambigs_classify_and_output (WERD_RES *werd_res, ROW_RES *row_res, BLOCK_RES *block_res, const char *label, FILE *output_file) | |||||||||
CubeRecoContext * | GetCubeRecoContext () | |||||||||
eval_word_spacing() | ||||||||||
The basic measure is the number of characters in contextually confirmed words. (I.e the word is done) If all words are contextually confirmed the evaluation is deemed perfect. Some fiddles are done to handle "1"s as these are VERY frequent causes of fuzzy spaces. The problem with the basic measure is that "561 63" would score the same as "56163", though given our knowledge that the space is fuzzy, and that there is a "1" next to the fuzzy space, we need to ensure that "56163" is prefered. The solution is to NOT COUNT the score of any word which has a digit at one end and a "1Il" as the character the other side of the space. Conversly, any character next to a "1" within a word is counted as a positive score. Thus "561 63" would score 4 (3 chars in a numeric word plus 1 side of the "1" joined). "56163" would score 7 - all chars in a numeric word + 2 sides of a "1" joined. The joined 1 rule is applied to any word REGARDLESS of contextual confirmation. Thus "PS7a71 3/7a" scores 1 (neither word is contexutally confirmed. The only score is from the joined 1. "PS7a713/7a" scores 2. | ||||||||||
BOOL8 | digit_or_numeric_punct (WERD_RES *word, int char_position) | |||||||||
inT16 | eval_word_spacing (WERD_RES_LIST &word_res_list) | |||||||||
fix_sp_fp_word() | ||||||||||
Test the current word to see if it can be split by deleting noise blobs. If so, do the business. Return with the iterator pointing to the same place if the word is unchanged, or the last of the replacement words. | ||||||||||
void | fix_noisy_space_list (WERD_RES_LIST &best_perm, ROW *row, BLOCK *block) | |||||||||
void | fix_sp_fp_word (WERD_RES_IT &word_res_it, ROW *row, BLOCK *block) | |||||||||
inT16 | worst_noise_blob (WERD_RES *word_res, float *worst_noise_score) | |||||||||
float | blob_noise_score (TBLOB *blob) | |||||||||
void | break_noisiest_blob_word (WERD_RES_LIST &words) | |||||||||
fix_fuzzy_spaces() | ||||||||||
Walk over the page finding sequences of words joined by fuzzy spaces. Extract them as a sublist, process the sublist to find the optimal arrangement of spaces then replace the sublist in the ROW_RES.
| ||||||||||
void | fix_fuzzy_space_list (WERD_RES_LIST &best_perm, ROW *row, BLOCK *block) | |||||||||
void | fix_fuzzy_spaces (ETEXT_DESC *monitor, inT32 word_count, PAGE_RES *page_res) | |||||||||
uniformly_spaced() | ||||||||||
Return true if one of the following are true:
| ||||||||||
BOOL8 | uniformly_spaced (WERD_RES *word) | |||||||||
BOOL8 | fixspace_thinks_word_done (WERD_RES *word) | |||||||||
tess_add_doc_word | ||||||||||
Add the given word to the document dictionary | ||||||||||
void | tess_add_doc_word (WERD_CHOICE *word_choice) | |||||||||
tess_segment_pass2 | ||||||||||
Segment a word using the pass2 conditions of the tess segmenter.
| ||||||||||
void | tess_segment_pass2 (WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices) | |||||||||
tess_acceptable_word | ||||||||||
| ||||||||||
BOOL8 | tess_acceptable_word (WERD_CHOICE *word_choice, WERD_CHOICE *raw_choice) | |||||||||
![]() | ||||||||||
Wordrec () | ||||||||||
virtual | ~Wordrec () | |||||||||
void | CopyCharChoices (const BLOB_CHOICE_LIST_VECTOR &from, BLOB_CHOICE_LIST_VECTOR *to) | |||||||||
bool | ChoiceIsCorrect (const UNICHARSET &uni_set, const WERD_CHOICE *choice, const GenericVector< STRING > &truth_text) | |||||||||
void | SaveAltChoices (const LIST &best_choices, WERD_RES *word) | |||||||||
void | FillLattice (const MATRIX &ratings, const LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) | |||||||||
void | CallFillLattice (const MATRIX &ratings, const LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) | |||||||||
void | update_ratings (const BLOB_CHOICE_LIST_VECTOR &new_choices, const CHUNKS_RECORD *chunks_record, const SEARCH_STATE search_state) | |||||||||
void | SegSearch (CHUNKS_RECORD *chunks_record, WERD_CHOICE *best_choice, BLOB_CHOICE_LIST_VECTOR *best_char_choices, WERD_CHOICE *raw_choice, STATE *output_best_state, BlamerBundle *blamer_bundle) | |||||||||
SEAM * | attempt_blob_chop (TWERD *word, TBLOB *blob, inT32 blob_number, bool italic_blob, SEAMS seam_list) | |||||||||
SEAM * | chop_numbered_blob (TWERD *word, inT32 blob_number, bool italic_blob, SEAMS seam_list) | |||||||||
SEAM * | chop_overlapping_blob (const GenericVector< TBOX > &boxes, WERD_RES *word_res, inT32 *blob_number, bool italic_blob, SEAMS seam_list) | |||||||||
void | junk_worst_seam (SEAM_QUEUE seams, SEAM *new_seam, float new_priority) | |||||||||
void | choose_best_seam (SEAM_QUEUE seam_queue, SEAM_PILE *seam_pile, SPLIT *split, PRIORITY priority, SEAM **seam_result, TBLOB *blob) | |||||||||
void | combine_seam (SEAM_QUEUE seam_queue, SEAM_PILE seam_pile, SEAM *seam) | |||||||||
inT16 | constrained_split (SPLIT *split, TBLOB *blob) | |||||||||
void | delete_seam_pile (SEAM_PILE seam_pile) | |||||||||
SEAM * | pick_good_seam (TBLOB *blob) | |||||||||
PRIORITY | seam_priority (SEAM *seam, inT16 xmin, inT16 xmax) | |||||||||
void | try_point_pairs (EDGEPT *points[MAX_NUM_POINTS], inT16 num_points, SEAM_QUEUE seam_queue, SEAM_PILE *seam_pile, SEAM **seam, TBLOB *blob) | |||||||||
void | try_vertical_splits (EDGEPT *points[MAX_NUM_POINTS], inT16 num_points, EDGEPT_CLIST *new_points, SEAM_QUEUE seam_queue, SEAM_PILE *seam_pile, SEAM **seam, TBLOB *blob) | |||||||||
PRIORITY | full_split_priority (SPLIT *split, inT16 xmin, inT16 xmax) | |||||||||
PRIORITY | grade_center_of_blob (register BOUNDS_RECT rect) | |||||||||
PRIORITY | grade_overlap (register BOUNDS_RECT rect) | |||||||||
PRIORITY | grade_split_length (register SPLIT *split) | |||||||||
PRIORITY | grade_sharpness (register SPLIT *split) | |||||||||
PRIORITY | grade_width_change (register BOUNDS_RECT rect) | |||||||||
void | set_outline_bounds (register EDGEPT *point1, register EDGEPT *point2, BOUNDS_RECT rect) | |||||||||
int | crosses_outline (EDGEPT *p0, EDGEPT *p1, EDGEPT *outline) | |||||||||
int | is_crossed (TPOINT a0, TPOINT a1, TPOINT b0, TPOINT b1) | |||||||||
int | is_same_edgept (EDGEPT *p1, EDGEPT *p2) | |||||||||
bool | near_point (EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1, EDGEPT **near_pt) | |||||||||
void | reverse_outline (EDGEPT *outline) | |||||||||
virtual BLOB_CHOICE_LIST * | classify_piece (TBLOB *pieces, const DENORM &denorm, SEAMS seams, inT16 start, inT16 end, BlamerBundle *blamer_bundle) | |||||||||
void | merge_fragments (MATRIX *ratings, inT16 num_blobs) | |||||||||
void | get_fragment_lists (inT16 current_frag, inT16 current_row, inT16 start, inT16 num_frag_parts, inT16 num_blobs, MATRIX *ratings, BLOB_CHOICE_LIST *choice_lists) | |||||||||
void | merge_and_put_fragment_lists (inT16 row, inT16 column, inT16 num_frag_parts, BLOB_CHOICE_LIST *choice_lists, MATRIX *ratings) | |||||||||
void | fill_filtered_fragment_list (BLOB_CHOICE_LIST *choices, int fragment_pos, int num_frag_parts, BLOB_CHOICE_LIST *filtered_choices) | |||||||||
BLOB_CHOICE_LIST * | get_piece_rating (MATRIX *ratings, TBLOB *blobs, const DENORM &denorm, SEAMS seams, inT16 start, inT16 end, BlamerBundle *blamer_bundle) | |||||||||
TBOX * | record_blob_bounds (TBLOB *blobs) | |||||||||
MATRIX * | record_piece_ratings (TBLOB *blobs) | |||||||||
WIDTH_RECORD * | state_char_widths (WIDTH_RECORD *chunk_widths, STATE *state, int num_joints) | |||||||||
FLOAT32 | get_width_variance (WIDTH_RECORD *wrec, float norm_height) | |||||||||
FLOAT32 | get_gap_variance (WIDTH_RECORD *wrec, float norm_height) | |||||||||
FLOAT32 | prioritize_state (CHUNKS_RECORD *chunks_record, SEARCH_RECORD *the_search) | |||||||||
FLOAT32 | width_priority (CHUNKS_RECORD *chunks_record, STATE *state, int num_joints) | |||||||||
FLOAT32 | seamcut_priority (SEAMS seams, STATE *state, int num_joints) | |||||||||
FLOAT32 | rating_priority (CHUNKS_RECORD *chunks_record, STATE *state, int num_joints) | |||||||||
void | program_editup (const char *textbase, bool init_classifier, bool init_permute) | |||||||||
BLOB_CHOICE_LIST_VECTOR * | cc_recog (WERD_RES *word) | |||||||||
void | program_editdown (inT32 elasped_time) | |||||||||
void | set_pass1 () | |||||||||
void | set_pass2 () | |||||||||
int | end_recog () | |||||||||
BLOB_CHOICE_LIST * | call_matcher (const DENORM *denorm, TBLOB *blob) | |||||||||
int | dict_word (const WERD_CHOICE &word) | |||||||||
BLOB_CHOICE_LIST * | classify_blob (TBLOB *blob, const DENORM &denorm, const char *string, C_COL color, BlamerBundle *blamer_bundle) | |||||||||
BLOB_CHOICE_LIST * | fake_classify_blob (UNICHAR_ID class_id, float rating, float certainty) | |||||||||
void | update_blob_classifications (TWERD *word, const BLOB_CHOICE_LIST_VECTOR &choices) | |||||||||
BLOB_CHOICE_LIST_VECTOR * | evaluate_chunks (CHUNKS_RECORD *chunks_record, SEARCH_STATE search_state, BlamerBundle *blamer_bundle) | |||||||||
void | best_first_search (CHUNKS_RECORD *chunks_record, BLOB_CHOICE_LIST_VECTOR *best_char_choices, WERD_RES *word, STATE *state, DANGERR *fixpt, STATE *best_state) | |||||||||
void | delete_search (SEARCH_RECORD *the_search) | |||||||||
inT16 | evaluate_state (CHUNKS_RECORD *chunks_record, SEARCH_RECORD *the_search, DANGERR *fixpt, BlamerBundle *blamer_bundle) | |||||||||
BLOB_CHOICE_LIST_VECTOR * | rebuild_current_state (WERD_RES *word, STATE *state, BLOB_CHOICE_LIST_VECTOR *char_choices, MATRIX *ratings) | |||||||||
SEARCH_RECORD * | new_search (CHUNKS_RECORD *chunks_record, int num_joints, BLOB_CHOICE_LIST_VECTOR *best_char_choices, WERD_CHOICE *best_choice, WERD_CHOICE *raw_choice, STATE *state) | |||||||||
void | expand_node (FLOAT32 worst_priority, CHUNKS_RECORD *chunks_record, SEARCH_RECORD *the_search) | |||||||||
void | replace_char_widths (CHUNKS_RECORD *chunks_record, SEARCH_STATE state) | |||||||||
BLOB_CHOICE * | rebuild_fragments (const char *unichar, const char *expanded_fragment_lengths, int choice_index, BLOB_CHOICE_LIST_VECTOR *old_choices) | |||||||||
BLOB_CHOICE_LIST * | join_blobs_and_classify (WERD_RES *word, int x, int y, int choice_index, MATRIX *ratings, BLOB_CHOICE_LIST_VECTOR *old_choices) | |||||||||
STATE * | pop_queue (HEAP *queue) | |||||||||
void | push_queue (HEAP *queue, STATE *state, FLOAT32 worst_priority, FLOAT32 priority, bool debug) | |||||||||
PRIORITY | point_priority (EDGEPT *point) | |||||||||
void | add_point_to_list (POINT_GROUP point_list, EDGEPT *point) | |||||||||
int | angle_change (EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) | |||||||||
int | is_little_chunk (EDGEPT *point1, EDGEPT *point2) | |||||||||
int | is_small_area (EDGEPT *point1, EDGEPT *point2) | |||||||||
EDGEPT * | pick_close_point (EDGEPT *critical_point, EDGEPT *vertical_point, int *best_dist) | |||||||||
void | prioritize_points (TESSLINE *outline, POINT_GROUP points) | |||||||||
void | new_min_point (EDGEPT *local_min, POINT_GROUP points) | |||||||||
void | new_max_point (EDGEPT *local_max, POINT_GROUP points) | |||||||||
void | vertical_projection_point (EDGEPT *split_point, EDGEPT *target_point, EDGEPT **best_point, EDGEPT_CLIST *new_points) | |||||||||
bool | improve_one_blob (WERD_RES *word_res, BLOB_CHOICE_LIST_VECTOR *char_choices, inT32 *blob_number, SEAMS *seam_list, DANGERR *fixpt, bool split_next_to_fragment, BlamerBundle *blamer_bundle) | |||||||||
void | modify_blob_choice (BLOB_CHOICE_LIST *answer, int chop_index) | |||||||||
bool | chop_one_blob (TWERD *word, BLOB_CHOICE_LIST_VECTOR *char_choices, inT32 *blob_number, SEAMS *seam_list, int *right_chop_index) | |||||||||
bool | chop_one_blob2 (const GenericVector< TBOX > &boxes, WERD_RES *word_res, SEAMS *seam_list) | |||||||||
BLOB_CHOICE_LIST_VECTOR * | chop_word_main (WERD_RES *word) | |||||||||
void | improve_by_chopping (WERD_RES *word, BLOB_CHOICE_LIST_VECTOR *char_choices, STATE *best_state, BLOB_CHOICE_LIST_VECTOR *best_char_choices, DANGERR *fixpt, bool *updated_best_choice) | |||||||||
MATRIX * | word_associator (bool only_create_ratings_matrtix, WERD_RES *word, STATE *state, BLOB_CHOICE_LIST_VECTOR *best_char_choices, DANGERR *fixpt, STATE *best_state) | |||||||||
inT16 | select_blob_to_split (const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_ceiling, bool split_next_to_fragment) | |||||||||
inT16 | select_blob_to_split_from_fixpt (DANGERR *fixpt) | |||||||||
void | set_chopper_blame (WERD_RES *word) | |||||||||
![]() | ||||||||||
Classify () | ||||||||||
virtual | ~Classify () | |||||||||
Dict & | getDict () | |||||||||
const ShapeTable * | shape_table () const | |||||||||
ADAPT_TEMPLATES | NewAdaptedTemplates (bool InitFromUnicharset) | |||||||||
int | GetFontinfoId (ADAPT_CLASS Class, uinT8 ConfigId) | |||||||||
int | PruneClasses (const INT_TEMPLATES_STRUCT *int_templates, int num_features, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, CP_RESULT_STRUCT *results) | |||||||||
void | ReadNewCutoffs (FILE *CutoffFile, bool swap, inT64 end_offset, CLASS_CUTOFF_ARRAY Cutoffs) | |||||||||
void | PrintAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates) | |||||||||
void | WriteAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates) | |||||||||
ADAPT_TEMPLATES | ReadAdaptedTemplates (FILE *File) | |||||||||
FLOAT32 | ComputeNormMatch (CLASS_ID ClassId, const FEATURE_STRUCT &feature, BOOL8 DebugMatch) | |||||||||
void | FreeNormProtos () | |||||||||
NORM_PROTOS * | ReadNormProtos (FILE *File, inT64 end_offset) | |||||||||
void | ConvertProto (PROTO Proto, int ProtoId, INT_CLASS Class) | |||||||||
INT_TEMPLATES | CreateIntTemplates (CLASSES FloatProtos, const UNICHARSET &target_unicharset) | |||||||||
void | LearnWord (const char *filename, const char *rejmap, WERD_RES *word) | |||||||||
void | LearnPieces (const char *filename, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word) | |||||||||
void | InitAdaptiveClassifier (bool load_pre_trained_templates) | |||||||||
void | InitAdaptedClass (TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates) | |||||||||
void | AdaptToPunc (TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold) | |||||||||
void | AmbigClassifier (TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, ADAPT_CLASS *Classes, UNICHAR_ID *Ambiguities, ADAPT_RESULTS *Results) | |||||||||
void | MasterMatcher (INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT *features, const uinT8 *norm_factors, ADAPT_CLASS *classes, int debug, int num_classes, const TBOX &blob_box, CLASS_PRUNER_RESULTS results, ADAPT_RESULTS *final_results) | |||||||||
void | ExpandShapesAndApplyCorrections (ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, const uinT8 *cn_factors, INT_RESULT_STRUCT &int_result, ADAPT_RESULTS *final_results) | |||||||||
double | ComputeCorrectedRating (bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, const uinT8 *cn_factors) | |||||||||
void | ConvertMatchesToChoices (const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices) | |||||||||
void | AddNewResult (ADAPT_RESULTS *results, CLASS_ID class_id, int shape_id, FLOAT32 rating, bool adapted, int config, int fontinfo_id, int fontinfo_id2) | |||||||||
int | GetAdaptiveFeatures (TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures) | |||||||||
void | DebugAdaptiveClassifier (TBLOB *Blob, const DENORM &denorm, ADAPT_RESULTS *Results) | |||||||||
void | GetAdaptThresholds (TWERD *Word, const DENORM &denorm, const WERD_CHOICE &BestChoice, const WERD_CHOICE &BestRawChoice, FLOAT32 Thresholds[]) | |||||||||
PROTO_ID | MakeNewTempProtos (FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask) | |||||||||
int | MakeNewTemporaryConfig (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures) | |||||||||
void | MakePermanent (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, const DENORM &denorm, TBLOB *Blob) | |||||||||
void | PrintAdaptiveMatchResults (FILE *File, ADAPT_RESULTS *Results) | |||||||||
void | RemoveExtraPuncs (ADAPT_RESULTS *Results) | |||||||||
void | RemoveBadMatches (ADAPT_RESULTS *Results) | |||||||||
void | SetAdaptiveThreshold (FLOAT32 Threshold) | |||||||||
void | ShowBestMatchFor (TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int shape_id, BOOL8 AdaptiveOn, BOOL8 PreTrainedOn, ADAPT_RESULTS *Results) | |||||||||
STRING | ClassIDToDebugStr (const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const | |||||||||
int | ClassAndConfigIDToFontOrShapeID (int class_id, int int_result_config) const | |||||||||
int | ShapeIDToClassID (int shape_id) const | |||||||||
UNICHAR_ID * | BaselineClassifier (TBLOB *Blob, const DENORM &denorm, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) | |||||||||
int | CharNormClassifier (TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, ADAPT_RESULTS *Results) | |||||||||
int | CharNormTrainingSample (bool pruner_only, const TrainingSample &sample, GenericVector< ShapeRating > *results) | |||||||||
UNICHAR_ID * | GetAmbiguities (TBLOB *Blob, const DENORM &denorm, CLASS_ID CorrectClass) | |||||||||
void | DoAdaptiveMatch (TBLOB *Blob, const DENORM &denorm, ADAPT_RESULTS *Results) | |||||||||
void | AdaptToChar (TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold) | |||||||||
void | DisplayAdaptedChar (TBLOB *blob, const DENORM &denorm, INT_CLASS_STRUCT *int_class) | |||||||||
int | AdaptableWord (TWERD *Word, const WERD_CHOICE &BestChoiceWord, const WERD_CHOICE &RawChoiceWord) | |||||||||
void | EndAdaptiveClassifier () | |||||||||
void | PrintAdaptiveStatistics (FILE *File) | |||||||||
void | SettupPass1 () | |||||||||
void | SettupPass2 () | |||||||||
void | AdaptiveClassifier (TBLOB *Blob, const DENORM &denorm, BLOB_CHOICE_LIST *Choices, CLASS_PRUNER_RESULTS cp_results) | |||||||||
void | ClassifyAsNoise (ADAPT_RESULTS *Results) | |||||||||
void | ResetAdaptiveClassifierInternal () | |||||||||
int | GetBaselineFeatures (TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, uinT8 *CharNormArray, inT32 *BlobLength) | |||||||||
int | GetCharNormFeatures (TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, uinT8 *PrunerNormArray, uinT8 *CharNormArray, inT32 *BlobLength, inT32 *FeatureOutlineIndex) | |||||||||
void | ComputeCharNormArrays (FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uinT8 *char_norm_array, uinT8 *pruner_array) | |||||||||
bool | TempConfigReliable (CLASS_ID class_id, const TEMP_CONFIG &config) | |||||||||
void | UpdateAmbigsGroup (CLASS_ID class_id, const DENORM &denorm, TBLOB *Blob) | |||||||||
void | ResetFeaturesHaveBeenExtracted () | |||||||||
bool | AdaptiveClassifierIsFull () | |||||||||
bool | LooksLikeGarbage (const DENORM &denorm, TBLOB *blob) | |||||||||
void | RefreshDebugWindow (ScrollView **win, const char *msg, int y_offset, const TBOX &wbox) | |||||||||
void | ClearCharNormArray (uinT8 *char_norm_array) | |||||||||
void | ComputeIntCharNormArray (const FEATURE_STRUCT &norm_feature, uinT8 *char_norm_array) | |||||||||
void | ComputeIntFeatures (FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures) | |||||||||
INT_TEMPLATES | ReadIntTemplates (FILE *File) | |||||||||
void | WriteIntTemplates (FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset) | |||||||||
CLASS_ID | GetClassToDebug (const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id) | |||||||||
void | ShowMatchDisplay () | |||||||||
UnicityTable< FontInfo > & | get_fontinfo_table () | |||||||||
UnicityTable< FontSet > & | get_fontset_table () | |||||||||
void | NormalizeOutlines (LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale) | |||||||||
FEATURE_SET | ExtractOutlineFeatures (TBLOB *Blob) | |||||||||
FEATURE_SET | ExtractPicoFeatures (TBLOB *Blob) | |||||||||
void | ReadClassFile () | |||||||||
![]() | ||||||||||
CCStruct () | ||||||||||
~CCStruct () | ||||||||||
![]() | ||||||||||
CUtil () | ||||||||||
~CUtil () | ||||||||||
void | read_variables (const char *filename, bool global_only) | |||||||||
![]() | ||||||||||
CCUtil () | ||||||||||
virtual | ~CCUtil () | |||||||||
void | main_setup (const char *argv0, const char *basename) | |||||||||
ParamsVectors * | params () |
Additional Inherited Members | |
![]() | |
bool | SegSearchDone (int num_futile_classifications) |
void | UpdateSegSearchNodes (int starting_col, SEG_SEARCH_PENDING_LIST *pending[], BestPathByColumn *best_path_by_column[], CHUNKS_RECORD *chunks_record, HEAP *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle) |
void | ProcessSegSearchPainPoint (float pain_point_priority, const MATRIX_COORD &pain_point, const WERD_CHOICE *best_choice, SEG_SEARCH_PENDING_LIST *pending[], CHUNKS_RECORD *chunks_record, HEAP *pain_points, BlamerBundle *blamer_bundle) |
void | InitBlamerForSegSearch (const WERD_CHOICE *best_choice, CHUNKS_RECORD *chunks_record, HEAP *pain_points, BlamerBundle *blamer_bundle, STRING *blamer_debug) |
void | FinishBlamerForSegSearch (const WERD_CHOICE *best_choice, BlamerBundle *blamer_bundle, STRING *blamer_debug) |
Definition at line 139 of file tesseractclass.h.
tesseract::Tesseract::Tesseract | ( | ) |
Definition at line 37 of file tesseractclass.cpp.
tesseract::Tesseract::~Tesseract | ( | ) |
Definition at line 398 of file tesseractclass.cpp.
BOOL8 tesseract::Tesseract::acceptable_number_string | ( | const char * | s, |
const char * | lengths | ||
) |
Definition at line 485 of file output.cpp.
ACCEPTABLE_WERD_TYPE tesseract::Tesseract::acceptable_word_string | ( | const UNICHARSET & | char_set, |
const char * | s, | ||
const char * | lengths | ||
) |
Definition at line 1284 of file control.cpp.
inT16 tesseract::Tesseract::alpha_count | ( | const char * | word, |
const char * | word_lengths | ||
) |
Definition at line 659 of file reject.cpp.
void tesseract::Tesseract::ambigs_classify_and_output | ( | WERD_RES * | werd_res, |
ROW_RES * | row_res, | ||
BLOCK_RES * | block_res, | ||
const char * | label, | ||
FILE * | output_file | ||
) |
Definition at line 163 of file recogtraining.cpp.
PAGE_RES * tesseract::Tesseract::ApplyBoxes | ( | const STRING & | fname, |
bool | find_segmentation, | ||
BLOCK_LIST * | block_list | ||
) |
Definition at line 111 of file applybox.cpp.
Definition at line 786 of file applybox.cpp.
int tesseract::Tesseract::AutoPageSeg | ( | bool | single_column, |
bool | osd, | ||
bool | only_osd, | ||
BLOCK_LIST * | blocks, | ||
TO_BLOCK_LIST * | to_blocks, | ||
Tesseract * | osd_tess, | ||
OSResults * | osr | ||
) |
Auto page segmentation. Divide the page image into blocks of uniform text linespacing and images.
Resolution (in ppi) is derived from the input image.
The output goes in the blocks list with corresponding TO_BLOCKs in the to_blocks list.
If single_column is true, then no attempt is made to divide the image into columns, but multiple blocks are still made if the text is of non-uniform linespacing.
If osd (orientation and script detection) is true then that is performed as well. If only_osd is true, then only orientation and script detection is performed. If osd is desired, (osd or only_osd) then osr_tess must be another Tesseract that was initialized especially for osd, and the results will be output into osr (orientation and script result).
Definition at line 218 of file pagesegmain.cpp.
|
inline |
Definition at line 181 of file tesseractclass.h.
void tesseract::Tesseract::bigram_correction_pass | ( | PAGE_RES * | page_res | ) |
Definition at line 419 of file control.cpp.
void tesseract::Tesseract::blamer_pass | ( | PAGE_RES * | page_res | ) |
Definition at line 684 of file control.cpp.
float tesseract::Tesseract::blob_noise_score | ( | TBLOB * | blob | ) |
Definition at line 844 of file fixspace.cpp.
void tesseract::Tesseract::break_noisiest_blob_word | ( | WERD_RES_LIST & | words | ) |
break_noisiest_blob_word() Find the word with the blob which looks like the worst noise. Break the word into two, deleting the noise blob.
Definition at line 699 of file fixspace.cpp.
SVMenuNode * tesseract::Tesseract::build_menu_new | ( | ) |
Definition at line 256 of file pgedit.cpp.
Definition at line 1388 of file control.cpp.
void tesseract::Tesseract::classify_word_and_language | ( | WordRecognizer | recognizer, |
BLOCK * | block, | ||
ROW * | row, | ||
WERD_RES * | word | ||
) |
Definition at line 795 of file control.cpp.
classify_word_pass1
Baseline normalize the word and pass it to Tess.
Definition at line 860 of file control.cpp.
classify_word_pass2
Control what to do with the word in pass 2
Definition at line 1026 of file control.cpp.
void tesseract::Tesseract::Clear | ( | ) |
Definition at line 413 of file tesseractclass.cpp.
float tesseract::Tesseract::ComputeCompatibleXheight | ( | WERD_RES * | word_res | ) |
Definition at line 96 of file fixxht.cpp.
void tesseract::Tesseract::convert_bad_unlv_chs | ( | WERD_RES * | word_res | ) |
Definition at line 666 of file docqual.cpp.
bool tesseract::Tesseract::ConvertStringToUnichars | ( | const char * | utf8, |
GenericVector< UNICHAR_ID > * | class_ids | ||
) |
Definition at line 536 of file applybox.cpp.
void tesseract::Tesseract::CorrectClassifyWords | ( | PAGE_RES * | page_res | ) |
Definition at line 764 of file applybox.cpp.
inT16 tesseract::Tesseract::count_alphanums | ( | const WERD_CHOICE & | word | ) |
Definition at line 474 of file output.cpp.
Definition at line 737 of file reject.cpp.
inT16 tesseract::Tesseract::count_alphas | ( | const WERD_CHOICE & | word | ) |
Definition at line 464 of file output.cpp.
Definition at line 131 of file docqual.cpp.
int tesseract::Tesseract::CountMisfitTops | ( | WERD_RES * | word_res | ) |
Definition at line 64 of file fixxht.cpp.
bool tesseract::Tesseract::create_cube_box_word | ( | Boxa * | char_boxes, |
int | num_chars, | ||
TBOX | word_box, | ||
BoxWord * | box_word | ||
) |
Definition at line 116 of file cube_control.cpp.
void tesseract::Tesseract::cube_combine_word | ( | CubeObject * | cube_obj, |
WERD_RES * | cube_word, | ||
WERD_RES * | tess_word | ||
) |
Definition at line 323 of file cube_control.cpp.
bool tesseract::Tesseract::cube_recognize | ( | CubeObject * | cube_obj, |
BLOCK * | block, | ||
WERD_RES * | word | ||
) |
Definition at line 366 of file cube_control.cpp.
CubeObject * tesseract::Tesseract::cube_recognize_word | ( | BLOCK * | block, |
WERD_RES * | word | ||
) |
Definition at line 286 of file cube_control.cpp.
Definition at line 275 of file cube_control.cpp.
debug_word
Process the whole image, but load word_config_ for the selected word(s).
Definition at line 636 of file pgedit.cpp.
Definition at line 344 of file fixspace.cpp.
void tesseract::Tesseract::do_re_display | ( | BOOL8(tesseract::Tesseract::*)(BLOCK *block, ROW *row, WERD_RES *word_res) | word_painter | ) |
Redisplay page
Definition at line 306 of file pgedit.cpp.
void tesseract::Tesseract::doc_and_block_rejection | ( | PAGE_RES_IT & | page_res_it, |
BOOL8 | good_quality_doc | ||
) |
Definition at line 238 of file docqual.cpp.
void tesseract::Tesseract::dont_allow_1Il | ( | WERD_RES * | word | ) |
Definition at line 705 of file reject.cpp.
void tesseract::Tesseract::dump_words | ( | WERD_RES_LIST & | perm, |
inT16 | score, | ||
inT16 | mode, | ||
BOOL8 | improved | ||
) |
Definition at line 450 of file fixspace.cpp.
void tesseract::Tesseract::end_tesseract | ( | ) |
Definition at line 431 of file tessedit.cpp.
inT16 tesseract::Tesseract::eval_word_spacing | ( | WERD_RES_LIST & | word_res_list | ) |
Definition at line 240 of file fixspace.cpp.
void tesseract::Tesseract::ExplodeRepeatedWord | ( | BLOB_CHOICE * | best_choice, |
PAGE_RES_IT * | page_res_it | ||
) |
Definition at line 1252 of file control.cpp.
bool tesseract::Tesseract::extract_cube_state | ( | CubeObject * | cube_obj, |
int * | num_chars, | ||
Boxa ** | char_boxes, | ||
CharSamp *** | char_samples | ||
) |
Definition at line 65 of file cube_control.cpp.
Definition at line 975 of file docqual.cpp.
void tesseract::Tesseract::fill_werd_res | ( | const BoxWord & | cube_box_word, |
WERD_CHOICE * | cube_werd_choice, | ||
const char * | cube_best_str, | ||
WERD_RES * | tess_werd_res | ||
) |
Definition at line 454 of file cube_control.cpp.
bool tesseract::Tesseract::FindSegmentation | ( | const GenericVector< UNICHAR_ID > & | target_text, |
WERD_RES * | word_res | ||
) |
Definition at line 560 of file applybox.cpp.
inT16 tesseract::Tesseract::first_alphanum_index | ( | const char * | word, |
const char * | word_lengths | ||
) |
Definition at line 633 of file reject.cpp.
inT16 tesseract::Tesseract::first_alphanum_offset | ( | const char * | word, |
const char * | word_lengths | ||
) |
Definition at line 646 of file reject.cpp.
void tesseract::Tesseract::fix_fuzzy_space_list | ( | WERD_RES_LIST & | best_perm, |
ROW * | row, | ||
BLOCK * | block | ||
) |
Definition at line 146 of file fixspace.cpp.
void tesseract::Tesseract::fix_fuzzy_spaces | ( | ETEXT_DESC * | monitor, |
inT32 | word_count, | ||
PAGE_RES * | page_res | ||
) |
Definition at line 49 of file fixspace.cpp.
void tesseract::Tesseract::fix_noisy_space_list | ( | WERD_RES_LIST & | best_perm, |
ROW * | row, | ||
BLOCK * | block | ||
) |
Definition at line 652 of file fixspace.cpp.
void tesseract::Tesseract::fix_rep_char | ( | PAGE_RES_IT * | page_res_it | ) |
fix_rep_char() The word is a repeated char. (Leader.) Find the repeated char character. Create the appropriate single-word or multi-word sequence according to the size of spaces in between blobs, and correct the classifications where some of the characters disagree with the majority.
Definition at line 1200 of file control.cpp.
Definition at line 618 of file fixspace.cpp.
Definition at line 586 of file fixspace.cpp.
void tesseract::Tesseract::flip_0O | ( | WERD_RES * | word | ) |
Definition at line 856 of file reject.cpp.
void tesseract::Tesseract::flip_hyphens | ( | WERD_RES * | word | ) |
Definition at line 796 of file reject.cpp.
void tesseract::Tesseract::font_recognition_pass | ( | PAGE_RES * | page_res | ) |
font_recognition_pass
Smooth the fonts for the document.
Definition at line 1590 of file control.cpp.
inT16 tesseract::Tesseract::fp_eval_word_spacing | ( | WERD_RES_LIST & | word_res_list | ) |
Definition at line 914 of file fixspace.cpp.
GARBAGE_LEVEL tesseract::Tesseract::garbage_word | ( | WERD_RES * | word, |
BOOL8 | ok_dict_word | ||
) |
Definition at line 689 of file docqual.cpp.
UNICHAR_ID tesseract::Tesseract::get_rep_char | ( | WERD_RES * | word | ) |
Definition at line 349 of file output.cpp.
|
inline |
Definition at line 219 of file tesseractclass.h.
|
inline |
Definition at line 914 of file tesseractclass.h.
|
inline |
Definition at line 193 of file tesseractclass.h.
|
inline |
Definition at line 190 of file tesseractclass.h.
bool tesseract::Tesseract::init_cube_objects | ( | bool | load_combiner, |
TessdataManager * | tessdata_manager | ||
) |
Definition at line 202 of file cube_control.cpp.
FILE * tesseract::Tesseract::init_recog_training | ( | const STRING & | fname | ) |
Definition at line 37 of file recogtraining.cpp.
int tesseract::Tesseract::init_tesseract | ( | const char * | arg0, |
const char * | textbase, | ||
const char * | language, | ||
OcrEngineMode | oem, | ||
char ** | configs, | ||
int | configs_size, | ||
const GenericVector< STRING > * | vars_vec, | ||
const GenericVector< STRING > * | vars_values, | ||
bool | set_only_init_params | ||
) |
Definition at line 270 of file tessedit.cpp.
|
inline |
Definition at line 352 of file tesseractclass.h.
int tesseract::Tesseract::init_tesseract_internal | ( | const char * | arg0, |
const char * | textbase, | ||
const char * | language, | ||
OcrEngineMode | oem, | ||
char ** | configs, | ||
int | configs_size, | ||
const GenericVector< STRING > * | vars_vec, | ||
const GenericVector< STRING > * | vars_values, | ||
bool | set_only_init_params | ||
) |
Definition at line 349 of file tessedit.cpp.
bool tesseract::Tesseract::init_tesseract_lang_data | ( | const char * | arg0, |
const char * | textbase, | ||
const char * | language, | ||
OcrEngineMode | oem, | ||
char ** | configs, | ||
int | configs_size, | ||
const GenericVector< STRING > * | vars_vec, | ||
const GenericVector< STRING > * | vars_values, | ||
bool | set_only_init_params | ||
) |
Definition at line 98 of file tessedit.cpp.
int tesseract::Tesseract::init_tesseract_lm | ( | const char * | arg0, |
const char * | textbase, | ||
const char * | language | ||
) |
Definition at line 420 of file tessedit.cpp.
void tesseract::Tesseract::make_reject_map | ( | WERD_RES * | word, |
BLOB_CHOICE_LIST_CLIST * | blob_choices, | ||
ROW * | row, | ||
inT16 | pass | ||
) |
Definition at line 197 of file fixspace.cpp.
match_word_pass2
Baseline normalize the word and pass it to Tess.
Definition at line 1098 of file control.cpp.
void tesseract::Tesseract::MaximallyChopWord | ( | const GenericVector< TBOX > & | boxes, |
BLOCK * | block, | ||
ROW * | row, | ||
WERD_RES * | word_res | ||
) |
Definition at line 257 of file applybox.cpp.
|
inline |
Definition at line 160 of file tesseractclass.h.
|
inline |
Definition at line 209 of file tesseractclass.h.
Definition at line 987 of file docqual.cpp.
BOOL8 tesseract::Tesseract::non_0_digit | ( | const UNICHARSET & | ch_set, |
UNICHAR_ID | unichar_id | ||
) |
Definition at line 981 of file reject.cpp.
BOOL8 tesseract::Tesseract::non_O_upper | ( | const UNICHARSET & | ch_set, |
UNICHAR_ID | unichar_id | ||
) |
Definition at line 977 of file reject.cpp.
|
inline |
Definition at line 216 of file tesseractclass.h.
Definition at line 456 of file reject.cpp.
void tesseract::Tesseract::output_pass | ( | PAGE_RES_IT & | page_res_it, |
const TBOX * | target_word_box | ||
) |
Definition at line 72 of file output.cpp.
void tesseract::Tesseract::ParseLanguageString | ( | const char * | lang_str, |
GenericVector< STRING > * | to_load, | ||
GenericVector< STRING > * | not_to_load | ||
) |
Definition at line 234 of file tessedit.cpp.
void tesseract::Tesseract::pgeditor_main | ( | int | width, |
int | height, | ||
PAGE_RES * | page_res | ||
) |
Top level editor operation: Setup a new window and an according event handler
Definition at line 336 of file pgedit.cpp.
|
inline |
Definition at line 164 of file tesseractclass.h.
|
inline |
Definition at line 167 of file tesseractclass.h.
BOOL8 tesseract::Tesseract::potential_word_crunch | ( | WERD_RES * | word, |
GARBAGE_LEVEL | garbage_level, | ||
BOOL8 | ok_dict_word | ||
) |
Definition at line 548 of file docqual.cpp.
void tesseract::Tesseract::PrepareForPageseg | ( | ) |
Definition at line 461 of file tesseractclass.cpp.
void tesseract::Tesseract::PrepareForTessOCR | ( | BLOCK_LIST * | block_list, |
Tesseract * | osd_tess, | ||
OSResults * | osr | ||
) |
Definition at line 497 of file tesseractclass.cpp.
Definition at line 396 of file pgedit.cpp.
void tesseract::Tesseract::process_image_event | ( | const SVEvent & | event | ) |
User has done something in the image window - mouse down or up. Work out what it is and do something with it. If DOWN - just remember where it was. If UP - for each word in the selected area do the operation defined by the current mode.
Definition at line 563 of file pgedit.cpp.
void tesseract::Tesseract::process_selected_words | ( | PAGE_RES * | page_res, |
TBOX & | selection_box, | ||
BOOL8(tesseract::Tesseract::*)(BLOCK *block, ROW *row, WERD_RES *word_res) | word_processor | ||
) |
Definition at line 31 of file pagewalk.cpp.
bool tesseract::Tesseract::ProcessTargetWord | ( | const TBOX & | word_box, |
const TBOX & | target_word_box, | ||
const char * | word_config, | ||
int | pass | ||
) |
Definition at line 128 of file control.cpp.
void tesseract::Tesseract::quality_based_rejection | ( | PAGE_RES_IT & | page_res_it, |
BOOL8 | good_quality_doc | ||
) |
Definition at line 143 of file docqual.cpp.
void tesseract::Tesseract::read_config_file | ( | const char * | filename, |
SetParamConstraint | constraint | ||
) |
bool tesseract::Tesseract::recog_all_words | ( | PAGE_RES * | page_res, |
ETEXT_DESC * | monitor, | ||
const TBOX * | target_word_box, | ||
const char * | word_config, | ||
int | dopasses | ||
) |
Walk the page_res, recognizing all the words. If monitor is not null, it is used as a progress monitor/timeout/cancel. If dopasses is 0, all recognition passes are run, 1 just pass 1, 2 passes2 and higher. If target_word_box is not null, special things are done to words that overlap the target_word_box: if word_config is not null, the word config file is read for just the target word(s), otherwise, on pass 2 and beyond ONLY the target words are processed (Jetsoft modification.) Returns false if we cancelled prematurely.
page_res | page structure |
monitor | progress monitor |
word_config | word_config file |
target_word_box | specifies just to extract a rectangle |
dopasses | 0 - all, 1 just pass 1, 2 passes 2 and higher |
Definition at line 178 of file control.cpp.
recog_interactive
Recognize a single word in interactive mode.
block | block |
row | row of word |
word_res | word to recognise |
Definition at line 97 of file control.cpp.
Definition at line 72 of file control.cpp.
void tesseract::Tesseract::recog_training_segmented | ( | const STRING & | fname, |
PAGE_RES * | page_res, | ||
volatile ETEXT_DESC * | monitor, | ||
FILE * | output_file | ||
) |
Definition at line 88 of file recogtraining.cpp.
void tesseract::Tesseract::recog_word | ( | WERD_RES * | word, |
BLOB_CHOICE_LIST_CLIST * | blob_choices | ||
) |
Definition at line 54 of file tfacepp.cpp.
void tesseract::Tesseract::recog_word_recursive | ( | WERD_RES * | word, |
BLOB_CHOICE_LIST_CLIST * | blob_choices | ||
) |
Definition at line 109 of file tfacepp.cpp.
void tesseract::Tesseract::recognize_page | ( | STRING & | image_name | ) |
void tesseract::Tesseract::reject_edge_blobs | ( | WERD_RES * | word | ) |
Definition at line 427 of file reject.cpp.
void tesseract::Tesseract::reject_I_1_L | ( | WERD_RES * | word | ) |
Definition at line 303 of file reject.cpp.
void tesseract::Tesseract::reject_mostly_rejects | ( | WERD_RES * | word | ) |
Definition at line 752 of file reject.cpp.
void tesseract::Tesseract::rejection_passes | ( | PAGE_RES * | page_res, |
ETEXT_DESC * | monitor, | ||
const TBOX * | target_word_box, | ||
const char * | word_config | ||
) |
Definition at line 588 of file control.cpp.
Definition at line 761 of file reject.cpp.
void tesseract::Tesseract::ReportFailedBox | ( | int | boxfile_lineno, |
TBOX | box, | ||
const char * | box_ch, | ||
const char * | err_msg | ||
) |
void tesseract::Tesseract::ReportXhtFixResult | ( | bool | accept_new_word, |
float | new_x_ht, | ||
WERD_RES * | word, | ||
WERD_RES * | new_word | ||
) |
Definition at line 955 of file control.cpp.
void tesseract::Tesseract::ReSegmentByClassification | ( | PAGE_RES * | page_res | ) |
Definition at line 510 of file applybox.cpp.
bool tesseract::Tesseract::ResegmentCharBox | ( | PAGE_RES * | page_res, |
const TBOX * | prev_box, | ||
const TBOX & | box, | ||
const TBOX & | next_box, | ||
const char * | correct_text | ||
) |
Definition at line 341 of file applybox.cpp.
bool tesseract::Tesseract::ResegmentWordBox | ( | BLOCK_LIST * | block_list, |
const TBOX & | box, | ||
const TBOX & | next_box, | ||
const char * | correct_text | ||
) |
Definition at line 439 of file applybox.cpp.
void tesseract::Tesseract::ResetAdaptiveClassifier | ( | ) |
Definition at line 433 of file tesseractclass.cpp.
void tesseract::Tesseract::ResetDocumentDictionary | ( | ) |
Definition at line 441 of file tesseractclass.cpp.
|
inline |
Definition at line 156 of file tesseractclass.h.
bool tesseract::Tesseract::RetryWithLanguage | ( | WERD_RES * | word, |
BLOCK * | block, | ||
ROW * | row, | ||
WordRecognizer | recognizer | ||
) |
Definition at line 756 of file control.cpp.
|
inline |
Definition at line 213 of file tesseractclass.h.
void tesseract::Tesseract::run_cube_combiner | ( | PAGE_RES * | page_res | ) |
Definition at line 241 of file cube_control.cpp.
Definition at line 786 of file reject.cpp.
|
inline |
Definition at line 196 of file tesseractclass.h.
|
inline |
Definition at line 199 of file tesseractclass.h.
void tesseract::Tesseract::SearchForText | ( | const GenericVector< BLOB_CHOICE_LIST * > * | choices, |
int | choices_pos, | ||
int | choices_length, | ||
const GenericVector< UNICHAR_ID > & | target_text, | ||
int | text_index, | ||
float | rating, | ||
GenericVector< int > * | segmentation, | ||
float * | best_rating, | ||
GenericVector< int > * | best_segmentation | ||
) |
Definition at line 625 of file applybox.cpp.
int tesseract::Tesseract::SegmentPage | ( | const STRING * | input_file, |
BLOCK_LIST * | blocks, | ||
Tesseract * | osd_tess, | ||
OSResults * | osr | ||
) |
Segment the page according to the current value of tessedit_pageseg_mode. pix_binary_ is used as the source image and should not be NULL. On return the blocks list owns all the constructed page layout.
Definition at line 107 of file pagesegmain.cpp.
|
inline |
Definition at line 170 of file tesseractclass.h.
|
inline |
Definition at line 187 of file tesseractclass.h.
void tesseract::Tesseract::set_unlv_suspects | ( | WERD_RES * | word | ) |
Definition at line 371 of file output.cpp.
void tesseract::Tesseract::set_word_fonts | ( | WERD_RES * | word, |
BLOB_CHOICE_LIST_CLIST * | blob_choices | ||
) |
set_word_fonts
Get the fonts for the word.
Definition at line 1500 of file control.cpp.
void tesseract::Tesseract::SetBlackAndWhitelist | ( | ) |
Definition at line 448 of file tesseractclass.cpp.
void tesseract::Tesseract::SetEquationDetect | ( | EquationDetect * | detector | ) |
Definition at line 427 of file tesseractclass.cpp.
|
inline |
Definition at line 202 of file tesseractclass.h.
PAGE_RES * tesseract::Tesseract::SetupApplyBoxes | ( | const GenericVector< TBOX > & | boxes, |
BLOCK_LIST * | block_list | ||
) |
Definition at line 197 of file applybox.cpp.
ColumnFinder * tesseract::Tesseract::SetupPageSegAndDetectOrientation | ( | bool | single_column, |
bool | osd, | ||
bool | only_osd, | ||
BLOCK_LIST * | blocks, | ||
Tesseract * | osd_tess, | ||
OSResults * | osr, | ||
TO_BLOCK_LIST * | to_blocks, | ||
Pix ** | photo_mask_pix, | ||
Pix ** | music_mask_pix | ||
) |
Sets up auto page segmentation, determines the orientation, and corrects it. Somewhat arbitrary chunk of functionality, factored out of AutoPageSeg to facilitate testing. photo_mask_pix is a pointer to a NULL pointer that will be filled on return with the leptonica photo mask, which must be pixDestroyed by the caller. to_blocks is an empty list that will be filled with (usually a single) block that is used during layout analysis. This ugly API is required because of the possibility of a unlv zone file. TODO(rays) clean this up. See AutoPageSeg for other arguments. The returned ColumnFinder must be deleted after use.
Definition at line 281 of file pagesegmain.cpp.
void tesseract::Tesseract::SetupUniversalFontIds | ( | ) |
Definition at line 399 of file tessedit.cpp.
void tesseract::Tesseract::SetupWordScripts | ( | BLOCK_LIST * | blocks | ) |
|
inline |
Definition at line 184 of file tesseractclass.h.
void tesseract::Tesseract::split_and_recog_word | ( | WERD_RES * | word, |
BLOB_CHOICE_LIST_CLIST * | blob_choices | ||
) |
Definition at line 177 of file tfacepp.cpp.
BOOL8 tesseract::Tesseract::terrible_word_crunch | ( | WERD_RES * | word, |
GARBAGE_LEVEL | garbage_level | ||
) |
Definition at line 510 of file docqual.cpp.
BOOL8 tesseract::Tesseract::tess_acceptable_word | ( | WERD_CHOICE * | word_choice, |
WERD_CHOICE * | raw_choice | ||
) |
Definition at line 102 of file tessbox.cpp.
void tesseract::Tesseract::tess_add_doc_word | ( | WERD_CHOICE * | word_choice | ) |
Definition at line 114 of file tessbox.cpp.
void tesseract::Tesseract::tess_segment_pass1 | ( | WERD_RES * | word, |
BLOB_CHOICE_LIST_CLIST * | blob_choices | ||
) |
Definition at line 42 of file tessbox.cpp.
void tesseract::Tesseract::tess_segment_pass2 | ( | WERD_RES * | word, |
BLOB_CHOICE_LIST_CLIST * | blob_choices | ||
) |
Definition at line 73 of file tessbox.cpp.
Definition at line 687 of file reject.cpp.
|
inline |
Definition at line 206 of file tesseractclass.h.
void tesseract::Tesseract::TidyUp | ( | PAGE_RES * | page_res | ) |
Definition at line 702 of file applybox.cpp.
void tesseract::Tesseract::tilde_crunch | ( | PAGE_RES_IT & | page_res_it | ) |
Definition at line 424 of file docqual.cpp.
void tesseract::Tesseract::tilde_delete | ( | PAGE_RES_IT & | page_res_it | ) |
Definition at line 596 of file docqual.cpp.
Definition at line 976 of file control.cpp.
Definition at line 515 of file fixspace.cpp.
Definition at line 120 of file docqual.cpp.
void tesseract::Tesseract::unrej_good_quality_words | ( | PAGE_RES_IT & | page_res_it | ) |
Definition at line 166 of file docqual.cpp.
Definition at line 50 of file adaptions.cpp.
BOOL8 tesseract::Tesseract::word_blank_and_set_display | ( | BLOCK * | block, |
ROW * | row, | ||
WERD_RES * | word_res | ||
) |
Definition at line 711 of file pgedit.cpp.
Normalize word and display in word window
Definition at line 724 of file pgedit.cpp.
Definition at line 68 of file docqual.cpp.
void tesseract::Tesseract::word_char_quality | ( | WERD_RES * | word, |
ROW * | row, | ||
inT16 * | match_count, | ||
inT16 * | accepted_match_count | ||
) |
Definition at line 100 of file docqual.cpp.
BOOL8 tesseract::Tesseract::word_contains_non_1_digit | ( | const char * | word, |
const char * | word_lengths | ||
) |
CRUNCH_MODE tesseract::Tesseract::word_deletable | ( | WERD_RES * | word, |
inT16 & | delete_mode | ||
) |
Definition at line 904 of file docqual.cpp.
word_display() Word Processor
Display a word according to its display modes
Definition at line 747 of file pgedit.cpp.
Definition at line 80 of file docqual.cpp.
word_set_display() Word processor
Display word according to current display mode settings
Definition at line 931 of file pgedit.cpp.
Definition at line 764 of file fixspace.cpp.
void tesseract::Tesseract::write_results | ( | PAGE_RES_IT & | page_res_it, |
char | newline_type, | ||
BOOL8 | force_eol | ||
) |
Definition at line 138 of file output.cpp.
int tesseract::Tesseract::applybox_debug = 1 |
"Debug level"
Definition at line 693 of file tesseractclass.h.
char* tesseract::Tesseract::applybox_exposure_pattern = ".exp" |
"Exposure value follows this pattern in the image" " filename. The name of the image files are expected" " to be in the form [lang].[fontname].exp[num].tif"
Definition at line 698 of file tesseractclass.h.
bool tesseract::Tesseract::applybox_learn_chars_and_char_frags_mode = false |
"Learn both character fragments (as is done in the" " special low exposure mode) as well as unfragmented" " characters."
Definition at line 702 of file tesseractclass.h.
bool tesseract::Tesseract::applybox_learn_ngrams_mode = false |
"Each bounding box is assumed to contain ngrams. Only" " learn the ngrams whose outlines overlap horizontally."
Definition at line 705 of file tesseractclass.h.
int tesseract::Tesseract::applybox_page = 0 |
"Page number to apply boxes from"
Definition at line 694 of file tesseractclass.h.
int tesseract::Tesseract::bidi_debug = 0 |
"Debug level for BiDi"
Definition at line 692 of file tesseractclass.h.
bool tesseract::Tesseract::bland_unrej = false |
"unrej potential with no chekcs"
Definition at line 787 of file tesseractclass.h.
char* tesseract::Tesseract::chs_leading_punct = "('`\"" |
"Leading punctuation"
Definition at line 725 of file tesseractclass.h.
char* tesseract::Tesseract::chs_trailing_punct1 = ").,;:?!" |
"1st Trailing punctuation"
Definition at line 726 of file tesseractclass.h.
char* tesseract::Tesseract::chs_trailing_punct2 = ")'`\"" |
"2nd Trailing punctuation"
Definition at line 727 of file tesseractclass.h.
char* tesseract::Tesseract::conflict_set_I_l_1 = "Il1[]" |
"Il1 conflict set"
Definition at line 878 of file tesseractclass.h.
bool tesseract::Tesseract::crunch_accept_ok = true |
"Use acceptability in okstring"
Definition at line 814 of file tesseractclass.h.
int tesseract::Tesseract::crunch_debug = 0 |
"As it says"
Definition at line 823 of file tesseractclass.h.
double tesseract::Tesseract::crunch_del_cert = -10.0 |
"POTENTIAL crunch cert lt this"
Definition at line 803 of file tesseractclass.h.
double tesseract::Tesseract::crunch_del_high_word = 1.5 |
"Del if word gt xht x this above bl"
Definition at line 808 of file tesseractclass.h.
double tesseract::Tesseract::crunch_del_low_word = 0.5 |
"Del if word gt xht x this below bl"
Definition at line 809 of file tesseractclass.h.
double tesseract::Tesseract::crunch_del_max_ht = 3.0 |
"Del if word ht gt xht x this"
Definition at line 805 of file tesseractclass.h.
double tesseract::Tesseract::crunch_del_min_ht = 0.7 |
"Del if word ht lt xht x this"
Definition at line 804 of file tesseractclass.h.
double tesseract::Tesseract::crunch_del_min_width = 3.0 |
"Del if word width lt xht x this"
Definition at line 806 of file tesseractclass.h.
double tesseract::Tesseract::crunch_del_rating = 60 |
"POTENTIAL crunch rating lt this"
Definition at line 802 of file tesseractclass.h.
bool tesseract::Tesseract::crunch_early_convert_bad_unlv_chs = false |
"Take out ~^ early?"
Definition at line 793 of file tesseractclass.h.
bool tesseract::Tesseract::crunch_early_merge_tess_fails = true |
"Before word crunch?"
Definition at line 792 of file tesseractclass.h.
bool tesseract::Tesseract::crunch_include_numerals = false |
"Fiddle alpha figures"
Definition at line 817 of file tesseractclass.h.
bool tesseract::Tesseract::crunch_leave_accept_strings = false |
"Dont pot crunch sensible strings"
Definition at line 816 of file tesseractclass.h.
int tesseract::Tesseract::crunch_leave_lc_strings = 4 |
"Dont crunch words with long lower case strings"
Definition at line 819 of file tesseractclass.h.
bool tesseract::Tesseract::crunch_leave_ok_strings = true |
"Dont touch sensible strings"
Definition at line 813 of file tesseractclass.h.
int tesseract::Tesseract::crunch_leave_uc_strings = 4 |
"Dont crunch words with long lower case strings"
Definition at line 821 of file tesseractclass.h.
int tesseract::Tesseract::crunch_long_repetitions = 3 |
"Crunch words with long repetitions"
Definition at line 822 of file tesseractclass.h.
double tesseract::Tesseract::crunch_poor_garbage_cert = -9.0 |
"crunch garbage cert lt this"
Definition at line 797 of file tesseractclass.h.
double tesseract::Tesseract::crunch_poor_garbage_rate = 60 |
"crunch garbage rating lt this"
Definition at line 798 of file tesseractclass.h.
bool tesseract::Tesseract::crunch_pot_garbage = true |
"POTENTIAL crunch garbage"
Definition at line 801 of file tesseractclass.h.
int tesseract::Tesseract::crunch_pot_indicators = 1 |
"How many potential indicators needed"
Definition at line 812 of file tesseractclass.h.
double tesseract::Tesseract::crunch_pot_poor_cert = -8.0 |
"POTENTIAL crunch cert lt this"
Definition at line 800 of file tesseractclass.h.
double tesseract::Tesseract::crunch_pot_poor_rate = 40 |
"POTENTIAL crunch rating lt this"
Definition at line 799 of file tesseractclass.h.
int tesseract::Tesseract::crunch_rating_max = 10 |
"For adj length in rating per ch"
Definition at line 811 of file tesseractclass.h.
double tesseract::Tesseract::crunch_small_outlines_size = 0.6 |
"Small if lt xht x this"
Definition at line 810 of file tesseractclass.h.
bool tesseract::Tesseract::crunch_terrible_garbage = true |
"As it says"
Definition at line 795 of file tesseractclass.h.
double tesseract::Tesseract::crunch_terrible_rating = 80.0 |
"crunch rating lt this"
Definition at line 794 of file tesseractclass.h.
int tesseract::Tesseract::cube_debug_level = 1 |
"Print cube debug info."
Definition at line 751 of file tesseractclass.h.
bool tesseract::Tesseract::debug_acceptable_wds = false |
"Dump word pass/fail chk"
Definition at line 724 of file tesseractclass.h.
int tesseract::Tesseract::debug_fix_space_level = 0 |
"Contextual fixspace debug"
Definition at line 829 of file tesseractclass.h.
int tesseract::Tesseract::debug_x_ht_level = 0 |
"Reestimate debug"
Definition at line 723 of file tesseractclass.h.
bool tesseract::Tesseract::docqual_excuse_outline_errs = false |
"Allow outline errs in unrejection?"
Definition at line 755 of file tesseractclass.h.
char* tesseract::Tesseract::file_type = ".tif" |
"Filename extension"
Definition at line 885 of file tesseractclass.h.
int tesseract::Tesseract::fixsp_done_mode = 1 |
"What constitues done for spacing"
Definition at line 828 of file tesseractclass.h.
int tesseract::Tesseract::fixsp_non_noise_limit = 1 |
"How many non-noise blbs either side?"
Definition at line 825 of file tesseractclass.h.
double tesseract::Tesseract::fixsp_small_outlines_size = 0.28 |
"Small if lt xht x this"
Definition at line 826 of file tesseractclass.h.
bool tesseract::Tesseract::interactive_display_mode = false |
"Run interactively?"
Definition at line 884 of file tesseractclass.h.
double tesseract::Tesseract::min_orientation_margin = 7.0 |
"Min acceptable orientation margin"
Definition at line 894 of file tesseractclass.h.
int tesseract::Tesseract::min_sane_x_ht_pixels = 8 |
"Reject any x-ht lt or eq than this"
Definition at line 879 of file tesseractclass.h.
char* tesseract::Tesseract::numeric_punctuation = ".," |
"Punct. chs expected WITHIN numbers"
Definition at line 831 of file tesseractclass.h.
int tesseract::Tesseract::ocr_devanagari_split_strategy = tesseract::ShiroRekhaSplitter::NO_SPLIT |
"Whether to use the top-line splitting process for Devanagari " "documents while performing ocr."
Definition at line 684 of file tesseractclass.h.
char* tesseract::Tesseract::ok_repeated_ch_non_alphanum_wds = "-?*\075" |
"Allow NN to unrej"
Definition at line 877 of file tesseractclass.h.
char* tesseract::Tesseract::outlines_2 = "ij!?%\":;" |
"Non standard number of outlines"
Definition at line 753 of file tesseractclass.h.
char* tesseract::Tesseract::outlines_odd = "%| " |
"Non standard number of outlines"
Definition at line 752 of file tesseractclass.h.
int tesseract::Tesseract::pageseg_devanagari_split_strategy = tesseract::ShiroRekhaSplitter::NO_SPLIT |
"Whether to use the top-line splitting process for Devanagari " "documents while performing page-segmentation."
Definition at line 680 of file tesseractclass.h.
int tesseract::Tesseract::paragraph_debug_level = 0 |
"Print paragraph debug info."
Definition at line 750 of file tesseractclass.h.
double tesseract::Tesseract::quality_blob_pc = 0.0 |
"good_quality_doc gte good blobs limit"
Definition at line 729 of file tesseractclass.h.
double tesseract::Tesseract::quality_char_pc = 0.95 |
"good_quality_doc gte good char limit"
Definition at line 732 of file tesseractclass.h.
int tesseract::Tesseract::quality_min_initial_alphas_reqd = 2 |
"alphas in a good word"
Definition at line 733 of file tesseractclass.h.
double tesseract::Tesseract::quality_outline_pc = 1.0 |
"good_quality_doc lte outline error limit"
Definition at line 731 of file tesseractclass.h.
double tesseract::Tesseract::quality_rej_pc = 0.08 |
"good_quality_doc lte rejection limit"
Definition at line 728 of file tesseractclass.h.
double tesseract::Tesseract::quality_rowrej_pc = 1.1 |
"good_quality_doc gte good char limit"
Definition at line 789 of file tesseractclass.h.
bool tesseract::Tesseract::rej_1Il_trust_permuter_type = true |
"Dont double check"
Definition at line 868 of file tesseractclass.h.
bool tesseract::Tesseract::rej_1Il_use_dict_word = false |
"Use dictword test"
Definition at line 867 of file tesseractclass.h.
bool tesseract::Tesseract::rej_alphas_in_number_perm = false |
"Extend permuter check"
Definition at line 873 of file tesseractclass.h.
bool tesseract::Tesseract::rej_trust_doc_dawg = false |
"Use DOC dawg in 11l conf. detector"
Definition at line 866 of file tesseractclass.h.
bool tesseract::Tesseract::rej_use_good_perm = true |
"Individual rejection control"
Definition at line 871 of file tesseractclass.h.
bool tesseract::Tesseract::rej_use_sensible_wd = false |
"Extend permuter check"
Definition at line 872 of file tesseractclass.h.
bool tesseract::Tesseract::rej_use_tess_accepted = true |
"Individual rejection control"
Definition at line 869 of file tesseractclass.h.
bool tesseract::Tesseract::rej_use_tess_blanks = true |
"Individual rejection control"
Definition at line 870 of file tesseractclass.h.
double tesseract::Tesseract::rej_whole_of_mostly_reject_word_fract = 0.85 |
"if >this fract"
Definition at line 874 of file tesseractclass.h.
bool tesseract::Tesseract::save_blob_choices = false |
"Save the results of the recognition step" " (blob_choices) within the corresponding WERD_CHOICE"
Definition at line 746 of file tesseractclass.h.
double tesseract::Tesseract::suspect_accept_rating = -999.9 |
"Accept good rating limit"
Definition at line 850 of file tesseractclass.h.
bool tesseract::Tesseract::suspect_constrain_1Il = false |
"UNLV keep 1Il chars rejected"
Definition at line 848 of file tesseractclass.h.
int tesseract::Tesseract::suspect_level = 99 |
"Suspect marker level"
Definition at line 843 of file tesseractclass.h.
double tesseract::Tesseract::suspect_rating_per_ch = 999.9 |
"Dont touch bad rating limit"
Definition at line 849 of file tesseractclass.h.
int tesseract::Tesseract::suspect_short_words = 2 |
"Dont Suspect dict wds longer than this"
Definition at line 847 of file tesseractclass.h.
int tesseract::Tesseract::suspect_space_level = 100 |
"Min suspect level for rejecting spaces"
Definition at line 845 of file tesseractclass.h.
int tesseract::Tesseract::tessdata_manager_debug_level = 0 |
"Debug level for TessdataManager functions."
Definition at line 888 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_adapt_to_char_fragments = true |
"Adapt to words that contain " " a character composed form fragments"
Definition at line 689 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_adaption_debug = false |
"Generate and print debug information for adaption"
Definition at line 691 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_ambigs_training = false |
"Perform training for ambiguities"
Definition at line 676 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_bigram_debug = 0 |
"Amount of debug output for bigram " "correction."
Definition at line 722 of file tesseractclass.h.
char* tesseract::Tesseract::tessedit_char_blacklist = "" |
"Blacklist of chars not to recognize"
Definition at line 672 of file tesseractclass.h.
char* tesseract::Tesseract::tessedit_char_whitelist = "" |
"Whitelist of chars to recognize"
Definition at line 674 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_consistent_reps = true |
"Force all rep chars the same"
Definition at line 857 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_create_boxfile = false |
"Output text with boxes"
Definition at line 880 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_create_hocr = false |
"Write .html hOCR output file"
Definition at line 840 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_debug_block_rejection = false |
"Block and Row stats"
Definition at line 718 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_debug_doc_rejection = false |
"Page stats"
Definition at line 784 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_debug_fonts = false |
"Output font info per char"
Definition at line 717 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_debug_quality_metrics = false |
"Output data to debug file"
Definition at line 786 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_display_outwords = false |
"Draw output words"
Definition at line 706 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_dont_blkrej_good_wds = false |
"Use word segmentation quality metric"
Definition at line 773 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_dont_rowrej_good_wds = false |
"Use word segmentation quality metric"
Definition at line 775 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_dump_choices = false |
"Dump char choices"
Definition at line 708 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_dump_pageseg_images = false |
"Dump intermediate images made during page segmentation"
Definition at line 662 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_enable_bigram_correction = false |
"Enable correction based on the word bigram dictionary."
Definition at line 720 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_enable_doc_dict = true |
"Add words to the document dictionary"
Definition at line 716 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_fix_fuzzy_spaces = true |
"Try to improve fuzzy spaces"
Definition at line 710 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_fix_hyphens = true |
"Crunch double hyphens?"
Definition at line 713 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_flip_0O = true |
"Contextual 0O O0 flips"
Definition at line 861 of file tesseractclass.h.
double tesseract::Tesseract::tessedit_good_doc_still_rowrej_wd = 1.1 |
"rej good doc wd if more than this fraction rejected"
Definition at line 781 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_good_quality_unrej = true |
"Reduce rejection on good docs"
Definition at line 757 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_image_border = 2 |
"Rej blbs near image edge limit"
Definition at line 875 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_init_config_only = false |
"Only initialize with the config file. Useful if the instance is " "not going to be used for OCR but say only for layout analysis."
Definition at line 899 of file tesseractclass.h.
char* tesseract::Tesseract::tessedit_load_sublangs = "" |
"List of languages to load with this one"
Definition at line 890 of file tesseractclass.h.
double tesseract::Tesseract::tessedit_lower_flip_hyphen = 1.5 |
"Aspect ratio dot/hyphen test"
Definition at line 863 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_make_boxes_from_boxes = false |
"Generate more boxes from boxed chars"
Definition at line 660 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_matcher_log = false |
"Log matcher activity"
Definition at line 741 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_minimal_rej_pass1 = false |
"Do minimal rejection on pass 1 output"
Definition at line 739 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_minimal_rejection = false |
"Only reject tess failures"
Definition at line 851 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_ocr_engine_mode = tesseract::OEM_TESSERACT_ONLY |
"Which OCR engine(s) to run (Tesseract, Cube, both). Defaults" " to loading and running only Tesseract (no Cube, no combiner)." " (Values from OcrEngineMode enum in tesseractclass.h)"
Definition at line 670 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_ok_mode = 5 |
"Acceptance decision algorithm"
Definition at line 859 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_override_permuter = true |
"According to dict_word"
Definition at line 886 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_page_number = -1 |
"-1 -> All pages, else specifc page to process"
Definition at line 882 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_pageseg_mode = PSM_SINGLE_BLOCK |
"Page seg mode: 0=osd only, 1=auto+osd, 2=auto, 3=col, 4=block," " 5=line, 6=word, 7=char" " (Values from PageSegMode enum in publictypes.h)"
Definition at line 666 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_prefer_joined_punct = false |
"Reward punctation joins"
Definition at line 827 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_preserve_blk_rej_perfect_wds = true |
"Only rej partially rejected words in block rejection"
Definition at line 769 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_preserve_min_wd_len = 2 |
"Only preserve wds longer than this"
Definition at line 777 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_preserve_row_rej_perfect_wds = true |
"Only rej partially rejected words in row rejection"
Definition at line 771 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_redo_xheight = true |
"Check/Correct x-height"
Definition at line 714 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_reject_bad_qual_wds = true |
"Reject all bad quality wds"
Definition at line 783 of file tesseractclass.h.
double tesseract::Tesseract::tessedit_reject_block_percent = 45.00 |
"%rej allowed before rej whole block"
Definition at line 762 of file tesseractclass.h.
double tesseract::Tesseract::tessedit_reject_doc_percent = 65.00 |
"%rej allowed before rej whole doc"
Definition at line 760 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_reject_mode = 0 |
"Rejection algorithm"
Definition at line 858 of file tesseractclass.h.
double tesseract::Tesseract::tessedit_reject_row_percent = 40.00 |
"%rej allowed before rej whole row"
Definition at line 764 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_rejection_debug = false |
"Adaption debug"
Definition at line 860 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_resegment_from_boxes = false |
"Take segmentation and labeling from box file"
Definition at line 654 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_resegment_from_line_boxes = false |
"Conversion of word/line box file to char box file"
Definition at line 656 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_row_rej_good_docs = true |
"Apply row rejection to good docs"
Definition at line 779 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_tess_adapt_to_rejmap = false |
"Use reject map to control Tesseract adaption"
Definition at line 735 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_tess_adaption_mode = 0x27 |
"Adaptation decision algorithm for tess"
Definition at line 737 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_test_adaption = false |
"Test adaption criteria"
Definition at line 740 of file tesseractclass.h.
int tesseract::Tesseract::tessedit_test_adaption_mode = 3 |
"Adaptation decision algorithm for tess"
Definition at line 743 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_train_from_boxes = false |
"Generate training data from boxed chars"
Definition at line 658 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_training_tess = false |
"Call Tess to learn blobs"
Definition at line 707 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_unrej_any_wd = false |
"Dont bother with word plausibility"
Definition at line 712 of file tesseractclass.h.
double tesseract::Tesseract::tessedit_upper_flip_hyphen = 1.8 |
"Aspect ratio dot/hyphen test"
Definition at line 865 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_use_reject_spaces = true |
"Reject spaces?"
Definition at line 758 of file tesseractclass.h.
double tesseract::Tesseract::tessedit_whole_wd_rej_row_percent = 70.00 |
"Number of row rejects in whole word rejects" "which prevents whole row rejection"
Definition at line 767 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_word_for_word = false |
"Make output have exactly one word per WERD"
Definition at line 854 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_write_block_separators = false |
"Write block separators in output"
Definition at line 836 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_write_images = false |
"Capture the image from the IPE"
Definition at line 883 of file tesseractclass.h.
char* tesseract::Tesseract::tessedit_write_params_to_file = "" |
"Write all parameters to the given file."
Definition at line 686 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_write_rep_codes = false |
"Write repetition char code"
Definition at line 838 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_write_unlv = false |
"Write .unlv output file"
Definition at line 839 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_zero_kelvin_rejection = false |
"Dont reject ANYTHING AT ALL"
Definition at line 856 of file tesseractclass.h.
bool tesseract::Tesseract::tessedit_zero_rejection = false |
"Dont reject ANYTHING"
Definition at line 852 of file tesseractclass.h.
bool tesseract::Tesseract::test_pt = false |
"Test for point"
Definition at line 747 of file tesseractclass.h.
double tesseract::Tesseract::test_pt_x = 99999.99 |
"xcoord"
Definition at line 748 of file tesseractclass.h.
double tesseract::Tesseract::test_pt_y = 99999.99 |
"ycoord"
Definition at line 749 of file tesseractclass.h.
bool tesseract::Tesseract::textord_equation_detect = false |
"Turn on equation detector"
Definition at line 900 of file tesseractclass.h.
bool tesseract::Tesseract::textord_tabfind_show_vlines = false |
"Debug line finding"
Definition at line 895 of file tesseractclass.h.
bool tesseract::Tesseract::textord_use_cjk_fp_model = FALSE |
"Use CJK fixed pitch model"
Definition at line 896 of file tesseractclass.h.
bool tesseract::Tesseract::unlv_tilde_crunching = true |
"Mark v.bad words for tilde crunch"
Definition at line 791 of file tesseractclass.h.
char* tesseract::Tesseract::unrecognised_char = "|" |
"Output char for unidentified blobs"
Definition at line 842 of file tesseractclass.h.
int tesseract::Tesseract::x_ht_acceptance_tolerance = 8 |
"Max allowed deviation of blob top outside of font data"
Definition at line 833 of file tesseractclass.h.
int tesseract::Tesseract::x_ht_min_change = 8 |
"Min change in xht before actually trying it"
Definition at line 834 of file tesseractclass.h.