Tesseract
3.02
|
Typedefs | |
typedef int(Dict::* | DictFunc )(void *void_dawg_args, UNICHAR_ID unichar_id, bool word_end) const |
typedef double(Dict::* | ProbabilityInContextFunc )(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes) |
typedef void(Wordrec::* | FillLatticeFunc )(const MATRIX &ratings, const LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) |
typedef TessCallback3< const UNICHARSET &, int, PAGE_RES * > | TruthCallback |
typedef GenericVectorEqEq < const ParagraphModel * > | SetOfModels |
typedef void(Tesseract::* | WordRecognizer )(BLOCK *block, ROW *row, WERD_RES *word) |
typedef GenericVector < ParamsTrainingHypothesis > | ParamsTrainingHypothesisList |
typedef GenericVector< UNICHAR_ID > | UnicharIdVector |
typedef GenericVector < AmbigSpec_LIST * > | UnicharAmbigsVector |
typedef signed int | char_32 |
typedef basic_string< char_32 > | string_32 |
typedef GenericVector< NodeChild > | NodeChildVector |
typedef GenericVector< int > | SuccessorList |
typedef GenericVector < SuccessorList * > | SuccessorListsVector |
typedef GenericVector< Dawg * > | DawgVector |
typedef GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > | BlobGridSearch |
typedef GridSearch < ColPartition, ColPartition_CLIST, ColPartition_C_IT > | ColPartitionGridSearch |
typedef GenericVector < ColPartitionSet * > | PartSetVector |
typedef TessResultCallback1 < bool, int > | WidthCallback |
typedef BBGrid< ColSegment, ColSegment_CLIST, ColSegment_C_IT > | ColSegmentGrid |
typedef GridSearch< ColSegment, ColSegment_CLIST, ColSegment_C_IT > | ColSegmentGridSearch |
typedef unsigned char | LanguageModelFlagsType |
Functions | |
int | CubeAPITest (Boxa *boxa_blocks, Pixa *pixa_blocks, Boxa *boxa_words, Pixa *pixa_words, const FCOORD &reskew, Pix *page_pix, PAGE_RES *page_res) |
TBLOB * | make_tesseract_blob (float baseline, float xheight, float descender, float ascender, bool numeric_mode, Pix *pix) |
TBOX | char_box_to_tbox (Box *char_box, TBOX word_box, int x_offset) |
bool | IsTextOrEquationType (PolyBlockType type) |
bool | IsLeftIndented (const EquationDetect::IndentType type) |
bool | IsRightIndented (const EquationDetect::IndentType type) |
template<typename T > | |
void | SimpleSwap (T &a, T &b) |
STRING | RtlEmbed (const STRING &word, bool rtlify) |
bool | IsLatinLetter (int ch) |
bool | IsDigitLike (int ch) |
bool | IsOpeningPunct (int ch) |
bool | IsTerminalPunct (int ch) |
const char * | SkipChars (const char *str, const char *toskip) |
const char * | SkipChars (const char *str, bool(*skip)(int)) |
const char * | SkipOne (const char *str, const char *toskip) |
bool | LikelyListNumeral (const STRING &word) |
bool | LikelyListMark (const STRING &word) |
bool | AsciiLikelyListItem (const STRING &word) |
int | UnicodeFor (const UNICHARSET *u, const WERD_CHOICE *werd, int pos) |
bool | LikelyListMarkUnicode (int ch) |
bool | UniLikelyListItem (const UNICHARSET *u, const WERD_CHOICE *werd) |
void | LeftWordAttributes (const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8, bool *is_list, bool *starts_idea, bool *ends_idea) |
void | RightWordAttributes (const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8, bool *is_list, bool *starts_idea, bool *ends_idea) |
int | ClosestCluster (const GenericVector< Cluster > &clusters, int value) |
void | CalculateTabStops (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, int tolerance, GenericVector< Cluster > *left_tabs, GenericVector< Cluster > *right_tabs) |
void | MarkRowsWithModel (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, const ParagraphModel *model, bool ltr, int eop_threshold) |
void | GeometricClassifyThreeTabStopTextBlock (int debug_level, GeometricClassifierState &s, ParagraphTheory *theory) |
void | GeometricClassify (int debug_level, GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory) |
bool | ValidFirstLine (const GenericVector< RowScratchRegisters > *rows, int row, const ParagraphModel *model) |
bool | ValidBodyLine (const GenericVector< RowScratchRegisters > *rows, int row, const ParagraphModel *model) |
bool | CrownCompatible (const GenericVector< RowScratchRegisters > *rows, int a, int b, const ParagraphModel *model) |
void | DiscardUnusedModels (const GenericVector< RowScratchRegisters > &rows, ParagraphTheory *theory) |
void | DowngradeWeakestToCrowns (int debug_level, ParagraphTheory *theory, GenericVector< RowScratchRegisters > *rows) |
void | RecomputeMarginsAndClearHypotheses (GenericVector< RowScratchRegisters > *rows, int start, int end, int percentile) |
int | InterwordSpace (const GenericVector< RowScratchRegisters > &rows, int row_start, int row_end) |
bool | FirstWordWouldHaveFit (const RowScratchRegisters &before, const RowScratchRegisters &after, tesseract::ParagraphJustification justification) |
bool | FirstWordWouldHaveFit (const RowScratchRegisters &before, const RowScratchRegisters &after) |
bool | TextSupportsBreak (const RowScratchRegisters &before, const RowScratchRegisters &after) |
bool | LikelyParagraphStart (const RowScratchRegisters &before, const RowScratchRegisters &after) |
bool | LikelyParagraphStart (const RowScratchRegisters &before, const RowScratchRegisters &after, tesseract::ParagraphJustification j) |
ParagraphModel | InternalParagraphModelByOutline (const GenericVector< RowScratchRegisters > *rows, int start, int end, int tolerance, bool *consistent) |
ParagraphModel | ParagraphModelByOutline (int debug_level, const GenericVector< RowScratchRegisters > *rows, int start, int end, int tolerance) |
bool | RowsFitModel (const GenericVector< RowScratchRegisters > *rows, int start, int end, const ParagraphModel *model) |
void | MarkStrongEvidence (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end) |
void | ModelStrongEvidence (int debug_level, GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, bool allow_flush_models, ParagraphTheory *theory) |
void | StrongEvidenceClassify (int debug_level, GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory) |
void | SeparateSimpleLeaderLines (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory) |
void | ConvertHypothesizedModelRunsToParagraphs (int debug_level, const GenericVector< RowScratchRegisters > &rows, GenericVector< PARA * > *row_owners, ParagraphTheory *theory) |
bool | RowIsStranded (const GenericVector< RowScratchRegisters > &rows, int row) |
void | LeftoverSegments (const GenericVector< RowScratchRegisters > &rows, GenericVector< Interval > *to_fix, int row_start, int row_end) |
void | CanonicalizeDetectionResults (GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs) |
void | DetectParagraphs (int debug_level, GenericVector< RowInfo > *row_infos, GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs, GenericVector< ParagraphModel * > *models) |
void | InitializeTextAndBoxesPreRecognition (const MutableIterator &it, RowInfo *info) |
void | InitializeRowInfo (bool after_recognition, const MutableIterator &it, RowInfo *info) |
void | DetectParagraphs (int debug_level, bool after_text_recognition, const MutableIterator *block_start, GenericVector< ParagraphModel * > *models) |
bool | StrongModel (const ParagraphModel *model) |
bool | read_t (PAGE_RES_IT *page_res_it, TBOX *tbox) |
ICOORD | ComputeEndFromGradient (const ICOORD &start, double m) |
bool | CompareFontInfo (const FontInfo &fi1, const FontInfo &fi2) |
bool | CompareFontSet (const FontSet &fs1, const FontSet &fs2) |
void | FontInfoDeleteCallback (FontInfo f) |
void | FontSetDeleteCallback (FontSet fs) |
bool | read_info (FILE *f, FontInfo *fi, bool swap) |
bool | write_info (FILE *f, const FontInfo &fi) |
bool | read_spacing_info (FILE *f, FontInfo *fi, bool swap) |
bool | write_spacing_info (FILE *f, const FontInfo &fi) |
bool | read_set (FILE *f, FontSet *fs, bool swap) |
bool | write_set (FILE *f, const FontSet &fs) |
void | OtsuThreshold (const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height, int **thresholds, int **hi_values) |
void | HistogramRect (const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height, int *histogram) |
int | OtsuStats (const int *histogram, int *H_out, int *omega0_out) |
ELISTIZE (AmbigSpec) | |
ELISTIZEH (AmbigSpec) | |
template<typename T > | |
bool | cmp_eq (T const &t1, T const &t2) |
template<typename T > | |
int | sort_cmp (const void *t1, const void *t2) |
template<typename T > | |
int | sort_ptr_cmp (const void *t1, const void *t2) |
void | ClearFeatureSpaceWindow (NORM_METHOD norm_method, ScrollView *window) |
WERD_CHOICE * | get_best_delete_other (WERD_CHOICE *choice1, WERD_CHOICE *choice2) |
BLOB_CHOICE * | get_nth_choice (BLOB_CHOICE_LIST *blob_list, int n) |
UNICHAR_ID | get_top_choice_uid (BLOB_CHOICE_LIST *blob_list) |
int | find_choice_by_uid (BLOB_CHOICE_LIST *blob_list, UNICHAR_ID target_uid) |
WERD_CHOICE * | get_choice_from_posstr (const UNICHARSET *unicharset, const BLOB_CHOICE_LIST_VECTOR &char_choices, int start_pos, const char *pos_str, float *certainties) |
void | get_posstr_from_choice (const BLOB_CHOICE_LIST_VECTOR &char_choices, WERD_CHOICE *word_choice, int start_pos, char *pos_str) |
BLOB_CHOICE * | find_choice_by_type (BLOB_CHOICE_LIST *blob_choices, char target_type, const UNICHARSET &unicharset) |
BLOB_CHOICE * | find_choice_by_script (BLOB_CHOICE_LIST *blob_choices, int target_sid, int backup_sid, int secondary_sid) |
Pix * | GridReducedPix (const TBOX &box, int gridsize, ICOORD bleft, int *left, int *bottom) |
Pix * | TraceOutlineOnReducedPix (C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left, int *bottom) |
Pix * | TraceBlockOnReducedPix (BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom) |
template<class BBC > | |
int | SortByBoxLeft (const void *void1, const void *void2) |
template<class BBC > | |
int | SortRightToLeft (const void *void1, const void *void2) |
template<class BBC > | |
int | SortByBoxBottom (const void *void1, const void *void2) |
template<typename T > | |
void | DeleteObject (T *object) |
ShapeTable * | LoadShapeTable (const STRING &file_prefix) |
void | WriteShapeTable (const STRING &file_prefix, const ShapeTable &shape_table) |
MasterTrainer * | LoadTrainingData (int argc, const char *const *argv, bool replication, ShapeTable **shape_table, STRING *file_prefix) |
ELISTIZE (ViterbiStateEntry) | |
ELISTIZEH (ViterbiStateEntry) | |
template<class BLOB_CHOICE > | |
int | SortByUnicharID (const void *void1, const void *void2) |
template<class BLOB_CHOICE > | |
int | SortByRating (const void *void1, const void *void2) |
recog_pseudo_word
Make a word from the selected blobs and run Tess on them.
page_res | recognise blobs |
selection_box | within this box |
fp_eval_word_spacing() Evaluation function for fixed pitch word lists.
Basically, count the number of "nice" characters - those which are in tess acceptable words or in dict words and are not rejected. Penalise any potential noise chars
process_selected_words()
Walk the current block list applying the specified word processor function to each word that overlaps the selection_box.
build_menu()
Construct the menu tree used by the command window
process_cmd_win_event()
Process a command returned from the command window (Just call the appropriate command handler)
word_blank_and_set_display() Word processor
Blank display of word then redisplay word according to current display mode settings
typedef GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> tesseract::BlobGridSearch |
Definition at line 31 of file blobgrid.h.
typedef signed int tesseract::char_32 |
Definition at line 40 of file string_32.h.
typedef GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> tesseract::ColPartitionGridSearch |
Definition at line 895 of file colpartition.h.
typedef BBGrid<ColSegment, ColSegment_CLIST, ColSegment_C_IT> tesseract::ColSegmentGrid |
Definition at line 118 of file tablefind.h.
typedef GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> tesseract::ColSegmentGridSearch |
Definition at line 121 of file tablefind.h.
typedef GenericVector<Dawg *> tesseract::DawgVector |
typedef int(Dict::* tesseract::DictFunc)(void *void_dawg_args, UNICHAR_ID unichar_id, bool word_end) const |
typedef void(Wordrec::* tesseract::FillLatticeFunc)(const MATRIX &ratings, const LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) |
typedef unsigned char tesseract::LanguageModelFlagsType |
Definition at line 37 of file language_model.h.
Definition at line 92 of file params_training_featdef.h.
Definition at line 33 of file colpartitionset.h.
typedef double(Dict::* tesseract::ProbabilityInContextFunc)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes) |
typedef GenericVectorEqEq<const ParagraphModel *> tesseract::SetOfModels |
Definition at line 94 of file paragraphs_internal.h.
typedef basic_string<char_32> tesseract::string_32 |
Definition at line 41 of file string_32.h.
typedef GenericVector<int> tesseract::SuccessorList |
typedef TessCallback3<const UNICHARSET &, int, PAGE_RES *> tesseract::TruthCallback |
typedef GenericVector<AmbigSpec_LIST *> tesseract::UnicharAmbigsVector |
typedef TessResultCallback1<bool, int> tesseract::WidthCallback |
Definition at line 103 of file tesseractclass.h.
enum tesseract::AmbigType |
Definition at line 44 of file ambigs.h.
Definition at line 51 of file classify.h.
Definition at line 437 of file tessedit.cpp.
Definition at line 30 of file tablefind.h.
Definition at line 47 of file colpartition.h.
Definition at line 69 of file errorcounter.h.
enum tesseract::DawgType |
enum tesseract::LineType |
Definition at line 54 of file paragraphs_internal.h.
Definition at line 1431 of file colpartitiongrid.cpp.
Definition at line 42 of file normalis.h.
When Tesseract/Cube is initialized we can choose to instantiate/load/run only the Tesseract part, only the Cube part or both along with the combiner. The preference of which engine to use is stored in tessedit_ocr_engine_mode.
ATTENTION: When modifying this enum, please make sure to make the appropriate changes to all the enums mirroring it (e.g. OCREngine in cityblock/workflow/detection/detection_storage.proto). Such enums will mention the connection to OcrEngineMode in the comments.
Definition at line 234 of file publictypes.h.
+——————+ Orientation Example: | 1 Aaaa Aaaa Aaaa | ==================== | Aaa aa aaa aa | To left is a diagram of some (1) English and | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit. | 2 | | ####### c c C | Upright Latin characters are represented as A and a. | ####### c c c | '<' represents a latin character rotated | < ####### c c c | anti-clockwise 90 degrees. | < ####### c c | | < ####### . c | Upright Chinese characters are represented C and c. | 3 ####### c | +——————+ NOTA BENE: enum values here should match goodoc.proto
If you orient your head so that "up" aligns with Orientation, then the characters will appear "right side up" and readable.
In the example above, both the English and Chinese paragraphs are oriented so their "up" is the top of the page (page up). The photo credit is read with one's head turned leftward ("up" is to page left).
The values of this enum match the convention of Tesseract's osdetect.h
Definition at line 104 of file publictypes.h.
enum of the elements of the page hierarchy, used in ResultIterator to provide functions that operate on each level without having to have 5x as many functions.
Definition at line 185 of file publictypes.h.
Possible modes for page layout analysis. These must be kept in order of decreasing amount of layout analysis to be done, except for OSD_ONLY, so that the inequality test macros below work.
Definition at line 147 of file publictypes.h.
JUSTIFICATION_UNKNONW The alignment is not clearly one of the other options. This could happen for example if there are only one or two lines of text or the text looks like source code or poetry.
NOTA BENE: Fully justified paragraphs (text aligned to both left and right margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text is written with a left-to-right script and with JUSTIFICATION_RIGHT if their text is written in a right-to-left script.
Interpretation for text read in vertical lines: "Left" is wherever the starting reading position is.
JUSTIFICATION_LEFT Each line, except possibly the first, is flush to the same left tab stop.
JUSTIFICATION_CENTER The text lines of the paragraph are centered about a line going down through their middle of the text lines.
JUSTIFICATION_RIGHT Each line, except possibly the first, is flush to the same right tab stop.
Definition at line 217 of file publictypes.h.
Definition at line 34 of file params_training_featdef.h.
enum tesseract::ScriptPos |
TA_LEFT_ALIGNED | |
TA_LEFT_RAGGED | |
TA_CENTER_JUSTIFIED | |
TA_RIGHT_ALIGNED | |
TA_RIGHT_RAGGED | |
TA_SEPARATOR | |
TA_COUNT |
Definition at line 43 of file tabvector.h.
Definition at line 51 of file tessdatamanager.h.
The text lines are read in the given sequence.
In English, the order is top-to-bottom. In Chinese, vertical text lines are read right-to-left. Mongolian is written in vertical columns top to bottom like Chinese, but the lines order left-to right.
Note that only some combinations make sense. For example, WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
Definition at line 136 of file publictypes.h.
The grapheme clusters within a line of text are laid out logically in this direction, judged when looking at the text line rotated so that its Orientation is "page up".
For English text, the writing direction is left-to-right. For the Chinese text in the above example, the writing direction is top-to-bottom.
WRITING_DIRECTION_LEFT_TO_RIGHT | |
WRITING_DIRECTION_RIGHT_TO_LEFT | |
WRITING_DIRECTION_TOP_TO_BOTTOM |
Definition at line 119 of file publictypes.h.
bool tesseract::AsciiLikelyListItem | ( | const STRING & | word | ) |
Definition at line 279 of file paragraphs.cpp.
void tesseract::CalculateTabStops | ( | GenericVector< RowScratchRegisters > * | rows, |
int | row_start, | ||
int | row_end, | ||
int | tolerance, | ||
GenericVector< Cluster > * | left_tabs, | ||
GenericVector< Cluster > * | right_tabs | ||
) |
Definition at line 703 of file paragraphs.cpp.
void tesseract::CanonicalizeDetectionResults | ( | GenericVector< PARA * > * | row_owners, |
PARA_LIST * | paragraphs | ||
) |
Definition at line 2179 of file paragraphs.cpp.
Definition at line 42 of file cube_control.cpp.
void tesseract::ClearFeatureSpaceWindow | ( | NORM_METHOD | norm_method, |
ScrollView * | window | ||
) |
Definition at line 1132 of file intproto.cpp.
int tesseract::ClosestCluster | ( | const GenericVector< Cluster > & | clusters, |
int | value | ||
) |
Definition at line 677 of file paragraphs.cpp.
bool tesseract::cmp_eq | ( | T const & | t1, |
T const & | t2 | ||
) |
Definition at line 285 of file genericvector.h.
bool tesseract::CompareFontInfo | ( | const FontInfo & | fi1, |
const FontInfo & | fi2 | ||
) |
Definition at line 25 of file fontinfo.cpp.
bool tesseract::CompareFontSet | ( | const FontSet & | fs1, |
const FontSet & | fs2 | ||
) |
Definition at line 33 of file fontinfo.cpp.
Definition at line 124 of file detlinefit.cpp.
void tesseract::ConvertHypothesizedModelRunsToParagraphs | ( | int | debug_level, |
const GenericVector< RowScratchRegisters > & | rows, | ||
GenericVector< PARA * > * | row_owners, | ||
ParagraphTheory * | theory | ||
) |
Definition at line 1988 of file paragraphs.cpp.
bool tesseract::CrownCompatible | ( | const GenericVector< RowScratchRegisters > * | rows, |
int | a, | ||
int | b, | ||
const ParagraphModel * | model | ||
) |
Definition at line 1237 of file paragraphs.cpp.
int tesseract::CubeAPITest | ( | Boxa * | boxa_blocks, |
Pixa * | pixa_blocks, | ||
Boxa * | boxa_words, | ||
Pixa * | pixa_words, | ||
const FCOORD & | reskew, | ||
Pix * | page_pix, | ||
PAGE_RES * | page_res | ||
) |
Placeholder for call to Cube and test that the input data is correct. reskew is the direction of baselines in the skewed image in normalized (cos theta, sin theta) form, so (0.866, 0.5) would represent a 30 degree anticlockwise skew.
Definition at line 628 of file baseapi.cpp.
void tesseract::DeleteObject | ( | T * | object | ) |
Definition at line 164 of file tablefind.cpp.
void tesseract::DetectParagraphs | ( | int | debug_level, |
GenericVector< RowInfo > * | row_infos, | ||
GenericVector< PARA * > * | row_owners, | ||
PARA_LIST * | paragraphs, | ||
GenericVector< ParagraphModel * > * | models | ||
) |
Definition at line 2211 of file paragraphs.cpp.
void tesseract::DetectParagraphs | ( | int | debug_level, |
bool | after_text_recognition, | ||
const MutableIterator * | block_start, | ||
GenericVector< ParagraphModel * > * | models | ||
) |
Definition at line 2454 of file paragraphs.cpp.
void tesseract::DiscardUnusedModels | ( | const GenericVector< RowScratchRegisters > & | rows, |
ParagraphTheory * | theory | ||
) |
Definition at line 1404 of file paragraphs.cpp.
void tesseract::DowngradeWeakestToCrowns | ( | int | debug_level, |
ParagraphTheory * | theory, | ||
GenericVector< RowScratchRegisters > * | rows | ||
) |
Definition at line 1437 of file paragraphs.cpp.
tesseract::ELISTIZE | ( | ViterbiStateEntry | ) |
tesseract::ELISTIZE | ( | AmbigSpec | ) |
tesseract::ELISTIZEH | ( | AmbigSpec | ) |
tesseract::ELISTIZEH | ( | ViterbiStateEntry | ) |
BLOB_CHOICE* tesseract::find_choice_by_script | ( | BLOB_CHOICE_LIST * | blob_choices, |
int | target_sid, | ||
int | backup_sid, | ||
int | secondary_sid | ||
) |
Iterate through all the character choices (for a single blob) and return the first that matches the target script ID. If backup_sid is not 0, then a match on either the target or backup sid is allowed. Note that there is no preference between a target or backup sid. To search for another sid only if no target_sid matched, use secondary_sid. So for example, to find first Han or Common char choice, do find_choice_by_script(cchoice, han_sid, common_sid, 0); To find first Han choice, but allow Common if none is found, do find_choice_by_script(cchoice, han_sid, 0, common_sid);
Definition at line 206 of file permute.cpp.
BLOB_CHOICE* tesseract::find_choice_by_type | ( | BLOB_CHOICE_LIST * | blob_choices, |
char | target_type, | ||
const UNICHARSET & | unicharset | ||
) |
Iterate through all the character choices (for a single blob) and return the first that matches the given type, which is one of 'aA0px*', for lower, upper, digit, punctuation, other, and 'any', respectively. If not match is found, a NULL is returned.
Definition at line 181 of file permute.cpp.
int tesseract::find_choice_by_uid | ( | BLOB_CHOICE_LIST * | blob_list, |
UNICHAR_ID | target_uid | ||
) |
Returns the rank (starting at 0) of a given unichar ID in the char choice list, or -1 if not found.
Definition at line 110 of file permute.cpp.
bool tesseract::FirstWordWouldHaveFit | ( | const RowScratchRegisters & | before, |
const RowScratchRegisters & | after, | ||
tesseract::ParagraphJustification | justification | ||
) |
Definition at line 1568 of file paragraphs.cpp.
bool tesseract::FirstWordWouldHaveFit | ( | const RowScratchRegisters & | before, |
const RowScratchRegisters & | after | ||
) |
Definition at line 1593 of file paragraphs.cpp.
void tesseract::FontInfoDeleteCallback | ( | FontInfo | f | ) |
void tesseract::FontSetDeleteCallback | ( | FontSet | fs | ) |
Definition at line 51 of file fontinfo.cpp.
void tesseract::GeometricClassify | ( | int | debug_level, |
GenericVector< RowScratchRegisters > * | rows, | ||
int | row_start, | ||
int | row_end, | ||
ParagraphTheory * | theory | ||
) |
Definition at line 1028 of file paragraphs.cpp.
void tesseract::GeometricClassifyThreeTabStopTextBlock | ( | int | debug_level, |
GeometricClassifierState & | s, | ||
ParagraphTheory * | theory | ||
) |
Definition at line 936 of file paragraphs.cpp.
WERD_CHOICE* tesseract::get_best_delete_other | ( | WERD_CHOICE * | choice1, |
WERD_CHOICE * | choice2 | ||
) |
get_best_delete_other
Returns the best of two choices and deletes the other (worse) choice. A choice is better if it has a non-empty string and has a lower rating than the other choice. If the ratings are the same, choice2 is preferred over choice1.
Definition at line 74 of file permute.cpp.
WERD_CHOICE* tesseract::get_choice_from_posstr | ( | const UNICHARSET * | unicharset, |
const BLOB_CHOICE_LIST_VECTOR & | char_choices, | ||
int | start_pos, | ||
const char * | pos_str, | ||
float * | certainties | ||
) |
Returns a WERD formed by taking the specified position (nth choice) string from char_choices starting at the given position. For example, if start_pos=2, pos_str="0121" will form a word using the 1st choice of char 3, 2nd choice of char 4, 3rd choice of char 5, 2nd choice of char 6. If n > number of choice, the closest (last) one is used.
Definition at line 129 of file permute.cpp.
BLOB_CHOICE* tesseract::get_nth_choice | ( | BLOB_CHOICE_LIST * | blob_list, |
int | n | ||
) |
Returns the n-th choice in the given blob_list (top-K choices). If n > K, the last choice is returned.
Definition at line 91 of file permute.cpp.
void tesseract::get_posstr_from_choice | ( | const BLOB_CHOICE_LIST_VECTOR & | char_choices, |
WERD_CHOICE * | word_choice, | ||
int | start_pos, | ||
char * | pos_str | ||
) |
Given a WERD_CHOICE, find the corresponding position string from char_choices. Pos_str must have been allocated already. This is the reverse of get_choice_from_posstr.
Definition at line 161 of file permute.cpp.
UNICHAR_ID tesseract::get_top_choice_uid | ( | BLOB_CHOICE_LIST * | blob_list | ) |
Returns the top choice char id. A helper function to make code cleaner.
Definition at line 99 of file permute.cpp.
Pix* tesseract::GridReducedPix | ( | const TBOX & | box, |
int | gridsize, | ||
ICOORD | bleft, | ||
int * | left, | ||
int * | bottom | ||
) |
Definition at line 212 of file bbgrid.cpp.
void tesseract::HistogramRect | ( | const unsigned char * | imagedata, |
int | bytes_per_pixel, | ||
int | bytes_per_line, | ||
int | left, | ||
int | top, | ||
int | width, | ||
int | height, | ||
int * | histogram | ||
) |
Definition at line 93 of file otsuthr.cpp.
void tesseract::InitializeRowInfo | ( | bool | after_recognition, |
const MutableIterator & | it, | ||
RowInfo * | info | ||
) |
Definition at line 2356 of file paragraphs.cpp.
void tesseract::InitializeTextAndBoxesPreRecognition | ( | const MutableIterator & | it, |
RowInfo * | info | ||
) |
Definition at line 2305 of file paragraphs.cpp.
ParagraphModel tesseract::InternalParagraphModelByOutline | ( | const GenericVector< RowScratchRegisters > * | rows, |
int | start, | ||
int | end, | ||
int | tolerance, | ||
bool * | consistent | ||
) |
Definition at line 1639 of file paragraphs.cpp.
int tesseract::InterwordSpace | ( | const GenericVector< RowScratchRegisters > & | rows, |
int | row_start, | ||
int | row_end | ||
) |
Definition at line 1547 of file paragraphs.cpp.
bool tesseract::IsDigitLike | ( | int | ch | ) |
Definition at line 209 of file paragraphs.cpp.
bool tesseract::IsLatinLetter | ( | int | ch | ) |
Definition at line 205 of file paragraphs.cpp.
|
inline |
Definition at line 95 of file equationdetect.cpp.
bool tesseract::IsOpeningPunct | ( | int | ch | ) |
Definition at line 213 of file paragraphs.cpp.
|
inline |
Definition at line 100 of file equationdetect.cpp.
bool tesseract::IsTerminalPunct | ( | int | ch | ) |
Definition at line 217 of file paragraphs.cpp.
|
inline |
Definition at line 91 of file equationdetect.cpp.
void tesseract::LeftoverSegments | ( | const GenericVector< RowScratchRegisters > & | rows, |
GenericVector< Interval > * | to_fix, | ||
int | row_start, | ||
int | row_end | ||
) |
Definition at line 2128 of file paragraphs.cpp.
void tesseract::LeftWordAttributes | ( | const UNICHARSET * | unicharset, |
const WERD_CHOICE * | werd, | ||
const STRING & | utf8, | ||
bool * | is_list, | ||
bool * | starts_idea, | ||
bool * | ends_idea | ||
) |
Definition at line 406 of file paragraphs.cpp.
bool tesseract::LikelyListMark | ( | const STRING & | word | ) |
Definition at line 274 of file paragraphs.cpp.
bool tesseract::LikelyListMarkUnicode | ( | int | ch | ) |
Definition at line 340 of file paragraphs.cpp.
bool tesseract::LikelyListNumeral | ( | const STRING & | word | ) |
Definition at line 240 of file paragraphs.cpp.
bool tesseract::LikelyParagraphStart | ( | const RowScratchRegisters & | before, |
const RowScratchRegisters & | after | ||
) |
Definition at line 1619 of file paragraphs.cpp.
bool tesseract::LikelyParagraphStart | ( | const RowScratchRegisters & | before, |
const RowScratchRegisters & | after, | ||
tesseract::ParagraphJustification | j | ||
) |
Definition at line 1626 of file paragraphs.cpp.
ShapeTable * tesseract::LoadShapeTable | ( | const STRING & | file_prefix | ) |
Definition at line 183 of file commontraining.cpp.
MasterTrainer * tesseract::LoadTrainingData | ( | int | argc, |
const char *const * | argv, | ||
bool | replication, | ||
ShapeTable ** | shape_table, | ||
STRING * | file_prefix | ||
) |
Definition at line 238 of file commontraining.cpp.
TBLOB* tesseract::make_tesseract_blob | ( | float | baseline, |
float | xheight, | ||
float | descender, | ||
float | ascender, | ||
bool | numeric_mode, | ||
Pix * | pix | ||
) |
Return a TBLOB * from the whole pix. To be freed later with delete.
Definition at line 1916 of file baseapi.cpp.
void tesseract::MarkRowsWithModel | ( | GenericVector< RowScratchRegisters > * | rows, |
int | row_start, | ||
int | row_end, | ||
const ParagraphModel * | model, | ||
bool | ltr, | ||
int | eop_threshold | ||
) |
Definition at line 763 of file paragraphs.cpp.
void tesseract::MarkStrongEvidence | ( | GenericVector< RowScratchRegisters > * | rows, |
int | row_start, | ||
int | row_end | ||
) |
Definition at line 1777 of file paragraphs.cpp.
void tesseract::ModelStrongEvidence | ( | int | debug_level, |
GenericVector< RowScratchRegisters > * | rows, | ||
int | row_start, | ||
int | row_end, | ||
bool | allow_flush_models, | ||
ParagraphTheory * | theory | ||
) |
Definition at line 1847 of file paragraphs.cpp.
int tesseract::OtsuStats | ( | const int * | histogram, |
int * | H_out, | ||
int * | omega0_out | ||
) |
Definition at line 113 of file otsuthr.cpp.
void tesseract::OtsuThreshold | ( | const unsigned char * | imagedata, |
int | bytes_per_pixel, | ||
int | bytes_per_line, | ||
int | left, | ||
int | top, | ||
int | width, | ||
int | height, | ||
int ** | thresholds, | ||
int ** | hi_values | ||
) |
Definition at line 32 of file otsuthr.cpp.
ParagraphModel tesseract::ParagraphModelByOutline | ( | int | debug_level, |
const GenericVector< RowScratchRegisters > * | rows, | ||
int | start, | ||
int | end, | ||
int | tolerance | ||
) |
Definition at line 1740 of file paragraphs.cpp.
bool tesseract::read_info | ( | FILE * | f, |
FontInfo * | fi, | ||
bool | swap | ||
) |
Definition at line 57 of file fontinfo.cpp.
bool tesseract::read_set | ( | FILE * | f, |
FontSet * | fs, | ||
bool | swap | ||
) |
Definition at line 140 of file fontinfo.cpp.
bool tesseract::read_spacing_info | ( | FILE * | f, |
FontInfo * | fi, | ||
bool | swap | ||
) |
Definition at line 80 of file fontinfo.cpp.
bool tesseract::read_t | ( | PAGE_RES_IT * | page_res_it, |
TBOX * | tbox | ||
) |
Definition at line 58 of file recogtraining.cpp.
void tesseract::RecomputeMarginsAndClearHypotheses | ( | GenericVector< RowScratchRegisters > * | rows, |
int | start, | ||
int | end, | ||
int | percentile | ||
) |
Definition at line 1507 of file paragraphs.cpp.
void tesseract::RightWordAttributes | ( | const UNICHARSET * | unicharset, |
const WERD_CHOICE * | werd, | ||
const STRING & | utf8, | ||
bool * | is_list, | ||
bool * | starts_idea, | ||
bool * | ends_idea | ||
) |
Definition at line 453 of file paragraphs.cpp.
bool tesseract::RowIsStranded | ( | const GenericVector< RowScratchRegisters > & | rows, |
int | row | ||
) |
Definition at line 2086 of file paragraphs.cpp.
bool tesseract::RowsFitModel | ( | const GenericVector< RowScratchRegisters > * | rows, |
int | start, | ||
int | end, | ||
const ParagraphModel * | model | ||
) |
Definition at line 1755 of file paragraphs.cpp.
void tesseract::SeparateSimpleLeaderLines | ( | GenericVector< RowScratchRegisters > * | rows, |
int | row_start, | ||
int | row_end, | ||
ParagraphTheory * | theory | ||
) |
Definition at line 1972 of file paragraphs.cpp.
void tesseract::SimpleSwap | ( | T & | a, |
T & | b | ||
) |
Definition at line 62 of file paragraphs.cpp.
const char* tesseract::SkipChars | ( | const char * | str, |
const char * | toskip | ||
) |
Definition at line 222 of file paragraphs.cpp.
const char* tesseract::SkipChars | ( | const char * | str, |
bool(*)(int) | skip | ||
) |
Definition at line 227 of file paragraphs.cpp.
const char* tesseract::SkipOne | ( | const char * | str, |
const char * | toskip | ||
) |
Definition at line 232 of file paragraphs.cpp.
int tesseract::sort_cmp | ( | const void * | t1, |
const void * | t2 | ||
) |
Definition at line 294 of file genericvector.h.
int tesseract::sort_ptr_cmp | ( | const void * | t1, |
const void * | t2 | ||
) |
Definition at line 311 of file genericvector.h.
int tesseract::SortByBoxBottom | ( | const void * | void1, |
const void * | void2 | ||
) |
Definition at line 405 of file bbgrid.h.
int tesseract::SortByBoxLeft | ( | const void * | void1, |
const void * | void2 | ||
) |
Definition at line 369 of file bbgrid.h.
int tesseract::SortByRating | ( | const void * | void1, |
const void * | void2 | ||
) |
Definition at line 116 of file pieces.cpp.
int tesseract::SortByUnicharID | ( | const void * | void1, |
const void * | void2 | ||
) |
Definition at line 108 of file pieces.cpp.
int tesseract::SortRightToLeft | ( | const void * | void1, |
const void * | void2 | ||
) |
Definition at line 387 of file bbgrid.h.
void tesseract::StrongEvidenceClassify | ( | int | debug_level, |
GenericVector< RowScratchRegisters > * | rows, | ||
int | row_start, | ||
int | row_end, | ||
ParagraphTheory * | theory | ||
) |
Definition at line 1942 of file paragraphs.cpp.
|
inline |
Definition at line 75 of file paragraphs_internal.h.
bool tesseract::TextSupportsBreak | ( | const RowScratchRegisters & | before, |
const RowScratchRegisters & | after | ||
) |
Definition at line 1608 of file paragraphs.cpp.
Pix * tesseract::TraceBlockOnReducedPix | ( | BLOCK * | block, |
int | gridsize, | ||
ICOORD | bleft, | ||
int * | left, | ||
int * | bottom | ||
) |
Definition at line 258 of file bbgrid.cpp.
Pix * tesseract::TraceOutlineOnReducedPix | ( | C_OUTLINE * | outline, |
int | gridsize, | ||
ICOORD | bleft, | ||
int * | left, | ||
int * | bottom | ||
) |
Definition at line 232 of file bbgrid.cpp.
int tesseract::UnicodeFor | ( | const UNICHARSET * | u, |
const WERD_CHOICE * | werd, | ||
int | pos | ||
) |
Definition at line 286 of file paragraphs.cpp.
bool tesseract::UniLikelyListItem | ( | const UNICHARSET * | u, |
const WERD_CHOICE * | werd | ||
) |
Definition at line 369 of file paragraphs.cpp.
bool tesseract::ValidBodyLine | ( | const GenericVector< RowScratchRegisters > * | rows, |
int | row, | ||
const ParagraphModel * | model | ||
) |
Definition at line 1226 of file paragraphs.cpp.
bool tesseract::ValidFirstLine | ( | const GenericVector< RowScratchRegisters > * | rows, |
int | row, | ||
const ParagraphModel * | model | ||
) |
Definition at line 1215 of file paragraphs.cpp.
bool tesseract::write_info | ( | FILE * | f, |
const FontInfo & | fi | ||
) |
bool tesseract::write_set | ( | FILE * | f, |
const FontSet & | fs | ||
) |
Definition at line 153 of file fontinfo.cpp.
bool tesseract::write_spacing_info | ( | FILE * | f, |
const FontInfo & | fi | ||
) |
Definition at line 112 of file fontinfo.cpp.
void tesseract::WriteShapeTable | ( | const STRING & | file_prefix, |
const ShapeTable & | shape_table | ||
) |
Definition at line 209 of file commontraining.cpp.
const int tesseract::case_state_table[6][4] |
Definition at line 35 of file context.cpp.
const int tesseract::kAdjacentLeaderSearchPadding = 2 |
Definition at line 124 of file tablefind.cpp.
const double tesseract::kAlignedFraction = 0.03125 |
Definition at line 40 of file alignedblob.cpp.
const double tesseract::kAlignedGapFraction = 0.75 |
Definition at line 44 of file alignedblob.cpp.
const char* tesseract::kAlignmentNames[] |
Definition at line 516 of file tabvector.cpp.
const double tesseract::kAllowBlobArea = 0.05 |
Definition at line 60 of file tablefind.cpp.
const double tesseract::kAllowBlobHeight = 0.3 |
Definition at line 58 of file tablefind.cpp.
const double tesseract::kAllowBlobWidth = 0.4 |
Definition at line 59 of file tablefind.cpp.
const double tesseract::kAllowTextArea = 0.8 |
Definition at line 53 of file tablefind.cpp.
const double tesseract::kAllowTextHeight = 0.5 |
Definition at line 51 of file tablefind.cpp.
const double tesseract::kAllowTextWidth = 0.6 |
Definition at line 52 of file tablefind.cpp.
const char * tesseract::kApostropheLikeUTF8 |
Definition at line 48 of file unicodes.cpp.
const double tesseract::kBigPartSizeRatio = 1.75 |
Definition at line 47 of file colpartitiongrid.cpp.
const int tesseract::kBoxClipTolerance = 2 |
Definition at line 31 of file boxword.cpp.
const double tesseract::kBrokenCJKIterationFraction = 0.125 |
Definition at line 79 of file strokewidth.cpp.
const int tesseract::kBytesPer64BitNumber = 20 |
Max bytes in the decimal representation of inT64.
Definition at line 1178 of file baseapi.cpp.
const int tesseract::kBytesPerBlob = kNumbersPerBlob * (kBytesPerNumber + 1) + 1 |
Multiplier for max expected textlength assumes (kBytesPerNumber + space)
Definition at line 1175 of file baseapi.cpp.
const int tesseract::kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1 |
Definition at line 1176 of file baseapi.cpp.
const int tesseract::kBytesPerNumber = 5 |
The number of bytes taken by each number. Since we use inT16 for ICOORD, assume only 5 digits max.
Definition at line 1169 of file baseapi.cpp.
const int tesseract::kCellSplitColumnThreshold = 0 |
Definition at line 36 of file tablerecog.cpp.
const int tesseract::kCellSplitRowThreshold = 0 |
Definition at line 35 of file tablerecog.cpp.
const double tesseract::kCharVerticalOverlapFraction = 0.375 |
Definition at line 63 of file tabfind.cpp.
const double tesseract::kCJKAspectRatio = 1.25 |
Definition at line 73 of file strokewidth.cpp.
const double tesseract::kCJKAspectRatioIncrease = 1.0625 |
Definition at line 75 of file strokewidth.cpp.
const double tesseract::kCJKBrokenDistanceFraction = 0.25 |
Definition at line 69 of file strokewidth.cpp.
const int tesseract::kCJKMaxComponents = 8 |
Definition at line 71 of file strokewidth.cpp.
const int tesseract::kCJKRadius = 2 |
Definition at line 67 of file strokewidth.cpp.
const int tesseract::kColumnWidthFactor = 20 |
const double tesseract::kCosMaxSkewAngle = 0.866025 |
Definition at line 82 of file tabfind.cpp.
const int tesseract::kCrackSpacing = 100 |
Spacing of cracks across the page to break up tall vertical lines.
Definition at line 44 of file linefind.cpp.
const ParagraphModel * tesseract::kCrownLeft = reinterpret_cast<ParagraphModel *>(0xDEAD111F) |
Definition at line 50 of file paragraphs.cpp.
const ParagraphModel * tesseract::kCrownRight = reinterpret_cast<ParagraphModel *>(0xDEAD888F) |
Definition at line 52 of file paragraphs.cpp.
const int tesseract::kDefaultResolution = 300 |
Default resolution used if input in not believable.
Definition at line 58 of file pagesegmain.cpp.
const double tesseract::kDiacriticXPadRatio = 7.0 |
Definition at line 82 of file strokewidth.cpp.
const double tesseract::kDiacriticYPadRatio = 1.75 |
Definition at line 85 of file strokewidth.cpp.
const float tesseract::kFontMergeDistance = 0.025 |
Definition at line 49 of file mastertrainer.cpp.
const double tesseract::kGoodRowNumberOfColumnsLarge = 0.7 |
Definition at line 54 of file tablerecog.cpp.
const double tesseract::kGoodRowNumberOfColumnsSmall[] = { 2, 2, 2, 2, 2, 3, 3 } |
Definition at line 50 of file tablerecog.cpp.
const int tesseract::kGoodRowNumberOfColumnsSmallSize |
Definition at line 51 of file tablerecog.cpp.
const int tesseract::kGutterMultiple = 4 |
Definition at line 39 of file tabvector.cpp.
const int tesseract::kGutterToNeighbourRatio = 3 |
Definition at line 41 of file tabvector.cpp.
const double tesseract::kHorizontalGapMergeFraction = 0.5 |
Definition at line 57 of file colfind.cpp.
const double tesseract::kHorizontalSpacing = 0.30 |
Definition at line 29 of file tablerecog.cpp.
const int tesseract::kHorzStrongTextlineAspect = 5 |
Definition at line 70 of file colpartition.cpp.
const int tesseract::kHorzStrongTextlineCount = 8 |
Definition at line 66 of file colpartition.cpp.
const int tesseract::kHorzStrongTextlineHeight = 10 |
Definition at line 68 of file colpartition.cpp.
const char * tesseract::kHyphenLikeUTF8 |
Definition at line 32 of file unicodes.cpp.
const float tesseract::kInfiniteDist = 999.0f |
Definition at line 896 of file mastertrainer.cpp.
const char* tesseract::kInputFile = "noname.tif" |
Filename used for input image file, from which to derive a name to search for a possible UNLV zone file, if none is specified by SetInputName.
Definition at line 88 of file baseapi.cpp.
const double tesseract::kLargeTableProjectionThreshold = 0.45 |
Definition at line 109 of file tablefind.cpp.
const int tesseract::kLargeTableRowCount = 6 |
Definition at line 111 of file tablefind.cpp.
const int tesseract::kLatinChs[] |
Latin chars corresponding to the unicode chars above.
Definition at line 1239 of file baseapi.cpp.
const int tesseract::kLeaderCutCost = 8 |
Definition at line 60 of file colpartition.cpp.
const int tesseract::kLeftIndentAlignmentCountTh = 1 |
Definition at line 88 of file equationdetect.cpp.
const double tesseract::kLineCountReciprocal = 4.0 |
Definition at line 52 of file tabvector.cpp.
const int tesseract::kLinedTableMinHorizontalLines = 3 |
Definition at line 39 of file tablerecog.cpp.
const int tesseract::kLinedTableMinVerticalLines = 3 |
Definition at line 38 of file tablerecog.cpp.
const int tesseract::kLineFindGridSize = 50 |
Grid size used by line finder. Not very critical.
Definition at line 46 of file linefind.cpp.
const double tesseract::kLineFragmentAspectRatio = 10.0 |
Definition at line 57 of file tabfind.cpp.
const double tesseract::kLineResidueAspectRatio = 8.0 |
Definition at line 108 of file strokewidth.cpp.
const int tesseract::kLineResiduePadRatio = 3 |
Definition at line 110 of file strokewidth.cpp.
const double tesseract::kLineResidueSizeRatio = 1.75 |
Definition at line 112 of file strokewidth.cpp.
const int tesseract::kLineTrapLongest = 4 |
Definition at line 101 of file strokewidth.cpp.
const int tesseract::kLineTrapShortest = 2 |
Definition at line 103 of file strokewidth.cpp.
const char * tesseract::kLRM = "\u200E" |
Definition at line 27 of file unicodes.cpp.
const double tesseract::kMarginFactor = 1.1 |
Definition at line 44 of file tablerecog.cpp.
const double tesseract::kMarginOverlapFraction = 0.25 |
Definition at line 54 of file colfind.cpp.
const float tesseract::kMathDigitDensityTh1 = 0.25 |
Definition at line 83 of file equationdetect.cpp.
const float tesseract::kMathDigitDensityTh2 = 0.1 |
Definition at line 84 of file equationdetect.cpp.
const float tesseract::kMathItalicDensityTh = 0.5 |
Definition at line 85 of file equationdetect.cpp.
const double tesseract::kMaxBaselineError = 0.4375 |
Definition at line 73 of file colpartition.cpp.
const double tesseract::kMaxBlobOverlapFactor = 4.0 |
Definition at line 79 of file tablefind.cpp.
const int tesseract::kMaxBlobWidth = 500 |
Definition at line 42 of file tablefind.cpp.
const inT16 tesseract::kMaxBoxEdgeDiff = 2 |
Definition at line 33 of file recogtraining.cpp.
const int tesseract::kMaxBoxesInDataPartition = 20 |
Definition at line 68 of file tablefind.cpp.
const int tesseract::kMaxBytesPerLine |
A maximal single box could occupy kNumbersPerBlob numbers at kBytesPer64BitNumber digits (if someone sneaks in a 64 bit value) and a space plus the newline and the maximum length of a UNICHAR. Test against this on each iteration for safety.
Definition at line 1185 of file baseapi.cpp.
const int tesseract::kMaxCaptionLines = 7 |
Definition at line 39 of file colpartitiongrid.cpp.
const int tesseract::kMaxCharTopRange = 48 |
Definition at line 61 of file fixxht.cpp.
const int tesseract::kMaxCircleErosions = 8 |
Definition at line 60 of file pagesegmain.cpp.
const int tesseract::kMaxCJKSizeRatio = 5 |
Definition at line 77 of file strokewidth.cpp.
const int tesseract::kMaxColorDistance = 900 |
Definition at line 80 of file colpartition.cpp.
const int tesseract::kMaxColumnHeaderDistance = 4 |
Definition at line 87 of file tablefind.cpp.
const int tesseract::kMaxCredibleResolution = 2400 |
Maximum believable resolution.
Definition at line 99 of file baseapi.cpp.
const double tesseract::kMaxDiacriticDistanceRatio = 1.25 |
Definition at line 91 of file strokewidth.cpp.
const double tesseract::kMaxDiacriticGapToBaseCharHeight = 1.0 |
Definition at line 94 of file strokewidth.cpp.
const double tesseract::kMaxDistToPartSizeRatio = 1.5 |
Definition at line 64 of file colfind.cpp.
const int tesseract::kMaxDropCapBottom = -128 |
Definition at line 37 of file boxword.cpp.
const int tesseract::kMaxFillinMultiple = 11 |
Definition at line 48 of file tabvector.cpp.
const double tesseract::kMaxGapInTextPartition = 4.0 |
Definition at line 71 of file tablefind.cpp.
const double tesseract::kMaxGutterWidthAbsolute = 2.00 |
Definition at line 52 of file tabfind.cpp.
const double tesseract::kMaxHorizontalGap = 3.0 |
Definition at line 65 of file tabfind.cpp.
const int tesseract::kMaxIncompatibleColumnCount = 2 |
Definition at line 52 of file colfind.cpp.
const int tesseract::kMaxIntSize = 22 |
Max string length of an int.
Definition at line 92 of file baseapi.cpp.
const int tesseract::kMaxLargeOverlaps = 3 |
Definition at line 117 of file strokewidth.cpp.
const int tesseract::kMaxLargeOverlapsWithMedium = 12 |
Definition at line 40 of file ccnontextdetect.cpp.
const int tesseract::kMaxLargeOverlapsWithSmall = 3 |
Definition at line 31 of file ccnontextdetect.cpp.
const double tesseract::kMaxLeaderGapFractionOfMax = 0.25 |
Definition at line 54 of file colpartition.cpp.
const double tesseract::kMaxLeaderGapFractionOfMin = 0.5 |
Definition at line 56 of file colpartition.cpp.
const int tesseract::kMaxLineResidue = 6 |
Definition at line 52 of file linefind.cpp.
const int tesseract::kMaxMediumOverlapsWithSmall = 12 |
Definition at line 36 of file ccnontextdetect.cpp.
const int tesseract::kMaxNeighbourDistFactor = 4 |
Definition at line 33 of file colpartitiongrid.cpp.
const double tesseract::kMaxNonLineDensity = 0.25 |
Definition at line 57 of file linefind.cpp.
const int tesseract::kMaxOffsetDist = 32 |
Definition at line 32 of file intfeaturemap.cpp.
const int tesseract::kMaxPadFactor = 6 |
Definition at line 30 of file colpartitiongrid.cpp.
const double tesseract::kMaxParagraphEndingLeftSpaceMultiple = 3.0 |
Definition at line 133 of file tablefind.cpp.
const double tesseract::kMaxPartitionSpacing = 1.75 |
Definition at line 66 of file colpartitiongrid.cpp.
const int tesseract::kMaxPartnerDepth = 4 |
Definition at line 42 of file colpartition.cpp.
const int tesseract::kMaxRaggedSearch = 25 |
Definition at line 40 of file tabfind.cpp.
const double tesseract::kMaxRectangularFraction = 0.75 |
Definition at line 47 of file imagefind.cpp.
const double tesseract::kMaxRectangularGradient = 0.1 |
Definition at line 50 of file imagefind.cpp.
const int tesseract::kMaxRMSColorNoise = 128 |
Definition at line 77 of file colpartition.cpp.
const double tesseract::kMaxRowSize = 2.5 |
Definition at line 47 of file tablerecog.cpp.
const double tesseract::kMaxSameBlockLineSpacing = 3 |
Definition at line 50 of file colpartition.cpp.
const double tesseract::kMaxSizeRatio = 1.5 |
Definition at line 52 of file colpartition.cpp.
const int tesseract::kMaxSkewFactor = 15 |
Definition at line 66 of file alignedblob.cpp.
const double tesseract::kMaxSmallNeighboursPerPix = 1.0 / 32 |
Definition at line 28 of file ccnontextdetect.cpp.
const double tesseract::kMaxSpacingDrift = 1.0 / 72 |
Definition at line 44 of file colpartition.cpp.
const double tesseract::kMaxStaveHeight = 1.0 |
Definition at line 59 of file linefind.cpp.
const double tesseract::kMaxTableCellXheight = 2.0 |
Definition at line 83 of file tablefind.cpp.
const int tesseract::kMaxTextLineBlobRatio = 5 |
Definition at line 73 of file tabfind.cpp.
const double tesseract::kMaxTopSpacingFraction = 0.25 |
Definition at line 47 of file colpartition.cpp.
const int tesseract::kMaxUnicharsPerCluster = 2000 |
Definition at line 47 of file mastertrainer.cpp.
const int tesseract::kMaxVerticalSearch = 12 |
Definition at line 39 of file tabfind.cpp.
const int tesseract::kMaxVerticalSpacing = 500 |
Definition at line 40 of file tablefind.cpp.
const double tesseract::kMaxXProjectionGapFactor = 2.0 |
Definition at line 143 of file tablefind.cpp.
const double tesseract::kMinAlignedGutter = 0.25 |
Definition at line 54 of file tabvector.cpp.
const int tesseract::kMinAlignedTabs = 4 |
Definition at line 56 of file alignedblob.cpp.
const double tesseract::kMinBaselineCoverage = 0.5 |
Definition at line 75 of file colpartition.cpp.
const int tesseract::kMinBoxesInTextPartition = 10 |
Definition at line 65 of file tablefind.cpp.
const double tesseract::kMinCaptionGapHeightRatio = 0.5 |
Definition at line 43 of file colpartitiongrid.cpp.
const double tesseract::kMinCaptionGapRatio = 2.0 |
Definition at line 41 of file colpartitiongrid.cpp.
const int tesseract::kMinChainTextValue = 3 |
Definition at line 64 of file colpartition.cpp.
const int tesseract::kMinClusteredShapes = 1 |
Definition at line 45 of file mastertrainer.cpp.
const int tesseract::kMinColorDifference = 16 |
Definition at line 56 of file imagefind.cpp.
const int tesseract::kMinColumnWidth = 100 |
Definition at line 49 of file colfind.cpp.
const int tesseract::kMinCredibleResolution = 70 |
Minimum believable resolution.
Minimum believable resolution. Used as a default if there is no other information, as it is safer to under-estimate than over-estimate.
Definition at line 97 of file baseapi.cpp.
const double tesseract::kMinDiacriticSizeRatio = 1.0625 |
Definition at line 88 of file strokewidth.cpp.
const int tesseract::kMinEvaluatedTabs = 3 |
Definition at line 70 of file tabfind.cpp.
const double tesseract::kMinFilledArea = 0.35 |
Definition at line 57 of file tablerecog.cpp.
const double tesseract::kMinFractionalLinesInColumn = 0.125 |
Definition at line 46 of file tabfind.cpp.
const double tesseract::kMinGoodTextPARatio = 1.5 |
Definition at line 56 of file ccnontextdetect.cpp.
const double tesseract::kMinGutterFraction = 0.5 |
Definition at line 50 of file tabvector.cpp.
const double tesseract::kMinGutterWidthAbsolute = 0.02 |
Definition at line 50 of file tabfind.cpp.
const double tesseract::kMinGutterWidthGrid = 0.5 |
Definition at line 61 of file colfind.cpp.
const double tesseract::kMinImageArea = 0.5 |
Definition at line 78 of file tabfind.cpp.
const int tesseract::kMinImageFindSize = 100 |
Definition at line 52 of file imagefind.cpp.
const int tesseract::kMinLeaderCount = 5 |
Definition at line 58 of file colpartition.cpp.
const int tesseract::kMinLineLengthFraction = 4 |
Denominator of resolution makes min pixels to demand line lengths to be.
Definition at line 42 of file linefind.cpp.
const int tesseract::kMinLinesInColumn = 10 |
Definition at line 42 of file tabfind.cpp.
const double tesseract::kMinMaxGapInTextPartition = 0.5 |
Definition at line 75 of file tablefind.cpp.
const double tesseract::kMinMusicPixelFraction = 0.75 |
Definition at line 61 of file linefind.cpp.
const double tesseract::kMinNonNoiseFraction = 0.5 |
Definition at line 59 of file colfind.cpp.
const int tesseract::kMinOutlierSamples = 5 |
Definition at line 37 of file trainingsampleset.cpp.
const double tesseract::kMinOverlapWithTable = 0.6 |
Definition at line 99 of file tablefind.cpp.
const double tesseract::kMinParagraphEndingTextToWhitespaceRatio = 3.0 |
Definition at line 139 of file tablefind.cpp.
const double tesseract::kMinPCLengthIncrease = 1.0 / 1024 |
Definition at line 33 of file intfeaturemap.cpp.
const double tesseract::kMinRaggedGutter = 1.5 |
Definition at line 56 of file tabvector.cpp.
const int tesseract::kMinRaggedTabs = 5 |
Definition at line 54 of file alignedblob.cpp.
const double tesseract::kMinRectangularFraction = 0.125 |
Definition at line 45 of file imagefind.cpp.
const int tesseract::kMinRectSize = 10 |
Minimum sensible image size to be worth running tesseract.
Definition at line 77 of file baseapi.cpp.
const int tesseract::kMinRowsInTable = 3 |
Definition at line 114 of file tablefind.cpp.
const int tesseract::kMinStrongTextValue = 6 |
Definition at line 62 of file colpartition.cpp.
const int tesseract::kMinSubscriptOffset = 20 |
Definition at line 33 of file boxword.cpp.
const int tesseract::kMinSuperscriptOffset = 20 |
Definition at line 35 of file boxword.cpp.
const double tesseract::kMinTabGradient = 4.0 |
Definition at line 62 of file alignedblob.cpp.
const int tesseract::kMinTextLineBlobRatio = 3 |
Definition at line 76 of file tabfind.cpp.
const int tesseract::kMinThickLineWidth = 12 |
Definition at line 48 of file linefind.cpp.
const int tesseract::kMinVerticalSearch = 3 |
Definition at line 38 of file tabfind.cpp.
const int tesseract::kMostlyOneDirRatio = 3 |
Definition at line 106 of file strokewidth.cpp.
const double tesseract::kNeighbourSearchFactor = 2.5 |
Definition at line 119 of file strokewidth.cpp.
const int tesseract::kNoisePadding = 4 |
Definition at line 47 of file ccnontextdetect.cpp.
const int tesseract::kNumbersPerBlob = 5 |
The 5 numbers output for each box (the usual 4 and a page number.)
Definition at line 1164 of file baseapi.cpp.
const int tesseract::kNumEndPoints = 3 |
Definition at line 27 of file detlinefit.cpp.
const int tesseract::kNumLiteralCnt = 5 |
Definition at line 36 of file tess_lang_model.h.
const char* tesseract::kOldVarsFile = "failed_vars.txt" |
Temp file used for storing current parameters before applying retry values.
Definition at line 90 of file baseapi.cpp.
const int tesseract::kOriginalNoiseMultiple = 8 |
Definition at line 43 of file ccnontextdetect.cpp.
const double tesseract::kParagraphEndingPreviousLineRatio = 1.3 |
Definition at line 129 of file tablefind.cpp.
const char * tesseract::kPDF = "\u202C" |
Definition at line 30 of file unicodes.cpp.
const double tesseract::kPhotoOffsetFraction = 0.375 |
Definition at line 50 of file ccnontextdetect.cpp.
const int tesseract::kPrime1 = 17 |
Definition at line 34 of file trainingsampleset.cpp.
const int tesseract::kPrime2 = 13 |
Definition at line 35 of file trainingsampleset.cpp.
const double tesseract::kRaggedFraction = 2.5 |
Definition at line 42 of file alignedblob.cpp.
const double tesseract::kRaggedGapFraction = 1.0 |
Definition at line 46 of file alignedblob.cpp.
const int tesseract::kRaggedGutterMultiple = 5 |
Definition at line 54 of file tabfind.cpp.
const int tesseract::kRandomizingCenter = 128 |
Definition at line 35 of file trainingsample.cpp.
const double tesseract::kRequiredColumns = 0.7 |
Definition at line 42 of file tablerecog.cpp.
const double tesseract::kRequiredFullJustifiedSpacing = 4.0 |
Definition at line 119 of file tablefind.cpp.
const char tesseract::kReverseIfHasRTL[] = "RRP_REVERSE_IF_HAS_RTL" |
const int tesseract::kRGBRMSColors = 4 |
Definition at line 36 of file colpartition.h.
const char * tesseract::kRLE = "\u202A" |
Definition at line 29 of file unicodes.cpp.
const char * tesseract::kRLM = "\u200F" |
Definition at line 28 of file unicodes.cpp.
const double tesseract::kRMSFitScaling = 8.0 |
Definition at line 54 of file imagefind.cpp.
const int tesseract::kRulingVerticalMargin = 3 |
Definition at line 95 of file tablefind.cpp.
const int tesseract::kSearchRadius = 2 |
Definition at line 96 of file strokewidth.cpp.
const int tesseract::kSeedBlobsCountTh = 10 |
Definition at line 87 of file equationdetect.cpp.
const int tesseract::kSideSpaceMargin = 10 |
Definition at line 104 of file tablefind.cpp.
const int tesseract::kSimilarRaggedDist = 50 |
Definition at line 46 of file tabvector.cpp.
const int tesseract::kSimilarVectorDist = 10 |
Definition at line 43 of file tabvector.cpp.
const float tesseract::kSizeRatioToReject = 2.0 |
Definition at line 114 of file strokewidth.cpp.
const double tesseract::kSmallTableProjectionThreshold = 0.35 |
Definition at line 108 of file tablefind.cpp.
const int tesseract::kSmoothDecisionMargin = 4 |
Definition at line 69 of file colpartitiongrid.cpp.
const double tesseract::kSmoothFactor = 0.25 |
Definition at line 59 of file tabfind.cpp.
const double tesseract::kSplitPartitionSize = 2.0 |
Definition at line 46 of file tablefind.cpp.
const int tesseract::kSquareLimit = 25 |
Definition at line 32 of file trainingsampleset.cpp.
const int tesseract::kStateCnt = 4 |
Definition at line 35 of file tess_lang_model.h.
const int tesseract::kStrayLinePer = 6 |
Definition at line 46 of file paragraphs.cpp.
const double tesseract::kStrokeWidthCJK = 2.0 |
Definition at line 64 of file strokewidth.cpp.
const double tesseract::kStrokeWidthConstantTolerance = 2.0 |
Definition at line 51 of file colpartitiongrid.cpp.
const double tesseract::kStrokeWidthFractionalTolerance = 0.25 |
Definition at line 147 of file tablefind.cpp.
const double tesseract::kStrokeWidthFractionCJK = 0.25 |
Definition at line 63 of file strokewidth.cpp.
const double tesseract::kStrokeWidthFractionTolerance = 0.25 |
Allowed proportional change in stroke width to be the same font.
Definition at line 49 of file colpartitiongrid.cpp.
const double tesseract::kStrokeWidthTolerance = 1.5 |
Allowed constant change in stroke width to be the same font. Really 1.5 pixels.
Definition at line 61 of file strokewidth.cpp.
const double tesseract::kTableColumnThreshold = 3.0 |
Definition at line 91 of file tablefind.cpp.
const int tesseract::kTabRadiusFactor = 5 |
Definition at line 36 of file tabfind.cpp.
const char tesseract::kTesseractReject = '~' |
Character returned when Tesseract couldn't recognize as anything.
Definition at line 79 of file baseapi.cpp.
const int tesseract::kTestChar = -1 |
Definition at line 30 of file trainingsampleset.cpp.
const char* tesseract::kTextordDebugPix = "psdebug_pix" |
Definition at line 69 of file alignedblob.cpp.
const double tesseract::kThickLengthMultiple = 0.75 |
Definition at line 55 of file linefind.cpp.
const int tesseract::kThinLineFraction = 20 |
Denominator of resolution makes max pixel width to allow thin lines.
Definition at line 40 of file linefind.cpp.
const double tesseract::kTinyEnoughTextlineOverlapFraction = 0.25 |
Definition at line 53 of file colpartitiongrid.cpp.
const float tesseract::kUnclearDensityTh = 0.25 |
Definition at line 86 of file equationdetect.cpp.
const int tesseract::kUniChs[] |
Conversion table for non-latin characters. Maps characters out of the latin set into the latin set. TODO(rays) incorporate this translation into unicharset.
Definition at line 1235 of file baseapi.cpp.
const char tesseract::kUNLVReject = '~' |
Character used by UNLV error counter as a reject.
Definition at line 81 of file baseapi.cpp.
const char tesseract::kUNLVSuspect = '^' |
Character used by UNLV as a suspect marker.
Definition at line 83 of file baseapi.cpp.
const char * tesseract::kUTF8LineSeparator = "\u2028" |
Definition at line 25 of file unicodes.cpp.
const char * tesseract::kUTF8ParagraphSeparator = "\u2029" |
Definition at line 26 of file unicodes.cpp.
const double tesseract::kVerticalSpacing = -0.2 |
Definition at line 32 of file tablerecog.cpp.
const int tesseract::kVLineAlignment = 3 |
Definition at line 48 of file alignedblob.cpp.
const int tesseract::kVLineGutter = 1 |
Definition at line 50 of file alignedblob.cpp.
const int tesseract::kVLineMinLength = 500 |
Definition at line 58 of file alignedblob.cpp.
const int tesseract::kVLineSearchSize = 150 |
Definition at line 52 of file alignedblob.cpp.
const char* const tesseract::RTLReversePolicyNames[] |
bool tesseract::textord_dump_table_images = false |
"Paint table detection output"
Definition at line 150 of file tablefind.cpp.
bool tesseract::textord_show_tables = false |
"Show table regions"
Definition at line 151 of file tablefind.cpp.
double tesseract::textord_tabfind_aligned_gap_fraction = 0.75 |
"Fraction of height used as a minimum gap for aligned blobs."
Definition at line 87 of file tabfind.cpp.
bool tesseract::textord_tabfind_find_tables = true |
"run table detection"
Definition at line 74 of file colfind.cpp.
bool tesseract::textord_tabfind_force_vertical_text = false |
"Force using vertical text page mode"
Definition at line 49 of file strokewidth.cpp.
bool tesseract::textord_tabfind_only_strokewidths = false |
"Only run stroke widths"
Definition at line 46 of file strokewidth.cpp.
bool tesseract::textord_tabfind_show_blocks = false |
"Show final block bounds"
Definition at line 73 of file colfind.cpp.
bool tesseract::textord_tabfind_show_color_fit = false |
"Show stroke widths"
Definition at line 26 of file colpartitiongrid.cpp.
bool tesseract::textord_tabfind_show_columns = false |
"Show column bounds"
Definition at line 72 of file colfind.cpp.
bool tesseract::textord_tabfind_show_finaltabs = false |
"Show tab vectors"
Definition at line 85 of file tabfind.cpp.
bool tesseract::textord_tabfind_show_initial_partitions = false |
"Show partition bounds"
Definition at line 67 of file colfind.cpp.
bool tesseract::textord_tabfind_show_initialtabs = false |
"Show tab candidates"
Definition at line 84 of file tabfind.cpp.
int tesseract::textord_tabfind_show_partitions = 0 |
"Show partition bounds, waiting if >1"
Definition at line 71 of file colfind.cpp.
bool tesseract::textord_tabfind_show_reject_blobs = false |
"Show blobs rejected as noise"
Definition at line 69 of file colfind.cpp.
int tesseract::textord_tabfind_show_strokewidths = 0 |
"Show stroke widths"
Definition at line 45 of file strokewidth.cpp.
bool tesseract::textord_tabfind_vertical_horizontal_mix = true |
"find horizontal lines such as headers in vertical page mode"
Definition at line 51 of file strokewidth.cpp.
bool tesseract::textord_tabfind_vertical_text = true |
"Enable vertical detection"
Definition at line 47 of file strokewidth.cpp.
double tesseract::textord_tabfind_vertical_text_ratio = 0.5 |
"Fraction of textlines deemed vertical to use vertical page mode"
Definition at line 53 of file strokewidth.cpp.
bool tesseract::textord_tablefind_recognize_tables = false |
"Enables the table recognizer for table layout and filtering."
Definition at line 157 of file tablefind.cpp.
bool tesseract::textord_tablefind_show_mark = false |
"Debug table marking steps in detail"
Definition at line 153 of file tablefind.cpp.
bool tesseract::textord_tablefind_show_stats = false |
"Show page stats used in table finding"
Definition at line 155 of file tablefind.cpp.
double tesseract::textord_tabvector_vertical_box_ratio = 0.5 |
"Fraction of box matches required to declare a line vertical"
Definition at line 62 of file tabvector.cpp.
double tesseract::textord_tabvector_vertical_gap_fraction = 0.5 |
"max fraction of mean blob width allowed for vertical gaps in vertical text"
"Max fraction of mean blob width allowed for vertical gaps in vertical text"
Definition at line 59 of file tabvector.cpp.
CCUtilMutex tesseract::tprintfMutex |
Definition at line 51 of file ccutil.cpp.