Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ocropus add-ons

Functions

TESS_LOCAL void tesseract::TessBaseAPI::AdaptToCharacter (const char *unichar_repr, int length, float baseline, float xheight, float descender, float ascender)
TESS_LOCAL PAGE_REStesseract::TessBaseAPI::RecognitionPass1 (BLOCK_LIST *block_list)
TESS_LOCAL PAGE_REStesseract::TessBaseAPI::RecognitionPass2 (BLOCK_LIST *block_list, PAGE_RES *pass1_result)
TESS_LOCAL void tesseract::TessBaseAPI::DetectParagraphs (bool after_text_recognition)
static TESS_LOCAL int tesseract::TessBaseAPI::TesseractExtractResult (char **text, int **lengths, float **costs, int **x0, int **y0, int **x1, int **y1, PAGE_RES *page_res)
TESS_LOCAL const PAGE_REStesseract::TessBaseAPI::GetPageRes () const

Detailed Description


Function Documentation

void tesseract::TessBaseAPI::AdaptToCharacter ( const char *  unichar_repr,
int  length,
float  baseline,
float  xheight,
float  descender,
float  ascender 
)
protected

Adapt to recognize the current image as the given character. The image must be preloaded and be just an image of a single character.

Adapt to recognize the current image as the given character. The image must be preloaded into pix_binary_ and be just an image of a single character.

Definition at line 1934 of file baseapi.cpp.

{
UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length);
TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender,
float threshold;
UNICHAR_ID best_class = 0;
float best_rating = -100;
// Classify to get a raw choice.
BLOB_CHOICE_LIST choices;
DENORM denorm;
tesseract_->AdaptiveClassifier(blob, denorm, &choices, NULL);
BLOB_CHOICE_IT choice_it;
choice_it.set_to_list(&choices);
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
choice_it.forward()) {
if (choice_it.data()->rating() > best_rating) {
best_rating = choice_it.data()->rating();
best_class = choice_it.data()->unichar_id();
}
}
if (blob->outlines)
tesseract_->AdaptToChar(blob, denorm, id, kUnknownFontinfoId, threshold);
delete blob;
}
void tesseract::TessBaseAPI::DetectParagraphs ( bool  after_text_recognition)
protected

Definition at line 1987 of file baseapi.cpp.

{
int debug_level = 0;
GetIntVariable("paragraph_debug_level", &debug_level);
MutableIterator *result_it = GetMutableIterator();
do { // Detect paragraphs for this block
::tesseract::DetectParagraphs(debug_level, after_text_recognition,
result_it, &models);
*paragraph_models_ += models;
} while (result_it->Next(RIL_BLOCK));
delete result_it;
}
TESS_LOCAL const PAGE_RES* tesseract::TessBaseAPI::GetPageRes ( ) const
inlineprotected

Definition at line 755 of file baseapi.h.

{
return page_res_;
};
PAGE_RES * tesseract::TessBaseAPI::RecognitionPass1 ( BLOCK_LIST *  block_list)
protected

Recognize text doing one pass only, using settings for a given pass.

Definition at line 1971 of file baseapi.cpp.

{
PAGE_RES *page_res = new PAGE_RES(block_list,
return page_res;
}
PAGE_RES * tesseract::TessBaseAPI::RecognitionPass2 ( BLOCK_LIST *  block_list,
PAGE_RES pass1_result 
)
protected

Definition at line 1978 of file baseapi.cpp.

{
if (!pass1_result)
pass1_result = new PAGE_RES(block_list,
tesseract_->recog_all_words(pass1_result, NULL, NULL, NULL, 2);
return pass1_result;
}
int tesseract::TessBaseAPI::TesseractExtractResult ( char **  text,
int **  lengths,
float **  costs,
int **  x0,
int **  y0,
int **  x1,
int **  y1,
PAGE_RES page_res 
)
staticprotected

Extract the OCR results, costs (penalty points for uncertainty), and the bounding boxes of the characters.

Definition at line 2073 of file baseapi.cpp.

{
TESS_CHAR_LIST tess_chars;
TESS_CHAR_IT tess_chars_it(&tess_chars);
extract_result(&tess_chars_it, page_res);
tess_chars_it.move_to_first();
int n = tess_chars.length();
int text_len = 0;
*lengths = new int[n];
*costs = new float[n];
*x0 = new int[n];
*y0 = new int[n];
*x1 = new int[n];
*y1 = new int[n];
int i = 0;
for (tess_chars_it.mark_cycle_pt();
!tess_chars_it.cycled_list();
tess_chars_it.forward(), i++) {
TESS_CHAR *tc = tess_chars_it.data();
text_len += (*lengths)[i] = tc->length;
(*costs)[i] = tc->cost;
(*x0)[i] = tc->box.left();
(*y0)[i] = tc->box.bottom();
(*x1)[i] = tc->box.right();
(*y1)[i] = tc->box.top();
}
char *p = *text = new char[text_len];
tess_chars_it.move_to_first();
for (tess_chars_it.mark_cycle_pt();
!tess_chars_it.cycled_list();
tess_chars_it.forward()) {
TESS_CHAR *tc = tess_chars_it.data();
strncpy(p, tc->unicode_repr, tc->length);
p += tc->length;
}
return n;
}