Tesseract
3.02
|
#include <classify.h>
Public Member Functions | |
Classify () | |
virtual | ~Classify () |
Dict & | getDict () |
const ShapeTable * | shape_table () const |
ADAPT_TEMPLATES | NewAdaptedTemplates (bool InitFromUnicharset) |
int | GetFontinfoId (ADAPT_CLASS Class, uinT8 ConfigId) |
int | PruneClasses (const INT_TEMPLATES_STRUCT *int_templates, int num_features, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, CP_RESULT_STRUCT *results) |
void | ReadNewCutoffs (FILE *CutoffFile, bool swap, inT64 end_offset, CLASS_CUTOFF_ARRAY Cutoffs) |
void | PrintAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates) |
void | WriteAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates) |
ADAPT_TEMPLATES | ReadAdaptedTemplates (FILE *File) |
FLOAT32 | ComputeNormMatch (CLASS_ID ClassId, const FEATURE_STRUCT &feature, BOOL8 DebugMatch) |
void | FreeNormProtos () |
NORM_PROTOS * | ReadNormProtos (FILE *File, inT64 end_offset) |
void | ConvertProto (PROTO Proto, int ProtoId, INT_CLASS Class) |
INT_TEMPLATES | CreateIntTemplates (CLASSES FloatProtos, const UNICHARSET &target_unicharset) |
void | LearnWord (const char *filename, const char *rejmap, WERD_RES *word) |
void | LearnPieces (const char *filename, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word) |
void | InitAdaptiveClassifier (bool load_pre_trained_templates) |
void | InitAdaptedClass (TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates) |
void | AdaptToPunc (TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold) |
void | AmbigClassifier (TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, ADAPT_CLASS *Classes, UNICHAR_ID *Ambiguities, ADAPT_RESULTS *Results) |
void | MasterMatcher (INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT *features, const uinT8 *norm_factors, ADAPT_CLASS *classes, int debug, int num_classes, const TBOX &blob_box, CLASS_PRUNER_RESULTS results, ADAPT_RESULTS *final_results) |
void | ExpandShapesAndApplyCorrections (ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, const uinT8 *cn_factors, INT_RESULT_STRUCT &int_result, ADAPT_RESULTS *final_results) |
double | ComputeCorrectedRating (bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, const uinT8 *cn_factors) |
void | ConvertMatchesToChoices (const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices) |
void | AddNewResult (ADAPT_RESULTS *results, CLASS_ID class_id, int shape_id, FLOAT32 rating, bool adapted, int config, int fontinfo_id, int fontinfo_id2) |
int | GetAdaptiveFeatures (TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures) |
void | DebugAdaptiveClassifier (TBLOB *Blob, const DENORM &denorm, ADAPT_RESULTS *Results) |
void | GetAdaptThresholds (TWERD *Word, const DENORM &denorm, const WERD_CHOICE &BestChoice, const WERD_CHOICE &BestRawChoice, FLOAT32 Thresholds[]) |
PROTO_ID | MakeNewTempProtos (FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask) |
int | MakeNewTemporaryConfig (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures) |
void | MakePermanent (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, const DENORM &denorm, TBLOB *Blob) |
void | PrintAdaptiveMatchResults (FILE *File, ADAPT_RESULTS *Results) |
void | RemoveExtraPuncs (ADAPT_RESULTS *Results) |
void | RemoveBadMatches (ADAPT_RESULTS *Results) |
void | SetAdaptiveThreshold (FLOAT32 Threshold) |
void | ShowBestMatchFor (TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int shape_id, BOOL8 AdaptiveOn, BOOL8 PreTrainedOn, ADAPT_RESULTS *Results) |
STRING | ClassIDToDebugStr (const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const |
int | ClassAndConfigIDToFontOrShapeID (int class_id, int int_result_config) const |
int | ShapeIDToClassID (int shape_id) const |
UNICHAR_ID * | BaselineClassifier (TBLOB *Blob, const DENORM &denorm, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) |
int | CharNormClassifier (TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, ADAPT_RESULTS *Results) |
int | CharNormTrainingSample (bool pruner_only, const TrainingSample &sample, GenericVector< ShapeRating > *results) |
UNICHAR_ID * | GetAmbiguities (TBLOB *Blob, const DENORM &denorm, CLASS_ID CorrectClass) |
void | DoAdaptiveMatch (TBLOB *Blob, const DENORM &denorm, ADAPT_RESULTS *Results) |
void | AdaptToChar (TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold) |
void | DisplayAdaptedChar (TBLOB *blob, const DENORM &denorm, INT_CLASS_STRUCT *int_class) |
int | AdaptableWord (TWERD *Word, const WERD_CHOICE &BestChoiceWord, const WERD_CHOICE &RawChoiceWord) |
void | EndAdaptiveClassifier () |
void | PrintAdaptiveStatistics (FILE *File) |
void | SettupPass1 () |
void | SettupPass2 () |
void | AdaptiveClassifier (TBLOB *Blob, const DENORM &denorm, BLOB_CHOICE_LIST *Choices, CLASS_PRUNER_RESULTS cp_results) |
void | ClassifyAsNoise (ADAPT_RESULTS *Results) |
void | ResetAdaptiveClassifierInternal () |
int | GetBaselineFeatures (TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, uinT8 *CharNormArray, inT32 *BlobLength) |
int | GetCharNormFeatures (TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, uinT8 *PrunerNormArray, uinT8 *CharNormArray, inT32 *BlobLength, inT32 *FeatureOutlineIndex) |
void | ComputeCharNormArrays (FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uinT8 *char_norm_array, uinT8 *pruner_array) |
bool | TempConfigReliable (CLASS_ID class_id, const TEMP_CONFIG &config) |
void | UpdateAmbigsGroup (CLASS_ID class_id, const DENORM &denorm, TBLOB *Blob) |
void | ResetFeaturesHaveBeenExtracted () |
bool | AdaptiveClassifierIsFull () |
bool | LooksLikeGarbage (const DENORM &denorm, TBLOB *blob) |
void | RefreshDebugWindow (ScrollView **win, const char *msg, int y_offset, const TBOX &wbox) |
void | ClearCharNormArray (uinT8 *char_norm_array) |
void | ComputeIntCharNormArray (const FEATURE_STRUCT &norm_feature, uinT8 *char_norm_array) |
void | ComputeIntFeatures (FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures) |
INT_TEMPLATES | ReadIntTemplates (FILE *File) |
void | WriteIntTemplates (FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset) |
CLASS_ID | GetClassToDebug (const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id) |
void | ShowMatchDisplay () |
UnicityTable< FontInfo > & | get_fontinfo_table () |
UnicityTable< FontSet > & | get_fontset_table () |
void | NormalizeOutlines (LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale) |
FEATURE_SET | ExtractOutlineFeatures (TBLOB *Blob) |
FEATURE_SET | ExtractPicoFeatures (TBLOB *Blob) |
ReadClassFile | |
Read in the training data from a file. All of the classes are read in. The results are stored in the global variable, 'TrainingData'. | |
void | ReadClassFile () |
![]() | |
CCStruct () | |
~CCStruct () | |
![]() | |
CUtil () | |
~CUtil () | |
void | read_variables (const char *filename, bool global_only) |
![]() | |
CCUtil () | |
virtual | ~CCUtil () |
void | main_setup (const char *argv0, const char *basename) |
ParamsVectors * | params () |
Protected Attributes | |
IntegerMatcher | im_ |
FEATURE_DEFS_STRUCT | feature_defs_ |
ShapeTable * | shape_table_ |
![]() | |
Image | image_ |
Additional Inherited Members | |
![]() | |
static const double | kDescenderFraction = 0.25 |
static const double | kXHeightFraction = 0.5 |
static const double | kAscenderFraction = 0.25 |
static const double | kXHeightCapRatio |
Definition at line 58 of file classify.h.
tesseract::Classify::Classify | ( | ) |
Definition at line 34 of file classify.cpp.
|
virtual |
Definition at line 178 of file classify.cpp.
int tesseract::Classify::AdaptableWord | ( | TWERD * | Word, |
const WERD_CHOICE & | BestChoiceWord, | ||
const WERD_CHOICE & | RawChoiceWord | ||
) |
Return TRUE if the specified word is acceptable for adaptation.
Globals: none
Word | current word |
BestChoiceWord | best overall choice for word with context |
RawChoiceWord | best choice for word without context |
Definition at line 894 of file adaptmatch.cpp.
void tesseract::Classify::AdaptiveClassifier | ( | TBLOB * | Blob, |
const DENORM & | denorm, | ||
BLOB_CHOICE_LIST * | Choices, | ||
CLASS_PRUNER_RESULTS | CPResults | ||
) |
This routine calls the adaptive matcher which returns (in an array) the class id of each class matched.
It also returns the number of classes matched. For each class matched it places the best rating found for that class into the Ratings array.
Bad matches are then removed so that they don't need to be sorted. The remaining good matches are then sorted and converted to choices.
This routine also performs some simple speckle filtering.
Blob | blob to be classified | |
denorm | normalization/denormalization parameters | |
[out] | Choices | List of choices found by adaptive matcher. |
[out] | CPResults | Array of CPResultStruct of size MAX_NUM_CLASSES is filled on return with the choices found by the class pruner and the ratings therefrom. Also contains the detailed results of the integer matcher. |
Definition at line 178 of file adaptmatch.cpp.
|
inline |
Definition at line 319 of file classify.h.
void tesseract::Classify::AdaptToChar | ( | TBLOB * | Blob, |
const DENORM & | denorm, | ||
CLASS_ID | ClassId, | ||
int | FontinfoId, | ||
FLOAT32 | Threshold | ||
) |
Blob | blob to add to templates for ClassId |
denorm | normalization/denormalization parameters |
ClassId | class to add blob to |
FontinfoId | font information from pre-trained templates |
Threshold | minimum match rating to existing template |
Globals:
Definition at line 933 of file adaptmatch.cpp.
void tesseract::Classify::AdaptToPunc | ( | TBLOB * | Blob, |
const DENORM & | denorm, | ||
CLASS_ID | ClassId, | ||
int | FontinfoId, | ||
FLOAT32 | Threshold | ||
) |
Blob | blob to add to templates for ClassId |
denorm | normalization/denormalization parameters |
ClassId | class to add blob to |
FontinfoId | font information from pre-trained teamples |
Threshold | minimum match rating to existing template |
Globals:
Definition at line 1082 of file adaptmatch.cpp.
void tesseract::Classify::AddNewResult | ( | ADAPT_RESULTS * | results, |
CLASS_ID | class_id, | ||
int | shape_id, | ||
FLOAT32 | rating, | ||
bool | adapted, | ||
int | config, | ||
int | fontinfo_id, | ||
int | fontinfo_id2 | ||
) |
This routine adds the result of a classification into Results. If the new rating is much worse than the current best rating, it is not entered into results because it would end up being stripped later anyway. If the new rating is better than the old rating for the class, it replaces the old rating. If this is the first rating for the class, the class is added to the list of matched classes in Results. If the new rating is better than the best so far, it becomes the best so far.
Globals:
[out] | results | results to add new result to |
class_id | class of new result | |
shape_id | shape index | |
rating | rating of new result | |
adapted | adapted match or not | |
config | config id of new result | |
fontinfo_id | font information of the new result | |
fontinfo_id2 | font information of the 2nd choice result |
Definition at line 1142 of file adaptmatch.cpp.
void tesseract::Classify::AmbigClassifier | ( | TBLOB * | Blob, |
const DENORM & | denorm, | ||
INT_TEMPLATES | Templates, | ||
ADAPT_CLASS * | Classes, | ||
UNICHAR_ID * | Ambiguities, | ||
ADAPT_RESULTS * | Results | ||
) |
This routine is identical to CharNormClassifier() except that it does no class pruning. It simply matches the unknown blob against the classes listed in Ambiguities.
Globals:
Blob | blob to be classified | |
denorm | normalization/denormalization parameters | |
Templates | built-in templates to classify against | |
Classes | adapted class templates | |
Ambiguities | array of class id's to match against | |
[out] | Results | place to put match results |
Definition at line 1205 of file adaptmatch.cpp.
UNICHAR_ID * tesseract::Classify::BaselineClassifier | ( | TBLOB * | Blob, |
const DENORM & | denorm, | ||
ADAPT_TEMPLATES | Templates, | ||
ADAPT_RESULTS * | Results | ||
) |
This routine extracts baseline normalized features from the unknown character and matches them against the specified set of templates. The classes which match are added to Results.
Globals:
Blob | blob to be classified |
denorm | normalization/denormalization parameters |
Templates | current set of adapted templates |
Results | place to put match results |
Definition at line 1423 of file adaptmatch.cpp.
int tesseract::Classify::CharNormClassifier | ( | TBLOB * | Blob, |
const DENORM & | denorm, | ||
INT_TEMPLATES | Templates, | ||
ADAPT_RESULTS * | Results | ||
) |
This routine extracts character normalized features from the unknown character and matches them against the specified set of templates. The classes which match are added to Results.
Blob | blob to be classified |
denorm | normalization/denormalization parameters |
Templates | templates to classify unknown against |
Results | place to put match results |
Globals:
Definition at line 1487 of file adaptmatch.cpp.
int tesseract::Classify::CharNormTrainingSample | ( | bool | pruner_only, |
const TrainingSample & | sample, | ||
GenericVector< ShapeRating > * | results | ||
) |
Definition at line 1531 of file adaptmatch.cpp.
int tesseract::Classify::ClassAndConfigIDToFontOrShapeID | ( | int | class_id, |
int | int_result_config | ||
) | const |
Definition at line 2733 of file adaptmatch.cpp.
STRING tesseract::Classify::ClassIDToDebugStr | ( | const INT_TEMPLATES_STRUCT * | templates, |
int | class_id, | ||
int | config_id | ||
) | const |
Definition at line 2720 of file adaptmatch.cpp.
void tesseract::Classify::ClassifyAsNoise | ( | ADAPT_RESULTS * | Results | ) |
This routine computes a rating which reflects the likelihood that the blob being classified is a noise blob. NOTE: assumes that the blob length has already been computed and placed into Results.
Results | results to add noise classification to |
Globals:
Definition at line 1615 of file adaptmatch.cpp.
void tesseract::Classify::ClearCharNormArray | ( | uinT8 * | char_norm_array | ) |
For each class in the unicharset, clears the corresponding entry in char_norm_array. char_norm_array is indexed by unichar_id.
Globals:
char_norm_array | array to be cleared |
Definition at line 48 of file float2int.cpp.
void tesseract::Classify::ComputeCharNormArrays | ( | FEATURE_STRUCT * | norm_feature, |
INT_TEMPLATES_STRUCT * | templates, | ||
uinT8 * | char_norm_array, | ||
uinT8 * | pruner_array | ||
) |
Definition at line 2092 of file adaptmatch.cpp.
double tesseract::Classify::ComputeCorrectedRating | ( | bool | debug, |
int | unichar_id, | ||
double | cp_rating, | ||
double | im_rating, | ||
int | feature_misses, | ||
int | bottom, | ||
int | top, | ||
int | blob_length, | ||
const uinT8 * | cn_factors | ||
) |
Definition at line 1360 of file adaptmatch.cpp.
void tesseract::Classify::ComputeIntCharNormArray | ( | const FEATURE_STRUCT & | norm_feature, |
uinT8 * | char_norm_array | ||
) |
For each class in unicharset, computes the match between norm_feature and the normalization protos for that class. Converts this number to the range from 0 - 255 and stores it into char_norm_array. CharNormArray is indexed by unichar_id.
Globals:
norm_feature | character normalization feature | |
[out] | char_norm_array | place to put results of size unicharset.size() |
Definition at line 69 of file float2int.cpp.
void tesseract::Classify::ComputeIntFeatures | ( | FEATURE_SET | Features, |
INT_FEATURE_ARRAY | IntFeatures | ||
) |
This routine converts each floating point pico-feature in Features into integer format and saves it into IntFeatures.
Globals:
Features | floating point pico-features to be converted | |
[out] | IntFeatures | array to put converted features into |
Definition at line 94 of file float2int.cpp.
FLOAT32 tesseract::Classify::ComputeNormMatch | ( | CLASS_ID | ClassId, |
const FEATURE_STRUCT & | feature, | ||
BOOL8 | DebugMatch | ||
) |
Definition at line 73 of file normmatch.cpp.
void tesseract::Classify::ConvertMatchesToChoices | ( | const DENORM & | denorm, |
const TBOX & | box, | ||
ADAPT_RESULTS * | Results, | ||
BLOB_CHOICE_LIST * | Choices | ||
) |
The function converts the given match ratings to the list of blob choices with ratings and certainties (used by the context checkers). If character fragments are present in the results, this function also makes sure that there is at least one non-fragmented classification included. For each classification result check the unicharset for "definite" ambiguities and modify the resulting Choices accordingly.
Definition at line 1675 of file adaptmatch.cpp.
Definition at line 528 of file intproto.cpp.
INT_TEMPLATES tesseract::Classify::CreateIntTemplates | ( | CLASSES | FloatProtos, |
const UNICHARSET & | target_unicharset | ||
) |
Definition at line 573 of file intproto.cpp.
void tesseract::Classify::DebugAdaptiveClassifier | ( | TBLOB * | Blob, |
const DENORM & | denorm, | ||
ADAPT_RESULTS * | Results | ||
) |
Blob | blob whose classification is being debugged |
denorm | normalization/denormalization parameters |
Results | results of match being debugged |
Globals: none
Definition at line 1748 of file adaptmatch.cpp.
void tesseract::Classify::DisplayAdaptedChar | ( | TBLOB * | blob, |
const DENORM & | denorm, | ||
INT_CLASS_STRUCT * | int_class | ||
) |
Definition at line 1036 of file adaptmatch.cpp.
void tesseract::Classify::DoAdaptiveMatch | ( | TBLOB * | Blob, |
const DENORM & | denorm, | ||
ADAPT_RESULTS * | Results | ||
) |
This routine performs an adaptive classification. If we have not yet adapted to enough classes, a simple classification to the pre-trained templates is performed. Otherwise, we match the blob against the adapted templates. If the adapted templates do not match well, we try a match against the pre-trained templates. If an adapted template match is found, we do a match to any pre-trained templates which could be ambiguous. The results from all of these classifications are merged together into Results.
Blob | blob to be classified |
denorm | normalization/denormalization parameters |
Results | place to put match results |
Globals:
Definition at line 1808 of file adaptmatch.cpp.
void tesseract::Classify::EndAdaptiveClassifier | ( | ) |
This routine performs cleanup operations on the adaptive classifier. It should be called before the program is terminated. Its main function is to save the adapted templates to a file.
Globals:
Definition at line 476 of file adaptmatch.cpp.
void tesseract::Classify::ExpandShapesAndApplyCorrections | ( | ADAPT_CLASS * | classes, |
bool | debug, | ||
int | class_id, | ||
int | bottom, | ||
int | top, | ||
float | cp_rating, | ||
int | blob_length, | ||
const uinT8 * | cn_factors, | ||
INT_RESULT_STRUCT & | int_result, | ||
ADAPT_RESULTS * | final_results | ||
) |
Definition at line 1295 of file adaptmatch.cpp.
FEATURE_SET tesseract::Classify::ExtractOutlineFeatures | ( | TBLOB * | Blob | ) |
Definition at line 36 of file outfeat.cpp.
FEATURE_SET tesseract::Classify::ExtractPicoFeatures | ( | TBLOB * | Blob | ) |
Definition at line 57 of file picofeat.cpp.
void tesseract::Classify::FreeNormProtos | ( | ) |
Definition at line 157 of file normmatch.cpp.
|
inline |
Definition at line 336 of file classify.h.
|
inline |
Definition at line 339 of file classify.h.
int tesseract::Classify::GetAdaptiveFeatures | ( | TBLOB * | Blob, |
INT_FEATURE_ARRAY | IntFeatures, | ||
FEATURE_SET * | FloatFeatures | ||
) |
This routine sets up the feature extractor to extract baseline normalized pico-features.
The extracted pico-features are converted to integer form and placed in IntFeatures. The original floating-pt. features are returned in FloatFeatures.
Globals: none
Blob | blob to extract features from | |
[out] | IntFeatures | array to fill with integer features |
[out] | FloatFeatures | place to return actual floating-pt features |
Definition at line 854 of file adaptmatch.cpp.
void tesseract::Classify::GetAdaptThresholds | ( | TWERD * | Word, |
const DENORM & | denorm, | ||
const WERD_CHOICE & | BestChoice, | ||
const WERD_CHOICE & | BestRawChoice, | ||
FLOAT32 | Thresholds[] | ||
) |
This routine tries to estimate how tight the adaptation threshold should be set for each character in the current word. In general, the routine tries to set tighter thresholds for a character when the current set of templates would have made an error on that character. It tries to set a threshold tight enough to eliminate the error. Two different sets of rules can be used to determine the desired thresholds.
Word | current word | |
denorm | normalization/denormalization parameters | |
BestChoice | best choice for current word with context | |
BestRawChoice | best choice for current word without context | |
[out] | Thresholds | array of thresholds to be filled in |
Globals:
Definition at line 1869 of file adaptmatch.cpp.
UNICHAR_ID * tesseract::Classify::GetAmbiguities | ( | TBLOB * | Blob, |
const DENORM & | denorm, | ||
CLASS_ID | CorrectClass | ||
) |
This routine matches blob to the built-in templates to find out if there are any classes other than the correct class which are potential ambiguities.
Blob | blob to get classification ambiguities for |
denorm | normalization/denormalization parameters |
CorrectClass | correct class for Blob |
Globals:
Definition at line 1898 of file adaptmatch.cpp.
int tesseract::Classify::GetBaselineFeatures | ( | TBLOB * | Blob, |
const DENORM & | denorm, | ||
INT_TEMPLATES | Templates, | ||
INT_FEATURE_ARRAY | IntFeatures, | ||
uinT8 * | CharNormArray, | ||
inT32 * | BlobLength | ||
) |
This routine calls the integer (Hardware) feature extractor if it has not been called before for this blob. The results from the feature extractor are placed into globals so that they can be used in other routines without re-extracting the features. It then copies the baseline features into the IntFeatures array provided by the caller.
Blob | blob to extract features from |
denorm | normalization/denormalization parameters |
Templates | used to compute char norm adjustments |
IntFeatures | array to fill with integer features |
CharNormArray | array to fill with dummy char norm adjustments |
BlobLength | length of blob in baseline-normalized units |
Globals:
Definition at line 1957 of file adaptmatch.cpp.
int tesseract::Classify::GetCharNormFeatures | ( | TBLOB * | Blob, |
const DENORM & | denorm, | ||
INT_TEMPLATES | Templates, | ||
INT_FEATURE_ARRAY | IntFeatures, | ||
uinT8 * | PrunerNormArray, | ||
uinT8 * | CharNormArray, | ||
inT32 * | BlobLength, | ||
inT32 * | FeatureOutlineArray | ||
) |
This routine calls the integer (Hardware) feature extractor if it has not been called before for this blob.
The results from the feature extractor are placed into globals so that they can be used in other routines without re-extracting the features.
It then copies the char norm features into the IntFeatures array provided by the caller.
Blob | blob to extract features from |
denorm | normalization/denormalization parameters |
Templates | used to compute char norm adjustments |
IntFeatures | array to fill with integer features |
PrunerNormArray | Array of factors from blob normalization process |
CharNormArray | array to fill with dummy char norm adjustments |
BlobLength | length of blob in baseline-normalized units |
FeatureOutlineArray | Globals:
|
Definition at line 2045 of file adaptmatch.cpp.
CLASS_ID tesseract::Classify::GetClassToDebug | ( | const char * | Prompt, |
bool * | adaptive_on, | ||
bool * | pretrained_on, | ||
int * | shape_id | ||
) |
Definition at line 1432 of file intproto.cpp.
|
inline |
Definition at line 62 of file classify.h.
int tesseract::Classify::GetFontinfoId | ( | ADAPT_CLASS | Class, |
uinT8 | ConfigId | ||
) |
Definition at line 190 of file adaptive.cpp.
void tesseract::Classify::InitAdaptedClass | ( | TBLOB * | Blob, |
const DENORM & | denorm, | ||
CLASS_ID | ClassId, | ||
int | FontinfoId, | ||
ADAPT_CLASS | Class, | ||
ADAPT_TEMPLATES | Templates | ||
) |
This routine creates a new adapted class and uses Blob as the model for the first config in that class.
Blob | blob to model new class after |
denorm | normalization/denormalization parameters |
ClassId | id of the class to be initialized |
FontinfoId | font information inferred from pre-trained templates |
Class | adapted class to be initialized |
Templates | adapted templates to add new class to |
Globals:
Definition at line 758 of file adaptmatch.cpp.
void tesseract::Classify::InitAdaptiveClassifier | ( | bool | load_pre_trained_templates | ) |
This routine reads in the training information needed by the adaptive classifier and saves it into global variables. Parameters: load_pre_trained_templates Indicates whether the pre-trained templates (inttemp, normproto and pffmtable components) should be lodaded. Should only be set to true if the necesary classifier components are present in the [lang].traineddata file. Globals: BuiltInTemplatesFile file to get built-in temps from BuiltInCutoffsFile file to get avg. feat per class from classify_use_pre_adapted_templates enables use of pre-adapted templates
Definition at line 545 of file adaptmatch.cpp.
void tesseract::Classify::LearnPieces | ( | const char * | filename, |
int | start, | ||
int | length, | ||
float | threshold, | ||
CharSegmentationType | segmentation, | ||
const char * | correct_text, | ||
WERD_RES * | word | ||
) |
Definition at line 394 of file adaptmatch.cpp.
void tesseract::Classify::LearnWord | ( | const char * | filename, |
const char * | rejmap, | ||
WERD_RES * | word | ||
) |
Definition at line 254 of file adaptmatch.cpp.
Definition at line 1991 of file adaptmatch.cpp.
int tesseract::Classify::MakeNewTemporaryConfig | ( | ADAPT_TEMPLATES | Templates, |
CLASS_ID | ClassId, | ||
int | FontinfoId, | ||
int | NumFeatures, | ||
INT_FEATURE_ARRAY | Features, | ||
FEATURE_SET | FloatFeatures | ||
) |
Templates | adapted templates to add new config to |
ClassId | class id to associate with new config |
FontinfoId | font information inferred from pre-trained templates |
NumFeatures | number of features in IntFeatures |
Features | features describing model for new config |
FloatFeatures | floating-pt representation of features |
Definition at line 2136 of file adaptmatch.cpp.
PROTO_ID tesseract::Classify::MakeNewTempProtos | ( | FEATURE_SET | Features, |
int | NumBadFeat, | ||
FEATURE_ID | BadFeat[], | ||
INT_CLASS | IClass, | ||
ADAPT_CLASS | Class, | ||
BIT_VECTOR | TempProtoMask | ||
) |
This routine finds sets of sequential bad features that all have the same angle and converts each set into a new temporary proto. The temp proto is added to the proto pruner for IClass, pushed onto the list of temp protos in Class, and added to TempProtoMask.
Features | floating-pt features describing new character |
NumBadFeat | number of bad features to turn into protos |
BadFeat | feature id's of bad features |
IClass | integer class templates to add new protos to |
Class | adapted class templates to add new protos to |
TempProtoMask | proto mask to add new protos to |
Globals: none
Definition at line 2233 of file adaptmatch.cpp.
void tesseract::Classify::MakePermanent | ( | ADAPT_TEMPLATES | Templates, |
CLASS_ID | ClassId, | ||
int | ConfigId, | ||
const DENORM & | denorm, | ||
TBLOB * | Blob | ||
) |
Templates | current set of adaptive templates |
ClassId | class containing config to be made permanent |
ConfigId | config to be made permanent |
denorm | normalization/denormalization parameters |
Blob | current blob being adapted to |
Globals: none
Definition at line 2323 of file adaptmatch.cpp.
void tesseract::Classify::MasterMatcher | ( | INT_TEMPLATES | templates, |
inT16 | num_features, | ||
const INT_FEATURE_STRUCT * | features, | ||
const uinT8 * | norm_factors, | ||
ADAPT_CLASS * | classes, | ||
int | debug, | ||
int | num_classes, | ||
const TBOX & | blob_box, | ||
CLASS_PRUNER_RESULTS | results, | ||
ADAPT_RESULTS * | final_results | ||
) |
Factored-out calls to IntegerMatcher based on class pruner results. Returns integer matcher results inside CLASS_PRUNER_RESULTS structure.
Definition at line 1257 of file adaptmatch.cpp.
ADAPT_TEMPLATES tesseract::Classify::NewAdaptedTemplates | ( | bool | InitFromUnicharset | ) |
Allocates memory for adapted tempates. each char in unicharset to the newly created templates
InitFromUnicharset | if true, add an empty class for |
Definition at line 167 of file adaptive.cpp.
Definition at line 346 of file mfoutline.cpp.
void tesseract::Classify::PrintAdaptedTemplates | ( | FILE * | File, |
ADAPT_TEMPLATES | Templates | ||
) |
This routine prints a summary of the adapted templates in Templates to File.
File | open text file to print Templates to |
Templates | adapted templates to print to File |
Definition at line 273 of file adaptive.cpp.
void tesseract::Classify::PrintAdaptiveMatchResults | ( | FILE * | File, |
ADAPT_RESULTS * | Results | ||
) |
This routine writes the matches in Results to File.
File | open text file to write Results to |
Results | match results to write to File |
Globals: none
Definition at line 2424 of file adaptmatch.cpp.
void tesseract::Classify::PrintAdaptiveStatistics | ( | FILE * | File | ) |
Print to File the statistics which have been gathered for the adaptive matcher.
File | open text file to print adaptive statistics to |
Globals: none
Definition at line 659 of file adaptmatch.cpp.
int tesseract::Classify::PruneClasses | ( | const INT_TEMPLATES_STRUCT * | int_templates, |
int | num_features, | ||
const INT_FEATURE_STRUCT * | features, | ||
const uinT8 * | normalization_factors, | ||
const uinT16 * | expected_num_features, | ||
CP_RESULT_STRUCT * | results | ||
) |
Definition at line 406 of file intmatcher.cpp.
ADAPT_TEMPLATES tesseract::Classify::ReadAdaptedTemplates | ( | FILE * | File | ) |
Read a set of adapted templates from File and return a ptr to the templates.
File | open text file to read adapted templates from |
Definition at line 371 of file adaptive.cpp.
void tesseract::Classify::ReadClassFile | ( | ) |
Definition at line 293 of file protos.cpp.
INT_TEMPLATES tesseract::Classify::ReadIntTemplates | ( | FILE * | File | ) |
Definition at line 786 of file intproto.cpp.
void tesseract::Classify::ReadNewCutoffs | ( | FILE * | CutoffFile, |
bool | swap, | ||
inT64 | end_offset, | ||
CLASS_CUTOFF_ARRAY | Cutoffs | ||
) |
Definition at line 42 of file cutoffs.cpp.
NORM_PROTOS * tesseract::Classify::ReadNormProtos | ( | FILE * | File, |
inT64 | end_offset | ||
) |
Definition at line 230 of file normmatch.cpp.
void tesseract::Classify::RefreshDebugWindow | ( | ScrollView ** | win, |
const char * | msg, | ||
int | y_offset, | ||
const TBOX & | wbox | ||
) |
Definition at line 228 of file adaptmatch.cpp.
void tesseract::Classify::RemoveBadMatches | ( | ADAPT_RESULTS * | Results | ) |
This routine steps thru each matching class in Results and removes it from the match list if its rating is worse than the BestRating plus a pad. In other words, all good matches get moved to the front of the classes array.
Results | contains matches to be filtered |
Globals:
Definition at line 2450 of file adaptmatch.cpp.
void tesseract::Classify::RemoveExtraPuncs | ( | ADAPT_RESULTS * | Results | ) |
This routine discards extra digits or punctuation from the results. We keep only the top 2 punctuation answers and the top 1 digit answer if present.
Results | contains matches to be filtered |
Definition at line 2503 of file adaptmatch.cpp.
void tesseract::Classify::ResetAdaptiveClassifierInternal | ( | ) |
Definition at line 636 of file adaptmatch.cpp.
void tesseract::Classify::ResetFeaturesHaveBeenExtracted | ( | ) |
Definition at line 1985 of file adaptmatch.cpp.
void tesseract::Classify::SetAdaptiveThreshold | ( | FLOAT32 | Threshold | ) |
This routine resets the internal thresholds inside the integer matcher to correspond to the specified threshold.
Threshold | threshold for creating new templates |
Globals:
Definition at line 2548 of file adaptmatch.cpp.
void tesseract::Classify::SettupPass1 | ( | ) |
This routine prepares the adaptive matcher for the start of the first pass. Learning is enabled (unless it is disabled for the whole program).
Globals:
Definition at line 710 of file adaptmatch.cpp.
void tesseract::Classify::SettupPass2 | ( | ) |
This routine prepares the adaptive matcher for the start of the second pass. Further learning is disabled.
Globals:
Definition at line 730 of file adaptmatch.cpp.
|
inline |
Definition at line 66 of file classify.h.
int tesseract::Classify::ShapeIDToClassID | ( | int | shape_id | ) | const |
Definition at line 2746 of file adaptmatch.cpp.
void tesseract::Classify::ShowBestMatchFor | ( | TBLOB * | Blob, |
const DENORM & | denorm, | ||
CLASS_ID | ClassId, | ||
int | shape_id, | ||
BOOL8 | AdaptiveOn, | ||
BOOL8 | PreTrainedOn, | ||
ADAPT_RESULTS * | Results | ||
) |
This routine compares Blob to both sets of templates (adaptive and pre-trained) and then displays debug information for the config which matched best.
Blob | blob to show best matching config for |
denorm | normalization/denormalization parameters |
ClassId | class whose configs are to be searched |
shape_id | shape index |
AdaptiveOn | TRUE if adaptive configs are enabled |
PreTrainedOn | TRUE if pretrained configs are enabled |
Results | results of match being debugged |
Globals:
Definition at line 2579 of file adaptmatch.cpp.
void tesseract::Classify::ShowMatchDisplay | ( | ) |
Definition at line 1096 of file intproto.cpp.
bool tesseract::Classify::TempConfigReliable | ( | CLASS_ID | class_id, |
const TEMP_CONFIG & | config | ||
) |
Definition at line 2762 of file adaptmatch.cpp.
void tesseract::Classify::UpdateAmbigsGroup | ( | CLASS_ID | class_id, |
const DENORM & | denorm, | ||
TBLOB * | Blob | ||
) |
Definition at line 2799 of file adaptmatch.cpp.
void tesseract::Classify::WriteAdaptedTemplates | ( | FILE * | File, |
ADAPT_TEMPLATES | Templates | ||
) |
This routine saves Templates to File in a binary format.
File | open text file to write Templates to |
Templates | set of adapted templates to write to File |
Definition at line 507 of file adaptive.cpp.
void tesseract::Classify::WriteIntTemplates | ( | FILE * | File, |
INT_TEMPLATES | Templates, | ||
const UNICHARSET & | target_unicharset | ||
) |
Definition at line 1155 of file intproto.cpp.
ADAPT_TEMPLATES tesseract::Classify::AdaptedTemplates |
Definition at line 430 of file classify.h.
BIT_VECTOR tesseract::Classify::AllConfigsOff |
Definition at line 437 of file classify.h.
BIT_VECTOR tesseract::Classify::AllConfigsOn |
Definition at line 435 of file classify.h.
BIT_VECTOR tesseract::Classify::AllProtosOff |
Definition at line 436 of file classify.h.
BIT_VECTOR tesseract::Classify::AllProtosOn |
Definition at line 433 of file classify.h.
double tesseract::Classify::certainty_scale = 20.0 |
"Certainty scaling factor"
Definition at line 398 of file classify.h.
int tesseract::Classify::classify_adapt_feature_threshold = 230 |
"Threshold for good features during adaptive 0-255"
Definition at line 404 of file classify.h.
int tesseract::Classify::classify_adapt_proto_threshold = 230 |
"Threshold for good protos during adaptive 0-255"
Definition at line 402 of file classify.h.
bool tesseract::Classify::classify_bln_numeric_mode = 0 |
"Assume the input is numbers [0-9]."
Definition at line 455 of file classify.h.
double tesseract::Classify::classify_char_norm_range = 0.2 |
"Character Normalization Range ..."
Definition at line 363 of file classify.h.
double tesseract::Classify::classify_character_fragments_garbage_certainty_threshold = -3.0 |
"Exclude fragments that do not match any whole character" " with at least this certainty"
Definition at line 410 of file classify.h.
int tesseract::Classify::classify_class_pruner_multiplier = 30 |
"Class Pruner Multiplier 0-255: "
Definition at line 422 of file classify.h.
int tesseract::Classify::classify_class_pruner_threshold = 229 |
"Class Pruner Threshold 0-255"
Definition at line 420 of file classify.h.
int tesseract::Classify::classify_cp_cutoff_strength = 7 |
"Class Pruner CutoffStrength: "
Definition at line 424 of file classify.h.
bool tesseract::Classify::classify_debug_character_fragments = FALSE |
"Bring up graphical debugging windows for fragments training"
Definition at line 412 of file classify.h.
int tesseract::Classify::classify_debug_level = 0 |
"Classify debug level"
Definition at line 357 of file classify.h.
bool tesseract::Classify::classify_enable_adaptive_debugger = 0 |
"Enable match debugger"
Definition at line 377 of file classify.h.
bool tesseract::Classify::classify_enable_adaptive_matcher = 1 |
"Enable adaptive classifier"
Definition at line 372 of file classify.h.
bool tesseract::Classify::classify_enable_learning = true |
"Enable adaptive classifier"
Definition at line 356 of file classify.h.
int tesseract::Classify::classify_integer_matcher_multiplier = 14 |
"Integer Matcher Multiplier 0-255: "
Definition at line 426 of file classify.h.
char* tesseract::Classify::classify_learn_debug_str = "" |
"Class str to debug learning"
Definition at line 416 of file classify.h.
int tesseract::Classify::classify_learning_debug_level = 0 |
"Learning Debug Level: "
Definition at line 380 of file classify.h.
double tesseract::Classify::classify_max_norm_scale_x = 0.325 |
"Max char x-norm scale ..."
Definition at line 365 of file classify.h.
double tesseract::Classify::classify_max_norm_scale_y = 0.325 |
"Max char y-norm scale ..."
Definition at line 367 of file classify.h.
double tesseract::Classify::classify_min_norm_scale_x = 0.0 |
"Min char x-norm scale ..."
Definition at line 364 of file classify.h.
double tesseract::Classify::classify_min_norm_scale_y = 0.0 |
"Min char y-norm scale ..."
Definition at line 366 of file classify.h.
double tesseract::Classify::classify_misfit_junk_penalty = 0.0 |
"Penalty to apply when a non-alnum is vertically out of " "its expected textline position"
Definition at line 396 of file classify.h.
int tesseract::Classify::classify_norm_method = character |
"Normalization Method ..."
Definition at line 361 of file classify.h.
bool tesseract::Classify::classify_save_adapted_templates = 0 |
"Save adapted templates to a file"
Definition at line 376 of file classify.h.
bool tesseract::Classify::classify_use_pre_adapted_templates = 0 |
"Use pre-adapted classifier templates"
Definition at line 374 of file classify.h.
bool tesseract::Classify::disable_character_fragments = TRUE |
"Do not include character fragments in the" " results of the classifier"
Definition at line 407 of file classify.h.
bool tesseract::Classify::EnableLearning |
Definition at line 439 of file classify.h.
|
protected |
Definition at line 459 of file classify.h.
UnicityTable<FontInfo> tesseract::Classify::fontinfo_table_ |
Definition at line 443 of file classify.h.
UnicityTable<FontSet> tesseract::Classify::fontset_table_ |
Definition at line 451 of file classify.h.
int tesseract::Classify::il1_adaption_test = 0 |
"Dont adapt to i/I at beginning of word"
Definition at line 453 of file classify.h.
|
protected |
Definition at line 455 of file classify.h.
double tesseract::Classify::matcher_avg_noise_size = 12.0 |
"Avg. noise blob length: "
Definition at line 386 of file classify.h.
double tesseract::Classify::matcher_bad_match_pad = 0.15 |
"Bad Match Pad (0-1)"
Definition at line 384 of file classify.h.
double tesseract::Classify::matcher_clustering_max_angle_delta = 0.015 |
"Maximum angle delta for prototype clustering"
Definition at line 393 of file classify.h.
int tesseract::Classify::matcher_debug_flags = 0 |
"Matcher Debug Flags"
Definition at line 379 of file classify.h.
int tesseract::Classify::matcher_debug_level = 0 |
"Matcher Debug Level"
Definition at line 378 of file classify.h.
bool tesseract::Classify::matcher_debug_separate_windows = FALSE |
"Use two different windows for debugging the matching: " "One for the protos and one for the features."
Definition at line 415 of file classify.h.
double tesseract::Classify::matcher_good_threshold = 0.125 |
"Good Match (0-1)"
Definition at line 381 of file classify.h.
double tesseract::Classify::matcher_great_threshold = 0.0 |
"Great Match (0-1)"
Definition at line 382 of file classify.h.
int tesseract::Classify::matcher_min_examples_for_prototyping = 3 |
"Reliable Config Threshold"
Definition at line 389 of file classify.h.
double tesseract::Classify::matcher_perfect_threshold = 0.02 |
"Perfect Match (0-1)"
Definition at line 383 of file classify.h.
int tesseract::Classify::matcher_permanent_classes_min = 1 |
"Min # of permanent classes"
Definition at line 387 of file classify.h.
double tesseract::Classify::matcher_rating_margin = 0.1 |
"New template margin (0-1)"
Definition at line 385 of file classify.h.
int tesseract::Classify::matcher_sufficient_examples_for_prototyping = 5 |
"Enable adaption even if the ambiguities have not been seen"
Definition at line 391 of file classify.h.
NORM_PROTOS* tesseract::Classify::NormProtos |
Definition at line 441 of file classify.h.
INT_TEMPLATES tesseract::Classify::PreTrainedTemplates |
Definition at line 426 of file classify.h.
bool tesseract::Classify::prioritize_division = FALSE |
"Prioritize blob division over chopping"
Definition at line 354 of file classify.h.
BIT_VECTOR tesseract::Classify::PrunedProtos |
Definition at line 434 of file classify.h.
double tesseract::Classify::rating_scale = 1.5 |
"Rating scaling factor"
Definition at line 397 of file classify.h.
|
protected |
Definition at line 464 of file classify.h.
BIT_VECTOR tesseract::Classify::TempProtoMask |
Definition at line 438 of file classify.h.
bool tesseract::Classify::tess_bn_matching = 0 |
"Baseline Normalized Matching"
Definition at line 371 of file classify.h.
bool tesseract::Classify::tess_cn_matching = 0 |
"Character Normalized Matching"
Definition at line 370 of file classify.h.
double tesseract::Classify::tessedit_class_miss_scale = 0.00390625 |
"Scale factor for features not used"
Definition at line 400 of file classify.h.
int tesseract::Classify::tessedit_single_match = FALSE |
"Top choice only from CP"
Definition at line 355 of file classify.h.