65 #include "config_auto.h"
68 #define ADAPT_TEMPLATE_SUFFIX ".a"
70 #define MAX_MATCHES 10
71 #define UNLIKELY_NUM_FEAT 200
73 #define MAX_ADAPTABLE_WERD_SIZE 40
75 #define ADAPTABLE_WERD_ADJUSTMENT (0.05)
77 #define Y_DIM_OFFSET (Y_SHIFT - BASELINE_Y_SHIFT)
79 #define WORST_POSSIBLE_RATING (1.0)
124 #define MarginalMatch(Rating) \
125 ((Rating) > matcher_great_threshold)
127 #define InitIntFX() (FeaturesHaveBeenExtracted = FALSE)
180 BLOB_CHOICE_LIST *Choices,
182 assert(Choices !=
NULL);
189 if (CPResults !=
NULL)
207 #ifndef GRAPHICS_DISABLED
212 NumClassesOutput += Choices->length();
213 if (Choices->length() == 0) {
215 tprintf (
"Empty classification!\n");
216 Choices =
new BLOB_CHOICE_LIST();
217 BLOB_CHOICE_IT temp_it;
218 temp_it.set_to_list(Choices);
229 int y_offset,
const TBOX &wbox) {
230 #ifndef GRAPHICS_DISABLED
231 const int kSampleSpaceWidth = 500;
233 *win =
new ScrollView(msg, 100, y_offset, kSampleSpaceWidth * 2, 200,
234 kSampleSpaceWidth * 2, 200,
true);
237 (*win)->Pen(64, 64, 64);
242 (*win)->ZoomToRectangle(wbox.
left(), wbox.
top(),
244 #endif // GRAPHICS_DISABLED
257 if (word_len == 0)
return;
259 float* thresholds =
NULL;
260 if (filename ==
NULL) {
271 tprintf(
"\n\nAdapting to word = %s\n",
273 thresholds =
new float[word_len];
278 char prev_map_char =
'0';
280 #ifndef GRAPHICS_DISABLED
282 if (learn_fragmented_word_debug_win_ !=
NULL) {
292 #endif // GRAPHICS_DISABLED
294 for (
int ch = 0; ch < word_len; ++ch) {
298 char rej_map_char = rejmap !=
NULL ? *rejmap++ :
'1';
301 float threshold = thresholds !=
NULL ? thresholds[ch] : 0.0f;
310 bool garbage =
false;
312 for (
int i = 0; i < start_blob; ++i) frag_blob = frag_blob->
next;
314 for (frag = 0; frag < word->
best_state[ch]; ++frag) {
318 frag_blob = frag_blob->
next;
325 for (frag = 0; frag < word->
best_state[ch]; ++frag) {
330 tokens[0].
string(), frag, word->
best_state[ch],
334 for (
int i = 0; i < tokens.
size(); i++) {
335 full_string += tokens[i];
336 if (i != tokens.
size() - 1)
380 prev_map_char = rej_map_char;
382 delete [] thresholds;
396 const char* correct_text,
WERD_RES *word) {
405 start, start + length - 1);
408 for (
int i = 0; i < start; ++i)
413 if (rotated_blob ==
NULL)
416 #ifndef GRAPHICS_DISABLED
422 learn_debug_win_->
Update();
427 blob->
plot(learn_fragments_debug_win_,
429 learn_fragments_debug_win_->
Update();
431 #endif // GRAPHICS_DISABLED
433 if (filename !=
NULL) {
445 tprintf(
"Adapting to char = %s, thr= %g font_id= %d\n",
449 AdaptToChar(rotated_blob, *denorm, class_id, font_id, threshold);
451 tprintf(
"Can't adapt to %s not in unicharset\n", correct_text);
453 if (rotated_blob != blob) {
484 File = fopen (Filename.
string(),
"wb");
486 cprintf (
"Unable to save adapted templates to %s!\n", Filename.
string());
488 cprintf (
"\nSaving adapted templates to %s ...", Filename.
string());
554 load_pre_trained_templates) {
564 tprintf(
"Error loading shape table!\n");
568 tprintf(
"Successfully loaded shape table!\n");
602 BaselineCutoffs[i] = 0;
611 File = fopen(Filename.
string(),
"rb");
616 cprintf(
"\nReading pre-adapted templates from %s ...\n",
626 BaselineCutoffs[i] = CharNormCutoffs[i];
638 tprintf(
"Resetting adaptive classifier (NumAdaptationsFailed=%d)\n",
639 NumAdaptationsFailed);
643 NumAdaptationsFailed = 0;
662 fprintf (File,
"\nADAPTIVE MATCHER STATISTICS:\n");
663 fprintf (File,
"\tNum blobs classified = %d\n", AdaptiveMatcherCalls);
664 fprintf (File,
"\tNum classes output = %d (Avg = %4.2f)\n",
666 ((AdaptiveMatcherCalls == 0) ? (0.0) :
667 ((
float) NumClassesOutput / AdaptiveMatcherCalls)));
668 fprintf (File,
"\t\tBaseline Classifier: %4d calls (%4.2f classes/call)\n",
669 BaselineClassifierCalls,
670 ((BaselineClassifierCalls == 0) ? (0.0) :
671 ((
float) NumBaselineClassesTried / BaselineClassifierCalls)));
672 fprintf (File,
"\t\tCharNorm Classifier: %4d calls (%4.2f classes/call)\n",
673 CharNormClassifierCalls,
674 ((CharNormClassifierCalls == 0) ? (0.0) :
675 ((
float) NumCharNormClassesTried / CharNormClassifierCalls)));
676 fprintf (File,
"\t\tAmbig Classifier: %4d calls (%4.2f classes/call)\n",
677 AmbigClassifierCalls,
678 ((AmbigClassifierCalls == 0) ? (0.0) :
679 ((
float) NumAmbigClassesTried / AmbigClassifierCalls)));
681 fprintf (File,
"\nADAPTIVE LEARNER STATISTICS:\n");
682 fprintf (File,
"\tNumber of words adapted to: %d\n", NumWordsAdaptedTo);
683 fprintf (File,
"\tNumber of chars adapted to: %d\n", NumCharsAdaptedTo);
786 BaselineCutoffs[ClassId] = CharNormCutoffs[ClassId];
790 for (Fid = 0; Fid < Features->
NumFeatures; Fid++) {
796 Proto = &(TempProto->
Proto);
822 cprintf (
"Added new class '%s' with class id %d and %d protos.\n",
870 *FloatFeatures = Features;
897 int BestChoiceLength = BestChoiceWord.
length();
898 float adaptable_score =
901 BestChoiceLength > 0 &&
902 BestChoiceLength == Word->
NumBlobs() &&
953 assert(Class !=
NULL);
962 if (NumFeatures <= 0)
968 for (
int cfg = 0; cfg < IClass->
NumConfigs; ++cfg) {
970 SET_BIT(MatchingFontConfigs, cfg);
976 NumFeatures, IntFeatures,
983 if (IntResult.
Rating <= Threshold) {
986 cprintf (
"Found good match to perm config %d = %4.1f%%.\n",
998 cprintf (
"Increasing reliability of temp config %d to %d.\n",
1009 cprintf (
"Found poor match to temp config %d = %4.1f%%.\n",
1020 if (NewTempConfigId >= 0 &&
1026 #ifndef GRAPHICS_DISABLED
1038 #ifndef GRAPHICS_DISABLED
1044 norm_array, &bloblength);
1045 delete [] norm_array;
1049 num_features, features,
1052 cprintf (
"Best match to temp config %d = %4.1f%%.\n",
1056 ConfigMask = 1 << IntResult.
Config;
1059 num_features, features,
1096 cprintf (
"Rejecting punc = %s (Alternatives = ",
1104 #ifndef SECURE_NAMES
1106 cprintf (
"Adapting to punc = %s, thr= %g\n",
1109 AdaptToChar(Blob, denorm, ClassId, FontinfoId, Threshold);
1156 static_cast<inT16>(config),
1157 static_cast<inT16>(fontinfo_id),
1158 static_cast<inT16>(fontinfo_id2) };
1161 (old_match && rating >= old_match->
rating))
1168 old_match->
rating = rating;
1172 if (rating < results->best_match.rating &&
1217 AmbigClassifierCalls++;
1220 NULL, CharNormArray,
1222 if (NumFeatures <= 0) {
1223 delete [] CharNormArray;
1233 while (*Ambiguities >= 0) {
1234 ClassId = *Ambiguities;
1239 NumFeatures, IntFeatures,
1246 IntResult, Results);
1249 NumAmbigClassesTried++;
1251 delete [] CharNormArray;
1260 const uinT8* norm_factors,
1264 const TBOX& blob_box,
1267 int top = blob_box.
top();
1268 int bottom = blob_box.
bottom();
1269 for (
int c = 0; c < num_classes; c++) {
1270 CLASS_ID class_id = results[c].Class;
1279 num_features, features,
1286 int_result, final_results);
1296 ADAPT_CLASS* classes,
bool debug,
int class_id,
int bottom,
int top,
1297 float cp_rating,
int blob_length,
const uinT8* cn_factors,
1300 int fontinfo_id = kBlankFontinfoId;
1301 int fontinfo_id2 = kBlankFontinfoId;
1302 if (classes !=
NULL) {
1310 if (int_result.
Config2 >= 0) {
1317 int shape_id = fontinfo_id;
1319 double min_rating = 0.0;
1320 for (
int c = 0; c < shape.
size(); ++c) {
1321 int unichar_id = shape[c].unichar_id;
1322 fontinfo_id = shape[c].font_ids[0];
1323 if (shape[c].font_ids.
size() > 1)
1324 fontinfo_id2 = shape[c].font_ids[1];
1325 else if (fontinfo_id2 != kBlankFontinfoId)
1330 bottom, top, blob_length,
1332 if (c == 0 || rating < min_rating)
1333 min_rating = rating;
1335 AddNewResult(final_results, unichar_id, shape_id, rating,
1337 fontinfo_id, fontinfo_id2);
1340 int_result.
Rating = min_rating;
1347 bottom, top, blob_length,
1352 fontinfo_id, fontinfo_id2);
1354 int_result.
Rating = rating;
1361 double cp_rating,
double im_rating,
1363 int bottom,
int top,
1365 const uinT8* cn_factors) {
1368 cn_factors[unichar_id]);
1370 double vertical_penalty = 0.0;
1375 int min_bottom, max_bottom, min_top, max_top;
1377 &min_top, &max_top);
1379 tprintf(
"top=%d, vs [%d, %d], bottom=%d, vs [%d, %d]\n",
1380 top, min_top, max_top, bottom, min_bottom, max_bottom);
1382 if (top < min_top || top > max_top ||
1383 bottom < min_bottom || bottom > max_bottom) {
1387 double result =cn_corrected + miss_penalty + vertical_penalty;
1391 tprintf(
"%s: %2.1f(CP%2.1f, IM%2.1f + CN%.2f(%d) + MP%2.1f + VP%2.1f)\n",
1396 (cn_corrected - im_rating) * 100.0,
1397 cn_factors[unichar_id],
1398 miss_penalty * 100.0,
1399 vertical_penalty * 100.0);
1433 BaselineClassifierCalls++;
1436 Blob, denorm, Templates->
Templates, IntFeatures, CharNormArray,
1438 if (NumFeatures <= 0) {
1439 delete [] CharNormArray;
1444 CharNormArray, BaselineCutoffs, Results->
CPResults);
1446 NumBaselineClassesTried += NumClasses;
1456 delete [] CharNormArray;
1462 return Templates->
Class[ClassId]->
1495 CharNormClassifierCalls++;
1500 uinT8* PrunerNormArray =
new uinT8[num_pruner_classes];
1502 PrunerNormArray, CharNormArray,
1504 if (NumFeatures <= 0) {
1505 delete [] CharNormArray;
1506 delete [] PrunerNormArray;
1510 NumClasses =
PruneClasses(Templates, NumFeatures, IntFeatures,
1518 NumCharNormClassesTried += NumClasses;
1521 MasterMatcher(Templates, NumFeatures, IntFeatures, CharNormArray,
1524 delete [] CharNormArray;
1525 delete [] PrunerNormArray;
1540 for (
int f = 0;
f < num_features; ++
f) {
1542 TBOX fbox(feature.
X, feature.
Y, feature.
X, feature.
Y);
1554 uinT8* pruner_norm_array =
new uinT8[num_pruner_classes];
1566 delete [] pruner_norm_array;
1569 for (
int i = 0; i < num_classes; ++i) {
1570 int class_id = adapt_results->
CPResults[i].Class;
1571 int shape_id = class_id;
1586 blob_box, adapt_results->
CPResults, adapt_results);
1588 for (
int i = 0; i < adapt_results->
NumMatches; i++) {
1594 delete [] char_norm_array;
1595 delete adapt_results;
1596 return num_features;
1620 Rating /= 1.0 + Rating;
1623 kBlankFontinfoId, kBlankFontinfoId);
1631 for (
int i = 0; i < results->
NumMatches; i++) {
1633 return &results->
match[i];
1643 kBlankFontinfoId, kBlankFontinfoId};
1645 return (entry ==
NULL) ? poor_result : *entry;
1677 BLOB_CHOICE_LIST *Choices) {
1678 assert(Choices !=
NULL);
1681 BLOB_CHOICE_IT temp_it;
1682 bool contains_nonfrag =
false;
1683 temp_it.set_to_list(Choices);
1684 int choices_length = 0;
1691 if (shape_table_ !=
NULL) {
1692 max_matches = shape_table_->MaxNumUnichars() * 2;
1697 for (
int i = 0; i < Results->
NumMatches; i++) {
1702 bool current_is_frag = (unicharset.get_fragment(next.
unichar_id) !=
NULL);
1703 if (temp_it.length()+1 == max_matches &&
1704 !contains_nonfrag && current_is_frag) {
1716 Rating = Certainty = next.
rating;
1717 Rating *= rating_scale * Results->
BlobLength;
1718 Certainty *= -(getDict().certainty_scale);
1720 inT16 min_xheight, max_xheight;
1722 &min_xheight, &max_xheight);
1724 fontinfo_id, fontinfo_id2,
1726 min_xheight, max_xheight, adapted));
1727 contains_nonfrag |= !current_is_frag;
1729 if (choices_length >= max_matches)
break;
1736 #ifndef GRAPHICS_DISABLED
1751 for (
int i = 0; i < Results->
NumMatches; i++) {
1755 const char *Prompt =
1756 "Left-click in IntegerMatch Window to continue or right click to debug...";
1759 bool adaptive_on =
true;
1760 bool pretrained_on =
true;
1762 const char* debug_mode;
1765 debug_mode =
"Adaptive Templates Only";
1766 else if (!adaptive_on)
1767 debug_mode =
"PreTrained Templates Only";
1769 debug_mode =
"All Templates";
1771 tprintf(
"Debugging class %d = %s in mode %s ...",
1772 unichar_id, unicharset.id_to_unichar(unichar_id), debug_mode);
1773 if (shape_id >= 0 && shape_table_ !=
NULL) {
1774 tprintf(
" from shape %s\n", shape_table_->DebugStr(shape_id).string());
1776 ShowBestMatchFor(Blob, denorm, unichar_id, shape_id, adaptive_on,
1777 pretrained_on, Results);
1779 }
while ((unichar_id = GetClassToDebug(Prompt, &adaptive_on,
1780 &pretrained_on, &shape_id)) != 0);
1813 AdaptiveMatcherCalls++;
1816 if (AdaptedTemplates->NumPermClasses < matcher_permanent_classes_min ||
1818 CharNormClassifier(Blob, denorm, PreTrainedTemplates, Results);
1820 Ambiguities = BaselineClassifier(Blob, denorm, AdaptedTemplates, Results);
1823 !tess_bn_matching) ||
1825 CharNormClassifier(Blob, denorm, PreTrainedTemplates, Results);
1826 }
else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) {
1827 AmbigClassifier(Blob, denorm,
1828 PreTrainedTemplates,
1829 AdaptedTemplates->Class,
1840 ClassifyAsNoise(Results);
1874 getDict().FindClassifierErrors(matcher_perfect_threshold,
1875 matcher_good_threshold,
1876 matcher_rating_margin,
1907 CharNormClassifier(Blob, denorm, PreTrainedTemplates, Results);
1908 RemoveBadMatches(Results);
1921 Ambiguities[i] = -1;
1923 Ambiguities[0] = -1;
1961 uinT8* CharNormArray,
1962 inT32 *BlobLength) {
1965 if (!FeaturesHaveBeenExtracted) {
1967 CharNormFeatures, &FXInfo,
NULL);
1968 FeaturesHaveBeenExtracted =
TRUE;
1972 *BlobLength = FXInfo.NumBL;
1976 for (Src = BaselineFeatures, End = Src + FXInfo.NumBL, Dest = IntFeatures;
1980 ClearCharNormArray(CharNormArray);
1981 *BlobLength = FXInfo.NumBL;
1982 return FXInfo.NumBL;
1986 FeaturesHaveBeenExtracted =
FALSE;
1992 BLOB_CHOICE_LIST *ratings =
new BLOB_CHOICE_LIST();
1993 AdaptiveClassifier(blob, denorm, ratings,
NULL);
1994 BLOB_CHOICE_IT ratings_it(ratings);
1995 const UNICHARSET &unicharset = getDict().getUnicharset();
1996 if (classify_debug_character_fragments) {
1998 ratings, unicharset);
2000 for (ratings_it.mark_cycle_pt(); !ratings_it.cycled_list();
2001 ratings_it.forward()) {
2006 return (ratings_it.data()->certainty() <
2007 classify_character_fragments_garbage_certainty_threshold);
2049 uinT8* PrunerNormArray,
2050 uinT8* CharNormArray,
2052 inT32 *FeatureOutlineArray) {
2058 if (!FeaturesHaveBeenExtracted) {
2060 CharNormFeatures, &FXInfo,
2061 FeatureOutlineIndex);
2062 FeaturesHaveBeenExtracted =
TRUE;
2066 *BlobLength = FXInfo.NumBL;
2070 for (Src = CharNormFeatures, End = Src + FXInfo.NumCN, Dest = IntFeatures;
2073 for (
int i = 0; FeatureOutlineArray && i < FXInfo.NumCN; ++i) {
2074 FeatureOutlineArray[i] = FeatureOutlineIndex[i];
2085 ComputeCharNormArrays(NormFeature, Templates, CharNormArray, PrunerNormArray);
2086 *BlobLength = FXInfo.NumBL;
2087 return (FXInfo.NumCN);
2094 uinT8* char_norm_array,
2095 uinT8* pruner_array) {
2096 ComputeIntCharNormArray(*norm_feature, char_norm_array);
2097 if (pruner_array !=
NULL) {
2098 if (shape_table_ ==
NULL) {
2099 ComputeIntCharNormArray(*norm_feature, pruner_array);
2102 templates->
NumClasses *
sizeof(pruner_array[0]));
2105 for (
int id = 0;
id < templates->
NumClasses; ++id) {
2107 const FontSet &fs = fontset_table_.get(font_set_id);
2108 for (
int config = 0; config < fs.
size; ++config) {
2109 const Shape& shape = shape_table_->GetShape(fs.
configs[config]);
2110 for (
int c = 0; c < shape.
size(); ++c) {
2111 if (char_norm_array[shape[c].unichar_id] < pruner_array[
id])
2112 pruner_array[id] = char_norm_array[shape[c].unichar_id];
2148 int MaxProtoId, OldMaxProtoId;
2156 if (classify_learning_debug_level >= 3)
2161 Class = Templates->
Class[ClassId];
2164 ++NumAdaptationsFailed;
2165 if (classify_learning_debug_level >= 1)
2166 cprintf(
"Cannot make new temporary config: maximum number exceeded.\n");
2172 NumOldProtos = im_.FindGoodProtos(IClass, AllProtosOn, AllConfigsOff,
2173 BlobLength, NumFeatures, Features,
2174 OldProtos, classify_adapt_proto_threshold,
2179 for (i = 0; i < NumOldProtos; i++)
2180 SET_BIT(TempProtoMask, OldProtos[i]);
2182 NumBadFeatures = im_.FindBadFeatures(IClass, TempProtoMask, AllConfigsOn,
2183 BlobLength, NumFeatures, Features,
2185 classify_adapt_feature_threshold,
2188 MaxProtoId = MakeNewTempProtos(FloatFeatures, NumBadFeatures, BadFeatures,
2189 IClass, Class, TempProtoMask);
2191 ++NumAdaptationsFailed;
2192 if (classify_learning_debug_level >= 1)
2193 cprintf(
"Cannot make new temp protos: maximum number exceeded.\n");
2203 if (classify_learning_debug_level >= 1)
2204 cprintf(
"Making new temp config %d fontinfo id %d"
2205 " using %d old and %d new protos.\n",
2207 NumOldProtos, MaxProtoId - OldMaxProtoId);
2250 for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat;
2251 ProtoStart < LastBad; ProtoStart = ProtoEnd) {
2252 F1 = Features->
Features[*ProtoStart];
2257 for (ProtoEnd = ProtoStart + 1,
2261 F2 = Features->
Features[*ProtoEnd];
2266 AngleDelta = fabs(A1 - A2);
2267 if (AngleDelta > 0.5)
2268 AngleDelta = 1.0 - AngleDelta;
2270 if (AngleDelta > matcher_clustering_max_angle_delta ||
2271 fabs(X1 - X2) > SegmentLength ||
2272 fabs(Y1 - Y2) > SegmentLength)
2276 F2 = Features->
Features[*(ProtoEnd - 1)];
2286 Proto = &(TempProto->
Proto);
2291 Proto->
Length = SegmentLength;
2293 Proto->
X = (X1 + X2) / 2.0;
2300 ConvertProto(Proto, Pid, IClass);
2302 classify_learning_debug_level >= 2);
2333 Class = Templates->
Class[ClassId];
2342 Ambigs = GetAmbiguities(Blob, denorm, ClassId);
2344 "PERM_CONFIG_STRUCT");
2359 if (classify_learning_debug_level >= 1) {
2360 tprintf(
"Making config %d for %s (ClassId %d) permanent:"
2361 " fontinfo id %d, ambiguities '",
2362 ConfigId, getDict().getUnicharset().debug_str(ClassId).
string(),
2365 *AmbigsPointer >= 0; ++AmbigsPointer)
2366 tprintf(
"%s", unicharset.id_to_unichar(*AmbigsPointer));
2425 for (
int i = 0; i < Results->
NumMatches; ++i) {
2426 tprintf(
"%s(%d), shape %d, %.2f ",
2453 static const char* romans =
"i v x I V X";
2456 if (classify_bln_numeric_mode) {
2457 UNICHAR_ID unichar_id_one = unicharset.contains_unichar(
"1") ?
2458 unicharset.unichar_to_id(
"1") : -1;
2459 UNICHAR_ID unichar_id_zero = unicharset.contains_unichar(
"0") ?
2460 unicharset.unichar_to_id(
"0") : -1;
2464 for (Next = NextGood = 0; Next < Results->
NumMatches; Next++) {
2465 if (Results->
match[Next].
rating <= BadMatchThreshold) {
2467 if (!unicharset.get_isalpha(match.
unichar_id) ||
2470 Results->
match[NextGood++] = Results->
match[Next];
2471 }
else if (unicharset.eq(match.
unichar_id,
"l") &&
2472 scored_one.
rating >= BadMatchThreshold) {
2473 Results->
match[NextGood] = scored_one;
2476 }
else if (unicharset.eq(match.
unichar_id,
"O") &&
2477 scored_zero.
rating >= BadMatchThreshold) {
2478 Results->
match[NextGood] = scored_zero;
2485 for (Next = NextGood = 0; Next < Results->
NumMatches; Next++) {
2486 if (Results->
match[Next].
rating <= BadMatchThreshold)
2487 Results->
match[NextGood++] = Results->
match[Next];
2508 static char punc_chars[] =
". , ; : / ` ~ ' - = \\ | \" ! _ ^";
2509 static char digit_chars[] =
"0 1 2 3 4 5 6 7 8 9";
2513 for (Next = NextGood = 0; Next < Results->
NumMatches; Next++) {
2515 if (strstr(punc_chars,
2518 Results->
match[NextGood++] = match;
2521 if (strstr(digit_chars,
2523 if (digit_count < 1)
2524 Results->
match[NextGood++] = match;
2527 Results->
match[NextGood++] = match;
2549 Threshold = (Threshold == matcher_good_threshold) ? 0.9: (1.0 - Threshold);
2550 classify_adapt_proto_threshold.set_value(
2551 ClipToRange<int>(255 * Threshold, 0, 255));
2552 classify_adapt_feature_threshold.set_value(
2553 ClipToRange<int>(255 * Threshold, 0, 255));
2586 int NumCNFeatures = 0, NumBLFeatures = 0;
2591 static int next_config = -1;
2593 if (PreTrainedOn) next_config = -1;
2598 cprintf (
"%d is not a legal class id!!\n", ClassId);
2605 if (shape_table_ ==
NULL)
2608 shape_id = ShapeIDToClassID(shape_id);
2609 if (PreTrainedOn && shape_id >= 0) {
2611 tprintf(
"No built-in templates for class/shape %d\n", shape_id);
2613 NumCNFeatures = GetCharNormFeatures(Blob, denorm, PreTrainedTemplates,
2614 CNFeatures,
NULL, CNAdjust,
2616 if (NumCNFeatures <= 0) {
2617 tprintf(
"Illegal blob (char norm features)!\n");
2621 AllProtosOn, AllConfigsOn,
2622 NumCNFeatures, CNFeatures,
2624 classify_adapt_feature_threshold,
NO_DEBUG,
2625 matcher_debug_separate_windows);
2626 ExpandShapesAndApplyCorrections(
NULL,
false, shape_id,
2629 0, BlobLength, CNAdjust,
2636 if (ClassId < 0 || ClassId >= AdaptedTemplates->Templates->NumClasses) {
2637 tprintf(
"Invalid adapted class id: %d\n", ClassId);
2639 AdaptedTemplates->Class[ClassId] ==
NULL ||
2641 tprintf(
"No AD templates for class %d = %s\n",
2642 ClassId, unicharset.id_to_unichar(ClassId));
2644 NumBLFeatures = GetBaselineFeatures(Blob,
2646 AdaptedTemplates->Templates,
2647 BLFeatures, BLAdjust,
2649 if (NumBLFeatures <= 0)
2650 tprintf(
"Illegal blob (baseline features)!\n");
2652 im_.SetBaseLineMatch();
2654 AllProtosOn, AllConfigsOn,
2655 NumBLFeatures, BLFeatures,
2657 classify_adapt_feature_threshold,
NO_DEBUG,
2658 matcher_debug_separate_windows);
2659 ExpandShapesAndApplyCorrections(
2660 AdaptedTemplates->Class,
false,
2670 if (next_config < 0) {
2671 ConfigMask = 1 << BLResult.
Config;
2674 ConfigMask = 1 << next_config;
2677 classify_norm_method.set_value(
baseline);
2679 im_.SetBaseLineMatch();
2680 tprintf(
"Adaptive Class ID: %d\n", ClassId);
2683 NumBLFeatures, BLFeatures,
2685 classify_adapt_feature_threshold,
2686 matcher_debug_flags,
2687 matcher_debug_separate_windows);
2688 ExpandShapesAndApplyCorrections(
2689 AdaptedTemplates->Class,
true,
2693 }
else if (shape_id >= 0) {
2694 ConfigMask = 1 << CNResult.
Config;
2695 classify_norm_method.set_value(
character);
2697 tprintf(
"Static Shape ID: %d\n", shape_id);
2701 NumCNFeatures, CNFeatures,
2703 classify_adapt_feature_threshold,
2704 matcher_debug_flags,
2705 matcher_debug_separate_windows);
2706 ExpandShapesAndApplyCorrections(
NULL,
true, shape_id,
2709 0, BlobLength, CNAdjust,
2721 int class_id,
int config_id)
const {
2723 if (templates == PreTrainedTemplates && shape_table_ !=
NULL) {
2724 int shape_id = ClassAndConfigIDToFontOrShapeID(class_id, config_id);
2725 class_string = shape_table_->DebugStr(shape_id);
2727 class_string = unicharset.debug_str(class_id);
2729 return class_string;
2734 int int_result_config)
const {
2735 int font_set_id = PreTrainedTemplates->Class[class_id]->font_set_id;
2737 if (font_set_id < 0)
2738 return kBlankFontinfoId;
2739 const FontSet &fs = fontset_table_.get(font_set_id);
2741 return fs.
configs[int_result_config];
2747 for (
int id = 0;
id < PreTrainedTemplates->NumClasses; ++id) {
2748 int font_set_id = PreTrainedTemplates->Class[id]->font_set_id;
2750 const FontSet &fs = fontset_table_.get(font_set_id);
2751 for (
int config = 0; config < fs.
size; ++config) {
2752 if (fs.
configs[config] == shape_id)
2756 tprintf(
"Shape %d not found\n", shape_id);
2764 if (classify_learning_debug_level >= 1) {
2765 tprintf(
"NumTimesSeen for config of %s is %d\n",
2766 getDict().getUnicharset().debug_str(class_id).
string(),
2769 if (config->
NumTimesSeen >= matcher_sufficient_examples_for_prototyping) {
2771 }
else if (config->
NumTimesSeen < matcher_min_examples_for_prototyping) {
2773 }
else if (use_ambigs_for_adaption) {
2777 getDict().getUnicharAmbigs().AmbigsForAdaption(class_id);
2778 int ambigs_size = (ambigs ==
NULL) ? 0 : ambigs->
size();
2779 for (
int ambig = 0; ambig < ambigs_size; ++ambig) {
2780 ADAPT_CLASS ambig_class = AdaptedTemplates->Class[(*ambigs)[ambig]];
2781 assert(ambig_class !=
NULL);
2784 matcher_min_examples_for_prototyping) {
2785 if (classify_learning_debug_level >= 1) {
2786 tprintf(
"Ambig %s has not been seen enough times,"
2787 " not making config for %s permanent\n",
2788 getDict().getUnicharset().debug_str(
2789 (*ambigs)[ambig]).
string(),
2790 getDict().getUnicharset().debug_str(class_id).
string());
2802 getDict().getUnicharAmbigs().ReverseAmbigsForAdaption(class_id);
2803 int ambigs_size = (ambigs ==
NULL) ? 0 : ambigs->
size();
2804 if (classify_learning_debug_level >= 1) {
2805 tprintf(
"Running UpdateAmbigsGroup for %s class_id=%d\n",
2806 getDict().getUnicharset().debug_str(class_id).
string(), class_id);
2808 for (
int ambig = 0; ambig < ambigs_size; ++ambig) {
2809 CLASS_ID ambig_class_id = (*ambigs)[ambig];
2810 const ADAPT_CLASS ambigs_class = AdaptedTemplates->Class[ambig_class_id];
2814 TempConfigFor(AdaptedTemplates->Class[ambig_class_id], cfg);
2815 if (config !=
NULL && TempConfigReliable(ambig_class_id, config)) {
2816 if (classify_learning_debug_level >= 1) {
2817 tprintf(
"Making config %d of %s permanent\n", cfg,
2818 getDict().getUnicharset().debug_str(
2819 ambig_class_id).
string());
2821 MakePermanent(AdaptedTemplates, ambig_class_id, cfg, denorm, Blob);