22 #include "config_auto.h"
48 static const uinT8 offset_table[256] = {
49 255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
50 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
51 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
52 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
53 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
54 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
55 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
56 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
57 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
58 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
59 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
60 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
61 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
62 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
63 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
64 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
67 static const uinT8 next_table[256] = {
68 0, 0, 0, 0x2, 0, 0x4, 0x4, 0x6, 0, 0x8, 0x8, 0x0a, 0x08, 0x0c, 0x0c, 0x0e,
69 0, 0x10, 0x10, 0x12, 0x10, 0x14, 0x14, 0x16, 0x10, 0x18, 0x18, 0x1a, 0x18,
71 0, 0x20, 0x20, 0x22, 0x20, 0x24, 0x24, 0x26, 0x20, 0x28, 0x28, 0x2a, 0x28,
73 0x20, 0x30, 0x30, 0x32, 0x30, 0x34, 0x34, 0x36, 0x30, 0x38, 0x38, 0x3a,
74 0x38, 0x3c, 0x3c, 0x3e,
75 0, 0x40, 0x40, 0x42, 0x40, 0x44, 0x44, 0x46, 0x40, 0x48, 0x48, 0x4a, 0x48,
77 0x40, 0x50, 0x50, 0x52, 0x50, 0x54, 0x54, 0x56, 0x50, 0x58, 0x58, 0x5a,
78 0x58, 0x5c, 0x5c, 0x5e,
79 0x40, 0x60, 0x60, 0x62, 0x60, 0x64, 0x64, 0x66, 0x60, 0x68, 0x68, 0x6a,
80 0x68, 0x6c, 0x6c, 0x6e,
81 0x60, 0x70, 0x70, 0x72, 0x70, 0x74, 0x74, 0x76, 0x70, 0x78, 0x78, 0x7a,
82 0x78, 0x7c, 0x7c, 0x7e,
83 0, 0x80, 0x80, 0x82, 0x80, 0x84, 0x84, 0x86, 0x80, 0x88, 0x88, 0x8a, 0x88,
85 0x80, 0x90, 0x90, 0x92, 0x90, 0x94, 0x94, 0x96, 0x90, 0x98, 0x98, 0x9a,
86 0x98, 0x9c, 0x9c, 0x9e,
87 0x80, 0xa0, 0xa0, 0xa2, 0xa0, 0xa4, 0xa4, 0xa6, 0xa0, 0xa8, 0xa8, 0xaa,
88 0xa8, 0xac, 0xac, 0xae,
89 0xa0, 0xb0, 0xb0, 0xb2, 0xb0, 0xb4, 0xb4, 0xb6, 0xb0, 0xb8, 0xb8, 0xba,
90 0xb8, 0xbc, 0xbc, 0xbe,
91 0x80, 0xc0, 0xc0, 0xc2, 0xc0, 0xc4, 0xc4, 0xc6, 0xc0, 0xc8, 0xc8, 0xca,
92 0xc8, 0xcc, 0xcc, 0xce,
93 0xc0, 0xd0, 0xd0, 0xd2, 0xd0, 0xd4, 0xd4, 0xd6, 0xd0, 0xd8, 0xd8, 0xda,
94 0xd8, 0xdc, 0xdc, 0xde,
95 0xc0, 0xe0, 0xe0, 0xe2, 0xe0, 0xe4, 0xe4, 0xe6, 0xe0, 0xe8, 0xe8, 0xea,
96 0xe8, 0xec, 0xec, 0xee,
97 0xe0, 0xf0, 0xf0, 0xf2, 0xf0, 0xf4, 0xf4, 0xf6, 0xf0, 0xf8, 0xf8, 0xfa,
98 0xf8, 0xfc, 0xfc, 0xfe
118 max_classes_ = max_classes;
121 class_count_ =
new int[rounded_classes_];
122 norm_count_ =
new int[rounded_classes_];
123 sort_key_ =
new int[rounded_classes_ + 1];
124 sort_index_ =
new int[rounded_classes_ + 1];
125 for (
int i = 0; i < rounded_classes_; i++) {
128 pruning_threshold_ = 0;
134 delete []class_count_;
135 delete []norm_count_;
137 delete []sort_index_;
144 num_features_ = num_features;
146 for (
int f = 0;
f < num_features; ++
f) {
155 for (
int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
158 const uinT32* pruner_word_ptr =
161 uinT32 pruner_word = *pruner_word_ptr++;
214 int cutoff_strength) {
215 for (
int class_id = 0; class_id < max_classes_; ++class_id) {
216 if (num_features_ < expected_num_features[class_id]) {
217 int deficit = expected_num_features[class_id] - num_features_;
218 class_count_[class_id] -= class_count_[class_id] * deficit /
219 (num_features_ * cutoff_strength + deficit);
227 for (
int class_id = 0; class_id < max_classes_; ++class_id) {
229 class_count_[class_id] = 0;
235 for (
int class_id = 0; class_id < max_classes_; ++class_id) {
239 class_count_[class_id] = 0;
249 const uinT8* normalization_factors) {
250 for (
int class_id = 0; class_id < max_classes_; class_id++) {
251 norm_count_[class_id] = class_count_[class_id] -
252 ((norm_multiplier * normalization_factors[class_id]) >> 8);
258 for (
int class_id = 0; class_id < max_classes_; class_id++) {
259 norm_count_[class_id] = class_count_[class_id];
269 for (
int c = 0; c < max_classes_; ++c) {
270 if (norm_count_[c] > max_count &&
276 (!max_of_non_fragments || !unicharset.
get_fragment(c))) {
277 max_count = norm_count_[c];
281 pruning_threshold_ = (max_count * pruning_factor) >> 8;
283 if (pruning_threshold_ < 1)
284 pruning_threshold_ = 1;
286 for (
int class_id = 0; class_id < max_classes_; class_id++) {
287 if (norm_count_[class_id] >= pruning_threshold_) {
289 sort_index_[num_classes_] = class_id;
290 sort_key_[num_classes_] = norm_count_[class_id];
295 if (num_classes_ > 1)
296 HeapSort(num_classes_, sort_key_, sort_index_);
304 int max_num_classes = int_templates->
NumClasses;
305 for (
int f = 0;
f < num_features_; ++
f) {
307 tprintf(
"F=%3d(%d,%d,%d),",
f, feature->
X, feature->
Y, feature->
Theta);
313 for (
int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
316 const uinT32* pruner_word_ptr =
319 uinT32 pruner_word = *pruner_word_ptr++;
320 for (
int word_class = 0; word_class < 16 &&
321 class_id < max_num_classes; ++word_class, ++class_id) {
322 if (norm_count_[class_id] >= pruning_threshold_) {
339 const uinT16* expected_num_features,
341 const uinT8* normalization_factors)
const {
342 tprintf(
"CP:%d classes, %d features:\n", num_classes_, num_features_);
343 for (
int i = 0; i < num_classes_; ++i) {
344 int class_id = sort_index_[num_classes_ - i];
347 tprintf(
"%s:Initial=%d, E=%d, Xht-adj=%d, N=%d, Rat=%.2f\n",
349 class_count_[class_id],
350 expected_num_features[class_id],
351 (norm_multiplier * normalization_factors[class_id]) >> 8,
352 sort_key_[num_classes_ - i],
353 100.0 - 100.0 * sort_key_[num_classes_ - i] /
361 for (
int c = 0; c < num_classes_; ++c) {
362 results[c].
Class = sort_index_[num_classes_ - c];
363 results[c].
Rating = 1.0 - sort_key_[num_classes_ - c] /
382 int rounded_classes_;
384 int pruning_threshold_;
409 const uinT8* normalization_factors,
410 const uinT16* expected_num_features,
435 if (normalization_factors !=
NULL) {
437 normalization_factors);
446 pruner.
DebugMatch(*
this, int_templates, features);
451 normalization_factors);
466 int AdaptFeatureThreshold,
468 bool SeparateDebugWindows) {
496 cprintf (
"Integer Matcher -------------------------------------------\n");
498 tables->
Clear(ClassTemplate);
501 for (Feature = 0; Feature < NumFeatures; Feature++) {
502 int csum = UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask,
503 Feature, &Features[Feature],
510 #ifndef GRAPHICS_DISABLED
512 DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
517 DisplayProtoDebugInfo(ClassTemplate, ProtoMask, ConfigMask,
518 *tables, SeparateDebugWindows);
522 DisplayFeatureDebugInfo(ClassTemplate, ProtoMask, ConfigMask, NumFeatures,
523 Features, AdaptFeatureThreshold, Debug,
524 SeparateDebugWindows);
529 tables->
NormalizeSums(ClassTemplate, NumFeatures, NumFeatures);
531 BestMatch = FindBestMatch(ClassTemplate, *tables, Result);
533 #ifndef GRAPHICS_DISABLED
535 DebugBestMatch(BestMatch, Result);
538 cprintf(
"Match Complete --------------------------------------------\n");
554 int AdaptProtoThreshold,
579 int NumGoodProtos = 0;
584 (
"Find Good Protos -------------------------------------------\n");
586 tables->
Clear(ClassTemplate);
588 for (
int Feature = 0; Feature < NumFeatures; Feature++)
589 UpdateTablesForFeature(
590 ClassTemplate, ProtoMask, ConfigMask, Feature, &(Features[Feature]),
593 #ifndef GRAPHICS_DISABLED
595 DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
600 for (
int proto = 0; proto < ClassTemplate->
NumProtos; proto++) {
603 for (
int i = 0; i < ClassTemplate->
ProtoLengths[proto]; i++)
609 if (Temp >= AdaptProtoThreshold) {
617 cprintf (
"Match Complete --------------------------------------------\n");
620 return NumGoodProtos;
633 int AdaptFeatureThreshold,
654 int NumBadFeatures = 0;
658 cprintf(
"Find Bad Features -------------------------------------------\n");
660 tables->
Clear(ClassTemplate);
662 for (
int Feature = 0; Feature < NumFeatures; Feature++) {
663 UpdateTablesForFeature(
664 ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
669 for (
int i = 0; i < ClassTemplate->
NumConfigs; i++)
674 if (best < AdaptFeatureThreshold) {
675 *FeatureArray = Feature;
681 #ifndef GRAPHICS_DISABLED
683 DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
688 cprintf(
"Match Complete --------------------------------------------\n");
691 return NumBadFeatures;
697 int classify_integer_matcher_multiplier) {
698 classify_debug_level_ = classify_debug_level;
706 double Similarity = ((double) IntSimilarity) / 65536.0 / 65536.0;
708 evidence = 255.0 / (evidence * evidence + 1.0);
716 similarity_evidence_table_[i] = (
uinT8) (evidence + 0.5);
720 evidence_table_mask_ =
723 table_trunc_shift_bits_ = (27 -
SE_TABLE_BITS - (mult_trunc_shift_bits_ << 1));
729 local_matcher_multiplier_ = 0;
735 local_matcher_multiplier_ = integer_matcher_multiplier;
771 cprintf (
"F = %3d, P = %3d, E = %3d, Configs = ",
772 FeatureNum, (
int) ActualProtoNum, (
int) Evidence);
786 uinT8 *FeatureEvidence,
797 cprintf(
"F=%3d, C=", FeatureNum);
798 for (
int ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) {
799 cprintf(
"%4d", FeatureEvidence[ConfigNum]);
807 int IntegerMatcher::UpdateTablesForFeature(
828 register uinT32 ConfigWord;
829 register uinT32 ProtoWord;
831 register uinT32 ActualProtoNum;
833 inT32 proto_word_offset;
844 uinT32 ThetaFeatureAddress;
845 register uinT8 *UINT8Pointer;
846 register int ProtoIndex;
848 register int *IntPointer;
857 XFeatureAddress = ((Feature->
X >> 2) << 1);
861 for (ProtoSetIndex = 0, ActualProtoNum = 0;
862 ProtoSetIndex < ClassTemplate->
NumProtoSets; ProtoSetIndex++) {
863 ProtoSet = ClassTemplate->
ProtoSets[ProtoSetIndex];
864 ProtoPrunerPtr = (
uinT32 *) ((*ProtoSet).ProtoPruner);
866 ProtoNum += (PROTOS_PER_PROTO_SET >> 1), ActualProtoNum +=
867 (PROTOS_PER_PROTO_SET >> 1), ProtoMask++, ProtoPrunerPtr++) {
869 ProtoWord = *(ProtoPrunerPtr + XFeatureAddress);
870 ProtoWord &= *(ProtoPrunerPtr + YFeatureAddress);
871 ProtoWord &= *(ProtoPrunerPtr + ThetaFeatureAddress);
872 ProtoWord &= *ProtoMask;
874 if (ProtoWord != 0) {
875 proto_byte = ProtoWord & 0xff;
877 proto_word_offset = 0;
878 while (ProtoWord != 0 || proto_byte != 0) {
879 while (proto_byte == 0) {
880 proto_byte = ProtoWord & 0xff;
882 proto_word_offset += 8;
884 proto_offset = offset_table[proto_byte] + proto_word_offset;
885 proto_byte = next_table[proto_byte];
886 Proto = &(ProtoSet->
Protos[ProtoNum + proto_offset]);
887 ConfigWord = Proto->
Configs[0];
888 A3 = (((Proto->
A * (Feature->
X - 128)) << 1)
889 - (Proto->
B * (Feature->
Y - 128)) + (Proto->
C << 9));
897 A3 >>= mult_trunc_shift_bits_;
898 M3 >>= mult_trunc_shift_bits_;
899 if (A3 > evidence_mult_mask_)
900 A3 = evidence_mult_mask_;
901 if (M3 > evidence_mult_mask_)
902 M3 = evidence_mult_mask_;
904 A4 = (A3 * A3) + (M3 * M3);
905 A4 >>= table_trunc_shift_bits_;
906 if (A4 > evidence_table_mask_)
909 Evidence = similarity_evidence_table_[A4];
913 ActualProtoNum + proto_offset,
914 Evidence, ConfigMask, ConfigWord);
916 ConfigWord &= *ConfigMask;
920 while (ConfigWord != 0 || config_byte != 0) {
921 while (config_byte == 0) {
922 config_byte = ConfigWord & 0xff;
926 config_offset = offset_table[config_byte];
927 config_byte = next_table[config_byte];
928 if (Evidence > UINT8Pointer[config_offset])
929 UINT8Pointer[config_offset] = Evidence;
935 ClassTemplate->
ProtoLengths[ActualProtoNum + proto_offset];
936 ProtoIndex > 0; ProtoIndex--, UINT8Pointer++) {
937 if (Evidence > *UINT8Pointer) {
938 Temp = *UINT8Pointer;
939 *UINT8Pointer = Evidence;
942 else if (Evidence == 0)
957 int SumOverConfigs = 0;
958 for (ConfigNum = ClassTemplate->
NumConfigs; ConfigNum > 0; ConfigNum--) {
959 int evidence = *UINT8Pointer++;
960 SumOverConfigs += evidence;
961 *IntPointer++ += evidence;
963 return SumOverConfigs;
968 #ifndef GRAPHICS_DISABLED
969 void IntegerMatcher::DebugFeatureProtoError(
995 cprintf(
"Configuration Mask:\n");
996 for (ConfigNum = 0; ConfigNum < ClassTemplate->
NumConfigs; ConfigNum++)
997 cprintf(
"%1d", (((*ConfigMask) >> ConfigNum) & 1));
1000 cprintf(
"Feature Error for Configurations:\n");
1001 for (ConfigNum = 0; ConfigNum < ClassTemplate->
NumConfigs; ConfigNum++) {
1006 / NumFeatures / 256.0));
1013 for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->
NumProtoSets;
1016 for (ProtoWordNum = 0; ProtoWordNum < 2;
1017 ProtoWordNum++, ProtoMask++) {
1020 ((ProtoNum < (PROTOS_PER_PROTO_SET >> 1))
1021 && (ActualProtoNum < ClassTemplate->
NumProtos));
1022 ProtoNum++, ActualProtoNum++)
1023 cprintf (
"%1d", (((*ProtoMask) >> ProtoNum) & 1));
1030 for (
int i = 0; i < ClassTemplate->
NumConfigs; i++)
1031 ProtoConfigs[i] = 0;
1034 cprintf (
"Proto Evidence:\n");
1035 for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->
NumProtoSets;
1037 ProtoSet = ClassTemplate->
ProtoSets[ProtoSetIndex];
1041 (ActualProtoNum < ClassTemplate->NumProtos));
1042 ProtoNum++, ActualProtoNum++) {
1043 cprintf (
"P %3d =", ActualProtoNum);
1045 for (
int j = 0; j < ClassTemplate->
ProtoLengths[ActualProtoNum]; j++) {
1052 temp / 256.0 / ClassTemplate->
ProtoLengths[ActualProtoNum]);
1056 while (ConfigWord) {
1057 cprintf (
"%5d", ConfigWord & 1 ? temp : 0);
1059 ProtoConfigs[ConfigNum] += temp;
1069 cprintf (
"Proto Error for Configurations:\n");
1070 for (ConfigNum = 0; ConfigNum < ClassTemplate->
NumConfigs; ConfigNum++)
1073 ProtoConfigs[ConfigNum] /
1079 cprintf (
"Proto Sum for Configurations:\n");
1080 for (ConfigNum = 0; ConfigNum < ClassTemplate->
NumConfigs; ConfigNum++)
1081 cprintf (
" %4.1f", ProtoConfigs[ConfigNum] / 256.0);
1084 cprintf (
"Proto Length for Configurations:\n");
1085 for (ConfigNum = 0; ConfigNum < ClassTemplate->
NumConfigs; ConfigNum++)
1095 void IntegerMatcher::DisplayProtoDebugInfo(
1100 bool SeparateDebugWindows) {
1107 if (SeparateDebugWindows) {
1113 for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->
NumProtoSets;
1115 ProtoSet = ClassTemplate->
ProtoSets[ProtoSetIndex];
1119 (ActualProtoNum < ClassTemplate->NumProtos));
1120 ProtoNum++, ActualProtoNum++) {
1123 for (
int i = 0; i < ClassTemplate->
ProtoLengths[ActualProtoNum]; i++)
1128 if ((ProtoSet->
Protos[ProtoNum]).Configs[0] & (*ConfigMask)) {
1137 void IntegerMatcher::DisplayFeatureDebugInfo(
1143 int AdaptFeatureThreshold,
1145 bool SeparateDebugWindows) {
1148 tables->
Clear(ClassTemplate);
1151 if (SeparateDebugWindows) {
1156 for (
int Feature = 0; Feature < NumFeatures; Feature++) {
1157 UpdateTablesForFeature(
1158 ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
1163 for (
int i = 0; i < ClassTemplate->
NumConfigs; i++)
1169 if (best < AdaptFeatureThreshold)
1197 for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->
NumProtoSets;
1199 ProtoSet = ClassTemplate->
ProtoSets[ProtoSetIndex];
1203 ProtoNum++, ActualProtoNum++) {
1205 for (
int i = 0; i < ClassTemplate->
ProtoLengths[ActualProtoNum]; i++)
1209 ConfigWord &= *ConfigMask;
1211 while (ConfigWord) {
1213 *IntPointer += temp;
1229 for (
int i = 0; i < ClassTemplate->
NumConfigs; i++) {
1237 int IntegerMatcher::FindBestMatch(
1258 for (
int ConfigNum = 0; ConfigNum < ClassTemplate->
NumConfigs; ConfigNum++) {
1260 if (*classify_debug_level_ > 2)
1261 cprintf(
"Config %d, rating=%d\n", ConfigNum, rating);
1262 if (rating > BestMatch) {
1263 if (BestMatch > 0) {
1265 Best2Match = BestMatch;
1269 Result->
Config = ConfigNum;
1271 }
else if (rating > Best2Match) {
1273 Best2Match = rating;
1278 Result->
Rating = (65536.0 - BestMatch) / 65536.0;
1286 int normalization_factor) {
1287 return (rating * blob_length +
1288 local_matcher_multiplier_ * normalization_factor / 256.0) /
1289 (blob_length + local_matcher_multiplier_);
1293 #ifndef GRAPHICS_DISABLED
1295 void IntegerMatcher::DebugBestMatch(
1297 tprintf(
"Rating = %5.1f%% Best Config = %3d, Distance = %5.1f\n",
1299 100.0 * (65536.0 - BestMatch) / 65536.0);
1305 HeapSort (
int n,
register int ra[],
register int rb[]) {
1320 register int i, rra, rrb;
1344 if (j < ir && ra[j] < ra[j + 1])