Tesseract
3.02
Main Page
Related Pages
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
intfeaturemap.h
Go to the documentation of this file.
1
// Copyright 2010 Google Inc. All Rights Reserved.
2
// Author: rays@google.com (Ray Smith)
4
// File: intfeaturemap.h
5
// Description: Encapsulation of IntFeatureSpace with IndexMapBiDi
6
// to provide a subspace mapping and fast feature lookup.
7
// Created: Tue Oct 26 08:58:30 PDT 2010
8
//
9
// Licensed under the Apache License, Version 2.0 (the "License");
10
// you may not use this file except in compliance with the License.
11
// You may obtain a copy of the License at
12
// http://www.apache.org/licenses/LICENSE-2.0
13
// Unless required by applicable law or agreed to in writing, software
14
// distributed under the License is distributed on an "AS IS" BASIS,
15
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
// See the License for the specific language governing permissions and
17
// limitations under the License.
18
//
20
21
#ifndef TESSERACT_CLASSIFY_INTFEATUREMAP_H__
22
#define TESSERACT_CLASSIFY_INTFEATUREMAP_H__
23
24
#include "
intfeaturespace.h
"
25
#include "
indexmapbidi.h
"
26
#include "
intproto.h
"
27
28
namespace
tesseract
{
29
30
class
SampleIterator;
31
32
// Number of positive and negative offset maps.
33
static
const
int
kNumOffsetMaps = 2;
34
35
// Class to map a feature space defined by INT_FEATURE_STRUCT to a compact
36
// down-sampled subspace of actually used features.
37
// The IntFeatureMap copes with 2 stages of transformation:
38
// The first step is down-sampling (re-quantization) and converting to a
39
// single index value from the 3-D input:
40
// INT_FEATURE_STRUCT <-> index feature (via IntFeatureSpace) and
41
// the second is a feature-space compaction to map only the feature indices
42
// that are actually used. This saves space in classifiers that are built
43
// using the mapped feature space.
44
// index (sparse) feature <-> map (compact) feature via IndexMapBiDi.
45
// Although the transformations are reversible, the inverses are lossy and do
46
// not return the exact input INT_FEATURE_STRUCT, due to the many->one nature
47
// of both transformations.
48
class
IntFeatureMap
{
49
public
:
50
IntFeatureMap
();
51
~IntFeatureMap
();
52
53
// Accessors.
54
int
sparse_size
()
const
{
55
return
feature_space_.
Size
();
56
}
57
int
compact_size
()
const
{
58
return
compact_size_;
59
}
60
const
IntFeatureSpace
&
feature_space
()
const
{
61
return
feature_space_;
62
}
63
const
IndexMapBiDi
&
feature_map
()
const
{
64
return
feature_map_;
65
}
66
67
// Pseudo-accessors.
68
int
IndexFeature
(
const
INT_FEATURE_STRUCT
&
f
)
const
;
69
int
MapFeature
(
const
INT_FEATURE_STRUCT
&
f
)
const
;
70
int
MapIndexFeature
(
int
index_feature)
const
;
71
INT_FEATURE_STRUCT
InverseIndexFeature
(
int
index_feature)
const
;
72
INT_FEATURE_STRUCT
InverseMapFeature
(
int
map_feature)
const
;
73
void
DeleteMapFeature
(
int
map_feature);
74
bool
IsMapFeatureDeleted
(
int
map_feature)
const
;
75
76
// Copies the given feature_space and uses it as the index feature map
77
// from INT_FEATURE_STRUCT.
78
void
Init
(
const
IntFeatureSpace
&
feature_space
);
79
80
// Helper to return an offset index feature. In this context an offset
81
// feature with a dir of +/-1 is a feature of a similar direction,
82
// but shifted perpendicular to the direction of the feature. An offset
83
// feature with a dir of +/-2 is feature at the same position, but rotated
84
// by +/- one [compact] quantum. Returns the index of the generated offset
85
// feature, or -1 if it doesn't exist. Dir should be in
86
// [-kNumOffsetMaps, kNumOffsetMaps] to indicate the relative direction.
87
// A dir of 0 is an identity transformation.
88
// Both input and output are from the index(sparse) feature space, not
89
// the mapped/compact feature space, but the offset feature is the minimum
90
// distance moved from the input to guarantee that it maps to the next
91
// available quantum in the mapped/compact space.
92
int
OffsetFeature
(
int
index_feature,
int
dir)
const
;
93
94
// Computes the features used by the subset of samples defined by
95
// the iterator and sets up the feature mapping.
96
// Returns the size of the compacted feature space.
97
int
FindNZFeatureMapping
(
SampleIterator
* it);
98
99
// After deleting some features, finish setting up the mapping, and map
100
// all the samples. Returns the size of the compacted feature space.
101
int
FinalizeMapping
(
SampleIterator
* it);
102
103
// Indexes the given array of features to a vector of sorted indices.
104
void
IndexAndSortFeatures
(
const
INT_FEATURE_STRUCT
* features,
105
int
num_features,
106
GenericVector<int>
* sorted_features)
const
{
107
feature_space_.
IndexAndSortFeatures
(features, num_features,
108
sorted_features);
109
}
110
// Maps the given array of index/sparse features to an array of map/compact
111
// features.
112
// Assumes the input is sorted. The output indices are sorted and uniqued.
113
// Returns the number of "missed" features, being features that
114
// don't map to the compact feature space.
115
int
MapIndexedFeatures
(
const
GenericVector<int>
& index_features,
116
GenericVector<int>
* map_features)
const
{
117
return
feature_map_.
MapFeatures
(index_features, map_features);
118
}
119
120
// Prints the map features from the set in human-readable form.
121
void
DebugMapFeatures
(
const
GenericVector<int>
& map_features)
const
;
122
123
private
:
124
void
Clear();
125
126
// Helper to compute an offset index feature. In this context an offset
127
// feature with a dir of +/-1 is a feature of a similar direction,
128
// but shifted perpendicular to the direction of the feature. An offset
129
// feature with a dir of +/-2 is feature at the same position, but rotated
130
// by +/- one [compact] quantum. Returns the index of the generated offset
131
// feature, or -1 if it doesn't exist. Dir should be in
132
// [-kNumOffsetMaps, kNumOffsetMaps] to indicate the relative direction.
133
// A dir of 0 is an identity transformation.
134
// Both input and output are from the index(sparse) feature space, not
135
// the mapped/compact feature space, but the offset feature is the minimum
136
// distance moved from the input to guarantee that it maps to the next
137
// available quantum in the mapped/compact space.
138
int
ComputeOffsetFeature(
int
index_feature,
int
dir)
const
;
139
140
// True if the mapping has changed since it was last finalized.
141
bool
mapping_changed_;
142
// Size of the compacted feature space, after unused features are removed.
143
int
compact_size_;
144
// Feature space quantization definition and indexing from INT_FEATURE_STRUCT.
145
IntFeatureSpace
feature_space_;
146
// Mapping from indexed feature space to the compacted space with unused
147
// features mapping to -1.
148
IndexMapBiDi
feature_map_;
149
// Index tables to map a feature index to the corresponding feature after a
150
// shift perpendicular to the feature direction, or a rotation in place.
151
// An entry of -1 indicates that there is no corresponding feature.
152
// Array of arrays of size feature_space_.Size() owned by this class.
153
int
* offset_plus_[kNumOffsetMaps];
154
int
* offset_minus_[kNumOffsetMaps];
155
156
// Don't use default copy and assign!
157
IntFeatureMap
(
const
IntFeatureMap
&);
158
void
operator=(
const
IntFeatureMap
&);
159
};
160
161
}
// namespace tesseract.
162
163
#endif // TESSERACT_CLASSIFY_INTFEATUREMAP_H__
mnt
data
src
tesseract-ocr
classify
intfeaturemap.h
Generated on Thu Nov 1 2012 20:19:46 for Tesseract by
1.8.1