Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
picofeat.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: picofeat.c
3  ** Purpose: Definition of pico-features.
4  ** Author: Dan Johnson
5  ** History: 9/4/90, DSJ, Created.
6  **
7  ** (c) Copyright Hewlett-Packard Company, 1988.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  ******************************************************************************/
21 #include "picofeat.h"
22 
23 #include "classify.h"
24 #include "efio.h"
25 #include "featdefs.h"
26 #include "fpoint.h"
27 #include "mfoutline.h"
28 #include "ocrfeatures.h"
29 #include "params.h"
30 #include "trainingsample.h"
31 
32 #include <math.h>
33 #include <stdio.h>
34 
35 /*---------------------------------------------------------------------------
36  Variables
37 ----------------------------------------------------------------------------*/
38 
39 double_VAR(classify_pico_feature_length, 0.05, "Pico Feature Length");
40 
41 /*---------------------------------------------------------------------------
42  Private Function Prototypes
43 ----------------------------------------------------------------------------*/
45  FPOINT *End,
46  FEATURE_SET FeatureSet);
47 
48 void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet);
49 
50 void NormalizePicoX(FEATURE_SET FeatureSet);
51 
55 /*---------------------------------------------------------------------------*/
56 namespace tesseract {
58 /*
59  ** Parameters:
60  ** Blob blob to extract pico-features from
61  ** LineStats statistics on text row blob is in
62  ** Globals:
63  ** classify_norm_method normalization method currently specified
64  ** Operation: Dummy for now.
65  ** Return: Pico-features for Blob.
66  ** Exceptions: none
67  ** History: 9/4/90, DSJ, Created.
68  */
69  LIST Outlines;
70  LIST RemainingOutlines;
71  MFOUTLINE Outline;
72  FEATURE_SET FeatureSet;
73  FLOAT32 XScale, YScale;
74 
75  FeatureSet = NewFeatureSet(MAX_PICO_FEATURES);
76  Outlines = ConvertBlob(Blob);
77  NormalizeOutlines(Outlines, &XScale, &YScale);
78  RemainingOutlines = Outlines;
79  iterate(RemainingOutlines) {
80  Outline = (MFOUTLINE) first_node (RemainingOutlines);
81  ConvertToPicoFeatures2(Outline, FeatureSet);
82  }
84  NormalizePicoX(FeatureSet);
85  FreeOutlines(Outlines);
86  return (FeatureSet);
87 
88 } /* ExtractPicoFeatures */
89 } // namespace tesseract
90 
94 /*---------------------------------------------------------------------------*/
96  FPOINT *End,
97  FEATURE_SET FeatureSet) {
98 /*
99  ** Parameters:
100  ** Start starting point of pico-feature
101  ** End ending point of pico-feature
102  ** FeatureSet set to add pico-feature to
103  ** Globals:
104  ** classify_pico_feature_length length of a single pico-feature
105  ** Operation: This routine converts an entire segment of an outline
106  ** into a set of pico features which are added to
107  ** FeatureSet. The length of the segment is rounded to the
108  ** nearest whole number of pico-features. The pico-features
109  ** are spaced evenly over the entire segment.
110  ** Return: none (results are placed in FeatureSet)
111  ** Exceptions: none
112  ** History: Tue Apr 30 15:44:34 1991, DSJ, Created.
113  */
114  FEATURE Feature;
115  FLOAT32 Angle;
116  FLOAT32 Length;
117  int NumFeatures;
118  FPOINT Center;
119  FPOINT Delta;
120  int i;
121 
122  Angle = NormalizedAngleFrom (Start, End, 1.0);
123  Length = DistanceBetween (*Start, *End);
124  NumFeatures = (int) floor (Length / classify_pico_feature_length + 0.5);
125  if (NumFeatures < 1)
126  NumFeatures = 1;
127 
128  /* compute vector for one pico feature */
129  Delta.x = XDelta (*Start, *End) / NumFeatures;
130  Delta.y = YDelta (*Start, *End) / NumFeatures;
131 
132  /* compute position of first pico feature */
133  Center.x = Start->x + Delta.x / 2.0;
134  Center.y = Start->y + Delta.y / 2.0;
135 
136  /* compute each pico feature in segment and add to feature set */
137  for (i = 0; i < NumFeatures; i++) {
138  Feature = NewFeature (&PicoFeatDesc);
139  Feature->Params[PicoFeatDir] = Angle;
140  Feature->Params[PicoFeatX] = Center.x;
141  Feature->Params[PicoFeatY] = Center.y;
142  AddFeature(FeatureSet, Feature);
143 
144  Center.x += Delta.x;
145  Center.y += Delta.y;
146  }
147 } /* ConvertSegmentToPicoFeat */
148 
149 
150 /*---------------------------------------------------------------------------*/
151 void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) {
152 /*
153  ** Parameters:
154  ** Outline outline to extract micro-features from
155  ** FeatureSet set of features to add pico-features to
156  ** Globals:
157  ** classify_pico_feature_length
158  ** length of features to be extracted
159  ** Operation:
160  ** This routine steps thru the specified outline and cuts it
161  ** up into pieces of equal length. These pieces become the
162  ** desired pico-features. Each segment in the outline
163  ** is converted into an integral number of pico-features.
164  ** Return: none (results are returned in FeatureSet)
165  ** Exceptions: none
166  ** History: 4/30/91, DSJ, Adapted from ConvertToPicoFeatures().
167  */
168  MFOUTLINE Next;
169  MFOUTLINE First;
170  MFOUTLINE Current;
171 
172  if (DegenerateOutline(Outline))
173  return;
174 
175  First = Outline;
176  Current = First;
177  Next = NextPointAfter(Current);
178  do {
179  /* note that an edge is hidden if the ending point of the edge is
180  marked as hidden. This situation happens because the order of
181  the outlines is reversed when they are converted from the old
182  format. In the old format, a hidden edge is marked by the
183  starting point for that edge. */
184  if (!(PointAt(Next)->Hidden))
185  ConvertSegmentToPicoFeat (&(PointAt(Current)->Point),
186  &(PointAt(Next)->Point), FeatureSet);
187 
188  Current = Next;
189  Next = NextPointAfter(Current);
190  }
191  while (Current != First);
192 
193 } /* ConvertToPicoFeatures2 */
194 
195 
196 /*---------------------------------------------------------------------------*/
197 void NormalizePicoX(FEATURE_SET FeatureSet) {
198 /*
199  ** Parameters:
200  ** FeatureSet pico-features to be normalized
201  ** Globals: none
202  ** Operation: This routine computes the average x position over all
203  ** of the pico-features in FeatureSet and then renormalizes
204  ** the pico-features to force this average to be the x origin
205  ** (i.e. x=0).
206  ** Return: none (FeatureSet is changed)
207  ** Exceptions: none
208  ** History: Tue Sep 4 16:50:08 1990, DSJ, Created.
209  */
210  int i;
211  FEATURE Feature;
212  FLOAT32 Origin = 0.0;
213 
214  for (i = 0; i < FeatureSet->NumFeatures; i++) {
215  Feature = FeatureSet->Features[i];
216  Origin += Feature->Params[PicoFeatX];
217  }
218  Origin /= FeatureSet->NumFeatures;
219 
220  for (i = 0; i < FeatureSet->NumFeatures; i++) {
221  Feature = FeatureSet->Features[i];
222  Feature->Params[PicoFeatX] -= Origin;
223  }
224 } /* NormalizePicoX */
225 
226 /*---------------------------------------------------------------------------*/
228 /*
229  ** Parameters:
230  ** blob blob to extract features from
231  ** denorm normalization/denormalization parameters.
232  ** Return: Integer character-normalized features for blob.
233  ** Exceptions: none
234  ** History: 8/8/2011, rays, Created.
235  */
237  tesseract::NM_CHAR_ANISOTROPIC, blob, denorm);
238  if (sample == NULL) return NULL;
239 
240  int num_features = sample->num_features();
241  const INT_FEATURE_STRUCT* features = sample->features();
242  FEATURE_SET feature_set = NewFeatureSet(num_features);
243  for (int f = 0; f < num_features; ++f) {
244  FEATURE feature = NewFeature(&IntFeatDesc);
245 
246  feature->Params[IntX] = features[f].X;
247  feature->Params[IntY] = features[f].Y;
248  feature->Params[IntDir] = features[f].Theta;
249  AddFeature(feature_set, feature);
250  }
251  delete sample;
252 
253  return feature_set;
254 } /* ExtractIntCNFeatures */
255 
256 /*---------------------------------------------------------------------------*/
258 /*
259  ** Parameters:
260  ** blob blob to extract features from
261  ** denorm normalization/denormalization parameters.
262  ** Return: Geometric (top/bottom/width) features for blob.
263  ** Exceptions: none
264  ** History: 8/8/2011, rays, Created.
265  */
267  tesseract::NM_CHAR_ANISOTROPIC, blob, denorm);
268  if (sample == NULL) return NULL;
269 
270  FEATURE_SET feature_set = NewFeatureSet(1);
271  FEATURE feature = NewFeature(&IntFeatDesc);
272 
273  feature->Params[GeoBottom] = sample->geo_feature(GeoBottom);
274  feature->Params[GeoTop] = sample->geo_feature(GeoTop);
275  feature->Params[GeoWidth] = sample->geo_feature(GeoWidth);
276  AddFeature(feature_set, feature);
277  delete sample;
278 
279  return feature_set;
280 } /* ExtractIntGeoFeatures */