Tesseract
3.02
Main Page
Related Pages
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
associate.h
Go to the documentation of this file.
1
2
// File: associate.h
3
// Description: Structs, classes, typedefs useful for the segmentation
4
// search. Functions for scoring segmentation paths according
5
// to their character widths, gap widths and seam cuts.
6
// Author: Daria Antonova
7
// Created: Mon Mar 8 11:26:43 PDT 2010
8
//
9
// (C) Copyright 2010, Google Inc.
10
// Licensed under the Apache License, Version 2.0 (the "License");
11
// you may not use this file except in compliance with the License.
12
// You may obtain a copy of the License at
13
// http://www.apache.org/licenses/LICENSE-2.0
14
// Unless required by applicable law or agreed to in writing, software
15
// distributed under the License is distributed on an "AS IS" BASIS,
16
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17
// See the License for the specific language governing permissions and
18
// limitations under the License.
19
//
21
22
#ifndef ASSOCIATE_H
23
#define ASSOCIATE_H
24
25
#include "
blobs.h
"
26
#include "
elst.h
"
27
#include "
matrix.h
"
28
#include "
seam.h
"
29
#include "
split.h
"
30
#include "
states.h
"
31
32
class
WERD_RES
;
33
34
typedef
inT16
BLOB_WEIGHTS
[
MAX_NUM_CHUNKS
];
35
36
// Each unichar evaluated.
37
struct
EVALUATION_RECORD
{
38
float
match
;
39
float
certainty
;
40
char
character
;
41
int
width
;
42
int
gap
;
43
};
44
45
typedef
EVALUATION_RECORD
EVALUATION_ARRAY
[
MAX_NUM_CHUNKS
];
46
47
// Classification info for chunks.
48
//
49
// TODO(daria): move to tesseract namespace when obsolete code using
50
// this struct that is not in tesseract namespace is deprecated.
51
struct
CHUNKS_RECORD
{
52
MATRIX
*
ratings
;
53
TBLOB
*
chunks
;
54
WERD_RES
*
word_res
;
// Borrowed pointer - do not delete!
55
SEAMS
splits
;
56
int
x_height
;
57
WIDTH_RECORD
*
chunk_widths
;
58
WIDTH_RECORD
*
char_widths
;
59
inT16
*
weights
;
60
};
61
62
namespace
tesseract
{
63
64
// Statisitcs about character widths, gaps and seams.
65
struct
AssociateStats
{
66
AssociateStats
() {
Clear
(); }
67
68
void
Clear
() {
69
shape_cost
= 0.0f;
70
bad_shape
=
false
;
71
full_wh_ratio
= 0.0f;
72
full_wh_ratio_total
= 0.0f;
73
full_wh_ratio_var
= 0.0f;
74
bad_fixed_pitch_right_gap
=
false
;
75
bad_fixed_pitch_wh_ratio
=
false
;
76
}
77
78
void
Print
() {
79
tprintf
(
"AssociateStats: w(%g %d) s(%g %d)\n"
,
shape_cost
,
bad_shape
);
80
}
81
82
float
shape_cost
;
// cost of blob shape
83
bool
bad_shape
;
// true if the shape of the blob is unacceptable
84
float
full_wh_ratio
;
// width-to-hight ratio + gap on the right
85
float
full_wh_ratio_total
;
// sum of width-to-hight ratios
86
// on the path terminating at this blob
87
float
full_wh_ratio_var
;
// variance of full_wh_ratios on the path
88
bool
bad_fixed_pitch_right_gap
;
// true if there is no gap before
89
// the blob on the right
90
bool
bad_fixed_pitch_wh_ratio
;
// true if the blobs has width-to-hight
91
// ratio > kMaxFixedPitchCharAspectRatio
92
};
93
94
// Utility functions for scoring segmentation paths according to their
95
// character widths, gap widths, seam characteristics.
96
class
AssociateUtils
{
97
public
:
98
static
const
float
kMaxFixedPitchCharAspectRatio
;
99
static
const
float
kMinGap
;
100
101
// Computes character widths, gaps and seams stats given the
102
// AssociateStats of the path so far, col, row of the blob that
103
// is being added to the path, and CHUNKS_RECORD containing information
104
// about character widths, gaps and seams.
105
// Fills associate_cost with the combined shape, gap and seam cost
106
// of adding a unichar from (col, row) to the path (note that since
107
// this function could be used to compute the prioritization for
108
// pain points, (col, row) entry might not be classified yet; thus
109
// information in the (col, row) entry of the ratings matrix is not used).
110
//
111
// Note: the function assumes that chunks_record, stats and
112
// associate_cost pointers are not NULL.
113
static
void
ComputeStats
(
int
col,
int
row,
114
const
AssociateStats
*parent_stats,
115
int
parent_path_length,
116
bool
fixed_pitch,
117
float
max_char_wh_ratio,
118
const
DENORM
*denorm,
119
CHUNKS_RECORD
*chunks_record,
120
int
debug_level,
121
AssociateStats
*stats);
122
123
// Returns the width of a chunk which is a composed of several blobs
124
// blobs[start_blob..last_blob] inclusively.
125
// Widths/gaps records are in the form:
126
// width_record->num_char = n
127
// width_record->widths[2*n-1] = w0,g0,w1,g1..w(n-1),g(n-1)
128
static
int
GetChunksWidth
(
WIDTH_RECORD
*width_record,
129
int
start_blob,
int
last_blob);
130
131
// Returns the width of a gap between the specified chunk and the next one.
132
static
inline
int
GetChunksGap
(
WIDTH_RECORD
*width_record,
int
last_chunk) {
133
return
(last_chunk >= 0 && last_chunk < width_record->num_chars - 1) ?
134
width_record->
widths
[last_chunk * 2 + 1] : 0;
135
}
136
137
// Returns the width cost for fixed-pitch text.
138
static
float
FixedPitchWidthCost
(
float
norm_width,
float
right_gap,
139
bool
end_pos,
float
max_char_wh_ratio);
140
141
// Returns the gap cost for fixed-pitch text (penalizes vertically
142
// overlapping components).
143
static
inline
float
FixedPitchGapCost
(
float
norm_gap,
bool
end_pos) {
144
return
(norm_gap < 0.05 && !end_pos) ? 5.0f : 0.0f;
145
}
146
};
147
148
}
// namespace tesseract
149
150
#endif
mnt
data
src
tesseract-ocr
wordrec
associate.h
Generated on Thu Nov 1 2012 20:19:51 for Tesseract by
1.8.1