Tesseract
3.02
Main Page
Related Pages
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
cube_search_object.h
Go to the documentation of this file.
1
/**********************************************************************
2
* File: cube_search_object.h
3
* Description: Declaration of the Cube Search Object Class
4
* Author: Ahmad Abdulkader
5
* Created: 2007
6
*
7
* (C) Copyright 2008, Google Inc.
8
** Licensed under the Apache License, Version 2.0 (the "License");
9
** you may not use this file except in compliance with the License.
10
** You may obtain a copy of the License at
11
** http://www.apache.org/licenses/LICENSE-2.0
12
** Unless required by applicable law or agreed to in writing, software
13
** distributed under the License is distributed on an "AS IS" BASIS,
14
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
** See the License for the specific language governing permissions and
16
** limitations under the License.
17
*
18
**********************************************************************/
19
20
// The CubeSearchObject class represents a char_samp (a word bitmap) that is
21
// being searched for characters (or recognizeable entities).
22
// The Class detects the connected components and peforms an oversegmentation
23
// on each ConComp. The result of which is a list of segments that are ordered
24
// in reading order.
25
// The class provided methods that inquire about the number of segments, the
26
// CharSamp corresponding to any segment range and the recognition results
27
// of any segment range
28
// An object of Class CubeSearchObject is used by the BeamSearch algorithm
29
// to recognize a CharSamp into a list of word alternates
30
31
#ifndef CUBE_SEARCH_OBJECT_H
32
#define CUBE_SEARCH_OBJECT_H
33
34
#include "
search_object.h
"
35
#include "
char_samp.h
"
36
#include "
conv_net_classifier.h
"
37
#include "
cube_reco_context.h
"
38
#include "allheaders.h"
39
40
namespace
tesseract
{
41
class
CubeSearchObject
:
public
SearchObject
{
42
public
:
43
CubeSearchObject
(
CubeRecoContext
*cntxt,
CharSamp
*samp);
44
~CubeSearchObject
();
45
46
// returns the Segmentation Point count of the CharSamp owned by the class
47
int
SegPtCnt
();
48
// Recognize the set of segments given by the specified range and return
49
// a list of possible alternate answers
50
CharAltList
*
RecognizeSegment
(
int
start_pt,
int
end_pt);
51
// Returns the CharSamp corresponding to the specified segment range
52
CharSamp
*
CharSample
(
int
start_pt,
int
end_pt);
53
// Returns a leptonica box corresponding to the specified segment range
54
Box *
CharBox
(
int
start_pt,
int
end_pt);
55
// Returns the cost of having a space before the specified segmentation pt
56
int
SpaceCost
(
int
seg_pt);
57
// Returns the cost of not having a space before the specified
58
// segmentation pt
59
int
NoSpaceCost
(
int
seg_pt);
60
// Returns the cost of not having any spaces within the specified range
61
// of segmentation points
62
int
NoSpaceCost
(
int
seg_pt,
int
end_pt);
63
64
private
:
65
// Maximum reasonable segment count
66
static
const
int
kMaxSegmentCnt = 128;
67
// Use cropped samples
68
static
const
bool
kUseCroppedChars;
69
70
// reading order flag
71
bool
rtl_;
72
// cached dimensions of char samp
73
int
left_;
74
int
itop_;
75
int
wid_;
76
int
hgt_;
77
// minimum and maximum and possible inter-segment gaps for spaces
78
int
min_spc_gap_;
79
int
max_spc_gap_;
80
// initialization flag
81
bool
init_;
82
// maximum segments per character: Cached from tuning parameters object
83
int
max_seg_per_char_;
84
// char sample to be processed
85
CharSamp
*samp_;
86
// segment count
87
int
segment_cnt_;
88
// segments of the processed char samp
89
ConComp
**segments_;
90
// Cache data members:
91
// There are two caches kept; a CharSamp cache and a CharAltList cache
92
// Each is a 2-D array of CharSamp and CharAltList pointers respectively
93
// hence the triple pointer.
94
CharAltList
***reco_cache_;
95
CharSamp
***samp_cache_;
96
// Cached costs of space and no-space after every segment. Computed only
97
// in phrase mode
98
int
*space_cost_;
99
int
*no_space_cost_;
100
101
// init and allocate variables, perform segmentation
102
bool
Init();
103
// Cleanup
104
void
Cleanup();
105
// Perform segmentation of the bitmap by detecting connected components,
106
// segmenting each connected component using windowed vertical pixel density
107
// histogram and sorting the resulting segments in reading order
108
// Returns true on success
109
bool
Segment();
110
// validate the segment ranges.
111
inline
bool
IsValidSegmentRange(
int
start_pt,
int
end_pt) {
112
return
(end_pt > start_pt && start_pt >= -1 && start_pt < segment_cnt_ &&
113
end_pt >= 0 && end_pt <= segment_cnt_ &&
114
end_pt <= (start_pt + max_seg_per_char_));
115
}
116
// computes the space and no space costs at gaps between segments
117
// return true on sucess
118
bool
ComputeSpaceCosts();
119
};
120
}
121
122
#endif // CUBE_SEARCH_OBJECT_H
mnt
data
src
tesseract-ocr
cube
cube_search_object.h
Generated on Thu Nov 1 2012 20:19:47 for Tesseract by
1.8.1