Tesseract
3.02
Main Page
Related Pages
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
pageiterator.h
Go to the documentation of this file.
1
2
// File: pageiterator.h
3
// Description: Iterator for tesseract page structure that avoids using
4
// tesseract internal data structures.
5
// Author: Ray Smith
6
// Created: Fri Feb 26 11:01:06 PST 2010
7
//
8
// (C) Copyright 2010, Google Inc.
9
// Licensed under the Apache License, Version 2.0 (the "License");
10
// you may not use this file except in compliance with the License.
11
// You may obtain a copy of the License at
12
// http://www.apache.org/licenses/LICENSE-2.0
13
// Unless required by applicable law or agreed to in writing, software
14
// distributed under the License is distributed on an "AS IS" BASIS,
15
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
// See the License for the specific language governing permissions and
17
// limitations under the License.
18
//
20
21
#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H__
22
#define TESSERACT_CCMAIN_PAGEITERATOR_H__
23
24
#include "
publictypes.h
"
25
#include "
platform.h
"
26
27
class
C_BLOB_IT;
28
class
PBLOB_IT;
29
class
PAGE_RES
;
30
class
PAGE_RES_IT
;
31
class
WERD
;
32
struct
Pix;
33
34
namespace
tesseract
{
35
36
class
Tesseract;
37
51
class
TESS_API
PageIterator
{
52
public
:
67
PageIterator
(
PAGE_RES
* page_res,
Tesseract
*
tesseract
,
68
int
scale,
int
scaled_yres,
69
int
rect_left,
int
rect_top,
70
int
rect_width,
int
rect_height);
71
virtual
~
PageIterator
();
72
79
PageIterator
(
const
PageIterator
& src);
80
const
PageIterator
& operator=(
const
PageIterator
& src);
81
83
bool
PositionedAtSameWord(
const
PAGE_RES_IT
* other)
const
;
84
85
// ============= Moving around within the page ============.
86
91
virtual
void
Begin();
92
98
virtual
void
RestartParagraph();
99
104
bool
IsWithinFirstTextlineOfParagraph()
const
;
105
111
virtual
void
RestartRow();
112
124
virtual
bool
Next(
PageIteratorLevel
level);
125
139
virtual
bool
IsAtBeginningOf(
PageIteratorLevel
level)
const
;
140
157
virtual
bool
IsAtFinalElement(
PageIteratorLevel
level,
158
PageIteratorLevel
element)
const
;
159
166
int
Cmp(
const
PageIterator
&other)
const
;
167
168
// ============= Accessing data ==============.
169
// Coordinate system:
170
// Integer coordinates are at the cracks between the pixels.
171
// The top-left corner of the top-left pixel in the image is at (0,0).
172
// The bottom-right corner of the bottom-right pixel in the image is at
173
// (width, height).
174
// Every bounding box goes from the top-left of the top-left contained
175
// pixel to the bottom-right of the bottom-right contained pixel, so
176
// the bounding box of the single top-left pixel in the image is:
177
// (0,0)->(1,1).
178
// If an image rectangle has been set in the API, then returned coordinates
179
// relate to the original (full) image, rather than the rectangle.
180
190
bool
BoundingBox(
PageIteratorLevel
level,
191
int
* left,
int
* top,
int
* right,
int
* bottom)
const
;
197
bool
BoundingBoxInternal(
PageIteratorLevel
level,
198
int
* left,
int
* top,
int
* right,
int
* bottom)
const
;
199
201
bool
Empty(
PageIteratorLevel
level)
const
;
202
207
PolyBlockType
BlockType()
const
;
208
215
Pix* GetBinaryImage(
PageIteratorLevel
level)
const
;
216
227
Pix* GetImage(
PageIteratorLevel
level,
int
padding,
228
int
* left,
int
* top)
const
;
229
236
bool
Baseline(
PageIteratorLevel
level,
237
int
* x1,
int
* y1,
int
* x2,
int
* y2)
const
;
238
247
void
Orientation
(
tesseract::Orientation
*orientation,
248
tesseract::WritingDirection
*writing_direction,
249
tesseract::TextlineOrder
*textline_order,
250
float
*deskew_angle)
const
;
251
280
void
ParagraphInfo(
tesseract::ParagraphJustification
*justification,
281
bool
*is_list_item,
282
bool
*is_crown,
283
int
*first_line_indent)
const
;
284
285
protected
:
290
TESS_LOCAL
void
BeginWord(
int
offset);
291
293
PAGE_RES
*
page_res_
;
295
Tesseract
*
tesseract_
;
300
PAGE_RES_IT
*
it_
;
305
WERD
*
word_
;
307
int
word_length_
;
309
int
blob_index_
;
315
C_BLOB_IT*
cblob_it_
;
317
int
scale_
;
318
int
scaled_yres_
;
319
int
rect_left_
;
320
int
rect_top_
;
321
int
rect_width_
;
322
int
rect_height_
;
323
};
324
325
}
// namespace tesseract.
326
327
#endif // TESSERACT_CCMAIN_PAGEITERATOR_H__
mnt
data
src
tesseract-ocr
ccmain
pageiterator.h
Generated on Thu Nov 1 2012 20:19:43 for Tesseract by
1.8.1