Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
resultiterator.h
Go to the documentation of this file.
1 
2 // File: resultiterator.h
3 // Description: Iterator for tesseract results that is capable of
4 // iterating in proper reading order over Bi Directional
5 // (e.g. mixed Hebrew and English) text.
6 // Author: David Eger
7 // Created: Fri May 27 13:58:06 PST 2011
8 //
9 // (C) Copyright 2011, Google Inc.
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 // http://www.apache.org/licenses/LICENSE-2.0
14 // Unless required by applicable law or agreed to in writing, software
15 // distributed under the License is distributed on an "AS IS" BASIS,
16 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 // See the License for the specific language governing permissions and
18 // limitations under the License.
19 //
21 
22 #ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H__
23 #define TESSERACT_CCMAIN_RESULT_ITERATOR_H__
24 
25 #include "platform.h"
26 #include "ltrresultiterator.h"
27 #include "genericvector.h"
28 
29 class BLOB_CHOICE_IT;
30 class WERD_RES;
31 class STRING;
32 
33 namespace tesseract {
34 
35 class Tesseract;
36 
38  public:
39  static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
40 
45  virtual ~ResultIterator() {}
46 
47  // ============= Moving around within the page ============.
52  virtual void Begin();
53 
66  virtual bool Next(PageIteratorLevel level);
67 
74  virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
75 
81  virtual bool IsAtFinalElement(PageIteratorLevel level,
82  PageIteratorLevel element) const;
83 
84  // ============= Accessing data ==============.
85 
90  virtual char* GetUTF8Text(PageIteratorLevel level) const;
91 
96  bool ParagraphIsLtr() const;
97 
98  // ============= Exposed only for testing =============.
99 
122  static void CalculateTextlineOrder(
123  bool paragraph_is_ltr,
124  const GenericVector<StrongScriptDirection> &word_dirs,
125  GenericVectorEqEq<int> *reading_order);
126 
127  static const int kMinorRunStart;
128  static const int kMinorRunEnd;
129  static const int kComplexWord;
130 
131  protected:
138  TESS_LOCAL explicit ResultIterator(const LTRResultIterator &resit);
139 
140  private:
145  bool CurrentParagraphIsLtr() const;
146 
158  void CalculateTextlineOrder(bool paragraph_is_ltr,
159  const LTRResultIterator &resit,
160  GenericVectorEqEq<int> *indices) const;
162  void CalculateTextlineOrder(bool paragraph_is_ltr,
163  const LTRResultIterator &resit,
165  GenericVectorEqEq<int> *indices) const;
166 
171  int LTRWordIndex() const;
172 
177  void CalculateBlobOrder(GenericVector<int> *blob_indices) const;
178 
180  void MoveToLogicalStartOfTextline();
181 
186  void MoveToLogicalStartOfWord();
187 
189  bool IsAtFinalSymbolOfWord() const;
190 
192  bool IsAtFirstSymbolOfWord() const;
193 
198  void AppendSuffixMarks(STRING *text) const;
199 
201  void AppendUTF8WordText(STRING *text) const;
202 
210  void IterateAndAppendUTF8TextlineText(STRING *text);
211 
218  void AppendUTF8ParagraphText(STRING *text) const;
219 
221  bool BidiDebug(int min_level) const;
222 
223  bool current_paragraph_is_ltr_;
224 
229  bool at_beginning_of_minor_run_;
230 
232  bool in_minor_direction_;
233 };
234 
235 } // namespace tesseract.
236 
237 #endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H__