Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
pageiterator.cpp
Go to the documentation of this file.
1 
2 // File: pageiterator.cpp
3 // Description: Iterator for tesseract page structure that avoids using
4 // tesseract internal data structures.
5 // Author: Ray Smith
6 // Created: Fri Feb 26 14:32:09 PST 2010
7 //
8 // (C) Copyright 2010, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
20 
21 #include "pageiterator.h"
22 #include "allheaders.h"
23 #include "helpers.h"
24 #include "pageres.h"
25 #include "tesseractclass.h"
26 
27 namespace tesseract {
28 
30  int scale, int scaled_yres,
31  int rect_left, int rect_top,
32  int rect_width, int rect_height)
33  : page_res_(page_res), tesseract_(tesseract),
34  word_(NULL), word_length_(0), blob_index_(0), cblob_it_(NULL),
35  scale_(scale), scaled_yres_(scaled_yres),
36  rect_left_(rect_left), rect_top_(rect_top),
37  rect_width_(rect_width), rect_height_(rect_height) {
38  it_ = new PAGE_RES_IT(page_res);
40 }
41 
43  delete it_;
44  delete cblob_it_;
45 }
46 
53  : page_res_(src.page_res_), tesseract_(src.tesseract_),
54  word_(NULL), word_length_(src.word_length_),
55  blob_index_(src.blob_index_), cblob_it_(NULL),
56  scale_(src.scale_), scaled_yres_(src.scaled_yres_),
57  rect_left_(src.rect_left_), rect_top_(src.rect_top_),
58  rect_width_(src.rect_width_), rect_height_(src.rect_height_) {
59  it_ = new PAGE_RES_IT(*src.it_);
61 }
62 
64  page_res_ = src.page_res_;
65  tesseract_ = src.tesseract_;
66  scale_ = src.scale_;
68  rect_left_ = src.rect_left_;
69  rect_top_ = src.rect_top_;
72  if (it_ != NULL) delete it_;
73  it_ = new PAGE_RES_IT(*src.it_);
75  return *this;
76 }
77 
79  return (it_ == NULL && it_ == other) ||
80  ((other != NULL) && (it_ != NULL) && (*it_ == *other));
81 }
82 
83 // ============= Moving around within the page ============.
84 
88  BeginWord(0);
89 }
90 
92  if (it_->block() == NULL) return; // At end of the document.
93  PAGE_RES_IT para(page_res_);
94  PAGE_RES_IT next_para(para);
95  next_para.forward_paragraph();
96  while (next_para.cmp(*it_) <= 0) {
97  para = next_para;
98  next_para.forward_paragraph();
99  }
100  *it_ = para;
101  BeginWord(0);
102 }
103 
105  PageIterator p_start(*this);
106  p_start.RestartParagraph();
107  return p_start.it_->row() == it_->row();
108 }
109 
111  it_->restart_row();
112  BeginWord(0);
113 }
114 
129  if (it_->block() == NULL) return false; // Already at the end!
130  if (it_->word() == NULL)
131  level = RIL_BLOCK;
132 
133  switch (level) {
134  case RIL_BLOCK:
135  it_->forward_block();
136  break;
137  case RIL_PARA:
139  break;
140  case RIL_TEXTLINE:
141  for (it_->forward_with_empties(); it_->row() == it_->prev_row();
143  break;
144  case RIL_WORD:
146  break;
147  case RIL_SYMBOL:
148  if (cblob_it_ != NULL)
149  cblob_it_->forward();
150  ++blob_index_;
151  if (blob_index_ >= word_length_)
153  else
154  return true;
155  break;
156  }
157  BeginWord(0);
158  return it_->block() != NULL;
159 }
160 
167  if (it_->block() == NULL) return false; // Already at the end!
168  if (it_->word() == NULL) return true; // In an image block.
169  switch (level) {
170  case RIL_BLOCK:
171  return blob_index_ == 0 && it_->block() != it_->prev_block();
172  case RIL_PARA:
173  return blob_index_ == 0 &&
174  (it_->block() != it_->prev_block() ||
175  it_->row()->row->para() != it_->prev_row()->row->para());
176  case RIL_TEXTLINE:
177  return blob_index_ == 0 && it_->row() != it_->prev_row();
178  case RIL_WORD:
179  return blob_index_ == 0;
180  case RIL_SYMBOL:
181  return true;
182  }
183  return false;
184 }
185 
191  PageIteratorLevel element) const {
192  if (Empty(element)) return true; // Already at the end!
193  // The result is true if we step forward by element and find we are
194  // at the the end of the page or at beginning of *all* levels in:
195  // [level, element).
196  // When there is more than one level difference between element and level,
197  // we could for instance move forward one symbol and still be at the first
198  // word on a line, so we also have to be at the first symbol in a word.
199  PageIterator next(*this);
200  next.Next(element);
201  if (next.Empty(element)) return true; // Reached the end of the page.
202  while (element > level) {
203  element = static_cast<PageIteratorLevel>(element - 1);
204  if (!next.IsAtBeginningOf(element))
205  return false;
206  }
207  return true;
208 }
209 
216 int PageIterator::Cmp(const PageIterator &other) const {
217  int word_cmp = it_->cmp(*other.it_);
218  if (word_cmp != 0)
219  return word_cmp;
220  if (blob_index_ < other.blob_index_)
221  return -1;
222  if (blob_index_ == other.blob_index_)
223  return 0;
224  return 1;
225 }
226 
227 // ============= Accessing data ==============.
228 // Coordinate system:
229 // Integer coordinates are at the cracks between the pixels.
230 // The top-left corner of the top-left pixel in the image is at (0,0).
231 // The bottom-right corner of the bottom-right pixel in the image is at
232 // (width, height).
233 // Every bounding box goes from the top-left of the top-left contained
234 // pixel to the bottom-right of the bottom-right contained pixel, so
235 // the bounding box of the single top-left pixel in the image is:
236 // (0,0)->(1,1).
237 // If an image rectangle has been set in the API, then returned coordinates
238 // relate to the original (full) image, rather than the rectangle.
239 
247  int* left, int* top,
248  int* right, int* bottom) const {
249  if (Empty(level))
250  return false;
251  TBOX box;
252  PARA *para = NULL;
253  switch (level) {
254  case RIL_BLOCK:
255  box = it_->block()->block->bounding_box();
256  break;
257  case RIL_PARA:
258  para = it_->row()->row->para();
259  // explicit fall-through.
260  case RIL_TEXTLINE:
261  box = it_->row()->row->bounding_box();
262  break;
263  case RIL_WORD:
264  box = it_->word()->word->bounding_box();
265  break;
266  case RIL_SYMBOL:
267  if (cblob_it_ == NULL)
268  box = it_->word()->box_word->BlobBox(blob_index_);
269  else
270  box = cblob_it_->data()->bounding_box();
271  }
272  if (level == RIL_PARA) {
273  PageIterator other = *this;
274  other.Begin();
275  do {
276  if (other.it_->block() &&
277  other.it_->block()->block == it_->block()->block &&
278  other.it_->row() && other.it_->row()->row &&
279  other.it_->row()->row->para() == para) {
280  box = box.bounding_union(other.it_->row()->row->bounding_box());
281  }
282  } while (other.Next(RIL_TEXTLINE));
283  }
284  if (level != RIL_SYMBOL || cblob_it_ != NULL)
285  box.rotate(it_->block()->block->re_rotation());
286  // Now we have a box in tesseract coordinates relative to the image rectangle,
287  // we have to convert the coords to a top-down system.
288  const int pix_height = pixGetHeight(tesseract_->pix_binary());
289  const int pix_width = pixGetWidth(tesseract_->pix_binary());
290  *left = ClipToRange(static_cast<int>(box.left()), 0, pix_width);
291  *top = ClipToRange(pix_height - box.top(), 0, pix_height);
292  *right = ClipToRange(static_cast<int>(box.right()), *left, pix_width);
293  *bottom = ClipToRange(pix_height - box.bottom(), *top, pix_height);
294  return true;
295 }
296 
304  int* left, int* top,
305  int* right, int* bottom) const {
306  if (!BoundingBoxInternal(level, left, top, right, bottom))
307  return false;
308  // Convert to the coordinate system of the original image.
309  *left = ClipToRange(*left / scale_ + rect_left_,
311  *top = ClipToRange(*top / scale_ + rect_top_,
313  *right = ClipToRange((*right + scale_ - 1) / scale_ + rect_left_,
314  *left, rect_left_ + rect_width_);
315  *bottom = ClipToRange((*bottom + scale_ - 1) / scale_ + rect_top_,
316  *top, rect_top_ + rect_height_);
317  return true;
318 }
319 
322  if (it_->block() == NULL) return true; // Already at the end!
323  if (it_->word() == NULL && level != RIL_BLOCK) return true; // image block
324  if (level == RIL_SYMBOL && blob_index_ >= word_length_)
325  return true; // Zero length word, or already at the end of it.
326  return false;
327 }
328 
331  if (it_->block() == NULL || it_->block()->block == NULL)
332  return PT_UNKNOWN; // Already at the end!
333  if (it_->block()->block->poly_block() == NULL)
334  return PT_FLOWING_TEXT; // No layout analysis used - assume text.
335  return it_->block()->block->poly_block()->isA();
336 }
337 
361  int left, top, right, bottom;
362  if (!BoundingBoxInternal(level, &left, &top, &right, &bottom))
363  return NULL;
364  Pix* pix = NULL;
365  switch (level) {
366  case RIL_BLOCK:
367  case RIL_PARA:
368  int bleft, btop, bright, bbottom;
369  BoundingBoxInternal(RIL_BLOCK, &bleft, &btop, &bright, &bbottom);
370  pix = it_->block()->block->render_mask();
371  // AND the mask and the image.
372  pixRasterop(pix, 0, 0, pixGetWidth(pix), pixGetHeight(pix),
373  PIX_SRC & PIX_DST, tesseract_->pix_binary(),
374  bleft, btop);
375  if (level == RIL_PARA) {
376  // RIL_PARA needs further attention:
377  // clip the paragraph from the block mask.
378  Box* box = boxCreate(left - bleft, top - btop,
379  right - left, bottom - top);
380  Pix* pix2 = pixClipRectangle(pix, box, NULL);
381  boxDestroy(&box);
382  pixDestroy(&pix);
383  pix = pix2;
384  }
385  break;
386  case RIL_TEXTLINE:
387  case RIL_WORD:
388  case RIL_SYMBOL:
389  if (level == RIL_SYMBOL && cblob_it_ != NULL &&
390  cblob_it_->data()->area() != 0)
391  return cblob_it_->data()->render();
392  // Just clip from the bounding box.
393  Box* box = boxCreate(left, top, right - left, bottom - top);
394  pix = pixClipRectangle(tesseract_->pix_binary(), box, NULL);
395  boxDestroy(&box);
396  break;
397  }
398  return pix;
399 }
400 
412  int* left, int* top) const {
413  int right, bottom;
414  if (!BoundingBox(level, left, top, &right, &bottom))
415  return NULL;
416  Pix* pix = tesseract_->pix_grey();
417  if (pix == NULL)
418  return GetBinaryImage(level);
419 
420  // Expand the box.
421  *left = MAX(*left - padding, 0);
422  *top = MAX(*top - padding, 0);
423  right = MIN(right + padding, rect_width_);
424  bottom = MIN(bottom + padding, rect_height_);
425  Box* box = boxCreate(*left, *top, right - *left, bottom - *top);
426  Pix* grey_pix = pixClipRectangle(pix, box, NULL);
427  boxDestroy(&box);
428  if (level == RIL_BLOCK) {
429  Pix* mask = it_->block()->block->render_mask();
430  Pix* expanded_mask = pixCreate(right - *left, bottom - *top, 1);
431  pixRasterop(expanded_mask, padding, padding,
432  pixGetWidth(mask), pixGetHeight(mask),
433  PIX_SRC, mask, 0, 0);
434  pixDestroy(&mask);
435  pixDilateBrick(expanded_mask, expanded_mask, 2*padding + 1, 2*padding + 1);
436  pixInvert(expanded_mask, expanded_mask);
437  pixSetMasked(grey_pix, expanded_mask, 255);
438  pixDestroy(&expanded_mask);
439  }
440  return grey_pix;
441 }
442 
449  int* x1, int* y1, int* x2, int* y2) const {
450  if (it_->word() == NULL) return false; // Already at the end!
451  ROW* row = it_->row()->row;
452  WERD* word = it_->word()->word;
453  TBOX box = (level == RIL_WORD || level == RIL_SYMBOL)
454  ? word->bounding_box()
455  : row->bounding_box();
456  int left = box.left();
457  ICOORD startpt(left, static_cast<inT16>(row->base_line(left) + 0.5));
458  int right = box.right();
459  ICOORD endpt(right, static_cast<inT16>(row->base_line(right) + 0.5));
460  // Rotate to image coordinates and convert to global image coords.
461  startpt.rotate(it_->block()->block->re_rotation());
462  endpt.rotate(it_->block()->block->re_rotation());
463  *x1 = startpt.x() / scale_ + rect_left_;
464  *y1 = (rect_height_ - startpt.y()) / scale_ + rect_top_;
465  *x2 = endpt.x() / scale_ + rect_left_;
466  *y2 = (rect_height_ - endpt.y()) / scale_ + rect_top_;
467  return true;
468 }
469 
471  tesseract::WritingDirection *writing_direction,
472  tesseract::TextlineOrder *textline_order,
473  float *deskew_angle) const {
474  BLOCK* block = it_->block()->block;
475 
476  // Orientation
477  FCOORD up_in_image(0.0, 1.0);
478  up_in_image.unrotate(block->classify_rotation());
479  up_in_image.rotate(block->re_rotation());
480 
481  if (up_in_image.x() == 0.0F) {
482  if (up_in_image.y() > 0.0F) {
483  *orientation = ORIENTATION_PAGE_UP;
484  } else {
485  *orientation = ORIENTATION_PAGE_DOWN;
486  }
487  } else if (up_in_image.x() > 0.0F) {
488  *orientation = ORIENTATION_PAGE_RIGHT;
489  } else {
490  *orientation = ORIENTATION_PAGE_LEFT;
491  }
492 
493  // Writing direction
494  bool is_vertical_text = (block->classify_rotation().x() == 0.0);
495  bool right_to_left = block->right_to_left();
496  *writing_direction =
497  is_vertical_text
499  : (right_to_left
502 
503  // Textline Order
504  bool is_mongolian = false; // TODO(eger): fix me
505  *textline_order = is_vertical_text
506  ? (is_mongolian
510 
511  // Deskew angle
512  FCOORD skew = block->skew(); // true horizontal for textlines
513  *deskew_angle = -skew.angle();
514 }
515 
517  bool *is_list_item,
518  bool *is_crown,
519  int *first_line_indent) const {
521  if (!it_->row() || !it_->row()->row || !it_->row()->row->para() ||
522  !it_->row()->row->para()->model)
523  return;
524 
525  PARA *para = it_->row()->row->para();
526  *is_list_item = para->is_list_item;
527  *is_crown = para->is_very_first_or_continuation;
528  *first_line_indent = para->model->first_indent() -
529  para->model->body_indent();
530 }
531 
536 void PageIterator::BeginWord(int offset) {
537  WERD_RES* word_res = it_->word();
538  if (word_res == NULL) {
539  // This is a non-text block, so there is no word.
540  word_length_ = 0;
541  blob_index_ = 0;
542  word_ = NULL;
543  return;
544  }
545  if (word_res->best_choice != NULL) {
546  // Recognition has been done, so we are using the box_word, which
547  // is already baseline denormalized.
548  word_length_ = word_res->best_choice->length();
549  ASSERT_HOST(word_res->box_word != NULL);
550  if (word_res->box_word->length() != word_length_) {
551  tprintf("Corrupted word! best_choice[len=%d] = %s, box_word[len=%d]: ",
553  word_res->box_word->length());
554  word_res->box_word->bounding_box().print();
555  }
556  ASSERT_HOST(word_res->box_word->length() == word_length_);
557  word_ = NULL;
558  // We will be iterating the box_word.
559  if (cblob_it_ != NULL) {
560  delete cblob_it_;
561  cblob_it_ = NULL;
562  }
563  } else {
564  // No recognition yet, so a "symbol" is a cblob.
565  word_ = word_res->word;
567  word_length_ = word_->cblob_list()->length();
568  if (cblob_it_ == NULL) cblob_it_ = new C_BLOB_IT;
569  cblob_it_->set_to_list(word_->cblob_list());
570  }
571  for (blob_index_ = 0; blob_index_ < offset; ++blob_index_) {
572  if (cblob_it_ != NULL)
573  cblob_it_->forward();
574  }
575 }
576 
577 } // namespace tesseract.