Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
pagesegmain.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: pagesegmain.cpp
3  * Description: Top-level page segmenter for Tesseract.
4  * Author: Ray Smith
5  * Created: Thu Sep 25 17:12:01 PDT 2008
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifdef _WIN32
21 #ifndef __GNUC__
22 #include <windows.h>
23 #endif /* __GNUC__ */
24 #else
25 #include <unistd.h>
26 #endif
27 #ifdef _MSC_VER
28 #pragma warning(disable:4244) // Conversion warnings
29 #endif
30 
31 // Include automatically generated configuration file if running autoconf.
32 #ifdef HAVE_CONFIG_H
33 #include "config_auto.h"
34 #endif
35 
36 #include "allheaders.h"
37 #include "blobbox.h"
38 #include "blread.h"
39 #include "colfind.h"
40 #include "equationdetect.h"
41 #include "imagefind.h"
42 #include "img.h"
43 #include "linefind.h"
44 #include "makerow.h"
45 #include "osdetect.h"
46 #include "tabvector.h"
47 #include "tesseractclass.h"
48 #include "tessvars.h"
49 #include "textord.h"
50 #include "tordmain.h"
51 #include "wordseg.h"
52 
53 namespace tesseract {
54 
56 const int kMinCredibleResolution = 70;
58 const int kDefaultResolution = 300;
59 // Max erosions to perform in removing an enclosing circle.
60 const int kMaxCircleErosions = 8;
61 
62 // Helper to remove an enclosing circle from an image.
63 // If there isn't one, then the image will most likely get badly mangled.
64 // The returned pix must be pixDestroyed after use. NULL may be returned
65 // if the image doesn't meet the trivial conditions that it uses to determine
66 // success.
67 static Pix* RemoveEnclosingCircle(Pix* pixs) {
68  Pix* pixsi = pixInvert(NULL, pixs);
69  Pix* pixc = pixCreateTemplate(pixs);
70  pixSetOrClearBorder(pixc, 1, 1, 1, 1, PIX_SET);
71  pixSeedfillBinary(pixc, pixc, pixsi, 4);
72  pixInvert(pixc, pixc);
73  pixDestroy(&pixsi);
74  Pix* pixt = pixAnd(NULL, pixs, pixc);
75  l_int32 max_count;
76  pixCountConnComp(pixt, 8, &max_count);
77  // The count has to go up before we start looking for the minimum.
78  l_int32 min_count = MAX_INT32;
79  Pix* pixout = NULL;
80  for (int i = 1; i < kMaxCircleErosions; i++) {
81  pixDestroy(&pixt);
82  pixErodeBrick(pixc, pixc, 3, 3);
83  pixt = pixAnd(NULL, pixs, pixc);
84  l_int32 count;
85  pixCountConnComp(pixt, 8, &count);
86  if (i == 1 || count > max_count) {
87  max_count = count;
88  min_count = count;
89  } else if (i > 1 && count < min_count) {
90  min_count = count;
91  pixDestroy(&pixout);
92  pixout = pixCopy(NULL, pixt); // Save the best.
93  } else if (count >= min_count) {
94  break; // We have passed by the best.
95  }
96  }
97  pixDestroy(&pixt);
98  pixDestroy(&pixc);
99  return pixout;
100 }
101 
107 int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
108  Tesseract* osd_tess, OSResults* osr) {
109  ASSERT_HOST(pix_binary_ != NULL);
110  int width = pixGetWidth(pix_binary_);
111  int height = pixGetHeight(pix_binary_);
112  // Get page segmentation mode.
113  PageSegMode pageseg_mode = static_cast<PageSegMode>(
114  static_cast<int>(tessedit_pageseg_mode));
115  // If a UNLV zone file can be found, use that instead of segmentation.
116  if (!PSM_COL_FIND_ENABLED(pageseg_mode) &&
117  input_file != NULL && input_file->length() > 0) {
118  STRING name = *input_file;
119  const char* lastdot = strrchr(name.string(), '.');
120  if (lastdot != NULL)
121  name[lastdot - name.string()] = '\0';
122  read_unlv_file(name, width, height, blocks);
123  }
124  if (blocks->empty()) {
125  // No UNLV file present. Work according to the PageSegMode.
126  // First make a single block covering the whole image.
127  BLOCK_IT block_it(blocks);
128  BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height);
130  block_it.add_to_end(block);
131  } else {
132  // UNLV file present. Use PSM_SINGLE_BLOCK.
133  pageseg_mode = PSM_SINGLE_BLOCK;
134  }
135  bool single_column = !PSM_COL_FIND_ENABLED(pageseg_mode);
136  bool osd_enabled = PSM_OSD_ENABLED(pageseg_mode);
137  bool osd_only = pageseg_mode == PSM_OSD_ONLY;
138 
139  int auto_page_seg_ret_val = 0;
140  TO_BLOCK_LIST to_blocks;
141  if (osd_enabled || PSM_BLOCK_FIND_ENABLED(pageseg_mode)) {
142  auto_page_seg_ret_val =
143  AutoPageSeg(single_column, osd_enabled, osd_only,
144  blocks, &to_blocks, osd_tess, osr);
145  if (osd_only)
146  return auto_page_seg_ret_val;
147  // To create blobs from the image region bounds uncomment this line:
148  // to_blocks.clear(); // Uncomment to go back to the old mode.
149  } else {
150  deskew_ = FCOORD(1.0f, 0.0f);
151  reskew_ = FCOORD(1.0f, 0.0f);
152  if (pageseg_mode == PSM_CIRCLE_WORD) {
153  Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_);
154  if (pixcleaned != NULL) {
155  pixDestroy(&pix_binary_);
156  pix_binary_ = pixcleaned;
157  }
158  }
159  }
160 
161  if (auto_page_seg_ret_val < 0) {
162  return -1;
163  }
164 
165  if (blocks->empty()) {
167  tprintf("Empty page\n");
168  return 0; // AutoPageSeg found an empty page.
169  }
170 
171  textord_.TextordPage(pageseg_mode, width, height, pix_binary_,
172  blocks, &to_blocks);
173  return auto_page_seg_ret_val;
174 }
175 
176 // Helper writes a grey image to a file for use by scrollviewer.
177 // Normally for speed we don't display the image in the layout debug windows.
178 // If textord_debug_images is true, we draw the image as a background to some
179 // of the debug windows. printable determines whether these
180 // images are optimized for printing instead of screen display.
181 static void WriteDebugBackgroundImage(bool printable, Pix* pix_binary) {
182  Pix* grey_pix = pixCreate(pixGetWidth(pix_binary),
183  pixGetHeight(pix_binary), 8);
184  // Printable images are light grey on white, but for screen display
185  // they are black on dark grey so the other colors show up well.
186  if (printable) {
187  pixSetAll(grey_pix);
188  pixSetMasked(grey_pix, pix_binary, 192);
189  } else {
190  pixSetAllArbitrary(grey_pix, 64);
191  pixSetMasked(grey_pix, pix_binary, 0);
192  }
194  pixWrite(AlignedBlob::textord_debug_pix().string(), grey_pix, IFF_PNG);
195  pixDestroy(&grey_pix);
196 }
197 
198 
218 int Tesseract::AutoPageSeg(bool single_column, bool osd, bool only_osd,
219  BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks,
220  Tesseract* osd_tess, OSResults* osr) {
221  if (textord_debug_images) {
222  WriteDebugBackgroundImage(textord_debug_printable, pix_binary_);
223  }
224  Pix* photomask_pix = NULL;
225  Pix* musicmask_pix = NULL;
226  // The blocks made by the ColumnFinder. Moved to blocks before return.
227  BLOCK_LIST found_blocks;
228  TO_BLOCK_LIST temp_blocks;
229 
231  single_column, osd, only_osd, blocks, osd_tess, osr,
232  &temp_blocks, &photomask_pix, &musicmask_pix);
233  if (finder != NULL) {
234  TO_BLOCK_IT to_block_it(&temp_blocks);
235  TO_BLOCK* to_block = to_block_it.data();
236  if (musicmask_pix != NULL) {
237  // TODO(rays) pass the musicmask_pix into FindBlocks and mark music
238  // blocks separately. For now combine with photomask_pix.
239  pixOr(photomask_pix, photomask_pix, musicmask_pix);
240  }
241  if (equ_detect_) {
242  finder->SetEquationDetect(equ_detect_);
243  }
244  if (finder->FindBlocks(single_column, scaled_color_, scaled_factor_,
245  to_block, photomask_pix,
246  &found_blocks, to_blocks) < 0) {
247  pixDestroy(&photomask_pix);
248  pixDestroy(&musicmask_pix);
249  return -1;
250  }
251  finder->GetDeskewVectors(&deskew_, &reskew_);
252  delete finder;
253  }
254  pixDestroy(&photomask_pix);
255  pixDestroy(&musicmask_pix);
256  blocks->clear();
257  BLOCK_IT block_it(blocks);
258  // Move the found blocks to the input/output blocks.
259  block_it.add_list_after(&found_blocks);
260 
261  if (textord_debug_images) {
262  // The debug image is no longer needed so delete it.
263  unlink(AlignedBlob::textord_debug_pix().string());
264  }
265  return 0;
266 }
267 
282  bool single_column, bool osd, bool only_osd,
283  BLOCK_LIST* blocks, Tesseract* osd_tess, OSResults* osr,
284  TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix, Pix** music_mask_pix) {
285  int vertical_x = 0;
286  int vertical_y = 1;
287  TabVector_LIST v_lines;
288  TabVector_LIST h_lines;
289  ICOORD bleft(0, 0);
290 
291  ASSERT_HOST(pix_binary_ != NULL);
293  pixWrite("tessinput.png", pix_binary_, IFF_PNG);
294  }
295  // Leptonica is used to find the rule/separator lines in the input.
296  LineFinder::FindAndRemoveLines(source_resolution_,
297  textord_tabfind_show_vlines, pix_binary_,
298  &vertical_x, &vertical_y, music_mask_pix,
299  &v_lines, &h_lines);
301  pixWrite("tessnolines.png", pix_binary_, IFF_PNG);
302  // Leptonica is used to find a mask of the photo regions in the input.
303  *photo_mask_pix = ImageFind::FindImages(pix_binary_);
305  pixWrite("tessnoimages.png", pix_binary_, IFF_PNG);
306  if (single_column)
307  v_lines.clear();
308 
309  // The rest of the algorithm uses the usual connected components.
310  textord_.find_components(pix_binary_, blocks, to_blocks);
311 
312  TO_BLOCK_IT to_block_it(to_blocks);
313  // There must be exactly one input block.
314  // TODO(rays) handle new textline finding with a UNLV zone file.
315  ASSERT_HOST(to_blocks->singleton());
316  TO_BLOCK* to_block = to_block_it.data();
317  TBOX blkbox = to_block->block->bounding_box();
318  ColumnFinder* finder = NULL;
319 
320  if (to_block->line_size >= 2) {
321  finder = new ColumnFinder(static_cast<int>(to_block->line_size),
322  blkbox.botleft(), blkbox.topright(),
323  source_resolution_,
324  &v_lines, &h_lines, vertical_x, vertical_y);
325 
326  finder->SetupAndFilterNoise(*photo_mask_pix, to_block);
327 
328  if (equ_detect_) {
329  equ_detect_->LabelSpecialText(to_block);
330  }
331 
332  BLOBNBOX_CLIST osd_blobs;
333  // osd_orientation is the number of 90 degree rotations to make the
334  // characters upright. (See osdetect.h for precise definition.)
335  // We want the text lines horizontal, (vertical text indicates vertical
336  // textlines) which may conflict (eg vertically written CJK).
337  int osd_orientation = 0;
338  bool vertical_text = finder->IsVerticallyAlignedText(to_block, &osd_blobs);
339  if (osd && osd_tess != NULL && osr != NULL) {
340  os_detect_blobs(&osd_blobs, osr, osd_tess);
341  if (only_osd) {
342  delete finder;
343  return NULL;
344  }
345  osd_orientation = osr->best_result.orientation_id;
346  double osd_score = osr->orientations[osd_orientation];
347  double osd_margin = min_orientation_margin * 2;
348  for (int i = 0; i < 4; ++i) {
349  if (i != osd_orientation &&
350  osd_score - osr->orientations[i] < osd_margin) {
351  osd_margin = osd_score - osr->orientations[i];
352  }
353  }
354  if (osd_margin < min_orientation_margin) {
355  // The margin is weak.
356  int best_script_id = osr->best_result.script_id;
357  bool cjk = (best_script_id == osd_tess->unicharset.han_sid()) ||
358  (best_script_id == osd_tess->unicharset.hiragana_sid()) ||
359  (best_script_id == osd_tess->unicharset.katakana_sid());
360 
361  if (!cjk && !vertical_text && osd_orientation == 2) {
362  // upside down latin text is improbable with such a weak margin.
363  tprintf("OSD: Weak margin (%.2f), horiz textlines, not CJK: "
364  "Don't rotate.\n", osd_margin);
365  osd_orientation = 0;
366  } else {
367  tprintf("OSD: Weak margin (%.2f) for %d blob text block, "
368  "but using orientation anyway: %d\n",
369  osd_blobs.length(), osd_margin, osd_orientation);
370  }
371  }
372  }
373  osd_blobs.shallow_clear();
374  finder->CorrectOrientation(to_block, vertical_text, osd_orientation);
375  }
376 
377  return finder;
378 }
379 
380 } // namespace tesseract.