Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ccnontextdetect.cpp
Go to the documentation of this file.
1 
2 // File: ccnontextdetect.cpp
3 // Description: Connected-Component-based photo (non-text) detection.
4 // Copyright 2011 Google Inc. All Rights Reserved.
5 // Author: rays@google.com (Ray Smith)
6 // Created: Sat Jun 11 10:12:01 PST 2011
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #include "ccnontextdetect.h"
21 #include "imagefind.h"
22 #include "strokewidth.h"
23 
24 namespace tesseract {
25 
26 // Max number of neighbour small objects per squared gridsize before a grid
27 // cell becomes image.
28 const double kMaxSmallNeighboursPerPix = 1.0 / 32;
29 // Max number of small blobs a large blob may overlap before it is rejected
30 // and determined to be image.
32 // Max number of small blobs a medium blob may overlap before it is rejected
33 // and determined to be image. Larger than for large blobs as medium blobs
34 // may be complex Chinese characters. Very large Chinese characters are going
35 // to overlap more medium blobs than small.
37 // Max number of normal blobs a large blob may overlap before it is rejected
38 // and determined to be image. This is set higher to allow for drop caps, which
39 // may overlap a lot of good text blobs.
41 // Multiplier of original noise_count used to test for the case of spreading
42 // noise beyond where it should really be.
43 const int kOriginalNoiseMultiple = 8;
44 // Pixel padding for noise blobs when rendering on the image
45 // mask to encourage them to join together. Make it too big and images
46 // will fatten out too much and have to be clipped to text.
47 const int kNoisePadding = 4;
48 // Fraction of max_noise_count_ to be added to the noise count if there is
49 // photo mask in the background.
50 const double kPhotoOffsetFraction = 0.375;
51 // Min ratio of perimeter^2/16area for a "good" blob in estimating noise
52 // density. Good blobs are supposed to be highly likely real text.
53 // We consider a square to have unit ratio, where A=(p/4)^2, hence the factor
54 // of 16. Digital circles are weird and have a minimum ratio of pi/64, not
55 // the 1/(4pi) that you would expect.
56 const double kMinGoodTextPARatio = 1.5;
57 
59  const ICOORD& bleft, const ICOORD& tright)
60  : BlobGrid(gridsize, bleft, tright),
61  max_noise_count_(static_cast<int>(kMaxSmallNeighboursPerPix *
62  gridsize * gridsize)),
63  noise_density_(NULL) {
64  // TODO(rays) break max_noise_count_ out into an area-proportional
65  // value, as now plus an additive constant for the number of text blobs
66  // in the 3x3 neigbourhood - maybe 9.
67 }
68 
70  delete noise_density_;
71 }
72 
73 // Creates and returns a Pix with the same resolution as the original
74 // in which 1 (black) pixels represent likely non text (photo, line drawing)
75 // areas of the page, deleting from the blob_block the blobs that were
76 // determined to be non-text.
77 // The photo_map is used to bias the decision towards non-text, rather than
78 // supplying definite decision.
79 // The blob_block is the usual result of connected component analysis,
80 // holding the detected blobs.
81 // The returned Pix should be PixDestroyed after use.
82 Pix* CCNonTextDetect::ComputeNonTextMask(bool debug, Pix* photo_map,
83  TO_BLOCK* blob_block) {
84  // Insert the smallest blobs into the grid.
85  InsertBlobList(&blob_block->small_blobs);
86  InsertBlobList(&blob_block->noise_blobs);
87  // Add the medium blobs that don't have a good strokewidth neighbour.
88  // Those that do go into good_grid as an antidote to spreading beyond the
89  // real reaches of a noise region.
90  BlobGrid good_grid(gridsize(), bleft(), tright());
91  BLOBNBOX_IT blob_it(&blob_block->blobs);
92  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
93  BLOBNBOX* blob = blob_it.data();
94  double perimeter_area_ratio = blob->cblob()->perimeter() / 4.0;
95  perimeter_area_ratio *= perimeter_area_ratio / blob->enclosed_area();
96  if (blob->GoodTextBlob() == 0 || perimeter_area_ratio < kMinGoodTextPARatio)
97  InsertBBox(true, true, blob);
98  else
99  good_grid.InsertBBox(true, true, blob);
100  }
101  noise_density_ = ComputeNoiseDensity(debug, photo_map, &good_grid);
102  good_grid.Clear(); // Not needed any more.
103  Pix* pix = noise_density_->ThresholdToPix(max_noise_count_);
104  if (debug) {
105  pixWrite("junknoisemask.png", pix, IFF_PNG);
106  }
107  ScrollView* win = NULL;
108  #ifndef GRAPHICS_DISABLED
109  if (debug) {
110  win = MakeWindow(0, 400, "Photo Mask Blobs");
111  }
112  #endif // GRAPHICS_DISABLED
113  // Large and medium blobs are not text if they overlap with "a lot" of small
114  // blobs.
115  MarkAndDeleteNonTextBlobs(&blob_block->large_blobs,
117  win, ScrollView::DARK_GREEN, pix);
118  MarkAndDeleteNonTextBlobs(&blob_block->blobs, kMaxMediumOverlapsWithSmall,
119  win, ScrollView::WHITE, pix);
120  // Clear the grid of small blobs and insert the medium blobs.
121  Clear();
122  InsertBlobList(&blob_block->blobs);
123  MarkAndDeleteNonTextBlobs(&blob_block->large_blobs,
125  win, ScrollView::DARK_GREEN, pix);
126  // Clear again before we start deleting the blobs in the grid.
127  Clear();
128  MarkAndDeleteNonTextBlobs(&blob_block->noise_blobs, -1,
129  win, ScrollView::CORAL, pix);
130  MarkAndDeleteNonTextBlobs(&blob_block->small_blobs, -1,
131  win, ScrollView::GOLDENROD, pix);
132  MarkAndDeleteNonTextBlobs(&blob_block->blobs, -1,
133  win, ScrollView::WHITE, pix);
134  if (debug) {
135  #ifndef GRAPHICS_DISABLED
136  win->Update();
137  #endif // GRAPHICS_DISABLED
138  pixWrite("junkccphotomask.png", pix, IFF_PNG);
139  #ifndef GRAPHICS_DISABLED
140  delete win->AwaitEvent(SVET_DESTROY);
141  delete win;
142  #endif // GRAPHICS_DISABLED
143  }
144  return pix;
145 }
146 
147 // Computes and returns the noise_density IntGrid, at the same gridsize as
148 // this by summing the number of small elements in a 3x3 neighbourhood of
149 // each grid cell. good_grid is filled with blobs that are considered most
150 // likely good text, and this is filled with small and medium blobs that are
151 // more likely non-text.
152 // The photo_map is used to bias the decision towards non-text, rather than
153 // supplying definite decision.
154 IntGrid* CCNonTextDetect::ComputeNoiseDensity(bool debug, Pix* photo_map,
155  BlobGrid* good_grid) {
156  IntGrid* noise_counts = CountCellElements();
157  IntGrid* noise_density = noise_counts->NeighbourhoodSum();
158  IntGrid* good_counts = good_grid->CountCellElements();
159  // Now increase noise density in photo areas, to bias the decision and
160  // minimize hallucinated text on image, but trim the noise_density where
161  // there are good blobs and the original count is low in non-photo areas,
162  // indicating that most of the result came from neighbouring cells.
163  int height = pixGetHeight(photo_map);
164  int photo_offset = IntCastRounded(max_noise_count_ * kPhotoOffsetFraction);
165  for (int y = 0; y < gridheight(); ++y) {
166  for (int x = 0; x < gridwidth(); ++x) {
167  int noise = noise_density->GridCellValue(x, y);
168  if (max_noise_count_ < noise + photo_offset &&
169  noise <= max_noise_count_) {
170  // Test for photo.
171  int left = x * gridsize();
172  int right = left + gridsize();
173  int bottom = height - y * gridsize();
174  int top = bottom - gridsize();
175  if (ImageFind::BoundsWithinRect(photo_map, &left, &top, &right,
176  &bottom)) {
177  noise_density->SetGridCell(x, y, noise + photo_offset);
178  }
179  }
180  if (debug && noise > max_noise_count_ &&
181  good_counts->GridCellValue(x, y) > 0) {
182  tprintf("At %d, %d, noise = %d, good=%d, orig=%d, thr=%d\n",
183  x * gridsize(), y * gridsize(),
184  noise_density->GridCellValue(x, y),
185  good_counts->GridCellValue(x, y),
186  noise_counts->GridCellValue(x, y), max_noise_count_);
187  }
188  if (noise > max_noise_count_ &&
189  good_counts->GridCellValue(x, y) > 0 &&
190  noise_counts->GridCellValue(x, y) * kOriginalNoiseMultiple <=
191  max_noise_count_) {
192  noise_density->SetGridCell(x, y, 0);
193  }
194  }
195  }
196  delete noise_counts;
197  delete good_counts;
198  return noise_density;
199 }
200 
201 // Helper to expand a box in one of the 4 directions by the given pad,
202 // provided it does not expand into any cell with a zero noise density.
203 // If that is not possible, try expanding all round by a small constant.
204 static TBOX AttemptBoxExpansion(const TBOX& box, const IntGrid& noise_density,
205  int pad) {
206  TBOX expanded_box(box);
207  expanded_box.set_right(box.right() + pad);
208  if (!noise_density.AnyZeroInRect(expanded_box))
209  return expanded_box;
210  expanded_box = box;
211  expanded_box.set_left(box.left() - pad);
212  if (!noise_density.AnyZeroInRect(expanded_box))
213  return expanded_box;
214  expanded_box = box;
215  expanded_box.set_top(box.top() + pad);
216  if (!noise_density.AnyZeroInRect(expanded_box))
217  return expanded_box;
218  expanded_box = box;
219  expanded_box.set_bottom(box.bottom() + pad);
220  if (!noise_density.AnyZeroInRect(expanded_box))
221  return expanded_box;
222  expanded_box = box;
223  expanded_box.pad(kNoisePadding, kNoisePadding);
224  if (!noise_density.AnyZeroInRect(expanded_box))
225  return expanded_box;
226  return box;
227 }
228 
229 // Tests each blob in the list to see if it is certain non-text using 2
230 // conditions:
231 // 1. blob overlaps a cell with high value in noise_density_ (previously set
232 // by ComputeNoiseDensity).
233 // OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This
234 // condition is disabled with max_blob_overlaps == -1.
235 // If it does, the blob is declared non-text, and is used to mark up the
236 // nontext_mask. Such blobs are fully deleted, and non-noise blobs have their
237 // neighbours reset, as they may now point to deleted data.
238 // WARNING: The blobs list blobs may be in the *this grid, but they are
239 // not removed. If any deleted blobs might be in *this, then this must be
240 // Clear()ed immediately after MarkAndDeleteNonTextBlobs is called.
241 // If the win is not NULL, deleted blobs are drawn on it in red, and kept
242 // blobs are drawn on it in ok_color.
243 void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs,
244  int max_blob_overlaps,
245  ScrollView* win,
246  ScrollView::Color ok_color,
247  Pix* nontext_mask) {
248  int imageheight = tright().y() - bleft().x();
249  BLOBNBOX_IT blob_it(blobs);
250  BLOBNBOX_LIST dead_blobs;
251  BLOBNBOX_IT dead_it(&dead_blobs);
252  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
253  BLOBNBOX* blob = blob_it.data();
254  TBOX box = blob->bounding_box();
255  if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) &&
256  (max_blob_overlaps < 0 ||
257  !BlobOverlapsTooMuch(blob, max_blob_overlaps))) {
258  blob->ClearNeighbours();
259  #ifndef GRAPHICS_DISABLED
260  if (win != NULL)
261  blob->plot(win, ok_color, ok_color);
262  #endif // GRAPHICS_DISABLED
263  } else {
264  if (noise_density_->AnyZeroInRect(box)) {
265  // There is a danger that the bounding box may overlap real text, so
266  // we need to render the outline.
267  Pix* blob_pix = blob->cblob()->render_outline();
268  pixRasterop(nontext_mask, box.left(), imageheight - box.top(),
269  box.width(), box.height(), PIX_SRC | PIX_DST,
270  blob_pix, 0, 0);
271  pixDestroy(&blob_pix);
272  } else {
273  if (box.area() < gridsize() * gridsize()) {
274  // It is a really bad idea to make lots of small components in the
275  // photo mask, so try to join it to a bigger area by expanding the
276  // box in a way that does not touch any zero noise density cell.
277  box = AttemptBoxExpansion(box, *noise_density_, gridsize());
278  }
279  // All overlapped cells are non-zero, so just mark the rectangle.
280  pixRasterop(nontext_mask, box.left(), imageheight - box.top(),
281  box.width(), box.height(), PIX_SET, NULL, 0, 0);
282  }
283  #ifndef GRAPHICS_DISABLED
284  if (win != NULL)
285  blob->plot(win, ScrollView::RED, ScrollView::RED);
286  #endif // GRAPHICS_DISABLED
287  // It is safe to delete the cblob now, as it isn't used by the grid
288  // or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the
289  // dead_blobs list.
290  // TODO(rays) delete the delete when the BLOBNBOX destructor deletes
291  // the cblob.
292  delete blob->cblob();
293  dead_it.add_to_end(blob_it.extract());
294  }
295  }
296 }
297 
298 // Returns true if the given blob overlaps more than max_overlaps blobs
299 // in the current grid.
300 bool CCNonTextDetect::BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps) {
301  // Search the grid to see what intersects it.
302  // Setup a Rectangle search for overlapping this blob.
303  BlobGridSearch rsearch(this);
304  TBOX box = blob->bounding_box();
305  rsearch.StartRectSearch(box);
306  rsearch.SetUniqueMode(true);
307  BLOBNBOX* neighbour;
308  int overlap_count = 0;
309  while (overlap_count <= max_overlaps &&
310  (neighbour = rsearch.NextRectSearch()) != NULL) {
311  if (box.major_overlap(neighbour->bounding_box())) {
312  ++overlap_count;
313  if (overlap_count > max_overlaps)
314  return true;
315  }
316  }
317  return false;
318 }
319 
320 } // namespace tesseract.