Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
colfind.cpp
Go to the documentation of this file.
1 
2 // File: colfind.cpp
3 // Description: Class to hold BLOBNBOXs in a grid for fast access
4 // to neighbours.
5 // Author: Ray Smith
6 // Created: Wed Jun 06 17:22:01 PDT 2007
7 //
8 // (C) Copyright 2007, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
20 
21 #ifdef _MSC_VER
22 #pragma warning(disable:4244) // Conversion warnings
23 #endif
24 
25 // Include automatically generated configuration file if running autoconf.
26 #ifdef HAVE_CONFIG_H
27 #include "config_auto.h"
28 #endif
29 
30 #include "colfind.h"
31 
32 #include "ccnontextdetect.h"
33 #include "colpartition.h"
34 #include "colpartitionset.h"
35 #include "equationdetectbase.h"
36 #include "linefind.h"
37 #include "normalis.h"
38 #include "strokewidth.h"
39 #include "blobbox.h"
40 #include "scrollview.h"
41 #include "tablefind.h"
42 #include "params.h"
43 #include "workingpartset.h"
44 
45 namespace tesseract {
46 
47 // Minimum width (in pixels) to be considered when making columns.
48 // TODO(rays) convert to inches, dependent on resolution.
49 const int kMinColumnWidth = 100;
50 // When assigning columns, the max number of misfit grid rows/ColPartitionSets
51 // that can be ignored.
53 // Min fraction of ColPartition height to be overlapping for margin purposes.
54 const double kMarginOverlapFraction = 0.25;
55 // Max fraction of mean_column_gap_ for the gap between two partitions within a
56 // column to allow them to merge.
57 const double kHorizontalGapMergeFraction = 0.5;
58 // Min fraction of grid size to not be considered likely noise.
59 const double kMinNonNoiseFraction = 0.5;
60 // Minimum gutter width as a fraction of gridsize
61 const double kMinGutterWidthGrid = 0.5;
62 // Max multiple of a partition's median size as a distance threshold for
63 // adding noise blobs.
64 const double kMaxDistToPartSizeRatio = 1.5;
65 
67  false, "Show partition bounds");
69  false, "Show blobs rejected as noise");
71  "Show partition bounds, waiting if >1");
72 BOOL_VAR(textord_tabfind_show_columns, false, "Show column bounds");
73 BOOL_VAR(textord_tabfind_show_blocks, false, "Show final block bounds");
74 BOOL_VAR(textord_tabfind_find_tables, true, "run table detection");
75 
76 ScrollView* ColumnFinder::blocks_win_ = NULL;
77 
78 // Gridsize is an estimate of the text size in the image. A suitable value
79 // is in TO_BLOCK::line_size after find_components has been used to make
80 // the blobs.
81 // bleft and tright are the bounds of the image (or rectangle) being processed.
82 // vlines is a (possibly empty) list of TabVector and vertical_x and y are
83 // the sum logical vertical vector produced by LineFinder::FindVerticalLines.
85  const ICOORD& bleft, const ICOORD& tright,
86  int resolution,
87  TabVector_LIST* vlines, TabVector_LIST* hlines,
88  int vertical_x, int vertical_y)
89  : TabFind(gridsize, bleft, tright, vlines, vertical_x, vertical_y,
90  resolution),
91  min_gutter_width_(static_cast<int>(kMinGutterWidthGrid * gridsize)),
92  mean_column_gap_(tright.x() - bleft.x()),
93  reskew_(1.0f, 0.0f), rotation_(1.0f, 0.0f), rerotate_(1.0f, 0.0f),
94  best_columns_(NULL), stroke_width_(NULL),
95  part_grid_(gridsize, bleft, tright), nontext_map_(NULL),
96  projection_(resolution),
97  denorm_(NULL), input_blobs_win_(NULL), equation_detect_(NULL) {
98  TabVector_IT h_it(&horizontal_lines_);
99  h_it.add_list_after(hlines);
100 }
101 
103  column_sets_.delete_data_pointers();
104  if (best_columns_ != NULL) {
105  delete [] best_columns_;
106  }
107  if (stroke_width_ != NULL)
108  delete stroke_width_;
109  delete input_blobs_win_;
110  pixDestroy(&nontext_map_);
111  while (denorm_ != NULL) {
112  DENORM* dead_denorm = denorm_;
113  denorm_ = const_cast<DENORM*>(denorm_->predecessor());
114  delete dead_denorm;
115  }
116 
117  // The ColPartitions are destroyed automatically, but any boxes in
118  // the noise_parts_ list are owned and need to be deleted explicitly.
119  ColPartition_IT part_it(&noise_parts_);
120  for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
121  ColPartition* part = part_it.data();
122  part->DeleteBoxes();
123  }
124  // Likewise any boxes in the good_parts_ list need to be deleted.
125  // These are just the image parts. Text parts have already given their
126  // boxes on to the TO_BLOCK, and have empty lists.
127  part_it.set_to_list(&good_parts_);
128  for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
129  ColPartition* part = part_it.data();
130  part->DeleteBoxes();
131  }
132  // Also, any blobs on the image_bblobs_ list need to have their cblobs
133  // deleted. This only happens if there has been an early return from
134  // FindColumns, as in a normal return, the blobs go into the grid and
135  // end up in noise_parts_, good_parts_ or the output blocks.
136  BLOBNBOX_IT bb_it(&image_bblobs_);
137  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
138  BLOBNBOX* bblob = bb_it.data();
139  delete bblob->cblob();
140  }
141 }
142 
143 // Performs initial processing on the blobs in the input_block:
144 // Setup the part_grid, stroke_width_, nontext_map.
145 // Obvious noise blobs are filtered out and used to mark the nontext_map_.
146 // Initial stroke-width analysis is used to get local text alignment
147 // direction, so the textline projection_ map can be setup.
148 // On return, IsVerticallyAlignedText may be called (now optionally) to
149 // determine the gross textline alignment of the page.
150 void ColumnFinder::SetupAndFilterNoise(Pix* photo_mask_pix,
151  TO_BLOCK* input_block) {
152  part_grid_.Init(gridsize(), bleft(), tright());
153  if (stroke_width_ != NULL)
154  delete stroke_width_;
155  stroke_width_ = new StrokeWidth(gridsize(), bleft(), tright());
156  min_gutter_width_ = static_cast<int>(kMinGutterWidthGrid * gridsize());
157  input_block->ReSetAndReFilterBlobs();
158  #ifndef GRAPHICS_DISABLED
160  input_blobs_win_ = MakeWindow(0, 0, "Filtered Input Blobs");
161  input_block->plot_graded_blobs(input_blobs_win_);
162  }
163  #endif // GRAPHICS_DISABLED
164  SetBlockRuleEdges(input_block);
165  pixDestroy(&nontext_map_);
166  // Run a preliminary strokewidth neighbour detection on the medium blobs.
167  stroke_width_->SetNeighboursOnMediumBlobs(input_block);
168  CCNonTextDetect nontext_detect(gridsize(), bleft(), tright());
169  // Remove obvious noise and make the initial non-text map.
170  nontext_map_ = nontext_detect.ComputeNonTextMask(textord_debug_tabfind,
171  photo_mask_pix, input_block);
172  // TODO(rays) experiment with making broken CJK fixing dependent on the
173  // language, and keeping the merged blobs on output instead of exploding at
174  // ColPartition::MakeBlock.
175  stroke_width_->FindTextlineDirectionAndFixBrokenCJK(true, input_block);
176  // Clear the strokewidth grid ready for rotation or leader finding.
177  stroke_width_->Clear();
178 }
179 
180 // Tests for vertical alignment of text (returning true if so), and generates
181 // a list of blobs of moderate aspect ratio, in the most frequent writing
182 // direction (in osd_blobs) for orientation and script detection to test
183 // the character orientation.
184 // block is the single block for the whole page or rectangle to be OCRed.
185 // Note that the vertical alignment may be due to text whose writing direction
186 // is vertical, like say Japanese, or due to text whose writing direction is
187 // horizontal but whose text appears vertically aligned because the image is
188 // not the right way up.
190  BLOBNBOX_CLIST* osd_blobs) {
191  return stroke_width_->TestVerticalTextDirection(block, osd_blobs);
192 }
193 
194 // Rotates the blobs and the TabVectors so that the gross writing direction
195 // (text lines) are horizontal and lines are read down the page.
196 // Applied rotation stored in rotation_.
197 // A second rotation is calculated for application during recognition to
198 // make the rotated blobs upright for recognition.
199 // Subsequent rotation stored in text_rotation_.
200 //
201 // Arguments:
202 // vertical_text_lines true if the text lines are vertical.
203 // recognition_rotation [0..3] is the number of anti-clockwise 90 degree
204 // rotations from osd required for the text to be upright and readable.
206  bool vertical_text_lines,
207  int recognition_rotation) {
208  const FCOORD anticlockwise90(0.0f, 1.0f);
209  const FCOORD clockwise90(0.0f, -1.0f);
210  const FCOORD rotation180(-1.0f, 0.0f);
211  const FCOORD norotation(1.0f, 0.0f);
212 
213  text_rotation_ = norotation;
214  // Rotate the page to make the text upright, as implied by
215  // recognition_rotation.
216  rotation_ = norotation;
217  if (recognition_rotation == 1) {
218  rotation_ = anticlockwise90;
219  } else if (recognition_rotation == 2) {
220  rotation_ = rotation180;
221  } else if (recognition_rotation == 3) {
222  rotation_ = clockwise90;
223  }
224  // We infer text writing direction to be vertical if there are several
225  // vertical text lines detected, and horizontal if not. But if the page
226  // orientation was determined to be 90 or 270 degrees, the true writing
227  // direction is the opposite of what we inferred.
228  if (recognition_rotation & 1) {
229  vertical_text_lines = !vertical_text_lines;
230  }
231  // If we still believe the writing direction is vertical, we use the
232  // convention of rotating the page ccw 90 degrees to make the text lines
233  // horizontal, and mark the blobs for rotation cw 90 degrees for
234  // classification so that the text order is correct after recognition.
235  if (vertical_text_lines) {
236  rotation_.rotate(anticlockwise90);
237  text_rotation_.rotate(clockwise90);
238  }
239  // Set rerotate_ to the inverse of rotation_.
240  rerotate_ = FCOORD(rotation_.x(), -rotation_.y());
241  if (rotation_.x() != 1.0f || rotation_.y() != 0.0f) {
242  // Rotate all the blobs and tab vectors.
243  RotateBlobList(rotation_, &block->large_blobs);
244  RotateBlobList(rotation_, &block->blobs);
245  RotateBlobList(rotation_, &block->small_blobs);
246  RotateBlobList(rotation_, &block->noise_blobs);
247  TabFind::ResetForVerticalText(rotation_, rerotate_, &horizontal_lines_,
248  &min_gutter_width_);
249  part_grid_.Init(gridsize(), bleft(), tright());
250  // Reset all blobs to initial state and filter by size.
251  // Since they have rotated, the list they belong on could have changed.
252  block->ReSetAndReFilterBlobs();
253  SetBlockRuleEdges(block);
254  stroke_width_->CorrectForRotation(rerotate_, &part_grid_);
255  }
256  if (textord_debug_tabfind) {
257  tprintf("Vertical=%d, orientation=%d, final rotation=(%f, %f)+(%f,%f)\n",
258  vertical_text_lines, recognition_rotation,
259  rotation_.x(), rotation_.y(),
260  text_rotation_.x(), text_rotation_.y());
261  }
262  // Setup the denormalization.
263  ASSERT_HOST(denorm_ == NULL);
264  denorm_ = new DENORM;
265  denorm_->SetupNormalization(NULL, NULL, &rotation_, NULL, NULL, 0,
266  0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
267 }
268 
269 // Finds blocks of text, image, rule line, table etc, returning them in the
270 // blocks and to_blocks
271 // (Each TO_BLOCK points to the basic BLOCK and adds more information.)
272 // Image blocks are generated by a combination of photo_mask_pix (which may
273 // NOT be NULL) and the rejected text found during preliminary textline
274 // finding.
275 // The input_block is the result of a call to find_components, and contains
276 // the blobs found in the image or rectangle to be OCRed. These blobs will be
277 // removed and placed in the output blocks, while unused ones will be deleted.
278 // If single_column is true, the input is treated as single column, but
279 // it is still divided into blocks of equal line spacing/text size.
280 // scaled_color is scaled down by scaled_factor from the input color image,
281 // and may be NULL if the input was not color.
282 // Returns -1 if the user hits the 'd' key in the blocks window while running
283 // in debug mode, which requests a retry with more debug info.
284 int ColumnFinder::FindBlocks(bool single_column,
285  Pix* scaled_color, int scaled_factor,
286  TO_BLOCK* input_block, Pix* photo_mask_pix,
287  BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) {
288  pixOr(photo_mask_pix, photo_mask_pix, nontext_map_);
289  stroke_width_->FindLeaderPartitions(input_block, &part_grid_);
290  stroke_width_->RemoveLineResidue(&big_parts_);
291  FindInitialTabVectors(NULL, min_gutter_width_, input_block);
292  SetBlockRuleEdges(input_block);
293  stroke_width_->GradeBlobsIntoPartitions(rerotate_, input_block, nontext_map_,
294  denorm_, &projection_,
295  &part_grid_, &big_parts_);
296  ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_,
297  input_block, this, &part_grid_, &big_parts_);
298  ImageFind::TransferImagePartsToImageMask(rerotate_, &part_grid_,
299  photo_mask_pix);
300  ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_,
301  input_block, this, &part_grid_, &big_parts_);
302  part_grid_.ReTypeBlobs(&image_bblobs_);
303  TidyBlobs(input_block);
304  Reset();
305  // TODO(rays) need to properly handle big_parts_.
306  ColPartition_IT p_it(&big_parts_);
307  for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward())
308  p_it.data()->DisownBoxes();
309  big_parts_.clear();
310  delete stroke_width_;
311  stroke_width_ = NULL;
312 
313  // A note about handling right-to-left scripts (Hebrew/Arabic):
314  // The columns must be reversed and come out in right-to-left instead of
315  // the normal left-to-right order. Because the left-to-right ordering
316  // is implicit in many data structures, it is simpler to fool the algorithms
317  // into thinking they are dealing with left-to-right text.
318  // To do this, we reflect the needed data in the y-axis and then reflect
319  // the blocks back after they have been created. This is a temporary
320  // arrangment that is confined to this function only, so the reflection
321  // is completely invisible in the output blocks.
322  // The only objects reflected are:
323  // The vertical separator lines that have already been found;
324  // The bounding boxes of all BLOBNBOXES on all lists on the input_block
325  // plus the image_bblobs. The outlines are not touched, since they are
326  // not looked at.
327  bool input_is_rtl = input_block->block->right_to_left();
328  if (input_is_rtl) {
329  // Reflect the vertical separator lines (member of TabFind).
330  ReflectInYAxis();
331  // Reflect the blob boxes.
332  ReflectForRtl(input_block, &image_bblobs_);
333  part_grid_.ReflectInYAxis();
334  }
335 
336  if (single_column) {
337  // No tab stops needed. Just the grid that FindTabVectors makes.
338  DontFindTabVectors(&image_bblobs_, input_block, &deskew_, &reskew_);
339  } else {
340  SetBlockRuleEdges(input_block);
341  // Find the tab stops, estimate skew, and deskew the tabs, blobs and
342  // part_grid_.
343  FindTabVectors(&horizontal_lines_, &image_bblobs_, input_block,
344  min_gutter_width_, &part_grid_, &deskew_, &reskew_);
345  // Add the deskew to the denorm_.
346  DENORM* new_denorm = new DENORM;
347  new_denorm->SetupNormalization(NULL, NULL, &deskew_, denorm_, NULL, 0,
348  0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
349  denorm_ = new_denorm;
350  }
351  SetBlockRuleEdges(input_block);
352  part_grid_.SetTabStops(this);
353 
354  // Make the column_sets_.
355  if (!MakeColumns(single_column)) {
356  tprintf("Empty page!!\n");
357  return 0; // This is an empty page.
358  }
359 
360  // Refill the grid using rectangular spreading, and get the benefit
361  // of the completed tab vectors marking the rule edges of each blob.
362  Clear();
363  #ifndef GRAPHICS_DISABLED
365  ScrollView* rej_win = MakeWindow(500, 300, "Rejected blobs");
366  input_block->plot_graded_blobs(rej_win);
367  }
368  #endif // GRAPHICS_DISABLED
369  InsertBlobsToGrid(false, false, &image_bblobs_, this);
370  InsertBlobsToGrid(true, true, &input_block->blobs, this);
371 
372  part_grid_.GridFindMargins(best_columns_);
373  // Split and merge the partitions by looking at local neighbours.
374  GridSplitPartitions();
375  // Resolve unknown partitions by adding to an existing partition, fixing
376  // the type, or declaring them noise.
377  part_grid_.GridFindMargins(best_columns_);
378  GridMergePartitions();
379  // Insert any unused noise blobs that are close enough to an appropriate
380  // partition.
381  InsertRemainingNoise(input_block);
382  // Add horizontal line separators as partitions.
383  GridInsertHLinePartitions();
384  GridInsertVLinePartitions();
385  // Recompute margins based on a local neighbourhood search.
386  part_grid_.GridFindMargins(best_columns_);
387  SetPartitionTypes();
389  ScrollView* part_win = MakeWindow(100, 300, "InitialPartitions");
390  part_grid_.DisplayBoxes(part_win);
391  DisplayTabVectors(part_win);
392  }
393 
394  if (equation_detect_) {
395  equation_detect_->FindEquationParts(&part_grid_, best_columns_);
396  }
398  TableFinder table_finder;
399  table_finder.Init(gridsize(), bleft(), tright());
400  table_finder.set_resolution(resolution_);
401  table_finder.set_left_to_right_language(
402  !input_block->block->right_to_left());
403  // Copy cleaned partitions from part_grid_ to clean_part_grid_ and
404  // insert dot-like noise into period_grid_
405  table_finder.InsertCleanPartitions(&part_grid_, input_block);
406  // Get Table Regions
407  table_finder.LocateTables(&part_grid_, best_columns_, WidthCB(), reskew_);
408  }
409  GridRemoveUnderlinePartitions();
410  part_grid_.DeleteUnknownParts(input_block);
411 
412  // Build the partitions into chains that belong in the same block and
413  // refine into one-to-one links, then smooth the types within each chain.
414  part_grid_.FindPartitionPartners();
415  part_grid_.FindFigureCaptions();
416  part_grid_.RefinePartitionPartners(true);
417  SmoothPartnerRuns();
418 
419  #ifndef GRAPHICS_DISABLED
421  ScrollView* window = MakeWindow(400, 300, "Partitions");
423  window->Image(AlignedBlob::textord_debug_pix().string(),
424  image_origin().x(), image_origin().y());
425  part_grid_.DisplayBoxes(window);
427  DisplayTabVectors(window);
428  if (window != NULL && textord_tabfind_show_partitions > 1) {
429  delete window->AwaitEvent(SVET_DESTROY);
430  }
431  }
432  #endif // GRAPHICS_DISABLED
433  part_grid_.AssertNoDuplicates();
434  // Ownership of the ColPartitions moves from part_sets_ to part_grid_ here,
435  // and ownership of the BLOBNBOXes moves to the ColPartitions.
436  // (They were previously owned by the block or the image_bblobs list.)
437  ReleaseBlobsAndCleanupUnused(input_block);
438  // Ownership of the ColPartitions moves from part_grid_ to good_parts_ and
439  // noise_parts_ here. In text blocks, ownership of the BLOBNBOXes moves
440  // from the ColPartitions to the output TO_BLOCK. In non-text, the
441  // BLOBNBOXes stay with the ColPartitions and get deleted in the destructor.
442  TransformToBlocks(blocks, to_blocks);
443  if (textord_debug_tabfind) {
444  tprintf("Found %d blocks, %d to_blocks\n",
445  blocks->length(), to_blocks->length());
446  }
447 
448  DisplayBlocks(blocks);
449  RotateAndReskewBlocks(input_is_rtl, to_blocks);
450  int result = 0;
451  #ifndef GRAPHICS_DISABLED
452  if (blocks_win_ != NULL) {
453  bool waiting = false;
454  do {
455  waiting = false;
456  SVEvent* event = blocks_win_->AwaitEvent(SVET_ANY);
457  if (event->type == SVET_INPUT && event->parameter != NULL) {
458  if (*event->parameter == 'd')
459  result = -1;
460  else
461  blocks->clear();
462  } else if (event->type == SVET_DESTROY) {
463  blocks_win_ = NULL;
464  } else {
465  waiting = true;
466  }
467  delete event;
468  } while (waiting);
469  }
470  #endif // GRAPHICS_DISABLED
471  return result;
472 }
473 
474 // Get the rotation required to deskew, and its inverse rotation.
476  *reskew = reskew_;
477  *deskew = reskew_;
478  deskew->set_y(-deskew->y());
479 }
480 
482  equation_detect_ = detect;
483 }
484 
486 
487 // Displays the blob and block bounding boxes in a window called Blocks.
488 void ColumnFinder::DisplayBlocks(BLOCK_LIST* blocks) {
489 #ifndef GRAPHICS_DISABLED
491  if (blocks_win_ == NULL)
492  blocks_win_ = MakeWindow(700, 300, "Blocks");
493  else
494  blocks_win_->Clear();
496  blocks_win_->Image(AlignedBlob::textord_debug_pix().string(),
497  image_origin().x(), image_origin().y());
498  else
499  DisplayBoxes(blocks_win_);
500  BLOCK_IT block_it(blocks);
501  int serial = 1;
502  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
503  block_it.forward()) {
504  BLOCK* block = block_it.data();
505  block->plot(blocks_win_, serial++,
508  }
509  blocks_win_->Update();
510  }
511 #endif
512 }
513 
514 // Displays the column edges at each grid y coordinate defined by
515 // best_columns_.
516 void ColumnFinder::DisplayColumnBounds(PartSetVector* sets) {
517 #ifndef GRAPHICS_DISABLED
518  ScrollView* col_win = MakeWindow(50, 300, "Columns");
520  col_win->Image(AlignedBlob::textord_debug_pix().string(),
521  image_origin().x(), image_origin().y());
522  else
523  DisplayBoxes(col_win);
525  for (int i = 0; i < gridheight_; ++i) {
526  ColPartitionSet* columns = best_columns_[i];
527  if (columns != NULL)
528  columns->DisplayColumnEdges(i * gridsize_, (i + 1) * gridsize_, col_win);
529  }
530 #endif
531 }
532 
533 // Sets up column_sets_ (the determined column layout at each horizontal
534 // slice). Returns false if the page is empty.
535 bool ColumnFinder::MakeColumns(bool single_column) {
536  // The part_sets_ are a temporary structure used during column creation,
537  // and is a vector of ColPartitionSets, representing ColPartitions found
538  // at horizontal slices through the page.
539  PartSetVector part_sets;
540  if (!single_column) {
541  if (!part_grid_.MakeColPartSets(&part_sets))
542  return false; // Empty page.
543  ASSERT_HOST(part_grid_.gridheight() == gridheight_);
544  // Try using only the good parts first.
545  bool good_only = true;
546  do {
547  for (int i = 0; i < gridheight_; ++i) {
548  ColPartitionSet* line_set = part_sets.get(i);
549  if (line_set != NULL && line_set->LegalColumnCandidate()) {
550  ColPartitionSet* column_candidate = line_set->Copy(good_only);
551  if (column_candidate != NULL)
552  column_candidate->AddToColumnSetsIfUnique(&column_sets_, WidthCB());
553  }
554  }
555  good_only = !good_only;
556  } while (column_sets_.empty() && !good_only);
558  PrintColumnCandidates("Column candidates");
559  // Improve the column candidates against themselves.
560  ImproveColumnCandidates(&column_sets_, &column_sets_);
562  PrintColumnCandidates("Improved columns");
563  // Improve the column candidates using the part_sets_.
564  ImproveColumnCandidates(&part_sets, &column_sets_);
565  }
566  ColPartitionSet* single_column_set =
567  part_grid_.MakeSingleColumnSet(WidthCB());
568  if (single_column_set != NULL) {
569  // Always add the single column set as a backup even if not in
570  // single column mode.
571  single_column_set->AddToColumnSetsIfUnique(&column_sets_, WidthCB());
572  }
574  PrintColumnCandidates("Final Columns");
575  if (!column_sets_.empty()) {
576  // Divide the page into sections of uniform column layout.
577  AssignColumns(part_sets);
579  DisplayColumnBounds(&part_sets);
580  }
581  ComputeMeanColumnGap();
582  ColPartition_LIST parts;
583  for (int i = 0; i < part_sets.size(); ++i) {
584  ColPartitionSet* line_set = part_sets.get(i);
585  if (line_set != NULL) {
586  line_set->RelinquishParts();
587  delete line_set;
588  }
589  }
590  return true;
591  }
592  return false;
593 }
594 
595 // Attempt to improve the column_candidates by expanding the columns
596 // and adding new partitions from the partition sets in src_sets.
597 // Src_sets may be equal to column_candidates, in which case it will
598 // use them as a source to improve themselves.
599 void ColumnFinder::ImproveColumnCandidates(PartSetVector* src_sets,
600  PartSetVector* column_sets) {
601  PartSetVector temp_cols;
602  temp_cols.move(column_sets);
603  if (src_sets == column_sets)
604  src_sets = &temp_cols;
605  int set_size = temp_cols.size();
606  // Try using only the good parts first.
607  bool good_only = true;
608  do {
609  for (int i = 0; i < set_size; ++i) {
610  ColPartitionSet* column_candidate = temp_cols.get(i);
611  ASSERT_HOST(column_candidate != NULL);
612  ColPartitionSet* improved = column_candidate->Copy(good_only);
613  if (improved != NULL) {
614  improved->ImproveColumnCandidate(WidthCB(), src_sets);
615  improved->AddToColumnSetsIfUnique(column_sets, WidthCB());
616  }
617  }
618  good_only = !good_only;
619  } while (column_sets->empty() && !good_only);
620  if (column_sets->empty())
621  column_sets->move(&temp_cols);
622  else
623  temp_cols.delete_data_pointers();
624 }
625 
626 // Prints debug information on the column candidates.
627 void ColumnFinder::PrintColumnCandidates(const char* title) {
628  int set_size = column_sets_.size();
629  tprintf("Found %d %s:\n", set_size, title);
630  if (textord_debug_tabfind >= 3) {
631  for (int i = 0; i < set_size; ++i) {
632  ColPartitionSet* column_set = column_sets_.get(i);
633  column_set->Print();
634  }
635  }
636 }
637 
638 // Finds the optimal set of columns that cover the entire image with as
639 // few changes in column partition as possible.
640 // NOTE: this could be thought of as an optimization problem, but a simple
641 // greedy algorithm is used instead. The algorithm repeatedly finds the modal
642 // compatible column in an unassigned region and uses that with the extra
643 // tweak of extending the modal region over small breaks in compatibility.
644 // Where modal regions overlap, the boundary is chosen so as to minimize
645 // the cost in terms of ColPartitions not fitting an approved column.
646 void ColumnFinder::AssignColumns(const PartSetVector& part_sets) {
647  int set_count = part_sets.size();
648  ASSERT_HOST(set_count == gridheight());
649  // Allocate and init the best_columns_.
650  best_columns_ = new ColPartitionSet*[set_count];
651  for (int y = 0; y < set_count; ++y)
652  best_columns_[y] = NULL;
653  int column_count = column_sets_.size();
654  // column_set_costs[part_sets_ index][column_sets_ index] is
655  // < MAX_INT32 if the partition set is compatible with the column set,
656  // in which case its value is the cost for that set used in deciding
657  // which competing set to assign.
658  // any_columns_possible[part_sets_ index] is true if any of
659  // possible_column_sets[part_sets_ index][*] is < MAX_INT32.
660  // assigned_costs[part_sets_ index] is set to the column_set_costs
661  // of the assigned column_sets_ index or MAX_INT32 if none is set.
662  // On return the best_columns_ member is set.
663  bool* any_columns_possible = new bool[set_count];
664  int* assigned_costs = new int[set_count];
665  int** column_set_costs = new int*[set_count];
666  // Set possible column_sets to indicate whether each set is compatible
667  // with each column.
668  for (int part_i = 0; part_i < set_count; ++part_i) {
669  ColPartitionSet* line_set = part_sets.get(part_i);
670  bool debug = line_set != NULL &&
671  WithinTestRegion(2, line_set->bounding_box().left(),
672  line_set->bounding_box().bottom());
673  column_set_costs[part_i] = new int[column_count];
674  any_columns_possible[part_i] = false;
675  assigned_costs[part_i] = MAX_INT32;
676  for (int col_i = 0; col_i < column_count; ++col_i) {
677  if (line_set != NULL &&
678  column_sets_.get(col_i)->CompatibleColumns(debug, line_set,
679  WidthCB())) {
680  column_set_costs[part_i][col_i] =
681  column_sets_.get(col_i)->UnmatchedWidth(line_set);
682  any_columns_possible[part_i] = true;
683  } else {
684  column_set_costs[part_i][col_i] = MAX_INT32;
685  if (debug)
686  tprintf("Set id %d did not match at y=%d, lineset =%p\n",
687  col_i, part_i, line_set);
688  }
689  }
690  }
691  // Assign a column set to each vertical grid position.
692  // While there is an unassigned range, find its mode.
693  int start, end;
694  while (BiggestUnassignedRange(set_count, any_columns_possible,
695  &start, &end)) {
696  if (textord_debug_tabfind >= 2)
697  tprintf("Biggest unassigned range = %d- %d\n", start, end);
698  // Find the modal column_set_id in the range.
699  int column_set_id = RangeModalColumnSet(column_set_costs,
700  assigned_costs, start, end);
701  if (textord_debug_tabfind >= 2) {
702  tprintf("Range modal column id = %d\n", column_set_id);
703  column_sets_.get(column_set_id)->Print();
704  }
705  // Now find the longest run of the column_set_id in the range.
706  ShrinkRangeToLongestRun(column_set_costs, assigned_costs,
707  any_columns_possible,
708  column_set_id, &start, &end);
709  if (textord_debug_tabfind >= 2)
710  tprintf("Shrunk range = %d- %d\n", start, end);
711  // Extend the start and end past the longest run, while there are
712  // only small gaps in compatibility that can be overcome by larger
713  // regions of compatibility beyond.
714  ExtendRangePastSmallGaps(column_set_costs, assigned_costs,
715  any_columns_possible,
716  column_set_id, -1, -1, &start);
717  --end;
718  ExtendRangePastSmallGaps(column_set_costs, assigned_costs,
719  any_columns_possible,
720  column_set_id, 1, set_count, &end);
721  ++end;
723  tprintf("Column id %d applies to range = %d - %d\n",
724  column_set_id, start, end);
725  // Assign the column to the range, which now may overlap with other ranges.
726  AssignColumnToRange(column_set_id, start, end, column_set_costs,
727  assigned_costs);
728  }
729  // If anything remains unassigned, the whole lot is unassigned, so
730  // arbitrarily assign id 0.
731  if (best_columns_[0] == NULL) {
732  AssignColumnToRange(0, 0, gridheight_, column_set_costs, assigned_costs);
733  }
734  // Free memory.
735  for (int i = 0; i < set_count; ++i) {
736  delete [] column_set_costs[i];
737  }
738  delete [] assigned_costs;
739  delete [] any_columns_possible;
740  delete [] column_set_costs;
741 }
742 
743 // Finds the biggest range in part_sets_ that has no assigned column, but
744 // column assignment is possible.
745 bool ColumnFinder::BiggestUnassignedRange(int set_count,
746  const bool* any_columns_possible,
747  int* best_start, int* best_end) {
748  int best_range_size = 0;
749  *best_start = set_count;
750  *best_end = set_count;
751  int end = set_count;
752  for (int start = 0; start < gridheight_; start = end) {
753  // Find the first unassigned index in start.
754  while (start < set_count) {
755  if (best_columns_[start] == NULL && any_columns_possible[start])
756  break;
757  ++start;
758  }
759  // Find the first past the end and count the good ones in between.
760  int range_size = 1; // Number of non-null, but unassigned line sets.
761  end = start + 1;
762  while (end < set_count) {
763  if (best_columns_[end] != NULL)
764  break;
765  if (any_columns_possible[end])
766  ++range_size;
767  ++end;
768  }
769  if (start < set_count && range_size > best_range_size) {
770  best_range_size = range_size;
771  *best_start = start;
772  *best_end = end;
773  }
774  }
775  return *best_start < *best_end;
776 }
777 
778 // Finds the modal compatible column_set_ index within the given range.
779 int ColumnFinder::RangeModalColumnSet(int** column_set_costs,
780  const int* assigned_costs,
781  int start, int end) {
782  int column_count = column_sets_.size();
783  STATS column_stats(0, column_count);
784  for (int part_i = start; part_i < end; ++part_i) {
785  for (int col_j = 0; col_j < column_count; ++col_j) {
786  if (column_set_costs[part_i][col_j] < assigned_costs[part_i])
787  column_stats.add(col_j, 1);
788  }
789  }
790  ASSERT_HOST(column_stats.get_total() > 0);
791  return column_stats.mode();
792 }
793 
794 // Given that there are many column_set_id compatible columns in the range,
795 // shrinks the range to the longest contiguous run of compatibility, allowing
796 // gaps where no columns are possible, but not where competing columns are
797 // possible.
798 void ColumnFinder::ShrinkRangeToLongestRun(int** column_set_costs,
799  const int* assigned_costs,
800  const bool* any_columns_possible,
801  int column_set_id,
802  int* best_start, int* best_end) {
803  // orig_start and orig_end are the maximum range we will look at.
804  int orig_start = *best_start;
805  int orig_end = *best_end;
806  int best_range_size = 0;
807  *best_start = orig_end;
808  *best_end = orig_end;
809  int end = orig_end;
810  for (int start = orig_start; start < orig_end; start = end) {
811  // Find the first possible
812  while (start < orig_end) {
813  if (column_set_costs[start][column_set_id] < assigned_costs[start] ||
814  !any_columns_possible[start])
815  break;
816  ++start;
817  }
818  // Find the first past the end.
819  end = start + 1;
820  while (end < orig_end) {
821  if (column_set_costs[end][column_set_id] >= assigned_costs[start] &&
822  any_columns_possible[end])
823  break;
824  ++end;
825  }
826  if (start < orig_end && end - start > best_range_size) {
827  best_range_size = end - start;
828  *best_start = start;
829  *best_end = end;
830  }
831  }
832 }
833 
834 // Moves start in the direction of step, upto, but not including end while
835 // the only incompatible regions are no more than kMaxIncompatibleColumnCount
836 // in size, and the compatible regions beyond are bigger.
837 void ColumnFinder::ExtendRangePastSmallGaps(int** column_set_costs,
838  const int* assigned_costs,
839  const bool* any_columns_possible,
840  int column_set_id,
841  int step, int end, int* start) {
842  if (textord_debug_tabfind > 2)
843  tprintf("Starting expansion at %d, step=%d, limit=%d\n",
844  *start, step, end);
845  if (*start == end)
846  return; // Cannot be expanded.
847 
848  int barrier_size = 0;
849  int good_size = 0;
850  do {
851  // Find the size of the incompatible barrier.
852  barrier_size = 0;
853  int i;
854  for (i = *start + step; i != end; i += step) {
855  if (column_set_costs[i][column_set_id] < assigned_costs[i])
856  break; // We are back on.
857  // Locations where none are possible don't count.
858  if (any_columns_possible[i])
859  ++barrier_size;
860  }
861  if (textord_debug_tabfind > 2)
862  tprintf("At %d, Barrier size=%d\n", i, barrier_size);
863  if (barrier_size > kMaxIncompatibleColumnCount)
864  return; // Barrier too big.
865  if (i == end) {
866  // We can't go any further, but the barrier was small, so go to the end.
867  *start = i - step;
868  return;
869  }
870  // Now find the size of the good region on the other side.
871  good_size = 1;
872  for (i += step; i != end; i += step) {
873  if (column_set_costs[i][column_set_id] < assigned_costs[i])
874  ++good_size;
875  else if (any_columns_possible[i])
876  break;
877  }
878  if (textord_debug_tabfind > 2)
879  tprintf("At %d, good size = %d\n", i, good_size);
880  // If we had enough good ones we can extend the start and keep looking.
881  if (good_size >= barrier_size)
882  *start = i - step;
883  } while (good_size >= barrier_size);
884 }
885 
886 // Assigns the given column_set_id to the given range.
887 void ColumnFinder::AssignColumnToRange(int column_set_id, int start, int end,
888  int** column_set_costs,
889  int* assigned_costs) {
890  ColPartitionSet* column_set = column_sets_.get(column_set_id);
891  for (int i = start; i < end; ++i) {
892  assigned_costs[i] = column_set_costs[i][column_set_id];
893  best_columns_[i] = column_set;
894  }
895 }
896 
897 // Computes the mean_column_gap_.
898 void ColumnFinder::ComputeMeanColumnGap() {
899  int total_gap = 0;
900  int total_width = 0;
901  int gap_samples = 0;
902  int width_samples = 0;
903  for (int i = 0; i < gridheight_; ++i) {
904  ASSERT_HOST(best_columns_[i] != NULL);
905  best_columns_[i]->AccumulateColumnWidthsAndGaps(&total_width,
906  &width_samples,
907  &total_gap,
908  &gap_samples);
909  }
910  mean_column_gap_ = gap_samples > 0 ? total_gap / gap_samples
911  : total_width / width_samples;
912 }
913 
916 
917 // Helper to delete all the deletable blobs on the list. Owned blobs are
918 // extracted from the list, but not deleted, leaving them owned by the owner().
919 static void ReleaseAllBlobsAndDeleteUnused(BLOBNBOX_LIST* blobs) {
920  for (BLOBNBOX_IT blob_it(blobs); !blob_it.empty(); blob_it.forward()) {
921  BLOBNBOX* blob = blob_it.extract();
922  if (blob->owner() == NULL) {
923  delete blob->cblob();
924  delete blob;
925  }
926  }
927 }
928 
929 // Hoovers up all un-owned blobs and deletes them.
930 // The rest get released from the block so the ColPartitions can pass
931 // ownership to the output blocks.
932 void ColumnFinder::ReleaseBlobsAndCleanupUnused(TO_BLOCK* block) {
933  ReleaseAllBlobsAndDeleteUnused(&block->blobs);
934  ReleaseAllBlobsAndDeleteUnused(&block->small_blobs);
935  ReleaseAllBlobsAndDeleteUnused(&block->noise_blobs);
936  ReleaseAllBlobsAndDeleteUnused(&block->large_blobs);
937  ReleaseAllBlobsAndDeleteUnused(&image_bblobs_);
938 }
939 
940 // Splits partitions that cross columns where they have nothing in the gap.
941 void ColumnFinder::GridSplitPartitions() {
942  // Iterate the ColPartitions in the grid.
943  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
944  gsearch(&part_grid_);
945  gsearch.StartFullSearch();
946  ColPartition* dont_repeat = NULL;
947  ColPartition* part;
948  while ((part = gsearch.NextFullSearch()) != NULL) {
949  if (part->blob_type() < BRT_UNKNOWN || part == dont_repeat)
950  continue; // Only applies to text partitions.
951  ColPartitionSet* column_set = best_columns_[gsearch.GridY()];
952  int first_col = -1;
953  int last_col = -1;
954  // Find which columns the partition spans.
955  part->ColumnRange(resolution_, column_set, &first_col, &last_col);
956  if (first_col > 0)
957  --first_col;
958  // Convert output column indices to physical column indices.
959  first_col /= 2;
960  last_col /= 2;
961  // We will only consider cases where a partition spans two columns,
962  // since a heading that spans more columns than that is most likely
963  // genuine.
964  if (last_col != first_col + 1)
965  continue;
966  // Set up a rectangle search x-bounded by the column gap and y by the part.
967  int y = part->MidY();
968  TBOX margin_box = part->bounding_box();
969  bool debug = AlignedBlob::WithinTestRegion(2, margin_box.left(),
970  margin_box.bottom());
971  if (debug) {
972  tprintf("Considering partition for GridSplit:");
973  part->Print();
974  }
975  ColPartition* column = column_set->GetColumnByIndex(first_col);
976  if (column == NULL)
977  continue;
978  margin_box.set_left(column->RightAtY(y) + 2);
979  column = column_set->GetColumnByIndex(last_col);
980  if (column == NULL)
981  continue;
982  margin_box.set_right(column->LeftAtY(y) - 2);
983  // TODO(rays) Decide whether to keep rectangular filling or not in the
984  // main grid and therefore whether we need a fancier search here.
985  // Now run the rect search on the main blob grid.
986  GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> rectsearch(this);
987  if (debug) {
988  tprintf("Searching box (%d,%d)->(%d,%d)\n",
989  margin_box.left(), margin_box.bottom(),
990  margin_box.right(), margin_box.top());
991  part->Print();
992  }
993  rectsearch.StartRectSearch(margin_box);
994  BLOBNBOX* bbox;
995  while ((bbox = rectsearch.NextRectSearch()) != NULL) {
996  if (bbox->bounding_box().overlap(margin_box))
997  break;
998  }
999  if (bbox == NULL) {
1000  // There seems to be nothing in the hole, so split the partition.
1001  gsearch.RemoveBBox();
1002  int x_middle = (margin_box.left() + margin_box.right()) / 2;
1003  if (debug) {
1004  tprintf("Splitting part at %d:", x_middle);
1005  part->Print();
1006  }
1007  ColPartition* split_part = part->SplitAt(x_middle);
1008  if (split_part != NULL) {
1009  if (debug) {
1010  tprintf("Split result:");
1011  part->Print();
1012  split_part->Print();
1013  }
1014  part_grid_.InsertBBox(true, true, split_part);
1015  } else {
1016  // Split had no effect
1017  if (debug)
1018  tprintf("Split had no effect\n");
1019  dont_repeat = part;
1020  }
1021  part_grid_.InsertBBox(true, true, part);
1022  gsearch.RepositionIterator();
1023  } else if (debug) {
1024  tprintf("Part cannot be split: blob (%d,%d)->(%d,%d) in column gap\n",
1025  bbox->bounding_box().left(), bbox->bounding_box().bottom(),
1026  bbox->bounding_box().right(), bbox->bounding_box().top());
1027  }
1028  }
1029 }
1030 
1031 // Merges partitions where there is vertical overlap, within a single column,
1032 // and the horizontal gap is small enough.
1033 void ColumnFinder::GridMergePartitions() {
1034  // Iterate the ColPartitions in the grid.
1035  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
1036  gsearch(&part_grid_);
1037  gsearch.StartFullSearch();
1038  ColPartition* part;
1039  while ((part = gsearch.NextFullSearch()) != NULL) {
1040  if (part->IsUnMergeableType())
1041  continue;
1042  // Set up a rectangle search x-bounded by the column and y by the part.
1043  ColPartitionSet* columns = best_columns_[gsearch.GridY()];
1044  TBOX box = part->bounding_box();
1045  bool debug = AlignedBlob::WithinTestRegion(1, box.left(), box.bottom());
1046  if (debug) {
1047  tprintf("Considering part for merge at:");
1048  part->Print();
1049  }
1050  int y = part->MidY();
1051  ColPartition* left_column = columns->ColumnContaining(box.left(), y);
1052  ColPartition* right_column = columns->ColumnContaining(box.right(), y);
1053  if (left_column == NULL || right_column != left_column) {
1054  if (debug)
1055  tprintf("In different columns\n");
1056  continue;
1057  }
1058  box.set_left(left_column->LeftAtY(y));
1059  box.set_right(right_column->RightAtY(y));
1060  // Now run the rect search.
1061  bool modified_box = false;
1062  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
1063  rsearch(&part_grid_);
1064  rsearch.SetUniqueMode(true);
1065  rsearch.StartRectSearch(box);
1066  ColPartition* neighbour;
1067 
1068  while ((neighbour = rsearch.NextRectSearch()) != NULL) {
1069  if (neighbour == part || neighbour->IsUnMergeableType())
1070  continue;
1071  const TBOX& neighbour_box = neighbour->bounding_box();
1072  if (debug) {
1073  tprintf("Considering merge with neighbour at:");
1074  neighbour->Print();
1075  }
1076  if (neighbour_box.right() < box.left() ||
1077  neighbour_box.left() > box.right())
1078  continue; // Not within the same column.
1079  if (part->VSignificantCoreOverlap(*neighbour) &&
1080  part->TypesMatch(*neighbour)) {
1081  // There is vertical overlap and the gross types match, but only
1082  // merge if the horizontal gap is small enough, as one of the
1083  // partitions may be a figure caption within a column.
1084  // If there is only one column, then the mean_column_gap_ is large
1085  // enough to allow almost any merge, by being the mean column width.
1086  const TBOX& part_box = part->bounding_box();
1087  // Don't merge if there is something else in the way. Use the margin
1088  // to decide, and check both to allow a bit of overlap.
1089  if (neighbour_box.left() > part->right_margin() &&
1090  part_box.right() < neighbour->left_margin())
1091  continue; // Neighbour is too far to the right.
1092  if (neighbour_box.right() < part->left_margin() &&
1093  part_box.left() > neighbour->right_margin())
1094  continue; // Neighbour is too far to the left.
1095  int h_gap = MAX(part_box.left(), neighbour_box.left()) -
1096  MIN(part_box.right(), neighbour_box.right());
1097  if (h_gap < mean_column_gap_ * kHorizontalGapMergeFraction ||
1098  part_box.width() < mean_column_gap_ ||
1099  neighbour_box.width() < mean_column_gap_) {
1100  if (debug) {
1101  tprintf("Running grid-based merge between:\n");
1102  part->Print();
1103  neighbour->Print();
1104  }
1105  rsearch.RemoveBBox();
1106  gsearch.RepositionIterator();
1107  part->Absorb(neighbour, WidthCB());
1108  modified_box = true;
1109  } else if (debug) {
1110  tprintf("Neighbour failed hgap test\n");
1111  }
1112  } else if (debug) {
1113  tprintf("Neighbour failed overlap or typesmatch test\n");
1114  }
1115  }
1116  if (modified_box) {
1117  // We modified the box of part, so re-insert it into the grid.
1118  // This does no harm in the current cell, as it already exists there,
1119  // but it needs to exist in all the cells covered by its bounding box,
1120  // or it will never be found by a full search.
1121  // Because the box has changed, it has to be removed first, otherwise
1122  // add_sorted may fail to keep a single copy of the pointer.
1123  gsearch.RemoveBBox();
1124  part_grid_.InsertBBox(true, true, part);
1125  gsearch.RepositionIterator();
1126  }
1127  }
1128 }
1129 
1130 // Inserts remaining noise blobs into the most applicable partition if any.
1131 // If there is no applicable partition, then the blobs are deleted.
1132 void ColumnFinder::InsertRemainingNoise(TO_BLOCK* block) {
1133  BLOBNBOX_IT blob_it(&block->noise_blobs);
1134  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1135  BLOBNBOX* blob = blob_it.data();
1136  if (blob->owner() != NULL) continue;
1137  TBOX search_box(blob->bounding_box());
1138  bool debug = WithinTestRegion(2, search_box.left(), search_box.bottom());
1139  search_box.pad(gridsize(), gridsize());
1140  // Setup a rectangle search to find the best partition to merge with.
1141  ColPartitionGridSearch rsearch(&part_grid_);
1142  rsearch.SetUniqueMode(true);
1143  rsearch.StartRectSearch(search_box);
1144  ColPartition* part;
1145  ColPartition* best_part = NULL;
1146  int best_distance = 0;
1147  while ((part = rsearch.NextRectSearch()) != NULL) {
1148  if (part->IsUnMergeableType())
1149  continue;
1150  int distance = projection_.DistanceOfBoxFromPartition(
1151  blob->bounding_box(), *part, denorm_, debug);
1152  if (best_part == NULL || distance < best_distance) {
1153  best_part = part;
1154  best_distance = distance;
1155  }
1156  }
1157  if (best_part != NULL &&
1158  best_distance < kMaxDistToPartSizeRatio * best_part->median_size()) {
1159  // Close enough to merge.
1160  if (debug) {
1161  tprintf("Adding noise blob with distance %d, thr=%g:box:",
1162  best_distance,
1163  kMaxDistToPartSizeRatio * best_part->median_size());
1164  blob->bounding_box().print();
1165  tprintf("To partition:");
1166  best_part->Print();
1167  }
1168  part_grid_.RemoveBBox(best_part);
1169  best_part->AddBox(blob);
1170  part_grid_.InsertBBox(true, true, best_part);
1171  blob->set_owner(best_part);
1172  blob->set_flow(best_part->flow());
1173  blob->set_region_type(best_part->blob_type());
1174  } else {
1175  // Mark the blob for deletion.
1176  blob->set_region_type(BRT_NOISE);
1177  }
1178  }
1179  // Delete the marked blobs, clearing neighbour references.
1180  block->DeleteUnownedNoise();
1181 }
1182 
1183 // Helper makes a box from a horizontal line.
1184 static TBOX BoxFromHLine(const TabVector* hline) {
1185  int top = MAX(hline->startpt().y(), hline->endpt().y());
1186  int bottom = MIN(hline->startpt().y(), hline->endpt().y());
1187  top += hline->mean_width();
1188  if (top == bottom) {
1189  if (bottom > 0)
1190  --bottom;
1191  else
1192  ++top;
1193  }
1194  return TBOX(hline->startpt().x(), bottom, hline->endpt().x(), top);
1195 }
1196 
1197 // Remove partitions that come from horizontal lines that look like
1198 // underlines, but are not part of a table.
1199 void ColumnFinder::GridRemoveUnderlinePartitions() {
1200  TabVector_IT hline_it(&horizontal_lines_);
1201  for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) {
1202  TabVector* hline = hline_it.data();
1203  if (hline->intersects_other_lines())
1204  continue;
1205  TBOX line_box = BoxFromHLine(hline);
1206  TBOX search_box = line_box;
1207  search_box.pad(0, line_box.height());
1208  ColPartitionGridSearch part_search(&part_grid_);
1209  part_search.SetUniqueMode(true);
1210  part_search.StartRectSearch(search_box);
1211  ColPartition* covered;
1212  bool touched_table = false;
1213  bool touched_text = false;
1214  ColPartition* line_part = NULL;
1215  while ((covered = part_search.NextRectSearch()) != NULL) {
1216  if (covered->type() == PT_TABLE) {
1217  touched_table = true;
1218  break;
1219  } else if (covered->IsTextType()) {
1220  // TODO(rays) Add a list of underline sections to ColPartition.
1221  int text_bottom = covered->median_bottom();
1222  if (line_box.bottom() <= text_bottom && text_bottom <= search_box.top())
1223  touched_text = true;
1224  } else if (covered->blob_type() == BRT_HLINE &&
1225  line_box.contains(covered->bounding_box())) {
1226  line_part = covered;
1227  }
1228  }
1229  if (line_part != NULL && !touched_table && touched_text) {
1230  part_grid_.RemoveBBox(line_part);
1231  delete line_part;
1232  }
1233  }
1234 }
1235 
1236 // Add horizontal line separators as partitions.
1237 void ColumnFinder::GridInsertHLinePartitions() {
1238  TabVector_IT hline_it(&horizontal_lines_);
1239  for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) {
1240  TabVector* hline = hline_it.data();
1241  TBOX line_box = BoxFromHLine(hline);
1242  ColPartition* part = ColPartition::MakeLinePartition(
1244  line_box.left(), line_box.bottom(), line_box.right(), line_box.top());
1245  part->set_type(PT_HORZ_LINE);
1246  bool any_image = false;
1247  ColPartitionGridSearch part_search(&part_grid_);
1248  part_search.SetUniqueMode(true);
1249  part_search.StartRectSearch(line_box);
1250  ColPartition* covered;
1251  while ((covered = part_search.NextRectSearch()) != NULL) {
1252  if (covered->IsImageType()) {
1253  any_image = true;
1254  break;
1255  }
1256  }
1257  if (!any_image)
1258  part_grid_.InsertBBox(true, true, part);
1259  else
1260  delete part;
1261  }
1262 }
1263 
1264 // Add horizontal line separators as partitions.
1265 void ColumnFinder::GridInsertVLinePartitions() {
1266  TabVector_IT vline_it(dead_vectors());
1267  for (vline_it.mark_cycle_pt(); !vline_it.cycled_list(); vline_it.forward()) {
1268  TabVector* vline = vline_it.data();
1269  if (!vline->IsSeparator())
1270  continue;
1271  int left = MIN(vline->startpt().x(), vline->endpt().x());
1272  int right = MAX(vline->startpt().x(), vline->endpt().x());
1273  right += vline->mean_width();
1274  if (left == right) {
1275  if (left > 0)
1276  --left;
1277  else
1278  ++right;
1279  }
1280  ColPartition* part = ColPartition::MakeLinePartition(
1282  left, vline->startpt().y(), right, vline->endpt().y());
1283  part->set_type(PT_VERT_LINE);
1284  bool any_image = false;
1285  ColPartitionGridSearch part_search(&part_grid_);
1286  part_search.SetUniqueMode(true);
1287  part_search.StartRectSearch(part->bounding_box());
1288  ColPartition* covered;
1289  while ((covered = part_search.NextRectSearch()) != NULL) {
1290  if (covered->IsImageType()) {
1291  any_image = true;
1292  break;
1293  }
1294  }
1295  if (!any_image)
1296  part_grid_.InsertBBox(true, true, part);
1297  else
1298  delete part;
1299  }
1300 }
1301 
1302 // For every ColPartition in the grid, sets its type based on position
1303 // in the columns.
1304 void ColumnFinder::SetPartitionTypes() {
1305  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
1306  gsearch(&part_grid_);
1307  gsearch.StartFullSearch();
1308  ColPartition* part;
1309  while ((part = gsearch.NextFullSearch()) != NULL) {
1310  part->SetPartitionType(resolution_, best_columns_[gsearch.GridY()]);
1311  }
1312 }
1313 
1314 // Only images remain with multiple types in a run of partners.
1315 // Sets the type of all in the group to the maximum of the group.
1316 void ColumnFinder::SmoothPartnerRuns() {
1317  // Iterate the ColPartitions in the grid.
1318  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
1319  gsearch(&part_grid_);
1320  gsearch.StartFullSearch();
1321  ColPartition* part;
1322  while ((part = gsearch.NextFullSearch()) != NULL) {
1323  ColPartition* partner = part->SingletonPartner(true);
1324  if (partner != NULL) {
1325  if (partner->SingletonPartner(false) != part) {
1326  tprintf("Ooops! Partition:(%d partners)",
1327  part->upper_partners()->length());
1328  part->Print();
1329  tprintf("has singleton partner:(%d partners",
1330  partner->lower_partners()->length());
1331  partner->Print();
1332  tprintf("but its singleton partner is:");
1333  if (partner->SingletonPartner(false) == NULL)
1334  tprintf("NULL\n");
1335  else
1336  partner->SingletonPartner(false)->Print();
1337  }
1338  ASSERT_HOST(partner->SingletonPartner(false) == part);
1339  } else if (part->SingletonPartner(false) != NULL) {
1340  ColPartitionSet* column_set = best_columns_[gsearch.GridY()];
1341  int column_count = column_set->ColumnCount();
1342  part->SmoothPartnerRun(column_count * 2 + 1);
1343  }
1344  }
1345 }
1346 
1347 // Helper functions for TransformToBlocks.
1348 // Add the part to the temp list in the correct order.
1349 void ColumnFinder::AddToTempPartList(ColPartition* part,
1350  ColPartition_CLIST* temp_list) {
1351  int mid_y = part->MidY();
1352  ColPartition_C_IT it(temp_list);
1353  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1354  ColPartition* test_part = it.data();
1355  if (part->type() == PT_NOISE || test_part->type() == PT_NOISE)
1356  continue; // Noise stays in sequence.
1357  if (test_part == part->SingletonPartner(false))
1358  break; // Insert before its lower partner.
1359  int neighbour_bottom = test_part->median_bottom();
1360  int neighbour_top = test_part->median_top();
1361  int neighbour_y = (neighbour_bottom + neighbour_top) / 2;
1362  if (neighbour_y < mid_y)
1363  break; // part is above test_part so insert it.
1364  if (!part->HOverlaps(*test_part) && !part->WithinSameMargins(*test_part))
1365  continue; // Incompatibles stay in order
1366  }
1367  if (it.cycled_list()) {
1368  it.add_to_end(part);
1369  } else {
1370  it.add_before_stay_put(part);
1371  }
1372 }
1373 
1374 // Add everything from the temp list to the work_set assuming correct order.
1375 void ColumnFinder::EmptyTempPartList(ColPartition_CLIST* temp_list,
1376  WorkingPartSet_LIST* work_set) {
1377  ColPartition_C_IT it(temp_list);
1378  while (!it.empty()) {
1379  it.extract()->AddToWorkingSet(bleft_, tright_, resolution_,
1380  &good_parts_, work_set);
1381  it.forward();
1382  }
1383 }
1384 
1385 // Transform the grid of partitions to the output blocks.
1386 void ColumnFinder::TransformToBlocks(BLOCK_LIST* blocks,
1387  TO_BLOCK_LIST* to_blocks) {
1388  WorkingPartSet_LIST work_set;
1389  ColPartitionSet* column_set = NULL;
1390  ColPartition_IT noise_it(&noise_parts_);
1391  // The temp_part_list holds a list of parts at the same grid y coord
1392  // so they can be added in the correct order. This prevents thin objects
1393  // like horizontal lines going before the text lines above them.
1394  ColPartition_CLIST temp_part_list;
1395  // Iterate the ColPartitions in the grid. It starts at the top
1396  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
1397  gsearch(&part_grid_);
1398  gsearch.StartFullSearch();
1399  int prev_grid_y = -1;
1400  ColPartition* part;
1401  while ((part = gsearch.NextFullSearch()) != NULL) {
1402  int grid_y = gsearch.GridY();
1403  if (grid_y != prev_grid_y) {
1404  EmptyTempPartList(&temp_part_list, &work_set);
1405  prev_grid_y = grid_y;
1406  }
1407  if (best_columns_[grid_y] != column_set) {
1408  column_set = best_columns_[grid_y];
1409  // Every line should have a non-null best column.
1410  ASSERT_HOST(column_set != NULL);
1411  column_set->ChangeWorkColumns(bleft_, tright_, resolution_,
1412  &good_parts_, &work_set);
1414  tprintf("Changed column groups at grid index %d, y=%d\n",
1415  gsearch.GridY(), gsearch.GridY() * gridsize());
1416  }
1417  if (part->type() == PT_NOISE) {
1418  noise_it.add_to_end(part);
1419  } else {
1420  AddToTempPartList(part, &temp_part_list);
1421  }
1422  }
1423  EmptyTempPartList(&temp_part_list, &work_set);
1424  // Now finish all working sets and transfer ColPartitionSets to block_sets.
1425  WorkingPartSet_IT work_it(&work_set);
1426  while (!work_it.empty()) {
1427  WorkingPartSet* working_set = work_it.extract();
1428  working_set->ExtractCompletedBlocks(bleft_, tright_, resolution_,
1429  &good_parts_, blocks, to_blocks);
1430  delete working_set;
1431  work_it.forward();
1432  }
1433 }
1434 
1435 // Helper reflects a list of blobs in the y-axis.
1436 // Only reflects the BLOBNBOX bounding box. Not the blobs or outlines below.
1437 static void ReflectBlobList(BLOBNBOX_LIST* bblobs) {
1438  BLOBNBOX_IT it(bblobs);
1439  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1440  it.data()->reflect_box_in_y_axis();
1441  }
1442 }
1443 
1444 // Reflect the blob boxes (but not the outlines) in the y-axis so that
1445 // the blocks get created in the correct RTL order. Reflects the blobs
1446 // in the input_block and the bblobs list.
1447 // The reflection is undone in RotateAndReskewBlocks by
1448 // reflecting the blocks themselves, and then recomputing the blob bounding
1449 // boxes.
1450 void ColumnFinder::ReflectForRtl(TO_BLOCK* input_block, BLOBNBOX_LIST* bblobs) {
1451  ReflectBlobList(bblobs);
1452  ReflectBlobList(&input_block->blobs);
1453  ReflectBlobList(&input_block->small_blobs);
1454  ReflectBlobList(&input_block->noise_blobs);
1455  ReflectBlobList(&input_block->large_blobs);
1456  // Update the denorm with the reflection.
1457  DENORM* new_denorm = new DENORM;
1458  new_denorm->SetupNormalization(NULL, NULL, NULL, denorm_, NULL, 0,
1459  0.0f, 0.0f, -1.0f, 1.0f, 0.0f, 0.0f);
1460  denorm_ = new_denorm;
1461 }
1462 
1463 // Undo the deskew that was done in FindTabVectors, as recognition is done
1464 // without correcting blobs or blob outlines for skew.
1465 // Reskew the completed blocks to put them back to the original rotated coords
1466 // that were created by CorrectOrientation.
1467 // If the input_is_rtl, then reflect the blocks in the y-axis to undo the
1468 // reflection that was done before FindTabVectors.
1469 // Blocks that were identified as vertical text (relative to the rotated
1470 // coordinates) are further rotated so the text lines are horizontal.
1471 // blob polygonal outlines are rotated to match the position of the blocks
1472 // that they are in, and their bounding boxes are recalculated to be accurate.
1473 // Record appropriate inverse transformations and required
1474 // classifier transformation in the blocks.
1475 void ColumnFinder::RotateAndReskewBlocks(bool input_is_rtl,
1476  TO_BLOCK_LIST* blocks) {
1477  if (input_is_rtl) {
1478  // The skew is backwards because of the reflection.
1479  FCOORD tmp = deskew_;
1480  deskew_ = reskew_;
1481  reskew_ = tmp;
1482  }
1483  TO_BLOCK_IT it(blocks);
1484  int block_index = 1;
1485  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1486  TO_BLOCK* to_block = it.data();
1487  BLOCK* block = to_block->block;
1488  // Blocks are created on the deskewed blob outlines in TransformToBlocks()
1489  // so we need to reskew them back to page coordinates.
1490  if (input_is_rtl) {
1491  block->reflect_polygon_in_y_axis();
1492  }
1493  block->rotate(reskew_);
1494  // Copy the right_to_left flag to the created block.
1495  block->set_right_to_left(input_is_rtl);
1496  // Save the skew angle in the block for baseline computations.
1497  block->set_skew(reskew_);
1498  block->set_index(block_index++);
1499  FCOORD blob_rotation = ComputeBlockAndClassifyRotation(block);
1500  // Rotate all the blobs if needed and recompute the bounding boxes.
1501  // Compute the block median blob width and height as we go.
1502  STATS widths(0, block->bounding_box().width());
1503  STATS heights(0, block->bounding_box().height());
1504  BLOBNBOX_IT blob_it(&to_block->blobs);
1505  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1506  BLOBNBOX* blob = blob_it.data();
1507  if (blob_rotation.x() != 1.0f || blob_rotation.y() != 0.0f) {
1508  blob->cblob()->rotate(blob_rotation);
1509  }
1510  blob->compute_bounding_box();
1511  widths.add(blob->bounding_box().width(), 1);
1512  heights.add(blob->bounding_box().height(), 1);
1513  }
1514  block->set_median_size(static_cast<int>(widths.median() + 0.5),
1515  static_cast<int>(heights.median() + 0.5));
1516  if (textord_debug_tabfind >= 2)
1517  tprintf("Block median size = (%d, %d)\n",
1518  block->median_size().x(), block->median_size().y());
1519  }
1520 }
1521 
1522 // Computes the rotations for the block (to make textlines horizontal) and
1523 // for the blobs (for classification) and sets the appropriate members
1524 // of the given block.
1525 // Returns the rotation that needs to be applied to the blobs to make
1526 // them sit in the rotated block.
1527 FCOORD ColumnFinder::ComputeBlockAndClassifyRotation(BLOCK* block) {
1528  // The text_rotation_ tells us the gross page text rotation that needs
1529  // to be applied for classification
1530  // TODO(rays) find block-level classify rotation by orientation detection.
1531  // In the mean time, assume that "up" for text printed in the minority
1532  // direction (PT_VERTICAL_TEXT) is perpendicular to the line of reading.
1533  // Accomplish this by zero-ing out the text rotation. This covers the
1534  // common cases of image credits in documents written in Latin scripts
1535  // and page headings for predominantly vertically written CJK books.
1536  FCOORD classify_rotation(text_rotation_);
1537  FCOORD block_rotation(1.0f, 0.0f);
1538  if (block->poly_block()->isA() == PT_VERTICAL_TEXT) {
1539  // Vertical text needs to be 90 degrees rotated relative to the rest.
1540  // If the rest has a 90 degree rotation already, use the inverse, making
1541  // the vertical text the original way up. Otherwise use 90 degrees
1542  // clockwise.
1543  if (rerotate_.x() == 0.0f)
1544  block_rotation = rerotate_;
1545  else
1546  block_rotation = FCOORD(0.0f, -1.0f);
1547  block->rotate(block_rotation);
1548  classify_rotation = FCOORD(1.0f, 0.0f);
1549  }
1550  block_rotation.rotate(rotation_);
1551  // block_rotation is now what we have done to the blocks. Now do the same
1552  // thing to the blobs, but save the inverse rotation in the block, as that
1553  // is what we need to DENORM back to the image coordinates.
1554  FCOORD blob_rotation(block_rotation);
1555  block_rotation.set_y(-block_rotation.y());
1556  block->set_re_rotation(block_rotation);
1557  block->set_classify_rotation(classify_rotation);
1558  if (textord_debug_tabfind) {
1559  tprintf("Blk %d, type %d rerotation(%.2f, %.2f), char(%.2f,%.2f), box:",
1560  block->index(), block->poly_block()->isA(),
1561  block->re_rotation().x(), block->re_rotation().y(),
1562  classify_rotation.x(), classify_rotation.y());
1563  }
1564  return blob_rotation;
1565 }
1566 
1567 } // namespace tesseract.