Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
blobbox.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: blobbox.h (Formerly blobnbox.h)
3  * Description: Code for the textord blob class.
4  * Author: Ray Smith
5  * Created: Thu Jul 30 09:08:51 BST 1992
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifndef BLOBBOX_H
21 #define BLOBBOX_H
22 
23 #include "clst.h"
24 #include "elst2.h"
25 #include "werd.h"
26 #include "ocrblock.h"
27 #include "statistc.h"
28 
30 {
31  PITCH_DUNNO, // insufficient data
32  PITCH_DEF_FIXED, // definitely fixed
33  PITCH_MAYBE_FIXED, // could be
38 };
39 
40 // The possible tab-stop types of each side of a BLOBNBOX.
41 // The ordering is important, as it is used for deleting dead-ends in the
42 // search. ALIGNED, CONFIRMED and VLINE should remain greater than the
43 // non-aligned, unset, or deleted members.
44 enum TabType {
45  TT_NONE, // Not a tab.
46  TT_DELETED, // Not a tab after detailed analysis.
47  TT_MAYBE_RAGGED, // Initial designation of a tab-stop candidate.
48  TT_MAYBE_ALIGNED, // Initial designation of a tab-stop candidate.
49  TT_CONFIRMED, // Aligned with neighbours.
50  TT_VLINE // Detected as a vertical line.
51 };
52 
53 // The possible region types of a BLOBNBOX.
54 // Note: keep all the text types > BRT_UNKNOWN and all the image types less.
55 // Keep in sync with kBlobTypes in colpartition.cpp and BoxColor, and the
56 // *Type static functions below.
58  BRT_NOISE, // Neither text nor image.
59  BRT_HLINE, // Horizontal separator line.
60  BRT_VLINE, // Vertical separator line.
61  BRT_RECTIMAGE, // Rectangular image.
62  BRT_POLYIMAGE, // Non-rectangular image.
63  BRT_UNKNOWN, // Not determined yet.
64  BRT_VERT_TEXT, // Vertical alignment, not necessarily vertically oriented.
65  BRT_TEXT, // Convincing text.
66 
67  BRT_COUNT // Number of possibilities.
68 };
69 
70 // enum for elements of arrays that refer to neighbours.
71 // NOTE: keep in this order, so ^2 can be used to flip direction.
78 };
79 
80 // enum for special type of text characters, such as math symbol or italic.
82  BSTT_NONE, // No special.
83  BSTT_ITALIC, // Italic style.
84  BSTT_DIGIT, // Digit symbols.
85  BSTT_MATH, // Mathmatical symobls (not including digit).
86  BSTT_UNCLEAR, // Characters with low recognition rate.
87  BSTT_SKIP, // Characters that we skip labeling (usually too small).
89 };
90 
92  return static_cast<BlobNeighbourDir>(dir ^ 2);
93 }
94 
95 // BlobTextFlowType indicates the quality of neighbouring information
96 // related to a chain of connected components, either horizontally or
97 // vertically. Also used by ColPartition for the collection of blobs
98 // within, which should all have the same value in most cases.
100  BTFT_NONE, // No text flow set yet.
101  BTFT_NONTEXT, // Flow too poor to be likely text.
102  BTFT_NEIGHBOURS, // Neighbours support flow in this direction.
103  BTFT_CHAIN, // There is a weak chain of text in this direction.
104  BTFT_STRONG_CHAIN, // There is a strong chain of text in this direction.
105  BTFT_TEXT_ON_IMAGE, // There is a strong chain of text on an image.
106  BTFT_LEADER, // Leader dots/dashes etc.
108 };
109 
110 // Returns true if type1 dominates type2 in a merge. Mostly determined by the
111 // ordering of the enum, LEADER is weak and dominates nothing.
112 // The function is anti-symmetric (t1 > t2) === !(t2 > t1), except that
113 // this cannot be true if t1 == t2, so the result is undefined.
115  // LEADER always loses.
116  if (type1 == BTFT_LEADER) return false;
117  if (type2 == BTFT_LEADER) return true;
118  // With those out of the way, the ordering of the enum determines the result.
119  return type1 >= type2;
120 }
121 
122 namespace tesseract {
123 class ColPartition;
124 }
125 
126 class BLOBNBOX;
128 class BLOBNBOX:public ELIST_LINK
129 {
130  public:
133  }
134  explicit BLOBNBOX(C_BLOB *srcblob) {
135  box = srcblob->bounding_box();
137  cblob_ptr = srcblob;
138  area = static_cast<int>(srcblob->area());
139  }
140  static BLOBNBOX* RealBlob(C_OUTLINE* outline) {
141  C_BLOB* blob = new C_BLOB(outline);
142  return new BLOBNBOX(blob);
143  }
144 
145  // Rotates the box and the underlying blob.
146  void rotate(FCOORD rotation);
147 
148  // Methods that act on the box without touching the underlying blob.
149  // Reflect the box in the y-axis, leaving the underlying blob untouched.
150  void reflect_box_in_y_axis();
151  // Rotates the box by the angle given by rotation.
152  // If the blob is a diacritic, then only small rotations for skew
153  // correction can be applied.
154  void rotate_box(FCOORD rotation);
155  // Moves just the box by the given vector.
157  if (IsDiacritic()) {
158  box.move(v);
159  base_char_top_ += v.y();
160  base_char_bottom_ += v.y();
161  } else {
162  box.move(v);
163  set_diacritic_box(box);
164  }
165  }
166  void merge(BLOBNBOX *nextblob);
167  void really_merge(BLOBNBOX* other);
168  void chop( // fake chop blob
169  BLOBNBOX_IT *start_it, // location of this
170  BLOBNBOX_IT *blob_it, // iterator
171  FCOORD rotation, // for landscape
172  float xheight); // line height
173 
174  void NeighbourGaps(int gaps[BND_COUNT]) const;
175  void MinMaxGapsClipped(int* h_min, int* h_max,
176  int* v_min, int* v_max) const;
177  void CleanNeighbours();
178  // Returns positive if there is at least one side neighbour that has a
179  // similar stroke width and is not on the other side of a rule line.
180  int GoodTextBlob() const;
181  // Returns the number of side neighbours that are of type BRT_NOISE.
182  int NoisyNeighbours() const;
183 
184  // Returns true if the blob is noise and has no owner.
185  bool DeletableNoise() const {
186  return owner() == NULL && region_type() == BRT_NOISE;
187  }
188 
189  // Returns true, and sets vert_possible/horz_possible if the blob has some
190  // feature that makes it individually appear to flow one way.
191  // eg if it has a high aspect ratio, yet has a complex shape, such as a
192  // joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1.
193  bool DefiniteIndividualFlow();
194 
195  // Returns true if there is no tabstop violation in merging this and other.
196  bool ConfirmNoTabViolation(const BLOBNBOX& other) const;
197 
198  // Returns true if other has a similar stroke width to this.
199  bool MatchingStrokeWidth(const BLOBNBOX& other,
200  double fractional_tolerance,
201  double constant_tolerance) const;
202 
203  // Returns a bounding box of the outline contained within the
204  // given horizontal range.
205  TBOX BoundsWithinLimits(int left, int right);
206 
207  // Simple accessors.
208  const TBOX& bounding_box() const {
209  return box;
210  }
211  // Set the bounding box. Use with caution.
212  // Normally use compute_bounding_box instead.
213  void set_bounding_box(const TBOX& new_box) {
214  box = new_box;
215  base_char_top_ = box.top();
216  base_char_bottom_ = box.bottom();
217  }
219  box = cblob_ptr->bounding_box();
220  base_char_top_ = box.top();
221  base_char_bottom_ = box.bottom();
222  }
223  const TBOX& reduced_box() const {
224  return red_box;
225  }
226  void set_reduced_box(TBOX new_box) {
227  red_box = new_box;
228  reduced = TRUE;
229  }
231  return area;
232  }
233  bool joined_to_prev() const {
234  return joined != 0;
235  }
236  bool red_box_set() const {
237  return reduced != 0;
238  }
239  int repeated_set() const {
240  return repeated_set_;
241  }
242  void set_repeated_set(int set_id) {
243  repeated_set_ = set_id;
244  }
245  C_BLOB *cblob() const {
246  return cblob_ptr;
247  }
249  return left_tab_type_;
250  }
251  void set_left_tab_type(TabType new_type) {
252  left_tab_type_ = new_type;
253  }
255  return right_tab_type_;
256  }
257  void set_right_tab_type(TabType new_type) {
258  right_tab_type_ = new_type;
259  }
261  return region_type_;
262  }
264  region_type_ = new_type;
265  }
267  return spt_type_;
268  }
270  spt_type_ = new_type;
271  }
273  return flow_;
274  }
276  flow_ = value;
277  }
278  bool vert_possible() const {
279  return vert_possible_;
280  }
281  void set_vert_possible(bool value) {
282  vert_possible_ = value;
283  }
284  bool horz_possible() const {
285  return horz_possible_;
286  }
287  void set_horz_possible(bool value) {
288  horz_possible_ = value;
289  }
290  int left_rule() const {
291  return left_rule_;
292  }
293  void set_left_rule(int new_left) {
294  left_rule_ = new_left;
295  }
296  int right_rule() const {
297  return right_rule_;
298  }
299  void set_right_rule(int new_right) {
300  right_rule_ = new_right;
301  }
302  int left_crossing_rule() const {
303  return left_crossing_rule_;
304  }
305  void set_left_crossing_rule(int new_left) {
306  left_crossing_rule_ = new_left;
307  }
308  int right_crossing_rule() const {
309  return right_crossing_rule_;
310  }
311  void set_right_crossing_rule(int new_right) {
312  right_crossing_rule_ = new_right;
313  }
314  float horz_stroke_width() const {
315  return horz_stroke_width_;
316  }
317  void set_horz_stroke_width(float width) {
318  horz_stroke_width_ = width;
319  }
320  float vert_stroke_width() const {
321  return vert_stroke_width_;
322  }
323  void set_vert_stroke_width(float width) {
324  vert_stroke_width_ = width;
325  }
326  float area_stroke_width() const {
327  return area_stroke_width_;
328  }
330  return owner_;
331  }
333  owner_ = new_owner;
334  }
335  bool leader_on_left() const {
336  return leader_on_left_;
337  }
338  void set_leader_on_left(bool flag) {
339  leader_on_left_ = flag;
340  }
341  bool leader_on_right() const {
342  return leader_on_right_;
343  }
344  void set_leader_on_right(bool flag) {
345  leader_on_right_ = flag;
346  }
348  return neighbours_[n];
349  }
351  return good_stroke_neighbours_[n];
352  }
354  neighbours_[n] = neighbour;
355  good_stroke_neighbours_[n] = good;
356  }
357  bool IsDiacritic() const {
358  return base_char_top_ != box.top() || base_char_bottom_ != box.bottom();
359  }
360  int base_char_top() const {
361  return base_char_top_;
362  }
363  int base_char_bottom() const {
364  return base_char_bottom_;
365  }
366  int line_crossings() const {
367  return line_crossings_;
368  }
369  void set_line_crossings(int value) {
370  line_crossings_ = value;
371  }
372  void set_diacritic_box(const TBOX& diacritic_box) {
373  base_char_top_ = diacritic_box.top();
374  base_char_bottom_ = diacritic_box.bottom();
375  }
377  return base_char_blob_;
378  }
380  base_char_blob_ = blob;
381  }
382 
383  bool UniquelyVertical() const {
384  return vert_possible_ && !horz_possible_;
385  }
386  bool UniquelyHorizontal() const {
387  return horz_possible_ && !vert_possible_;
388  }
389 
390  // Returns true if the region type is text.
391  static bool IsTextType(BlobRegionType type) {
392  return type == BRT_TEXT || type == BRT_VERT_TEXT;
393  }
394  // Returns true if the region type is image.
395  static bool IsImageType(BlobRegionType type) {
396  return type == BRT_RECTIMAGE || type == BRT_POLYIMAGE;
397  }
398  // Returns true if the region type is line.
399  static bool IsLineType(BlobRegionType type) {
400  return type == BRT_HLINE || type == BRT_VLINE;
401  }
402  // Returns true if the region type cannot be merged.
403  static bool UnMergeableType(BlobRegionType type) {
404  return IsLineType(type) || IsImageType(type);
405  }
406  // Helper to call CleanNeighbours on all blobs on the list.
407  static void CleanNeighbours(BLOBNBOX_LIST* blobs);
408  // Helper to delete all the deletable blobs on the list.
409  static void DeleteNoiseBlobs(BLOBNBOX_LIST* blobs);
410 
411 #ifndef GRAPHICS_DISABLED
412  // Helper to draw all the blobs on the list in the given body_colour,
413  // with child outlines in the child_colour.
414  static void PlotBlobs(BLOBNBOX_LIST* list,
415  ScrollView::Color body_colour,
416  ScrollView::Color child_colour,
417  ScrollView* win);
418  // Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the
419  // given list in the given body_colour, with child outlines in the
420  // child_colour.
421  static void PlotNoiseBlobs(BLOBNBOX_LIST* list,
422  ScrollView::Color body_colour,
423  ScrollView::Color child_colour,
424  ScrollView* win);
425 
427  BlobTextFlowType flow_type);
428 
429  // Keep in sync with BlobRegionType.
430  ScrollView::Color BoxColor() const;
431 
432  void plot(ScrollView* window, // window to draw in
433  ScrollView::Color blob_colour, // for outer bits
434  ScrollView::Color child_colour); // for holes
435 #endif
436 
437  // Initializes the bulk of the members to default values for use at
438  // construction time.
440  cblob_ptr = NULL;
441  area = 0;
442  area_stroke_width_ = 0.0f;
443  horz_stroke_width_ = 0.0f;
444  vert_stroke_width_ = 0.0f;
445  ReInit();
446  }
447  // Initializes members set by StrokeWidth and beyond, without discarding
448  // stored area and strokewidth values, which are expensive to calculate.
449  void ReInit() {
450  joined = false;
451  reduced = false;
452  repeated_set_ = 0;
453  left_tab_type_ = TT_NONE;
454  right_tab_type_ = TT_NONE;
455  region_type_ = BRT_UNKNOWN;
456  flow_ = BTFT_NONE;
457  spt_type_ = BSTT_SKIP;
458  left_rule_ = 0;
459  right_rule_ = 0;
460  left_crossing_rule_ = 0;
461  right_crossing_rule_ = 0;
462  if (area_stroke_width_ == 0.0f && area > 0 && cblob() != NULL)
463  area_stroke_width_ = 2.0f * area / cblob()->perimeter();
464  owner_ = NULL;
465  base_char_top_ = box.top();
466  base_char_bottom_ = box.bottom();
467  line_crossings_ = 0;
468  base_char_blob_ = NULL;
469  horz_possible_ = false;
470  vert_possible_ = false;
471  leader_on_left_ = false;
472  leader_on_right_ = false;
473  ClearNeighbours();
474  }
475 
477  for (int n = 0; n < BND_COUNT; ++n) {
478  neighbours_[n] = NULL;
479  good_stroke_neighbours_[n] = false;
480  }
481  }
482 
483  private:
484  C_BLOB *cblob_ptr; // edgestep blob
485  TBOX box; // bounding box
486  TBOX red_box; // bounding box
487  int area:30; // enclosed area
488  int joined:1; // joined to prev
489  int reduced:1; // reduced box set
490  int repeated_set_; // id of the set of repeated blobs
491  TabType left_tab_type_; // Indicates tab-stop assessment
492  TabType right_tab_type_; // Indicates tab-stop assessment
493  BlobRegionType region_type_; // Type of region this blob belongs to
494  BlobTextFlowType flow_; // Quality of text flow.
495  inT16 left_rule_; // x-coord of nearest but not crossing rule line
496  inT16 right_rule_; // x-coord of nearest but not crossing rule line
497  inT16 left_crossing_rule_; // x-coord of nearest or crossing rule line
498  inT16 right_crossing_rule_; // x-coord of nearest or crossing rule line
499  inT16 base_char_top_; // y-coord of top/bottom of diacritic base,
500  inT16 base_char_bottom_; // if it exists else top/bottom of this blob.
501  int line_crossings_; // Number of line intersections touched.
502  BLOBNBOX* base_char_blob_; // The blob that was the base char.
503  float horz_stroke_width_; // Median horizontal stroke width
504  float vert_stroke_width_; // Median vertical stroke width
505  float area_stroke_width_; // Stroke width from area/perimeter ratio.
506  tesseract::ColPartition* owner_; // Who will delete me when I am not needed
507  BlobSpecialTextType spt_type_; // Special text type.
508  BLOBNBOX* neighbours_[BND_COUNT];
509  bool good_stroke_neighbours_[BND_COUNT];
510  bool horz_possible_; // Could be part of horizontal flow.
511  bool vert_possible_; // Could be part of vertical flow.
512  bool leader_on_left_; // There is a leader to the left.
513  bool leader_on_right_; // There is a leader to the right.
514 };
515 
516 class TO_ROW: public ELIST2_LINK
517 {
518  public:
519  static const int kErrorWeight = 3;
520 
521  TO_ROW() {
522  clear();
523  } //empty
524  TO_ROW( //constructor
525  BLOBNBOX *blob, //from first blob
526  float top, //of row //target height
527  float bottom,
528  float row_size);
529 
530  float max_y() const { //access function
531  return y_max;
532  }
533  float min_y() const {
534  return y_min;
535  }
536  float mean_y() const {
537  return (y_min + y_max) / 2.0f;
538  }
539  float initial_min_y() const {
540  return initial_y_min;
541  }
542  float line_m() const { //access to line fit
543  return m;
544  }
545  float line_c() const {
546  return c;
547  }
548  float line_error() const {
549  return error;
550  }
551  float parallel_c() const {
552  return para_c;
553  }
554  float parallel_error() const {
555  return para_error;
556  }
557  float believability() const { //baseline goodness
558  return credibility;
559  }
560  float intercept() const { //real parallel_c
561  return y_origin;
562  }
563  void add_blob( //put in row
564  BLOBNBOX *blob, //blob to add
565  float top, //of row //target height
566  float bottom,
567  float row_size);
568  void insert_blob( //put in row in order
569  BLOBNBOX *blob);
570 
571  BLOBNBOX_LIST *blob_list() { //get list
572  return &blobs;
573  }
574 
575  void set_line( //set line spec
576  float new_m, //line to set
577  float new_c,
578  float new_error) {
579  m = new_m;
580  c = new_c;
581  error = new_error;
582  }
583  void set_parallel_line( //set fixed gradient line
584  float gradient, //page gradient
585  float new_c,
586  float new_error) {
587  para_c = new_c;
588  para_error = new_error;
589  credibility =
590  (float) (blobs.length () - kErrorWeight * new_error);
591  y_origin = (float) (new_c / sqrt (1 + gradient * gradient));
592  //real intercept
593  }
594  void set_limits( //set min,max
595  float new_min, //bottom and
596  float new_max) { //top of row
597  y_min = new_min;
598  y_max = new_max;
599  }
601  //get projection
602 
603  bool rep_chars_marked() const {
604  return num_repeated_sets_ != -1;
605  }
607  num_repeated_sets_ = -1;
608  }
609  int num_repeated_sets() const {
610  return num_repeated_sets_;
611  }
612  void set_num_repeated_sets(int num_sets) {
613  num_repeated_sets_ = num_sets;
614  }
615 
616  // true when dead
618  BOOL8 all_caps; // had no ascenders
619  BOOL8 used_dm_model; // in guessing pitch
620  inT16 projection_left; // start of projection
621  inT16 projection_right; // start of projection
622  PITCH_TYPE pitch_decision; // how strong is decision
623  float fixed_pitch; // pitch or 0
624  float fp_space; // sp if fixed pitch
625  float fp_nonsp; // nonsp if fixed pitch
626  float pr_space; // sp if prop
627  float pr_nonsp; // non sp if prop
628  float spacing; // to "next" row
629  float xheight; // of line
630  int xheight_evidence; // number of blobs of height xheight
631  float ascrise; // ascenders
632  float descdrop; // descenders
633  float body_size; // of CJK characters. Assumed to be
634  // xheight+ascrise for non-CJK text.
635  inT32 min_space; // min size for real space
636  inT32 max_nonspace; // max size of non-space
637  inT32 space_threshold; // space vs nonspace
638  float kern_size; // average non-space
639  float space_size; // average space
640  WERD_LIST rep_words; // repeated chars
641  ICOORDELT_LIST char_cells; // fixed pitch cells
642  QSPLINE baseline; // curved baseline
643  STATS projection; // vertical projection
644 
645  private:
646  void clear(); // clear all values to reasonable defaults
647 
648  BLOBNBOX_LIST blobs; //blobs in row
649  float y_min; //coords
650  float y_max;
651  float initial_y_min;
652  float m, c; //line spec
653  float error; //line error
654  float para_c; //constrained fit
655  float para_error;
656  float y_origin; //rotated para_c;
657  float credibility; //baseline believability
658  int num_repeated_sets_; // number of sets of repeated blobs
659  // set to -1 if we have not searched
660  // for repeated blobs in this row yet
661 };
662 
664 class TO_BLOCK:public ELIST_LINK
665 {
666  public:
667  TO_BLOCK() : pitch_decision(PITCH_DUNNO) {
668  clear();
669  } //empty
670  TO_BLOCK( //constructor
671  BLOCK *src_block); //real block
672  ~TO_BLOCK();
673 
674  void clear(); // clear all scalar members.
675 
676  TO_ROW_LIST *get_rows() { //access function
677  return &row_list;
678  }
679 
680  // Rotate all the blobnbox lists and the underlying block. Then update the
681  // median size statistic from the blobs list.
682  void rotate(const FCOORD& rotation) {
683  BLOBNBOX_LIST* blobnbox_list[] = {&blobs, &underlines, &noise_blobs,
684  &small_blobs, &large_blobs, NULL};
685  for (BLOBNBOX_LIST** list = blobnbox_list; *list != NULL; ++list) {
686  BLOBNBOX_IT it(*list);
687  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
688  it.data()->rotate(rotation);
689  }
690  }
691  // Rotate the block
692  ASSERT_HOST(block->poly_block() != NULL);
693  block->rotate(rotation);
694  // Update the median size statistic from the blobs list.
695  STATS widths(0, block->bounding_box().width());
696  STATS heights(0, block->bounding_box().height());
697  BLOBNBOX_IT blob_it(&blobs);
698  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
699  widths.add(blob_it.data()->bounding_box().width(), 1);
700  heights.add(blob_it.data()->bounding_box().height(), 1);
701  }
702  block->set_median_size(static_cast<int>(widths.median() + 0.5),
703  static_cast<int>(heights.median() + 0.5));
704  }
705 
706  void print_rows() { //debug info
707  TO_ROW_IT row_it = &row_list;
708  TO_ROW *row;
709 
710  for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
711  row_it.forward ()) {
712  row = row_it.data ();
713  printf ("Row range (%g,%g), para_c=%g, blobcount=" INT32FORMAT
714  "\n", row->min_y (), row->max_y (), row->parallel_c (),
715  row->blob_list ()->length ());
716  }
717  }
718 
719  // Reorganizes the blob lists with a different definition of small, medium
720  // and large, compared to the original definition.
721  // Height is still the primary filter key, but medium width blobs of small
722  // height become medium, and very wide blobs of small height stay small.
723  void ReSetAndReFilterBlobs();
724 
725  // Deletes noise blobs from all lists where not owned by a ColPartition.
726  void DeleteUnownedNoise();
727 
728 #ifndef GRAPHICS_DISABLED
729  // Draw the noise blobs from all lists in red.
730  void plot_noise_blobs(ScrollView* to_win);
731  // Draw the blobs on on the various lists in the block in different colors.
732  void plot_graded_blobs(ScrollView* to_win);
733 #endif
734 
735  BLOBNBOX_LIST blobs; //medium size
736  BLOBNBOX_LIST underlines; //underline blobs
737  BLOBNBOX_LIST noise_blobs; //very small
738  BLOBNBOX_LIST small_blobs; //fairly small
739  BLOBNBOX_LIST large_blobs; //big blobs
740  BLOCK *block; //real block
741  PITCH_TYPE pitch_decision; //how strong is decision
742  float line_spacing; //estimate
743  // line_size is a lower-bound estimate of the font size in pixels of
744  // the text in the block (with ascenders and descenders), being a small
745  // (1.25) multiple of the median height of filtered blobs.
746  // In most cases the font size will be bigger, but it will be closer
747  // if the text is allcaps, or in a no-x-height script.
748  float line_size; //estimate
749  float max_blob_size; //line assignment limit
750  float baseline_offset; //phase shift
751  float xheight; //median blob size
752  float fixed_pitch; //pitch or 0
753  float kern_size; //average non-space
754  float space_size; //average space
755  inT32 min_space; //min definite space
756  inT32 max_nonspace; //max definite
757  float fp_space; //sp if fixed pitch
758  float fp_nonsp; //nonsp if fixed pitch
759  float pr_space; //sp if prop
760  float pr_nonsp; //non sp if prop
761  TO_ROW *key_row; //starting row
762 
763  private:
764  TO_ROW_LIST row_list; //temporary rows
765 };
766 
768 extern double_VAR_H (textord_error_weight, 3,
769 "Weighting for error in believability");
770 void find_cblob_limits( //get y limits
771  C_BLOB *blob, //blob to search
772  float leftx, //x limits
773  float rightx,
774  FCOORD rotation, //for landscape
775  float &ymin, //output y limits
776  float &ymax);
777 void find_cblob_vlimits( //get y limits
778  C_BLOB *blob, //blob to search
779  float leftx, //x limits
780  float rightx,
781  float &ymin, //output y limits
782  float &ymax);
783 void find_cblob_hlimits( //get x limits
784  C_BLOB *blob, //blob to search
785  float bottomy, //y limits
786  float topy,
787  float &xmin, //output x limits
788  float &xymax);
789 C_BLOB *crotate_cblob( //rotate it
790  C_BLOB *blob, //blob to search
791  FCOORD rotation //for landscape
792  );
793 TBOX box_next( //get bounding box
794  BLOBNBOX_IT *it //iterator to blobds
795  );
796 TBOX box_next_pre_chopped( //get bounding box
797  BLOBNBOX_IT *it //iterator to blobds
798  );
799 void vertical_cblob_projection( //project outlines
800  C_BLOB *blob, //blob to project
801  STATS *stats //output
802  );
803 void vertical_coutline_projection( //project outlines
804  C_OUTLINE *outline, //outline to project
805  STATS *stats //output
806  );
807 #ifndef GRAPHICS_DISABLED
808 void plot_blob_list(ScrollView* win, // window to draw in
809  BLOBNBOX_LIST *list, // blob list
810  ScrollView::Color body_colour, // colour to draw
811  ScrollView::Color child_colour); // colour of child
812 #endif // GRAPHICS_DISABLED
813 #endif