Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
rejctmap.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: rejctmap.h (Formerly rejmap.h)
3  * Description: REJ and REJMAP class functions.
4  * Author: Phil Cheatle
5  * Created: Thu Jun 9 13:46:38 BST 1994
6  *
7  * (C) Copyright 1994, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18 
19 This module may look unneccessarily verbose, but here's the philosophy...
20 
21 ALL processing of the reject map is done in this module. There are lots of
22 separate calls to set reject/accept flags. These have DELIBERATELY been kept
23 distinct so that this module can decide what to do.
24 
25 Basically, there is a flag for each sort of rejection or acceptance. This
26 provides a history of what has happened to EACH character.
27 
28 Determining whether a character is CURRENTLY rejected depends on implicit
29 understanding of the SEQUENCE of possible calls. The flags are defined and
30 grouped in the REJ_FLAGS enum. These groupings are used in determining a
31 characters CURRENT rejection status. Basically, a character is ACCEPTED if
32 
33  none of the permanent rej flags are set
34  AND ( the character has never been rejected
35  OR an accept flag is set which is LATER than the latest reject flag )
36 
37 IT IS FUNDAMENTAL THAT ANYONE HACKING THIS CODE UNDERSTANDS THE SIGNIFICANCE
38 OF THIS IMPLIED TEMPORAL ORDERING OF THE FLAGS!!!!
39 **********************************************************************/
40 
41 #ifndef REJCTMAP_H
42 #define REJCTMAP_H
43 
44 #ifdef __UNIX__
45 #include <assert.h>
46 #endif
47 #include "memry.h"
48 #include "bits16.h"
49 #include "params.h"
50 #include "notdll.h"
51 
53 {
54  /* Reject modes which are NEVER overridden */
55  R_TESS_FAILURE, // PERM Tess didnt classify
56  R_SMALL_XHT, // PERM Xht too small
57  R_EDGE_CHAR, // PERM Too close to edge of image
58  R_1IL_CONFLICT, // PERM 1Il confusion
59  R_POSTNN_1IL, // PERM 1Il unrejected by NN
60  R_REJ_CBLOB, // PERM Odd blob
61  R_MM_REJECT, // PERM Matrix match rejection (m's)
62  R_BAD_REPETITION, // TEMP Repeated char which doesn't match trend
63 
64  /* Initial reject modes (pre NN_ACCEPT) */
65  R_POOR_MATCH, // TEMP Ray's original heuristic (Not used)
66  R_NOT_TESS_ACCEPTED, // TEMP Tess didnt accept WERD
67  R_CONTAINS_BLANKS, // TEMP Tess failed on other chs in WERD
68  R_BAD_PERMUTER, // POTENTIAL Bad permuter for WERD
69 
70  /* Reject modes generated after NN_ACCEPT but before MM_ACCEPT */
71  R_HYPHEN, // TEMP Post NN dodgy hyphen or full stop
72  R_DUBIOUS, // TEMP Post NN dodgy chars
73  R_NO_ALPHANUMS, // TEMP No alphanumerics in word after NN
74  R_MOSTLY_REJ, // TEMP Most of word rejected so rej the rest
75  R_XHT_FIXUP, // TEMP Xht tests unsure
76 
77  /* Reject modes generated after MM_ACCEPT but before QUALITY_ACCEPT */
78  R_BAD_QUALITY, // TEMP Quality metrics bad for WERD
79 
80  /* Reject modes generated after QUALITY_ACCEPT but before MINIMAL_REJ accep*/
81  R_DOC_REJ, // TEMP Document rejection
82  R_BLOCK_REJ, // TEMP Block rejection
83  R_ROW_REJ, // TEMP Row rejection
84  R_UNLV_REJ, // TEMP ~ turned to - or ^ turned to space
85 
86  /* Accept modes which occur inbetween the above rejection groups */
87  R_NN_ACCEPT, //NN acceptance
88  R_HYPHEN_ACCEPT, //Hyphen acceptance
89  R_MM_ACCEPT, //Matrix match acceptance
90  R_QUALITY_ACCEPT, //Accept word in good quality doc
91  R_MINIMAL_REJ_ACCEPT //Accept EVERYTHING except tess failures
92 };
93 
94 /* REJECT MAP VALUES */
95 
96 #define MAP_ACCEPT '1'
97 #define MAP_REJECT_PERM '0'
98 #define MAP_REJECT_TEMP '2'
99 #define MAP_REJECT_POTENTIAL '3'
100 
101 class REJ
102 {
103  BITS16 flags1;
104  BITS16 flags2;
105 
106  void set_flag(REJ_FLAGS rej_flag) {
107  if (rej_flag < 16)
108  flags1.turn_on_bit (rej_flag);
109  else
110  flags2.turn_on_bit (rej_flag - 16);
111  }
112 
113  BOOL8 rej_before_nn_accept();
114  BOOL8 rej_between_nn_and_mm();
115  BOOL8 rej_between_mm_and_quality_accept();
116  BOOL8 rej_between_quality_and_minimal_rej_accept();
117  BOOL8 rej_before_mm_accept();
118  BOOL8 rej_before_quality_accept();
119 
120  public:
121  REJ() { //constructor
122  }
123 
124  REJ( //classwise copy
125  const REJ &source) {
126  flags1 = source.flags1;
127  flags2 = source.flags2;
128  }
129 
130  REJ & operator= ( //assign REJ
131  const REJ & source) { //from this
132  flags1 = source.flags1;
133  flags2 = source.flags2;
134  return *this;
135  }
136 
137  BOOL8 flag(REJ_FLAGS rej_flag) {
138  if (rej_flag < 16)
139  return flags1.bit (rej_flag);
140  else
141  return flags2.bit (rej_flag - 16);
142  }
143 
144  char display_char() {
145  if (perm_rejected ())
146  return MAP_REJECT_PERM;
147  else if (accept_if_good_quality ())
148  return MAP_REJECT_POTENTIAL;
149  else if (rejected ())
150  return MAP_REJECT_TEMP;
151  else
152  return MAP_ACCEPT;
153  }
154 
155  BOOL8 perm_rejected(); //Is char perm reject?
156 
157  BOOL8 rejected(); //Is char rejected?
158 
159  BOOL8 accepted() { //Is char accepted?
160  return !rejected ();
161  }
162 
163  //potential rej?
165 
167  return (rejected () && !perm_rejected ());
168  }
169 
170  void setrej_tess_failure(); //Tess generated blank
171  void setrej_small_xht(); //Small xht char/wd
172  void setrej_edge_char(); //Close to image edge
173  void setrej_1Il_conflict(); //Initial reject map
174  void setrej_postNN_1Il(); //1Il after NN
175  void setrej_rej_cblob(); //Insert duff blob
176  void setrej_mm_reject(); //Matrix matcher
177  //Odd repeated char
178  void setrej_bad_repetition();
179  void setrej_poor_match(); //Failed Rays heuristic
180  //TEMP reject_word
182  //TEMP reject_word
183  void setrej_contains_blanks();
184  void setrej_bad_permuter(); //POTENTIAL reject_word
185  void setrej_hyphen(); //PostNN dubious hyph or .
186  void setrej_dubious(); //PostNN dubious limit
187  void setrej_no_alphanums(); //TEMP reject_word
188  void setrej_mostly_rej(); //TEMP reject_word
189  void setrej_xht_fixup(); //xht fixup
190  void setrej_bad_quality(); //TEMP reject_word
191  void setrej_doc_rej(); //TEMP reject_word
192  void setrej_block_rej(); //TEMP reject_word
193  void setrej_row_rej(); //TEMP reject_word
194  void setrej_unlv_rej(); //TEMP reject_word
195  void setrej_nn_accept(); //NN Flipped a char
196  void setrej_hyphen_accept(); //Good aspect ratio
197  void setrej_mm_accept(); //Matrix matcher
198  //Quality flip a char
199  void setrej_quality_accept();
200  //Accept all except blank
202 
203  void full_print(FILE *fp);
204 };
205 
206 class REJMAP
207 {
208  REJ *ptr; //ptr to the chars
209  inT16 len; //Number of chars
210 
211  public:
212  REJMAP() { //constructor
213  ptr = NULL;
214  len = 0;
215  }
216 
217  REJMAP( //classwise copy
218  const REJMAP &rejmap);
219 
220  REJMAP & operator= ( //assign REJMAP
221  const REJMAP & source); //from this
222 
223  ~REJMAP () { //destructor
224  if (ptr != NULL)
225  free_struct (ptr, len * sizeof (REJ), "REJ");
226  }
227 
228  void initialise( //Redefine map
229  inT16 length);
230 
231  REJ & operator[]( //access function
232  inT16 index) const //map index
233  {
234  ASSERT_HOST (index < len);
235  return ptr[index]; //no bounds checks
236  }
237 
238  inT32 length() const { //map length
239  return len;
240  }
241 
242  inT16 accept_count(); //How many accepted?
243 
244  inT16 reject_count() { //How many rejects?
245  return len - accept_count ();
246  }
247 
248  void remove_pos( //Cut out an element
249  inT16 pos); //element to remove
250 
251  void print(FILE *fp);
252 
253  void full_print(FILE *fp);
254 
255  BOOL8 recoverable_rejects(); //Any non perm rejs?
256 
258  //Any potential rejs?
259 
260  void rej_word_small_xht(); //Reject whole word
261  //Reject whole word
262  void rej_word_tess_failure();
264  //Reject whole word
265  //Reject whole word
267  //Reject whole word
268  void rej_word_bad_permuter();
269  void rej_word_xht_fixup(); //Reject whole word
270  //Reject whole word
271  void rej_word_no_alphanums();
272  void rej_word_mostly_rej(); //Reject whole word
273  void rej_word_bad_quality(); //Reject whole word
274  void rej_word_doc_rej(); //Reject whole word
275  void rej_word_block_rej(); //Reject whole word
276  void rej_word_row_rej(); //Reject whole word
277 };
278 #endif