Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
trie.cpp
Go to the documentation of this file.
1 /* -*-C-*-
2  ********************************************************************************
3  *
4  * File: trie.c (Formerly trie.c)
5  * Description: Functions to build a trie data structure.
6  * Author: Mark Seaman, OCR Technology
7  * Created: Fri Oct 16 14:37:00 1987
8  * Modified: Fri Jul 26 12:18:10 1991 (Mark Seaman) marks@hpgrlt
9  * Language: C
10  * Package: N/A
11  * Status: Reusable Software Component
12  *
13  * (c) Copyright 1987, Hewlett-Packard Company.
14  ** Licensed under the Apache License, Version 2.0 (the "License");
15  ** you may not use this file except in compliance with the License.
16  ** You may obtain a copy of the License at
17  ** http://www.apache.org/licenses/LICENSE-2.0
18  ** Unless required by applicable law or agreed to in writing, software
19  ** distributed under the License is distributed on an "AS IS" BASIS,
20  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21  ** See the License for the specific language governing permissions and
22  ** limitations under the License.
23  *
24  *********************************************************************************/
25 /*----------------------------------------------------------------------
26  I n c l u d e s
27 ----------------------------------------------------------------------*/
28 #ifdef _MSC_VER
29 #pragma warning(disable:4244) // Conversion warnings
30 #pragma warning(disable:4800) // int/bool warnings
31 #endif
32 #include "trie.h"
33 
34 #include "callcpp.h"
35 #include "dawg.h"
36 #include "dict.h"
37 #include "freelist.h"
38 #include "genericvector.h"
39 #include "helpers.h"
40 
41 namespace tesseract {
42 
43 const char kDoNotReverse[] = "RRP_DO_NO_REVERSE";
44 const char kReverseIfHasRTL[] = "RRP_REVERSE_IF_HAS_RTL";
45 const char kForceReverse[] = "RRP_FORCE_REVERSE";
46 
47 const char * const RTLReversePolicyNames[] = {
51 };
52 
53 const char Trie::kAlphaPatternUnicode[] = "\u2000";
54 const char Trie::kDigitPatternUnicode[] = "\u2001";
55 const char Trie::kAlphanumPatternUnicode[] = "\u2002";
56 const char Trie::kPuncPatternUnicode[] = "\u2003";
57 const char Trie::kLowerPatternUnicode[] = "\u2004";
58 const char Trie::kUpperPatternUnicode[] = "\u2005";
59 
60 const char *Trie::get_reverse_policy_name(RTLReversePolicy reverse_policy) {
61  return RTLReversePolicyNames[reverse_policy];
62 }
63 
64 // Reset the Trie to empty.
65 void Trie::clear() {
67  nodes_.clear();
68  num_edges_ = 0;
69  new_dawg_node(); // Need to allocate node 0.
70 }
71 
72 bool Trie::edge_char_of(NODE_REF node_ref, NODE_REF next_node,
73  int direction, bool word_end, UNICHAR_ID unichar_id,
74  EDGE_RECORD **edge_ptr, EDGE_INDEX *edge_index) const {
75  if (debug_level_ == 3) {
76  tprintf("edge_char_of() given node_ref " REFFORMAT " next_node " REFFORMAT
77  " direction %d word_end %d unichar_id %d, exploring node:\n",
78  node_ref, next_node, direction, word_end, unichar_id);
79  if (node_ref != NO_EDGE) {
80  print_node(node_ref, nodes_[node_ref]->forward_edges.size());
81  }
82  }
83  if (node_ref == NO_EDGE) return false;
84  assert(node_ref < nodes_.size());
85  EDGE_VECTOR &vec = (direction == FORWARD_EDGE) ?
86  nodes_[node_ref]->forward_edges : nodes_[node_ref]->backward_edges;
87  int vec_size = vec.size();
88  if (node_ref == 0) { // binary search
89  EDGE_INDEX start = 0;
90  EDGE_INDEX end = vec_size - 1;
91  EDGE_INDEX k;
92  int compare;
93  while (start <= end) {
94  k = (start + end) >> 1; // (start + end) / 2
95  compare = given_greater_than_edge_rec(next_node, word_end,
96  unichar_id, vec[k]);
97  if (compare == 0) { // given == vec[k]
98  *edge_ptr = &(vec[k]);
99  *edge_index = k;
100  return true;
101  } else if (compare == 1) { // given > vec[k]
102  start = k + 1;
103  } else { // given < vec[k]
104  end = k - 1;
105  }
106  }
107  } else { // linear search
108  for (int i = 0; i < vec_size; ++i) {
109  EDGE_RECORD &edge_rec = vec[i];
110  if (edge_rec_match(next_node, word_end, unichar_id,
111  next_node_from_edge_rec(edge_rec),
112  end_of_word_from_edge_rec(edge_rec),
113  unichar_id_from_edge_rec(edge_rec))) {
114  *edge_ptr = &(edge_rec);
115  *edge_index = i;
116  return true;
117  }
118  }
119  }
120  return false; // not found
121 }
122 
123 bool Trie::add_edge_linkage(NODE_REF node1, NODE_REF node2, bool marker_flag,
124  int direction, bool word_end,
125  UNICHAR_ID unichar_id) {
126  if (num_edges_ == max_num_edges_) return false;
127  EDGE_VECTOR *vec = (direction == FORWARD_EDGE) ?
128  &(nodes_[node1]->forward_edges) : &(nodes_[node1]->backward_edges);
129  int search_index;
130  if (node1 == 0) {
131  search_index = 0; // find the index to make the add sorted
132  while (search_index < vec->size() &&
133  given_greater_than_edge_rec(node2, word_end, unichar_id,
134  (*vec)[search_index]) == 1) {
135  search_index++;
136  }
137  } else {
138  search_index = vec->size(); // add is unsorted, so index does not matter
139  }
140  EDGE_RECORD edge_rec;
141  link_edge(&edge_rec, node2, marker_flag, direction, word_end, unichar_id);
142  if (search_index < vec->size()) {
143  vec->insert(edge_rec, search_index);
144  } else {
145  vec->push_back(edge_rec);
146  }
147  if (debug_level_ > 1) {
148  tprintf("new edge in nodes_[" REFFORMAT "]: ", node1);
149  print_edge_rec(edge_rec);
150  tprintf("\n");
151  }
152  num_edges_++;
153  return true;
154 }
155 
157  NODE_REF the_next_node,
158  bool marker_flag,
159  UNICHAR_ID unichar_id) {
160  EDGE_RECORD *back_edge_ptr;
161  EDGE_INDEX back_edge_index;
162  ASSERT_HOST(edge_char_of(the_next_node, NO_EDGE, BACKWARD_EDGE, false,
163  unichar_id, &back_edge_ptr, &back_edge_index));
164  if (marker_flag) {
165  *back_edge_ptr |= (MARKER_FLAG << flag_start_bit_);
166  *edge_ptr |= (MARKER_FLAG << flag_start_bit_);
167  }
168  // Mark both directions as end of word.
169  *back_edge_ptr |= (WERD_END_FLAG << flag_start_bit_);
170  *edge_ptr |= (WERD_END_FLAG << flag_start_bit_);
171 }
172 
174  const GenericVector<bool> *repetitions) {
175  if (word.length() <= 0) return false; // can't add empty words
176  if (repetitions != NULL) ASSERT_HOST(repetitions->size() == word.length());
177  // Make sure the word does not contain invalid unchar ids.
178  for (int i = 0; i < word.length(); ++i) {
179  if (word.unichar_id(i) < 0 ||
180  word.unichar_id(i) >= unicharset_size_) return false;
181  }
182 
183  EDGE_RECORD *edge_ptr;
184  NODE_REF last_node = 0;
185  NODE_REF the_next_node;
186  bool marker_flag = false;
187  EDGE_INDEX edge_index;
188  int i;
189  inT32 still_finding_chars = true;
190  inT32 word_end = false;
191  bool add_failed = false;
192  bool found;
193 
194  if (debug_level_ > 1) word.print("\nAdding word: ");
195 
196  UNICHAR_ID unichar_id;
197  for (i = 0; i < word.length() - 1; ++i) {
198  unichar_id = word.unichar_id(i);
199  marker_flag = (repetitions != NULL) ? (*repetitions)[i] : false;
200  if (debug_level_ > 1) tprintf("Adding letter %d\n", unichar_id);
201  if (still_finding_chars) {
202  found = edge_char_of(last_node, NO_EDGE, FORWARD_EDGE, word_end,
203  unichar_id, &edge_ptr, &edge_index);
204  if (found && debug_level_ > 1) {
205  tprintf("exploring edge " REFFORMAT " in node " REFFORMAT "\n",
206  edge_index, last_node);
207  }
208  if (!found) {
209  still_finding_chars = false;
210  } else if (next_node_from_edge_rec(*edge_ptr) == 0) {
211  word_end = true;
212  still_finding_chars = false;
213  remove_edge(last_node, 0, word_end, unichar_id);
214  } else {
215  if (marker_flag) set_marker_flag_in_edge_rec(edge_ptr);
216  last_node = next_node_from_edge_rec(*edge_ptr);
217  }
218  }
219  if (!still_finding_chars) {
220  the_next_node = new_dawg_node();
221  if (debug_level_ > 1)
222  tprintf("adding node " REFFORMAT "\n", the_next_node);
223  if (the_next_node == 0) {
224  add_failed = true;
225  break;
226  }
227  if (!add_new_edge(last_node, the_next_node,
228  marker_flag, word_end, unichar_id)) {
229  add_failed = true;
230  break;
231  }
232  word_end = false;
233  last_node = the_next_node;
234  }
235  }
236  the_next_node = 0;
237  unichar_id = word.unichar_id(i);
238  marker_flag = (repetitions != NULL) ? (*repetitions)[i] : false;
239  if (debug_level_ > 1) tprintf("Adding letter %d\n", unichar_id);
240  if (still_finding_chars &&
241  edge_char_of(last_node, NO_EDGE, FORWARD_EDGE, false,
242  unichar_id, &edge_ptr, &edge_index)) {
243  // An extension of this word already exists in the trie, so we
244  // only have to add the ending flags in both directions.
245  add_word_ending(edge_ptr, next_node_from_edge_rec(*edge_ptr),
246  marker_flag, unichar_id);
247  } else {
248  if (!add_failed &&
249  !add_new_edge(last_node, the_next_node, marker_flag, true, unichar_id))
250  add_failed = true;
251  }
252  if (add_failed) {
253  tprintf("Re-initializing document dictionary...\n");
254  clear();
255  return false;
256  } else {
257  return true;
258  }
259 }
260 
262  TRIE_NODE_RECORD *node = new TRIE_NODE_RECORD();
263  if (node == NULL) return 0; // failed to create new node
264  nodes_.push_back(node);
265  return nodes_.length() - 1;
266 }
267 
269  const UNICHARSET &unicharset,
270  Trie::RTLReversePolicy reverse_policy) {
271  FILE *word_file;
272  char string[CHARS_PER_LINE];
273  int word_count = 0;
274 
275  word_file = open_file (filename, "r");
276 
277  while (fgets(string, CHARS_PER_LINE, word_file) != NULL) {
278  chomp_string(string); // remove newline
279  WERD_CHOICE word(string, unicharset);
280  if ((reverse_policy == RRP_REVERSE_IF_HAS_RTL &&
281  word.has_rtl_unichar_id()) ||
282  reverse_policy == RRP_FORCE_REVERSE) {
284  }
285  ++word_count;
286  if (debug_level_ && word_count % 10000 == 0)
287  tprintf("Read %d words so far\n", word_count);
288  if (word.length() != 0 && !word.contains_unichar_id(INVALID_UNICHAR_ID)) {
289  if (!this->word_in_dawg(word)) {
290  this->add_word_to_dawg(word);
291  if (!this->word_in_dawg(word)) {
292  tprintf("Error: word '%s' not in DAWG after adding it\n", string);
293  return false;
294  }
295  }
296  } else if (debug_level_) {
297  tprintf("Skipping invalid word %s\n", string);
298  if (debug_level_ >= 3) word.print();
299  }
300  }
301  if (debug_level_)
302  tprintf("Read %d words total.\n", word_count);
303  fclose(word_file);
304  return true;
305 }
306 
314  unicharset->unichar_insert(kPuncPatternUnicode);
320  initialized_patterns_ = true;
321  unicharset_size_ = unicharset->size();
322 }
323 
325  const UNICHARSET &unicharset,
326  GenericVector<UNICHAR_ID> *vec) const {
327  bool is_alpha = unicharset.get_isalpha(unichar_id);
328  if (is_alpha) {
331  if (unicharset.get_islower(unichar_id)) {
333  } else if (unicharset.get_isupper(unichar_id)) {
335  }
336  }
337  if (unicharset.get_isdigit(unichar_id)) {
339  if (!is_alpha) vec->push_back(alphanum_pattern_);
340  }
341  if (unicharset.get_ispunctuation(unichar_id)) {
342  vec->push_back(punc_pattern_);
343  }
344 }
345 
347  if (ch == 'c') {
348  return alpha_pattern_;
349  } else if (ch == 'd') {
350  return digit_pattern_;
351  } else if (ch == 'n') {
352  return alphanum_pattern_;
353  } else if (ch == 'p') {
354  return punc_pattern_;
355  } else if (ch == 'a') {
356  return lower_pattern_;
357  } else if (ch == 'A') {
358  return upper_pattern_;
359  } else {
360  return INVALID_UNICHAR_ID;
361  }
362 }
363 
365  const UNICHARSET &unicharset) {
366  if (!initialized_patterns_) {
367  tprintf("please call initialize_patterns() before read_pattern_list()\n");
368  return false;
369  }
370 
371  FILE *pattern_file = open_file (filename, "r");
372  if (pattern_file == NULL) {
373  tprintf("Error opening pattern file %s\n", filename);
374  return false;
375  }
376 
377  int pattern_count = 0;
378  char string[CHARS_PER_LINE];
379  while (fgets(string, CHARS_PER_LINE, pattern_file) != NULL) {
380  chomp_string(string); // remove newline
381  // Parse the pattern and construct a unichar id vector.
382  // Record the number of repetitions of each unichar in the parallel vector.
383  WERD_CHOICE word(&unicharset);
384  GenericVector<bool> repetitions_vec;
385  const char *str_ptr = string;
386  int step = unicharset.step(str_ptr);
387  bool failed = false;
388  while (step > 0) {
389  UNICHAR_ID curr_unichar_id = INVALID_UNICHAR_ID;
390  if (step == 1 && *str_ptr == '\\') {
391  ++str_ptr;
392  if (*str_ptr == '\\') { // regular '\' unichar that was escaped
393  curr_unichar_id = unicharset.unichar_to_id(str_ptr, step);
394  } else {
395  if (word.length() < kSaneNumConcreteChars) {
396  tprintf("Please provide at least %d concrete characters at the"
397  " beginning of the pattern\n", kSaneNumConcreteChars);
398  failed = true;
399  break;
400  }
401  // Parse character class from expression.
402  curr_unichar_id = character_class_to_pattern(*str_ptr);
403  }
404  } else {
405  curr_unichar_id = unicharset.unichar_to_id(str_ptr, step);
406  }
407  if (curr_unichar_id == INVALID_UNICHAR_ID) {
408  failed = true;
409  break; // failed to parse this pattern
410  }
411  word.append_unichar_id(curr_unichar_id, 1, 0.0, 0.0);
412  repetitions_vec.push_back(false);
413  str_ptr += step;
414  step = unicharset.step(str_ptr);
415  // Check if there is a repetition pattern specified after this unichar.
416  if (step == 1 && *str_ptr == '\\' && *(str_ptr+1) == '*') {
417  repetitions_vec[repetitions_vec.size()-1] = true;
418  str_ptr += 2;
419  step = unicharset.step(str_ptr);
420  }
421  }
422  if (failed) {
423  tprintf("Invalid user pattern %s\n", string);
424  continue;
425  }
426  // Insert the pattern into the trie.
427  if (debug_level_ > 2) {
428  tprintf("Inserting expanded user pattern %s\n",
429  word.debug_string().string());
430  }
431  if (!this->word_in_dawg(word)) {
432  this->add_word_to_dawg(word, &repetitions_vec);
433  if (!this->word_in_dawg(word)) {
434  tprintf("Error: failed to insert pattern '%s'\n", string);
435  }
436  }
437  ++pattern_count;
438  }
439  if (debug_level_) {
440  tprintf("Read %d valid patterns from %s\n", pattern_count, filename);
441  }
442  fclose(pattern_file);
443  return true;
444 }
445 
447  bool word_end, UNICHAR_ID unichar_id) {
448  EDGE_RECORD *edge_ptr = NULL;
449  EDGE_INDEX edge_index = 0;
450  ASSERT_HOST(edge_char_of(node1, node2, direction, word_end,
451  unichar_id, &edge_ptr, &edge_index));
452  if (debug_level_ > 1) {
453  tprintf("removed edge in nodes_[" REFFORMAT "]: ", node1);
454  print_edge_rec(*edge_ptr);
455  tprintf("\n");
456  }
457  if (direction == FORWARD_EDGE) {
458  nodes_[node1]->forward_edges.remove(edge_index);
459  } else {
460  nodes_[node1]->backward_edges.remove(edge_index);
461  }
462  --num_edges_;
463 }
464 
466  if (debug_level_ > 2) {
467  print_all("Before reduction:", MAX_NODE_EDGES_DISPLAY);
468  }
469  NODE_MARKER reduced_nodes = new bool[nodes_.size()];
470  for (int i = 0; i < nodes_.size(); i++) reduced_nodes[i] = 0;
471  this->reduce_node_input(0, reduced_nodes);
472  delete[] reduced_nodes;
473 
474  if (debug_level_ > 2) {
475  print_all("After reduction:", MAX_NODE_EDGES_DISPLAY);
476  }
477  // Build a translation map from node indices in nodes_ vector to
478  // their target indices in EDGE_ARRAY.
479  NODE_REF *node_ref_map = new NODE_REF[nodes_.size() + 1];
480  int i, j;
481  node_ref_map[0] = 0;
482  for (i = 0; i < nodes_.size(); ++i) {
483  node_ref_map[i+1] = node_ref_map[i] + nodes_[i]->forward_edges.size();
484  }
485  int num_forward_edges = node_ref_map[i];
486 
487  // Convert nodes_ vector into EDGE_ARRAY translating the next node references
488  // in edges using node_ref_map. Empty nodes and backward edges are dropped.
489  EDGE_ARRAY edge_array =
490  (EDGE_ARRAY)memalloc(num_forward_edges * sizeof(EDGE_RECORD));
491  EDGE_ARRAY edge_array_ptr = edge_array;
492  for (i = 0; i < nodes_.size(); ++i) {
493  TRIE_NODE_RECORD *node_ptr = nodes_[i];
494  int end = node_ptr->forward_edges.size();
495  for (j = 0; j < end; ++j) {
496  EDGE_RECORD &edge_rec = node_ptr->forward_edges[j];
497  NODE_REF node_ref = next_node_from_edge_rec(edge_rec);
498  ASSERT_HOST(node_ref < nodes_.size());
499  UNICHAR_ID unichar_id = unichar_id_from_edge_rec(edge_rec);
500  link_edge(edge_array_ptr, node_ref_map[node_ref], false, FORWARD_EDGE,
501  end_of_word_from_edge_rec(edge_rec), unichar_id);
502  if (j == end - 1) set_marker_flag_in_edge_rec(edge_array_ptr);
503  ++edge_array_ptr;
504  }
505  }
506  delete[] node_ref_map;
507 
508  return new SquishedDawg(edge_array, num_forward_edges, type_, lang_,
510 }
511 
513  const EDGE_RECORD &edge1,
514  const EDGE_RECORD &edge2) {
515  if (debug_level_ > 1) {
516  tprintf("\nCollapsing node %d:\n", node);
518  tprintf("Candidate edges: ");
519  print_edge_rec(edge1);
520  tprintf(", ");
521  print_edge_rec(edge2);
522  tprintf("\n\n");
523  }
524  NODE_REF next_node1 = next_node_from_edge_rec(edge1);
525  NODE_REF next_node2 = next_node_from_edge_rec(edge2);
526  TRIE_NODE_RECORD *next_node2_ptr = nodes_[next_node2];
527  // Translate all edges going to/from next_node2 to go to/from next_node1.
528  EDGE_RECORD *edge_ptr = NULL;
529  EDGE_INDEX edge_index;
530  int i;
531  // Remove the backward link in node to next_node2.
532  const EDGE_RECORD &fwd_edge = next_node2_ptr->forward_edges[0];
533  remove_edge_linkage(node, next_node2, BACKWARD_EDGE,
534  end_of_word_from_edge_rec(fwd_edge),
535  unichar_id_from_edge_rec(fwd_edge));
536  // Copy all the backward links in next_node2 to node next_node1
537  for (i = 0; i < next_node2_ptr->backward_edges.size(); ++i) {
538  const EDGE_RECORD &bkw_edge = next_node2_ptr->backward_edges[i];
539  NODE_REF curr_next_node = next_node_from_edge_rec(bkw_edge);
540  UNICHAR_ID curr_unichar_id = unichar_id_from_edge_rec(bkw_edge);
541  int curr_word_end = end_of_word_from_edge_rec(bkw_edge);
542  bool marker_flag = marker_flag_from_edge_rec(bkw_edge);
543  add_edge_linkage(next_node1, curr_next_node, marker_flag, BACKWARD_EDGE,
544  curr_word_end, curr_unichar_id);
545  // Relocate the corresponding forward edge in curr_next_node
546  ASSERT_HOST(edge_char_of(curr_next_node, next_node2, FORWARD_EDGE,
547  curr_word_end, curr_unichar_id,
548  &edge_ptr, &edge_index));
549  set_next_node_in_edge_rec(edge_ptr, next_node1);
550  }
551  int next_node2_num_edges = (next_node2_ptr->forward_edges.size() +
552  next_node2_ptr->backward_edges.size());
553  if (debug_level_ > 1) {
554  tprintf("removed %d edges from node " REFFORMAT "\n",
555  next_node2_num_edges, next_node2);
556  }
557  next_node2_ptr->forward_edges.clear();
558  next_node2_ptr->backward_edges.clear();
559  num_edges_ -= next_node2_num_edges;
560  return true;
561 }
562 
564  UNICHAR_ID unichar_id,
565  NODE_REF node,
566  const EDGE_VECTOR &backward_edges,
567  NODE_MARKER reduced_nodes) {
568  if (debug_level_ > 1)
569  tprintf("reduce_lettered_edges(edge=" REFFORMAT ")\n", edge_index);
570  // Compare each of the edge pairs with the given unichar_id.
571  bool did_something = false;
572  for (int i = edge_index; i < backward_edges.size() - 1; ++i) {
573  // Find the first edge that can be eliminated.
574  UNICHAR_ID curr_unichar_id = INVALID_UNICHAR_ID;
575  while (i < backward_edges.size() &&
576  ((curr_unichar_id = unichar_id_from_edge_rec(backward_edges[i])) ==
577  unichar_id) &&
578  !can_be_eliminated(backward_edges[i])) ++i;
579  if (i == backward_edges.size() || curr_unichar_id != unichar_id) break;
580  const EDGE_RECORD &edge_rec = backward_edges[i];
581  // Compare it to the rest of the edges with the given unichar_id.
582  for (int j = i + 1; j < backward_edges.size(); ++j) {
583  const EDGE_RECORD &next_edge_rec = backward_edges[j];
584  if (unichar_id_from_edge_rec(next_edge_rec) != unichar_id) break;
585  if (end_of_word_from_edge_rec(next_edge_rec) ==
586  end_of_word_from_edge_rec(edge_rec) &&
587  can_be_eliminated(next_edge_rec) &&
588  eliminate_redundant_edges(node, edge_rec, next_edge_rec)) {
589  reduced_nodes[next_node_from_edge_rec(edge_rec)] = 0;
590  did_something = true;
591  --j; // do not increment j if next_edge_rec was removed
592  }
593  }
594  }
595  return did_something;
596 }
597 
599  int num_edges = edges->size();
600  if (num_edges <= 1) return;
601  for (int i = 0; i < num_edges - 1; ++i) {
602  int min = i;
603  for (int j = (i + 1); j < num_edges; ++j) {
604  if (unichar_id_from_edge_rec((*edges)[j]) <
605  unichar_id_from_edge_rec((*edges)[min])) min = j;
606  }
607  if (i != min) {
608  EDGE_RECORD temp = (*edges)[i];
609  (*edges)[i] = (*edges)[min];
610  (*edges)[min] = temp;
611  }
612  }
613 }
614 
616  NODE_MARKER reduced_nodes) {
617  if (debug_level_ > 1) {
618  tprintf("reduce_node_input(node=" REFFORMAT ")\n", node);
620  }
621 
622  EDGE_VECTOR &backward_edges = nodes_[node]->backward_edges;
623  if (node != 0) sort_edges(&backward_edges);
624  EDGE_INDEX edge_index = 0;
625  while (edge_index < backward_edges.size()) {
626  UNICHAR_ID unichar_id =
627  unichar_id_from_edge_rec(backward_edges[edge_index]);
628  while (reduce_lettered_edges(edge_index, unichar_id, node,
629  backward_edges, reduced_nodes));
630  while (++edge_index < backward_edges.size() &&
631  unichar_id_from_edge_rec(backward_edges[edge_index]) == unichar_id);
632  }
633  reduced_nodes[node] = true; // mark as reduced
634 
635  if (debug_level_ > 1) {
636  tprintf("Node " REFFORMAT " after reduction:\n", node);
638  }
639 
640  for (int i = 0; i < backward_edges.size(); ++i) {
641  NODE_REF next_node = next_node_from_edge_rec(backward_edges[i]);
642  if (next_node != 0 && !reduced_nodes[next_node]) {
643  reduce_node_input(next_node, reduced_nodes);
644  }
645  }
646 }
647 
648 void Trie::print_node(NODE_REF node, int max_num_edges) const {
649  if (node == NO_EDGE) return; // nothing to print
650  TRIE_NODE_RECORD *node_ptr = nodes_[node];
651  int num_fwd = node_ptr->forward_edges.size();
652  int num_bkw = node_ptr->backward_edges.size();
653  EDGE_VECTOR *vec;
654  for (int dir = 0; dir < 2; ++dir) {
655  if (dir == 0) {
656  vec = &(node_ptr->forward_edges);
657  tprintf(REFFORMAT " (%d %d): ", node, num_fwd, num_bkw);
658  } else {
659  vec = &(node_ptr->backward_edges);
660  tprintf("\t");
661  }
662  int i;
663  for (i = 0; (dir == 0 ? i < num_fwd : i < num_bkw) &&
664  i < max_num_edges; ++i) {
665  print_edge_rec((*vec)[i]);
666  tprintf(" ");
667  }
668  if (dir == 0 ? i < num_fwd : i < num_bkw) tprintf("...");
669  tprintf("\n");
670  }
671 }
672 } // namespace tesseract