Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
word_altlist.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: word_altlist.cpp
3  * Description: Implementation of the Word Alternate List Class
4  * Author: Ahmad Abdulkader
5  * Created: 2008
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "word_altlist.h"
21 
22 namespace tesseract {
24  : AltList(max_alt) {
25  word_alt_ = NULL;
26 }
27 
29  if (word_alt_ != NULL) {
30  for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
31  if (word_alt_[alt_idx] != NULL) {
32  delete []word_alt_[alt_idx];
33  }
34  }
35  delete []word_alt_;
36  word_alt_ = NULL;
37  }
38 }
39 
40 // insert an alternate word with the specified cost and tag
41 bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) {
42  if (word_alt_ == NULL || alt_cost_ == NULL) {
43  word_alt_ = new char_32*[max_alt_];
44  alt_cost_ = new int[max_alt_];
45  alt_tag_ = new void *[max_alt_];
46 
47  if (word_alt_ == NULL || alt_cost_ == NULL || alt_tag_ == NULL) {
48  return false;
49  }
50 
51  memset(alt_tag_, 0, max_alt_ * sizeof(*alt_tag_));
52  } else {
53  // check if alt already exists
54  for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
55  if (CubeUtils::StrCmp(word_str, word_alt_[alt_idx]) == 0) {
56  // update the cost if we have a lower one
57  if (cost < alt_cost_[alt_idx]) {
58  alt_cost_[alt_idx] = cost;
59  alt_tag_[alt_idx] = tag;
60  }
61  return true;
62  }
63  }
64  }
65 
66  // determine length of alternate
67  int len = CubeUtils::StrLen(word_str);
68 
69  word_alt_[alt_cnt_] = new char_32[len + 1];
70  if (word_alt_[alt_cnt_] == NULL) {
71  return false;
72  }
73 
74  if (len > 0) {
75  memcpy(word_alt_[alt_cnt_], word_str, len * sizeof(*word_str));
76  }
77 
78  word_alt_[alt_cnt_][len] = 0;
79  alt_cost_[alt_cnt_] = cost;
80  alt_tag_[alt_cnt_] = tag;
81 
82  alt_cnt_++;
83 
84  return true;
85 }
86 
87 // sort the alternate in descending order based on the cost
89  for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
90  for (int alt = alt_idx + 1; alt < alt_cnt_; alt++) {
91  if (alt_cost_[alt_idx] > alt_cost_[alt]) {
92  char_32 *pchTemp = word_alt_[alt_idx];
93  word_alt_[alt_idx] = word_alt_[alt];
94  word_alt_[alt] = pchTemp;
95 
96  int temp = alt_cost_[alt_idx];
97  alt_cost_[alt_idx] = alt_cost_[alt];
98  alt_cost_[alt] = temp;
99 
100  void *tag = alt_tag_[alt_idx];
101  alt_tag_[alt_idx] = alt_tag_[alt];
102  alt_tag_[alt] = tag;
103  }
104  }
105  }
106 }
107 
109  for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
110  char_32 *word_32 = word_alt_[alt_idx];
111  string word_str;
112  CubeUtils::UTF32ToUTF8(word_32, &word_str);
113  int num_unichars = CubeUtils::StrLen(word_32);
114  fprintf(stderr, "Alt[%d]=%s (cost=%d, num_unichars=%d); unichars=", alt_idx,
115  word_str.c_str(), alt_cost_[alt_idx], num_unichars);
116  for (int i = 0; i < num_unichars; ++i)
117  fprintf(stderr, "%d ", word_32[i]);
118  fprintf(stderr, "\n");
119  }
120 }
121 } // namespace tesseract