Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
conv_net_classifier.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: charclassifier.cpp
3  * Description: Implementation of Convolutional-NeuralNet Character Classifier
4  * Author: Ahmad Abdulkader
5  * Created: 2007
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include <algorithm>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string>
24 #include <vector>
25 #include <wctype.h>
26 
27 #include "char_set.h"
28 #include "classifier_base.h"
29 #include "const.h"
30 #include "conv_net_classifier.h"
31 #include "cube_utils.h"
32 #include "feature_base.h"
33 #include "feature_bmp.h"
34 #include "tess_lang_model.h"
35 
36 namespace tesseract {
37 
39  TuningParams *params,
40  FeatureBase *feat_extract)
41  : CharClassifier(char_set, params, feat_extract) {
42  char_net_ = NULL;
43  net_input_ = NULL;
44  net_output_ = NULL;
45 }
46 
48  if (char_net_ != NULL) {
49  delete char_net_;
50  char_net_ = NULL;
51  }
52 
53  if (net_input_ != NULL) {
54  delete []net_input_;
55  net_input_ = NULL;
56  }
57 
58  if (net_output_ != NULL) {
59  delete []net_output_;
60  net_output_ = NULL;
61  }
62 }
63 
64 // The main training function. Given a sample and a class ID the classifier
65 // updates its parameters according to its learning algorithm. This function
66 // is currently not implemented. TODO(ahmadab): implement end-2-end training
67 bool ConvNetCharClassifier::Train(CharSamp *char_samp, int ClassID) {
68  return false;
69 }
70 
71 // A secondary function needed for training. Allows the trainer to set the
72 // value of any train-time paramter. This function is currently not
73 // implemented. TODO(ahmadab): implement end-2-end training
74 bool ConvNetCharClassifier::SetLearnParam(char *var_name, float val) {
75  // TODO(ahmadab): implementation of parameter initializing.
76  return false;
77 }
78 
79 // Folds the output of the NeuralNet using the loaded folding sets
80 void ConvNetCharClassifier::Fold() {
81  // in case insensitive mode
82  if (case_sensitive_ == false) {
83  int class_cnt = char_set_->ClassCount();
84  // fold case
85  for (int class_id = 0; class_id < class_cnt; class_id++) {
86  // get class string
87  const char_32 *str32 = char_set_->ClassString(class_id);
88  // get the upper case form of the string
89  string_32 upper_form32 = str32;
90  for (int ch = 0; ch < upper_form32.length(); ch++) {
91  if (iswalpha(static_cast<int>(upper_form32[ch])) != 0) {
92  upper_form32[ch] = towupper(upper_form32[ch]);
93  }
94  }
95 
96  // find out the upperform class-id if any
97  int upper_class_id =
98  char_set_->ClassID(reinterpret_cast<const char_32 *>(
99  upper_form32.c_str()));
100  if (upper_class_id != -1 && class_id != upper_class_id) {
101  float max_out = MAX(net_output_[class_id], net_output_[upper_class_id]);
102  net_output_[class_id] = max_out;
103  net_output_[upper_class_id] = max_out;
104  }
105  }
106  }
107 
108  // The folding sets specify how groups of classes should be folded
109  // Folding involved assigning a min-activation to all the members
110  // of the folding set. The min-activation is a fraction of the max-activation
111  // of the members of the folding set
112  for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) {
113  if (fold_set_len_[fold_set] == 0)
114  continue;
115  float max_prob = net_output_[fold_sets_[fold_set][0]];
116  for (int ch = 1; ch < fold_set_len_[fold_set]; ch++) {
117  if (net_output_[fold_sets_[fold_set][ch]] > max_prob) {
118  max_prob = net_output_[fold_sets_[fold_set][ch]];
119  }
120  }
121  for (int ch = 0; ch < fold_set_len_[fold_set]; ch++) {
122  net_output_[fold_sets_[fold_set][ch]] = MAX(max_prob * kFoldingRatio,
123  net_output_[fold_sets_[fold_set][ch]]);
124  }
125  }
126 }
127 
128 // Compute the features of specified charsamp and feedforward the
129 // specified nets
130 bool ConvNetCharClassifier::RunNets(CharSamp *char_samp) {
131  if (char_net_ == NULL) {
132  fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): "
133  "NeuralNet is NULL\n");
134  return false;
135  }
136  int feat_cnt = char_net_->in_cnt();
137  int class_cnt = char_set_->ClassCount();
138 
139  // allocate i/p and o/p buffers if needed
140  if (net_input_ == NULL) {
141  net_input_ = new float[feat_cnt];
142  if (net_input_ == NULL) {
143  fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): "
144  "unable to allocate memory for input nodes\n");
145  return false;
146  }
147 
148  net_output_ = new float[class_cnt];
149  if (net_output_ == NULL) {
150  fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): "
151  "unable to allocate memory for output nodes\n");
152  return false;
153  }
154  }
155 
156  // compute input features
157  if (feat_extract_->ComputeFeatures(char_samp, net_input_) == false) {
158  fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): "
159  "unable to compute features\n");
160  return false;
161  }
162 
163  if (char_net_ != NULL) {
164  if (char_net_->FeedForward(net_input_, net_output_) == false) {
165  fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): "
166  "unable to run feed-forward\n");
167  return false;
168  }
169  } else {
170  return false;
171  }
172  Fold();
173  return true;
174 }
175 
176 // return the cost of being a char
178  if (RunNets(char_samp) == false) {
179  return 0;
180  }
181  return CubeUtils::Prob2Cost(1.0f - net_output_[0]);
182 }
183 
184 // classifies a charsamp and returns an alternate list
185 // of chars sorted by char costs
187  // run the needed nets
188  if (RunNets(char_samp) == false) {
189  return NULL;
190  }
191 
192  int class_cnt = char_set_->ClassCount();
193 
194  // create an altlist
195  CharAltList *alt_list = new CharAltList(char_set_, class_cnt);
196  if (alt_list == NULL) {
197  fprintf(stderr, "Cube WARNING (ConvNetCharClassifier::Classify): "
198  "returning emtpy CharAltList\n");
199  return NULL;
200  }
201 
202  for (int out = 1; out < class_cnt; out++) {
203  int cost = CubeUtils::Prob2Cost(net_output_[out]);
204  alt_list->Insert(out, cost);
205  }
206 
207  return alt_list;
208 }
209 
210 // Set an external net (for training purposes)
212  if (char_net_ != NULL) {
213  delete char_net_;
214  char_net_ = NULL;
215  }
216  char_net_ = char_net;
217 }
218 
219 // This function will return true if the file does not exist.
220 // But will fail if the it did not pass the sanity checks
221 bool ConvNetCharClassifier::LoadFoldingSets(const string &data_file_path,
222  const string &lang,
223  LangModel *lang_mod) {
224  fold_set_cnt_ = 0;
225  string fold_file_name;
226  fold_file_name = data_file_path + lang;
227  fold_file_name += ".cube.fold";
228 
229  // folding sets are optional
230  FILE *fp = fopen(fold_file_name.c_str(), "rb");
231  if (fp == NULL) {
232  return true;
233  }
234  fclose(fp);
235 
236  string fold_sets_str;
237  if (!CubeUtils::ReadFileToString(fold_file_name.c_str(),
238  &fold_sets_str)) {
239  return false;
240  }
241 
242  // split into lines
243  vector<string> str_vec;
244  CubeUtils::SplitStringUsing(fold_sets_str, "\r\n", &str_vec);
245  fold_set_cnt_ = str_vec.size();
246 
247  fold_sets_ = new int *[fold_set_cnt_];
248  if (fold_sets_ == NULL) {
249  return false;
250  }
251  fold_set_len_ = new int[fold_set_cnt_];
252  if (fold_set_len_ == NULL) {
253  fold_set_cnt_ = 0;
254  return false;
255  }
256 
257  for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) {
258  reinterpret_cast<TessLangModel *>(lang_mod)->RemoveInvalidCharacters(
259  &str_vec[fold_set]);
260 
261  // if all or all but one character are invalid, invalidate this set
262  if (str_vec[fold_set].length() <= 1) {
263  fprintf(stderr, "Cube WARNING (ConvNetCharClassifier::LoadFoldingSets): "
264  "invalidating folding set %d\n", fold_set);
265  fold_set_len_[fold_set] = 0;
266  fold_sets_[fold_set] = NULL;
267  continue;
268  }
269 
270  string_32 str32;
271  CubeUtils::UTF8ToUTF32(str_vec[fold_set].c_str(), &str32);
272  fold_set_len_[fold_set] = str32.length();
273  fold_sets_[fold_set] = new int[fold_set_len_[fold_set]];
274  if (fold_sets_[fold_set] == NULL) {
275  fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadFoldingSets): "
276  "could not allocate folding set\n");
277  fold_set_cnt_ = fold_set;
278  return false;
279  }
280  for (int ch = 0; ch < fold_set_len_[fold_set]; ch++) {
281  fold_sets_[fold_set][ch] = char_set_->ClassID(str32[ch]);
282  }
283  }
284  return true;
285 }
286 
287 // Init the classifier provided a data-path and a language string
288 bool ConvNetCharClassifier::Init(const string &data_file_path,
289  const string &lang,
290  LangModel *lang_mod) {
291  if (init_) {
292  return true;
293  }
294 
295  // load the nets if any. This function will return true if the net file
296  // does not exist. But will fail if the net did not pass the sanity checks
297  if (!LoadNets(data_file_path, lang)) {
298  return false;
299  }
300 
301  // load the folding sets if any. This function will return true if the
302  // file does not exist. But will fail if the it did not pass the sanity checks
303  if (!LoadFoldingSets(data_file_path, lang, lang_mod)) {
304  return false;
305  }
306 
307  init_ = true;
308  return true;
309 }
310 
311 // Load the classifier's Neural Nets
312 // This function will return true if the net file does not exist.
313 // But will fail if the net did not pass the sanity checks
314 bool ConvNetCharClassifier::LoadNets(const string &data_file_path,
315  const string &lang) {
316  string char_net_file;
317 
318  // add the lang identifier
319  char_net_file = data_file_path + lang;
320  char_net_file += ".cube.nn";
321 
322  // neural network is optional
323  FILE *fp = fopen(char_net_file.c_str(), "rb");
324  if (fp == NULL) {
325  return true;
326  }
327  fclose(fp);
328 
329  // load main net
330  char_net_ = tesseract::NeuralNet::FromFile(char_net_file.c_str());
331  if (char_net_ == NULL) {
332  fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadNets): "
333  "could not load %s\n", char_net_file.c_str());
334  return false;
335  }
336 
337  // validate net
338  if (char_net_->in_cnt()!= feat_extract_->FeatureCnt()) {
339  fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadNets): "
340  "could not validate net %s\n", char_net_file.c_str());
341  return false;
342  }
343 
344  // alloc net i/o buffers
345  int feat_cnt = char_net_->in_cnt();
346  int class_cnt = char_set_->ClassCount();
347 
348  if (char_net_->out_cnt() != class_cnt) {
349  fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadNets): "
350  "output count (%d) and class count (%d) are not equal\n",
351  char_net_->out_cnt(), class_cnt);
352  return false;
353  }
354 
355  // allocate i/p and o/p buffers if needed
356  if (net_input_ == NULL) {
357  net_input_ = new float[feat_cnt];
358  if (net_input_ == NULL) {
359  return false;
360  }
361 
362  net_output_ = new float[class_cnt];
363  if (net_output_ == NULL) {
364  return false;
365  }
366  }
367 
368  return true;
369 }
370 } // tesseract