48 if (char_net_ !=
NULL) {
53 if (net_input_ !=
NULL) {
58 if (net_output_ !=
NULL) {
80 void ConvNetCharClassifier::Fold() {
85 for (
int class_id = 0; class_id < class_cnt; class_id++) {
90 for (
int ch = 0; ch < upper_form32.length(); ch++) {
91 if (iswalpha(static_cast<int>(upper_form32[ch])) != 0) {
92 upper_form32[ch] = towupper(upper_form32[ch]);
99 upper_form32.c_str()));
100 if (upper_class_id != -1 && class_id != upper_class_id) {
101 float max_out =
MAX(net_output_[class_id], net_output_[upper_class_id]);
102 net_output_[class_id] = max_out;
103 net_output_[upper_class_id] = max_out;
112 for (
int fold_set = 0; fold_set <
fold_set_cnt_; fold_set++) {
115 float max_prob = net_output_[
fold_sets_[fold_set][0]];
117 if (net_output_[fold_sets_[fold_set][ch]] > max_prob) {
118 max_prob = net_output_[fold_sets_[fold_set][ch]];
121 for (
int ch = 0; ch < fold_set_len_[fold_set]; ch++) {
122 net_output_[fold_sets_[fold_set][ch]] =
MAX(max_prob * kFoldingRatio,
123 net_output_[fold_sets_[fold_set][ch]]);
130 bool ConvNetCharClassifier::RunNets(CharSamp *char_samp) {
131 if (char_net_ ==
NULL) {
132 fprintf(stderr,
"Cube ERROR (ConvNetCharClassifier::RunNets): "
133 "NeuralNet is NULL\n");
136 int feat_cnt = char_net_->
in_cnt();
140 if (net_input_ ==
NULL) {
141 net_input_ =
new float[feat_cnt];
142 if (net_input_ ==
NULL) {
143 fprintf(stderr,
"Cube ERROR (ConvNetCharClassifier::RunNets): "
144 "unable to allocate memory for input nodes\n");
148 net_output_ =
new float[class_cnt];
149 if (net_output_ ==
NULL) {
150 fprintf(stderr,
"Cube ERROR (ConvNetCharClassifier::RunNets): "
151 "unable to allocate memory for output nodes\n");
158 fprintf(stderr,
"Cube ERROR (ConvNetCharClassifier::RunNets): "
159 "unable to compute features\n");
163 if (char_net_ !=
NULL) {
164 if (char_net_->
FeedForward(net_input_, net_output_) ==
false) {
165 fprintf(stderr,
"Cube ERROR (ConvNetCharClassifier::RunNets): "
166 "unable to run feed-forward\n");
178 if (RunNets(char_samp) ==
false) {
188 if (RunNets(char_samp) ==
false) {
196 if (alt_list ==
NULL) {
197 fprintf(stderr,
"Cube WARNING (ConvNetCharClassifier::Classify): "
198 "returning emtpy CharAltList\n");
202 for (
int out = 1; out < class_cnt; out++) {
204 alt_list->
Insert(out, cost);
212 if (char_net_ !=
NULL) {
216 char_net_ = char_net;
221 bool ConvNetCharClassifier::LoadFoldingSets(
const string &data_file_path,
225 string fold_file_name;
226 fold_file_name = data_file_path +
lang;
227 fold_file_name +=
".cube.fold";
230 FILE *fp = fopen(fold_file_name.c_str(),
"rb");
236 string fold_sets_str;
243 vector<string> str_vec;
245 fold_set_cnt_ = str_vec.size();
248 if (fold_sets_ ==
NULL) {
252 if (fold_set_len_ ==
NULL) {
257 for (
int fold_set = 0; fold_set <
fold_set_cnt_; fold_set++) {
258 reinterpret_cast<TessLangModel *
>(lang_mod)->RemoveInvalidCharacters(
262 if (str_vec[fold_set].length() <= 1) {
263 fprintf(stderr,
"Cube WARNING (ConvNetCharClassifier::LoadFoldingSets): "
264 "invalidating folding set %d\n", fold_set);
265 fold_set_len_[fold_set] = 0;
266 fold_sets_[fold_set] =
NULL;
272 fold_set_len_[fold_set] = str32.length();
273 fold_sets_[fold_set] =
new int[fold_set_len_[fold_set]];
274 if (fold_sets_[fold_set] ==
NULL) {
275 fprintf(stderr,
"Cube ERROR (ConvNetCharClassifier::LoadFoldingSets): "
276 "could not allocate folding set\n");
277 fold_set_cnt_ = fold_set;
280 for (
int ch = 0; ch < fold_set_len_[fold_set]; ch++) {
288 bool ConvNetCharClassifier::Init(
const string &data_file_path,
290 LangModel *lang_mod) {
297 if (!LoadNets(data_file_path, lang)) {
303 if (!LoadFoldingSets(data_file_path, lang, lang_mod)) {
314 bool ConvNetCharClassifier::LoadNets(
const string &data_file_path,
315 const string &lang) {
316 string char_net_file;
319 char_net_file = data_file_path +
lang;
320 char_net_file +=
".cube.nn";
323 FILE *fp = fopen(char_net_file.c_str(),
"rb");
331 if (char_net_ ==
NULL) {
332 fprintf(stderr,
"Cube ERROR (ConvNetCharClassifier::LoadNets): "
333 "could not load %s\n", char_net_file.c_str());
339 fprintf(stderr,
"Cube ERROR (ConvNetCharClassifier::LoadNets): "
340 "could not validate net %s\n", char_net_file.c_str());
345 int feat_cnt = char_net_->
in_cnt();
348 if (char_net_->
out_cnt() != class_cnt) {
349 fprintf(stderr,
"Cube ERROR (ConvNetCharClassifier::LoadNets): "
350 "output count (%d) and class count (%d) are not equal\n",
351 char_net_->
out_cnt(), class_cnt);
356 if (net_input_ ==
NULL) {
357 net_input_ =
new float[feat_cnt];
358 if (net_input_ ==
NULL) {
362 net_output_ =
new float[class_cnt];
363 if (net_output_ ==
NULL) {