Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
char_samp_set.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: char_samp_enum.cpp
3  * Description: Implementation of a Character Sample Set Class
4  * Author: Ahmad Abdulkader
5  * Created: 2007
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include <stdlib.h>
21 #include <string>
22 #include "char_samp_set.h"
23 #include "cached_file.h"
24 
25 namespace tesseract {
26 
28  cnt_ = 0;
29  samp_buff_ = NULL;
30  own_samples_ = false;
31 }
32 
34  Cleanup();
35 }
36 
37 // free buffers and init vars
38 void CharSampSet::Cleanup() {
39  if (samp_buff_ != NULL) {
40  // only free samples if owned by class
41  if (own_samples_ == true) {
42  for (int samp_idx = 0; samp_idx < cnt_; samp_idx++) {
43  if (samp_buff_[samp_idx] != NULL) {
44  delete samp_buff_[samp_idx];
45  }
46  }
47  }
48  delete []samp_buff_;
49  }
50  cnt_ = 0;
51  samp_buff_ = NULL;
52 }
53 
54 // add a new sample
55 bool CharSampSet::Add(CharSamp *char_samp) {
56  if ((cnt_ % SAMP_ALLOC_BLOCK) == 0) {
57  // create an extended buffer
58  CharSamp **new_samp_buff =
59  reinterpret_cast<CharSamp **>(new CharSamp *[cnt_ + SAMP_ALLOC_BLOCK]);
60  if (new_samp_buff == NULL) {
61  return false;
62  }
63  // copy old contents
64  if (cnt_ > 0) {
65  memcpy(new_samp_buff, samp_buff_, cnt_ * sizeof(*samp_buff_));
66  delete []samp_buff_;
67  }
68  samp_buff_ = new_samp_buff;
69  }
70  samp_buff_[cnt_++] = char_samp;
71  return true;
72 }
73 
74 // load char samples from file
75 bool CharSampSet::LoadCharSamples(FILE *fp) {
76  // free existing
77  Cleanup();
78  // samples are created here and owned by the class
79  own_samples_ = true;
80  // start loading char samples
81  while (feof(fp) == 0) {
82  CharSamp *new_samp = CharSamp::FromCharDumpFile(fp);
83  if (new_samp != NULL) {
84  if (Add(new_samp) == false) {
85  return false;
86  }
87  }
88  }
89  return true;
90 }
91 
92 // creates a CharSampSet object from file
94  FILE *fp;
95  unsigned int val32;
96  // open the file
97  fp = fopen(file_name.c_str(), "rb");
98  if (fp == NULL) {
99  return NULL;
100  }
101  // read and verify marker
102  if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
103  return NULL;
104  }
105  if (val32 != 0xfefeabd0) {
106  return NULL;
107  }
108  // create an object
109  CharSampSet *samp_set = new CharSampSet();
110  if (samp_set == NULL) {
111  return NULL;
112  }
113  if (samp_set->LoadCharSamples(fp) == false) {
114  delete samp_set;
115  samp_set = NULL;
116  }
117  fclose(fp);
118  return samp_set;
119 }
120 
121 // Create a new Char Dump file
122 FILE *CharSampSet::CreateCharDumpFile(string file_name) {
123  FILE *fp;
124  unsigned int val32;
125  // create the file
126  fp = fopen(file_name.c_str(), "wb");
127  if (!fp) {
128  return NULL;
129  }
130  // read and verify marker
131  val32 = 0xfefeabd0;
132  if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
133  return NULL;
134  }
135  return fp;
136 }
137 
138 // Enumerate the Samples in the set one-by-one calling the enumertor's
139  // EnumCharSamp method for each sample
140 bool CharSampSet::EnumSamples(string file_name, CharSampEnum *enum_obj) {
141  CachedFile *fp_in;
142  unsigned int val32;
143  long i64_size,
144  i64_pos;
145  // open the file
146  fp_in = new CachedFile(file_name);
147  if (fp_in == NULL) {
148  return false;
149  }
150  i64_size = fp_in->Size();
151  if (i64_size < 1) {
152  return false;
153  }
154  // read and verify marker
155  if (fp_in->Read(&val32, sizeof(val32)) != sizeof(val32)) {
156  return false;
157  }
158  if (val32 != 0xfefeabd0) {
159  return false;
160  }
161  // start loading char samples
162  while (fp_in->eof() == false) {
163  CharSamp *new_samp = CharSamp::FromCharDumpFile(fp_in);
164  i64_pos = fp_in->Tell();
165  if (new_samp != NULL) {
166  bool ret_flag = (enum_obj)->EnumCharSamp(new_samp,
167  (100.0f * i64_pos / i64_size));
168  delete new_samp;
169  if (ret_flag == false) {
170  break;
171  }
172  }
173  }
174  delete fp_in;
175  return true;
176 }
177 
178 } // namespace ocrlib