Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
cube_utils.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: cube_utils.h
3  * Description: Declaration of the Cube Utilities Class
4  * Author: Ahmad Abdulkader
5  * Created: 2008
6  *
7  *(C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0(the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 // The CubeUtils class provides miscellaneous utility and helper functions
21 // to the rest of the Cube Engine
22 
23 #ifndef CUBE_UTILS_H
24 #define CUBE_UTILS_H
25 
26 #include <vector>
27 #include <string>
28 
29 #include "allheaders.h"
30 #include "const.h"
31 #include "char_set.h"
32 #include "char_samp.h"
33 #include "img.h"
34 
35 namespace tesseract {
36 class CubeUtils {
37  public:
38  CubeUtils();
39  ~CubeUtils();
40 
41  // Converts a probability value to a cost by getting the -log() of the
42  // probability value to a known base
43  static int Prob2Cost(double prob_val);
44  // Converts a cost to probability by getting the exp(-normalized cost)
45  static double Cost2Prob(int cost);
46  // Computes the length of a 32-bit char buffer
47  static int StrLen(const char_32 *str);
48  // Compares two 32-bit char buffers
49  static int StrCmp(const char_32 *str1, const char_32 *str2);
50  // Duplicates a 32-bit char buffer
51  static char_32 *StrDup(const char_32 *str);
52  // Creates a CharSamp from an IMAGE and a bounding box
53  static CharSamp *CharSampleFromImg(IMAGE *img,
54  int left, int top, int wid, int hgt);
55  // Creates a CharSamp from an Pix and a bounding box
56  static CharSamp *CharSampleFromPix(Pix *pix,
57  int left, int top, int wid, int hgt);
58  // Creates an IMAGE from a CharSamp
59  static IMAGE *ImageFromCharSample(CharSamp *char_samp);
60  // Creates a Pix from a CharSamp
61  static Pix *PixFromCharSample(CharSamp *char_samp);
62  // read the contents of a file to a string
63  static bool ReadFileToString(const string &file_name, string *str);
64  // split a string into vectors using any of the specified delimiters
65  static void SplitStringUsing(const string &str, const string &delims,
66  vector<string> *str_vec);
67  // UTF-8 to UTF-32 convesion functions
68  static void UTF8ToUTF32(const char *utf8_str, string_32 *str32);
69  static void UTF32ToUTF8(const char_32 *utf32_str, string *str);
70  // Returns true if input word has either 1) all-one-case, or 2)
71  // first character upper-case, and remaining characters lower-case.
72  // If char_set is not NULL, uses tesseract's unicharset functions
73  // to determine case properties. Otherwise, uses C-locale-dependent
74  // functions, which may be unreliable on non-ASCII characters.
75  static bool IsCaseInvariant(const char_32 *str32, CharSet *char_set);
76  // Returns char_32 pointer to the lower-case-transformed version of
77  // the input string or NULL on error. If char_set is NULL returns NULL.
78  // Return array must be freed by caller.
79  static char_32 *ToLower(const char_32 *str32, CharSet *char_set);
80  // Returns char_32 pointer to the upper-case-transformed version of
81  // the input string or NULL on error. If char_set is NULL returns NULL.
82  // Return array must be freed by caller.
83  static char_32 *ToUpper(const char_32 *str32, CharSet *char_set);
84  private:
85  static unsigned char *GetImageData(IMAGE *img,
86  int left, int top, int wid, int hgt);
87  static unsigned char *GetImageData(Pix *pix,
88  int left, int top, int wid, int hgt);
89 };
90 } // namespace tesseract
91 #endif // CUBE_UTILS_H