Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
strngs.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: strngs.h (Formerly strings.h)
3  * Description: STRING class definition.
4  * Author: Ray Smith
5  * Created: Fri Feb 15 09:15:01 GMT 1991
6  *
7  * (C) Copyright 1991, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifndef STRNGS_H
21 #define STRNGS_H
22 
23 #include <stdio.h>
24 #include <string.h>
25 #include "platform.h"
26 #include "memry.h"
27 
28 // STRING_IS_PROTECTED means that string[index] = X is invalid
29 // because you have to go through strings interface to modify it.
30 // This allows the string to ensure internal integrity and maintain
31 // its own string length. Unfortunately this is not possible because
32 // STRINGS are used as direct-manipulation data buffers for things
33 // like length arrays and many places cast away the const on string()
34 // to mutate the string. Turning this off means that internally we
35 // cannot assume we know the strlen.
36 #define STRING_IS_PROTECTED 0
37 
38 template <typename T> class GenericVector;
39 
41 {
42  public:
43  STRING();
44  STRING(const STRING &string);
45  STRING(const char *string);
46  ~STRING ();
47 
48  // Writes to the given file. Returns false in case of error.
49  bool Serialize(FILE* fp) const;
50  // Reads from the given file. Returns false in case of error.
51  // If swap is true, assumes a big/little-endian swap is needed.
52  bool DeSerialize(bool swap, FILE* fp);
53 
54  BOOL8 contains(const char c) const;
55  inT32 length() const;
56  inT32 size() const { return length(); }
57  const char *string() const;
58 
59  inline char* strdup() const {
60  inT32 len = length() + 1;
61  return strncpy(new char[len], GetCStr(), len);
62  }
63 
64 #if STRING_IS_PROTECTED
65  const char &operator[] (inT32 index) const;
66  // len is number of chars in s to insert starting at index in this string
67  void insert_range(inT32 index, const char*s, int len);
68  void erase_range(inT32 index, int len);
69 #else
70  char &operator[] (inT32 index) const;
71 #endif
72  void split(const char c, GenericVector<STRING> *splited);
73  void truncate_at(inT32 index);
74 
75  BOOL8 operator== (const STRING & string) const;
76  BOOL8 operator!= (const STRING & string) const;
77  BOOL8 operator!= (const char *string) const;
78 
79  STRING & operator= (const char *string);
80  STRING & operator= (const STRING & string);
81 
82  STRING operator+ (const STRING & string) const;
83  STRING operator+ (const char ch) const;
84 
85  STRING & operator+= (const char *string);
86  STRING & operator+= (const STRING & string);
87  STRING & operator+= (const char ch);
88 
89  // Assignment for strings which are not null-terminated.
90  void assign(const char *cstr, int len);
91 
92  // Appends the given string and int (as a %d) to this.
93  // += cannot be used for ints as there as a char += operator that would
94  // be ambiguous, and ints usually need a string before or between them
95  // anyway.
96  void add_str_int(const char* str, int number);
97 
98  // ensure capcaity but keep pointer encapsulated
99  inline void ensure(inT32 min_capacity) { ensure_cstr(min_capacity); }
100 
101  private:
102  typedef struct STRING_HEADER {
103  // How much space was allocated in the string buffer for char data.
104  int capacity_;
105 
106  // used_ is how much of the capacity is currently being used,
107  // including a '\0' terminator.
108  //
109  // If used_ is 0 then string is NULL (not even the '\0')
110  // else if used_ > 0 then it is strlen() + 1 (because it includes '\0')
111  // else strlen is >= 0 (not NULL) but needs to be computed.
112  // this condition is set when encapsulation is violated because
113  // an API returned a mutable string.
114  //
115  // capacity_ - used_ = excess capacity that the string can grow
116  // without reallocating
117  mutable int used_;
118  } STRING_HEADER;
119 
120  // To preserve the behavior of the old serialization, we only have space
121  // for one pointer in this structure. So we are embedding a data structure
122  // at the start of the storage that will hold additional state variables,
123  // then storing the actual string contents immediately after.
124  STRING_HEADER* data_;
125 
126  // returns the header part of the storage
127  inline STRING_HEADER* GetHeader() {
128  return data_;
129  }
130  inline const STRING_HEADER* GetHeader() const {
131  return data_;
132  }
133 
134  // returns the string data part of storage
135  inline char* GetCStr() {
136  return ((char *)data_) + sizeof(STRING_HEADER);
137  };
138 
139  inline const char* GetCStr() const {
140  return ((const char *)data_) + sizeof(STRING_HEADER);
141  };
142  inline bool InvariantOk() const {
143 #if STRING_IS_PROTECTED
144  return (GetHeader()->used_ == 0) ?
145  (string() == NULL) : (GetHeader()->used_ == (strlen(string()) + 1));
146 #else
147  return true;
148 #endif
149  }
150 
151  // Ensure string has requested capacity as optimization
152  // to avoid unnecessary reallocations.
153  // The return value is a cstr buffer with at least requested capacity
154  char* ensure_cstr(inT32 min_capacity);
155 
156  void FixHeader() const; // make used_ non-negative, even if const
157 
158  char* AllocData(int used, int capacity);
159  void DiscardData();
160 };
161 #endif