Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tesseract::CharSet Class Reference

#include <char_set.h>

List of all members.

Public Member Functions

 CharSet ()
 ~CharSet ()
bool SharedUnicharset ()
int ClassID (const char_32 *str) const
int ClassID (char_32 ch) const
int UnicharID (const char_32 *str) const
int UnicharID (char_32 ch) const
const char_32ClassString (int class_id) const
int ClassCount () const
UNICHARSETInternalUnicharset ()

Static Public Member Functions

static CharSetCreate (TessdataManager *tessdata_manager, UNICHARSET *tess_unicharset)

Detailed Description

Definition at line 42 of file char_set.h.


Constructor & Destructor Documentation

tesseract::CharSet::CharSet ( )

Definition at line 28 of file char_set.cpp.

{
class_cnt_ = 0;
class_strings_ = NULL;
unicharset_map_ = NULL;
init_ = false;
// init hash table
memset(hash_bin_size_, 0, sizeof(hash_bin_size_));
}
tesseract::CharSet::~CharSet ( )

Definition at line 38 of file char_set.cpp.

{
if (class_strings_ != NULL) {
for (int cls = 0; cls < class_cnt_; cls++) {
if (class_strings_[cls] != NULL) {
delete class_strings_[cls];
}
}
delete []class_strings_;
class_strings_ = NULL;
}
delete []unicharset_map_;
}

Member Function Documentation

int tesseract::CharSet::ClassCount ( ) const
inline

Definition at line 111 of file char_set.h.

{ return class_cnt_; }
int tesseract::CharSet::ClassID ( const char_32 str) const
inline

Definition at line 54 of file char_set.h.

{
int hash_val = Hash(str);
if (hash_bin_size_[hash_val] == 0)
return -1;
for (int bin = 0; bin < hash_bin_size_[hash_val]; bin++) {
if (class_strings_[hash_bins_[hash_val][bin]]->compare(str) == 0)
return hash_bins_[hash_val][bin];
}
return -1;
}
int tesseract::CharSet::ClassID ( char_32  ch) const
inline

Definition at line 65 of file char_set.h.

{
int hash_val = Hash(ch);
if (hash_bin_size_[hash_val] == 0)
return -1;
for (int bin = 0; bin < hash_bin_size_[hash_val]; bin++) {
if ((*class_strings_[hash_bins_[hash_val][bin]])[0] == ch &&
class_strings_[hash_bins_[hash_val][bin]]->length() == 1) {
return hash_bins_[hash_val][bin];
}
}
return -1;
}
const char_32* tesseract::CharSet::ClassString ( int  class_id) const
inline

Definition at line 104 of file char_set.h.

{
if (class_id < 0 || class_id >= class_cnt_) {
return NULL;
}
return reinterpret_cast<const char_32 *>(class_strings_[class_id]->c_str());
}
CharSet * tesseract::CharSet::Create ( TessdataManager tessdata_manager,
UNICHARSET tess_unicharset 
)
static

Definition at line 54 of file char_set.cpp.

{
CharSet *char_set = new CharSet();
if (char_set == NULL) {
return NULL;
}
// First look for Cube's unicharset; if not there, use tesseract's
bool cube_unicharset_exists;
if (!(cube_unicharset_exists =
tessdata_manager->SeekToStart(TESSDATA_CUBE_UNICHARSET)) &&
!tessdata_manager->SeekToStart(TESSDATA_UNICHARSET)) {
fprintf(stderr, "Cube ERROR (CharSet::Create): could not find "
"either cube or tesseract unicharset\n");
return false;
}
FILE *charset_fp = tessdata_manager->GetDataFilePtr();
if (!charset_fp) {
fprintf(stderr, "Cube ERROR (CharSet::Create): could not load "
"a unicharset\n");
return false;
}
// If we found a cube unicharset separate from tesseract's, load it and
// map its unichars to tesseract's; if only one unicharset exists,
// just load it.
bool loaded;
if (cube_unicharset_exists) {
char_set->cube_unicharset_.load_from_file(charset_fp);
loaded = tessdata_manager->SeekToStart(TESSDATA_CUBE_UNICHARSET);
loaded = loaded && char_set->LoadSupportedCharList(
tessdata_manager->GetDataFilePtr(), tess_unicharset);
char_set->unicharset_ = &char_set->cube_unicharset_;
} else {
loaded = char_set->LoadSupportedCharList(charset_fp, NULL);
char_set->unicharset_ = tess_unicharset;
}
if (!loaded) {
delete char_set;
return false;
}
char_set->init_ = true;
return char_set;
}
UNICHARSET* tesseract::CharSet::InternalUnicharset ( )
inline

Definition at line 121 of file char_set.h.

{ return unicharset_; }
bool tesseract::CharSet::SharedUnicharset ( )
inline

Definition at line 48 of file char_set.h.

{ return (unicharset_map_ == NULL); }
int tesseract::CharSet::UnicharID ( const char_32 str) const
inline

Definition at line 80 of file char_set.h.

{
int class_id = ClassID(str);
if (class_id == INVALID_UNICHAR_ID)
return INVALID_UNICHAR_ID;
int unichar_id;
if (unicharset_map_)
unichar_id = unicharset_map_[class_id];
else
unichar_id = class_id;
return unichar_id;
}
int tesseract::CharSet::UnicharID ( char_32  ch) const
inline

Definition at line 92 of file char_set.h.

{
int class_id = ClassID(ch);
if (class_id == INVALID_UNICHAR_ID)
return INVALID_UNICHAR_ID;
int unichar_id;
if (unicharset_map_)
unichar_id = unicharset_map_[class_id];
else
unichar_id = class_id;
return unichar_id;
}

The documentation for this class was generated from the following files: