Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tesseract::CharSamp Class Reference

#include <char_samp.h>

Inheritance diagram for tesseract::CharSamp:
tesseract::Bmp8

List of all members.

Public Member Functions

 CharSamp ()
 CharSamp (int wid, int hgt)
 CharSamp (int left, int top, int wid, int hgt)
 ~CharSamp ()
unsigned short Left () const
unsigned short Right () const
unsigned short Top () const
unsigned short Bottom () const
unsigned short Page () const
unsigned short NormTop () const
unsigned short NormBottom () const
unsigned short NormAspectRatio () const
unsigned short FirstChar () const
unsigned short LastChar () const
char_32 Label () const
char_32StrLabel () const
string stringLabel () const
void SetLeft (unsigned short left)
void SetTop (unsigned short top)
void SetPage (unsigned short page)
void SetLabel (char_32 label)
void SetLabel (const char_32 *label32)
void SetLabel (string str)
void SetNormTop (unsigned short norm_top)
void SetNormBottom (unsigned short norm_bottom)
void SetNormAspectRatio (unsigned short norm_aspect_ratio)
void SetFirstChar (unsigned short first_char)
void SetLastChar (unsigned short last_char)
bool Save2CharDumpFile (FILE *fp) const
CharSampCrop ()
ConComp ** Segment (int *seg_cnt, bool right_2_left, int max_hist_wnd, int min_con_comp_size) const
CharSampScale (int wid, int hgt, bool isotropic=true)
CharSampClone () const
bool ComputeFeatures (int conv_grid_size, float *features)
int LabelLen () const
- Public Member Functions inherited from tesseract::Bmp8
 Bmp8 (unsigned short wid, unsigned short hgt)
 ~Bmp8 ()
bool Clear ()
unsigned short Width () const
unsigned short Stride () const
unsigned short Height () const
unsigned char * RawData () const
bool ScaleFrom (Bmp8 *bmp, bool isotropic=true)
bool Deslant ()
bool HorizontalDeslant (double *deslant_angle)
bool IsIdentical (Bmp8 *pBmp) const
ConComp ** FindConComps (int *concomp_cnt, int min_size) const
float ForegroundRatio () const
float MeanHorizontalHistogramEntropy () const
int * HorizontalHistogram () const

Static Public Member Functions

static CharSampFromCharDumpFile (CachedFile *fp)
static CharSampFromCharDumpFile (FILE *fp)
static CharSampFromCharDumpFile (unsigned char **raw_data)
static CharSampFromRawData (int left, int top, int wid, int hgt, unsigned char *data)
static CharSampFromConComps (ConComp **concomp_array, int strt_concomp, int seg_flags_size, int *seg_flags, bool *left_most, bool *right_most, int word_hgt)
static int AuxFeatureCnt ()
static int LabelLen (const char_32 *label32)

Additional Inherited Members

- Protected Member Functions inherited from tesseract::Bmp8
bool LoadFromCharDumpFile (CachedFile *fp)
bool LoadFromCharDumpFile (FILE *fp)
bool LoadFromCharDumpFile (unsigned char **raw_data)
bool LoadFromRawData (unsigned char *data)
bool SaveBmp2CharDumpFile (FILE *fp) const
bool IsBlankColumn (int x) const
bool IsBlankRow (int y) const
void Crop (int *xst_src, int *yst_src, int *wid, int *hgt)
void Copy (int x, int y, int wid, int hgt, Bmp8 *bmp_dest) const
- Protected Attributes inherited from tesseract::Bmp8
unsigned short wid_
unsigned short hgt_
unsigned char ** line_buff_
- Static Protected Attributes inherited from tesseract::Bmp8
static const int kConCompAllocChunk = 16
static const int kDeslantAngleCount

Detailed Description

Definition at line 39 of file char_samp.h.


Constructor & Destructor Documentation

tesseract::CharSamp::CharSamp ( )

Definition at line 29 of file char_samp.cpp.

: Bmp8(0, 0) {
left_ = 0;
top_ = 0;
label32_ = NULL;
page_ = -1;
}
tesseract::CharSamp::CharSamp ( int  wid,
int  hgt 
)

Definition at line 37 of file char_samp.cpp.

: Bmp8(wid, hgt) {
left_ = 0;
top_ = 0;
label32_ = NULL;
page_ = -1;
}
tesseract::CharSamp::CharSamp ( int  left,
int  top,
int  wid,
int  hgt 
)

Definition at line 45 of file char_samp.cpp.

: Bmp8(wid, hgt)
, left_(left)
, top_(top) {
label32_ = NULL;
page_ = -1;
}
tesseract::CharSamp::~CharSamp ( )

Definition at line 53 of file char_samp.cpp.

{
if (label32_ != NULL) {
delete []label32_;
label32_ = NULL;
}
}

Member Function Documentation

static int tesseract::CharSamp::AuxFeatureCnt ( )
inlinestatic

Definition at line 138 of file char_samp.h.

{ return (5); }
unsigned short tesseract::CharSamp::Bottom ( ) const
inline

Definition at line 49 of file char_samp.h.

{ return top_ + hgt_; }
CharSamp * tesseract::CharSamp::Clone ( ) const

Definition at line 575 of file char_samp.cpp.

{
// create the cropped char samp
CharSamp *samp = new CharSamp(left_, top_, wid_, hgt_);
samp->SetLabel(label32_);
samp->SetFirstChar(first_char_);
samp->SetLastChar(last_char_);
samp->SetNormTop(norm_top_);
samp->SetNormBottom(norm_bottom_);
samp->SetNormAspectRatio(norm_aspect_ratio_);
// copy the bitmap to the cropped img
Copy(0, 0, wid_, hgt_, samp);
return samp;
}
bool tesseract::CharSamp::ComputeFeatures ( int  conv_grid_size,
float *  features 
)

Definition at line 656 of file char_samp.cpp.

{
// Create a scaled BMP
CharSamp *scaled_bmp = Scale(conv_grid_size, conv_grid_size);
if (!scaled_bmp) {
return false;
}
// prepare input
unsigned char *buff = scaled_bmp->RawData();
// bitmap features
int input;
int bmp_size = conv_grid_size * conv_grid_size;
for (input = 0; input < bmp_size; input++) {
features[input] = 255.0f - (1.0f * buff[input]);
}
// word context features
features[input++] = FirstChar();
features[input++] = LastChar();
features[input++] = NormTop();
features[input++] = NormBottom();
features[input++] = NormAspectRatio();
delete scaled_bmp;
return true;
}
CharSamp * tesseract::CharSamp::Crop ( )

Definition at line 358 of file char_samp.cpp.

{
// get the dimesions of the cropped img
int cropped_left = 0;
int cropped_top = 0;
int cropped_wid = wid_;
int cropped_hgt = hgt_;
Bmp8::Crop(&cropped_left, &cropped_top,
&cropped_wid, &cropped_hgt);
if (cropped_wid == 0 || cropped_hgt == 0) {
return NULL;
}
// create the cropped char samp
CharSamp *cropped_samp = new CharSamp(left_ + cropped_left,
top_ + cropped_top,
cropped_wid, cropped_hgt);
cropped_samp->SetLabel(label32_);
cropped_samp->SetFirstChar(first_char_);
cropped_samp->SetLastChar(last_char_);
// the following 3 fields may/should be reset by the calling function
// using context information, i.e., location of character box
// w.r.t. the word bounding box
cropped_samp->SetNormAspectRatio(255 *
cropped_wid / (cropped_wid + cropped_hgt));
cropped_samp->SetNormTop(0);
cropped_samp->SetNormBottom(255);
// copy the bitmap to the cropped img
Copy(cropped_left, cropped_top, cropped_wid, cropped_hgt, cropped_samp);
return cropped_samp;
}
unsigned short tesseract::CharSamp::FirstChar ( ) const
inline

Definition at line 54 of file char_samp.h.

{ return first_char_; }
CharSamp * tesseract::CharSamp::FromCharDumpFile ( CachedFile fp)
static

Reimplemented from tesseract::Bmp8.

Definition at line 82 of file char_samp.cpp.

{
unsigned short left;
unsigned short top;
unsigned short page;
unsigned short first_char;
unsigned short last_char;
unsigned short norm_top;
unsigned short norm_bottom;
unsigned short norm_aspect_ratio;
unsigned int val32;
char_32 *label32;
// read and check 32 bit marker
if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) {
return NULL;
}
if (val32 != 0xabd0fefe) {
return NULL;
}
// read label length,
if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) {
return NULL;
}
// the label is not null terminated in the file
if (val32 > 0) {
label32 = new char_32[val32 + 1];
if (label32 == NULL) {
return NULL;
}
// read label
if (fp->Read(label32, val32 * sizeof(*label32)) !=
(val32 * sizeof(*label32))) {
return NULL;
}
// null terminate
label32[val32] = 0;
} else {
label32 = NULL;
}
// read coordinates
if (fp->Read(&page, sizeof(page)) != sizeof(page)) {
return NULL;
}
if (fp->Read(&left, sizeof(left)) != sizeof(left)) {
return NULL;
}
if (fp->Read(&top, sizeof(top)) != sizeof(top)) {
return NULL;
}
if (fp->Read(&first_char, sizeof(first_char)) != sizeof(first_char)) {
return NULL;
}
if (fp->Read(&last_char, sizeof(last_char)) != sizeof(last_char)) {
return NULL;
}
if (fp->Read(&norm_top, sizeof(norm_top)) != sizeof(norm_top)) {
return NULL;
}
if (fp->Read(&norm_bottom, sizeof(norm_bottom)) != sizeof(norm_bottom)) {
return NULL;
}
if (fp->Read(&norm_aspect_ratio, sizeof(norm_aspect_ratio)) !=
sizeof(norm_aspect_ratio)) {
return NULL;
}
// create the object
CharSamp *char_samp = new CharSamp();
if (char_samp == NULL) {
return NULL;
}
// init
char_samp->label32_ = label32;
char_samp->page_ = page;
char_samp->left_ = left;
char_samp->top_ = top;
char_samp->first_char_ = first_char;
char_samp->last_char_ = last_char;
char_samp->norm_top_ = norm_top;
char_samp->norm_bottom_ = norm_bottom;
char_samp->norm_aspect_ratio_ = norm_aspect_ratio;
// load the Bmp8 part
if (char_samp->LoadFromCharDumpFile(fp) == false) {
delete char_samp;
return NULL;
}
return char_samp;
}
CharSamp * tesseract::CharSamp::FromCharDumpFile ( FILE *  fp)
static

Reimplemented from tesseract::Bmp8.

Definition at line 172 of file char_samp.cpp.

{
unsigned short left;
unsigned short top;
unsigned short page;
unsigned short first_char;
unsigned short last_char;
unsigned short norm_top;
unsigned short norm_bottom;
unsigned short norm_aspect_ratio;
unsigned int val32;
char_32 *label32;
// read and check 32 bit marker
if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
return NULL;
}
if (val32 != 0xabd0fefe) {
return NULL;
}
// read label length,
if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
return NULL;
}
// the label is not null terminated in the file
if (val32 > 0) {
label32 = new char_32[val32 + 1];
if (label32 == NULL) {
return NULL;
}
// read label
if (fread(label32, 1, val32 * sizeof(*label32), fp) !=
(val32 * sizeof(*label32))) {
return NULL;
}
// null terminate
label32[val32] = 0;
} else {
label32 = NULL;
}
// read coordinates
if (fread(&page, 1, sizeof(page), fp) != sizeof(page)) {
return NULL;
}
if (fread(&left, 1, sizeof(left), fp) != sizeof(left)) {
return NULL;
}
if (fread(&top, 1, sizeof(top), fp) != sizeof(top)) {
return NULL;
}
if (fread(&first_char, 1, sizeof(first_char), fp) != sizeof(first_char)) {
return NULL;
}
if (fread(&last_char, 1, sizeof(last_char), fp) != sizeof(last_char)) {
return NULL;
}
if (fread(&norm_top, 1, sizeof(norm_top), fp) != sizeof(norm_top)) {
return NULL;
}
if (fread(&norm_bottom, 1, sizeof(norm_bottom), fp) != sizeof(norm_bottom)) {
return NULL;
}
if (fread(&norm_aspect_ratio, 1, sizeof(norm_aspect_ratio), fp) !=
sizeof(norm_aspect_ratio)) {
return NULL;
}
// create the object
CharSamp *char_samp = new CharSamp();
if (char_samp == NULL) {
return NULL;
}
// init
char_samp->label32_ = label32;
char_samp->page_ = page;
char_samp->left_ = left;
char_samp->top_ = top;
char_samp->first_char_ = first_char;
char_samp->last_char_ = last_char;
char_samp->norm_top_ = norm_top;
char_samp->norm_bottom_ = norm_bottom;
char_samp->norm_aspect_ratio_ = norm_aspect_ratio;
// load the Bmp8 part
if (char_samp->LoadFromCharDumpFile(fp) == false) {
return NULL;
}
return char_samp;
}
CharSamp * tesseract::CharSamp::FromCharDumpFile ( unsigned char **  raw_data)
static

Definition at line 590 of file char_samp.cpp.

{
unsigned int val32;
char_32 *label32;
unsigned char *raw_data = *raw_data_ptr;
// read and check 32 bit marker
memcpy(&val32, raw_data, sizeof(val32));
raw_data += sizeof(val32);
if (val32 != 0xabd0fefe) {
return NULL;
}
// read label length,
memcpy(&val32, raw_data, sizeof(val32));
raw_data += sizeof(val32);
// the label is not null terminated in the file
if (val32 > 0) {
label32 = new char_32[val32 + 1];
if (label32 == NULL) {
return NULL;
}
// read label
memcpy(label32, raw_data, val32 * sizeof(*label32));
raw_data += (val32 * sizeof(*label32));
// null terminate
label32[val32] = 0;
} else {
label32 = NULL;
}
// create the object
CharSamp *char_samp = new CharSamp();
if (char_samp == NULL) {
return NULL;
}
// read coordinates
char_samp->label32_ = label32;
memcpy(&char_samp->page_, raw_data, sizeof(char_samp->page_));
raw_data += sizeof(char_samp->page_);
memcpy(&char_samp->left_, raw_data, sizeof(char_samp->left_));
raw_data += sizeof(char_samp->left_);
memcpy(&char_samp->top_, raw_data, sizeof(char_samp->top_));
raw_data += sizeof(char_samp->top_);
memcpy(&char_samp->first_char_, raw_data, sizeof(char_samp->first_char_));
raw_data += sizeof(char_samp->first_char_);
memcpy(&char_samp->last_char_, raw_data, sizeof(char_samp->last_char_));
raw_data += sizeof(char_samp->last_char_);
memcpy(&char_samp->norm_top_, raw_data, sizeof(char_samp->norm_top_));
raw_data += sizeof(char_samp->norm_top_);
memcpy(&char_samp->norm_bottom_, raw_data, sizeof(char_samp->norm_bottom_));
raw_data += sizeof(char_samp->norm_bottom_);
memcpy(&char_samp->norm_aspect_ratio_, raw_data,
sizeof(char_samp->norm_aspect_ratio_));
raw_data += sizeof(char_samp->norm_aspect_ratio_);
// load the Bmp8 part
if (char_samp->LoadFromCharDumpFile(&raw_data) == false) {
delete char_samp;
return NULL;
}
(*raw_data_ptr) = raw_data;
return char_samp;
}
CharSamp * tesseract::CharSamp::FromConComps ( ConComp **  concomp_array,
int  strt_concomp,
int  seg_flags_size,
int *  seg_flags,
bool *  left_most,
bool *  right_most,
int  word_hgt 
)
static

Definition at line 467 of file char_samp.cpp.

{
int concomp;
int end_concomp;
int concomp_cnt = 0;
end_concomp = strt_concomp + seg_flags_size;
// determine ID range
bool once = false;
int min_id = -1;
int max_id = -1;
for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
if (!once) {
min_id = concomp_array[concomp]->ID();
max_id = concomp_array[concomp]->ID();
once = true;
} else {
UpdateRange(concomp_array[concomp]->ID(), &min_id, &max_id);
}
concomp_cnt++;
}
}
if (concomp_cnt < 1 || !once || min_id == -1 || max_id == -1) {
return NULL;
}
// alloc memo for computing leftmost and right most attributes
int id_cnt = max_id - min_id + 1;
bool *id_exist = new bool[id_cnt];
bool *left_most_exist = new bool[id_cnt];
bool *right_most_exist = new bool[id_cnt];
if (!id_exist || !left_most_exist || !right_most_exist)
return NULL;
memset(id_exist, 0, id_cnt * sizeof(*id_exist));
memset(left_most_exist, 0, id_cnt * sizeof(*left_most_exist));
memset(right_most_exist, 0, id_cnt * sizeof(*right_most_exist));
// find the dimensions of the charsamp
once = false;
int left = -1;
int right = -1;
int top = -1;
int bottom = -1;
int unq_ids = 0;
int unq_left_most = 0;
int unq_right_most = 0;
for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
if (!once) {
left = concomp_array[concomp]->Left();
right = concomp_array[concomp]->Right();
top = concomp_array[concomp]->Top();
bottom = concomp_array[concomp]->Bottom();
once = true;
} else {
UpdateRange(concomp_array[concomp]->Left(),
concomp_array[concomp]->Right(), &left, &right);
UpdateRange(concomp_array[concomp]->Top(),
concomp_array[concomp]->Bottom(), &top, &bottom);
}
// count unq ids, unq left most and right mosts ids
int concomp_id = concomp_array[concomp]->ID() - min_id;
if (!id_exist[concomp_id]) {
id_exist[concomp_id] = true;
unq_ids++;
}
if (concomp_array[concomp]->LeftMost()) {
if (left_most_exist[concomp_id] == false) {
left_most_exist[concomp_id] = true;
unq_left_most++;
}
}
if (concomp_array[concomp]->RightMost()) {
if (right_most_exist[concomp_id] == false) {
right_most_exist[concomp_id] = true;
unq_right_most++;
}
}
}
}
delete []id_exist;
delete []left_most_exist;
delete []right_most_exist;
if (!once || left == -1 || top == -1 || right == -1 || bottom == -1) {
return NULL;
}
(*left_most) = (unq_left_most >= unq_ids);
(*right_most) = (unq_right_most >= unq_ids);
// create the char sample object
CharSamp *samp = new CharSamp(left, top, right - left + 1, bottom - top + 1);
if (!samp) {
return NULL;
}
// set the foreground pixels
for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
ConCompPt *pt_ptr = concomp_array[concomp]->Head();
while (pt_ptr) {
samp->line_buff_[pt_ptr->y() - top][pt_ptr->x() - left] = 0;
pt_ptr = pt_ptr->Next();
}
}
}
return samp;
}
CharSamp * tesseract::CharSamp::FromRawData ( int  left,
int  top,
int  wid,
int  hgt,
unsigned char *  data 
)
static

Definition at line 283 of file char_samp.cpp.

{
// create the object
CharSamp *char_samp = new CharSamp(left, top, wid, hgt);
if (char_samp == NULL) {
return NULL;
}
if (char_samp->LoadFromRawData(data) == false) {
delete char_samp;
return NULL;
}
return char_samp;
}
char_32 tesseract::CharSamp::Label ( ) const
inline

Definition at line 56 of file char_samp.h.

{
if (label32_ == NULL || LabelLen() != 1) {
return 0;
}
return label32_[0];
}
int tesseract::CharSamp::LabelLen ( ) const
inline

Definition at line 140 of file char_samp.h.

{ return LabelLen(label32_); }
static int tesseract::CharSamp::LabelLen ( const char_32 label32)
inlinestatic

Definition at line 141 of file char_samp.h.

{
if (label32 == NULL) {
return 0;
}
int len = 0;
while (label32[++len] != 0);
return len;
}
unsigned short tesseract::CharSamp::LastChar ( ) const
inline

Definition at line 55 of file char_samp.h.

{ return last_char_; }
unsigned short tesseract::CharSamp::Left ( ) const
inline

Definition at line 46 of file char_samp.h.

{ return left_; }
unsigned short tesseract::CharSamp::NormAspectRatio ( ) const
inline

Definition at line 53 of file char_samp.h.

{ return norm_aspect_ratio_; }
unsigned short tesseract::CharSamp::NormBottom ( ) const
inline

Definition at line 52 of file char_samp.h.

{ return norm_bottom_; }
unsigned short tesseract::CharSamp::NormTop ( ) const
inline

Definition at line 51 of file char_samp.h.

{ return norm_top_; }
unsigned short tesseract::CharSamp::Page ( ) const
inline

Definition at line 50 of file char_samp.h.

{ return page_; }
unsigned short tesseract::CharSamp::Right ( ) const
inline

Definition at line 47 of file char_samp.h.

{ return left_ + wid_; }
bool tesseract::CharSamp::Save2CharDumpFile ( FILE *  fp) const

Definition at line 298 of file char_samp.cpp.

{
unsigned int val32;
// write and check 32 bit marker
val32 = 0xabd0fefe;
if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
return false;
}
// write label length
val32 = (label32_ == NULL) ? 0 : LabelLen(label32_);
if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
return false;
}
// write label
if (label32_ != NULL) {
if (fwrite(label32_, 1, val32 * sizeof(*label32_), fp) !=
(val32 * sizeof(*label32_))) {
return false;
}
}
// write coordinates
if (fwrite(&page_, 1, sizeof(page_), fp) != sizeof(page_)) {
return false;
}
if (fwrite(&left_, 1, sizeof(left_), fp) != sizeof(left_)) {
return false;
}
if (fwrite(&top_, 1, sizeof(top_), fp) != sizeof(top_)) {
return false;
}
if (fwrite(&first_char_, 1, sizeof(first_char_), fp) !=
sizeof(first_char_)) {
return false;
}
if (fwrite(&last_char_, 1, sizeof(last_char_), fp) != sizeof(last_char_)) {
return false;
}
if (fwrite(&norm_top_, 1, sizeof(norm_top_), fp) != sizeof(norm_top_)) {
return false;
}
if (fwrite(&norm_bottom_, 1, sizeof(norm_bottom_), fp) !=
sizeof(norm_bottom_)) {
return false;
}
if (fwrite(&norm_aspect_ratio_, 1, sizeof(norm_aspect_ratio_), fp) !=
sizeof(norm_aspect_ratio_)) {
return false;
}
if (SaveBmp2CharDumpFile(fp) == false) {
return false;
}
return true;
}
CharSamp * tesseract::CharSamp::Scale ( int  wid,
int  hgt,
bool  isotropic = true 
)

Definition at line 261 of file char_samp.cpp.

{
CharSamp *scaled_samp = new CharSamp(wid, hgt);
if (scaled_samp == NULL) {
return NULL;
}
if (scaled_samp->ScaleFrom(this, isotropic) == false) {
delete scaled_samp;
return NULL;
}
scaled_samp->left_ = left_;
scaled_samp->top_ = top_;
scaled_samp->page_ = page_;
scaled_samp->SetLabel(label32_);
scaled_samp->first_char_ = first_char_;
scaled_samp->last_char_ = last_char_;
scaled_samp->norm_top_ = norm_top_;
scaled_samp->norm_bottom_ = norm_bottom_;
scaled_samp->norm_aspect_ratio_ = norm_aspect_ratio_;
return scaled_samp;
}
ConComp ** tesseract::CharSamp::Segment ( int *  seg_cnt,
bool  right_2_left,
int  max_hist_wnd,
int  min_con_comp_size 
) const

Definition at line 392 of file char_samp.cpp.

{
// init
(*segment_cnt) = 0;
int concomp_cnt = 0;
int seg_cnt = 0;
// find the concomps of the image
ConComp **concomp_array = FindConComps(&concomp_cnt, min_con_comp_size);
if (concomp_cnt <= 0 || !concomp_array) {
if (concomp_array)
delete []concomp_array;
return NULL;
}
ConComp **seg_array = NULL;
// segment each concomp further using vertical histogram
for (int concomp = 0; concomp < concomp_cnt; concomp++) {
int concomp_seg_cnt = 0;
// segment the concomp
ConComp **concomp_seg_array = NULL;
ConComp **concomp_alloc_seg =
concomp_array[concomp]->Segment(max_hist_wnd, &concomp_seg_cnt);
// no segments, add the whole concomp
if (concomp_alloc_seg == NULL) {
concomp_seg_cnt = 1;
concomp_seg_array = concomp_array + concomp;
} else {
// delete the original concomp, we no longer need it
concomp_seg_array = concomp_alloc_seg;
delete concomp_array[concomp];
}
// add the resulting segments
for (int seg_idx = 0; seg_idx < concomp_seg_cnt; seg_idx++) {
// too small of a segment: ignore
if (concomp_seg_array[seg_idx]->Width() < 2 &&
concomp_seg_array[seg_idx]->Height() < 2) {
delete concomp_seg_array[seg_idx];
} else {
// add the new segment
// extend the segment array
if ((seg_cnt % kConCompAllocChunk) == 0) {
ConComp **temp_segm_array =
new ConComp *[seg_cnt + kConCompAllocChunk];
if (temp_segm_array == NULL) {
fprintf(stderr, "Cube ERROR (CharSamp::Segment): could not "
"allocate additional connected components\n");
delete []concomp_seg_array;
delete []concomp_array;
delete []seg_array;
return NULL;
}
if (seg_cnt > 0) {
memcpy(temp_segm_array, seg_array, seg_cnt * sizeof(*seg_array));
delete []seg_array;
}
seg_array = temp_segm_array;
}
seg_array[seg_cnt++] = concomp_seg_array[seg_idx];
}
} // segment
if (concomp_alloc_seg != NULL) {
delete []concomp_alloc_seg;
}
} // concomp
delete []concomp_array;
// sort the concomps from Left2Right or Right2Left, based on the reading order
if (seg_cnt > 0 && seg_array != NULL) {
qsort(seg_array, seg_cnt, sizeof(*seg_array), right_2_left ?
}
(*segment_cnt) = seg_cnt;
return seg_array;
}
void tesseract::CharSamp::SetFirstChar ( unsigned short  first_char)
inline

Definition at line 104 of file char_samp.h.

{
first_char_ = first_char;
}
void tesseract::CharSamp::SetLabel ( char_32  label)
inline

Definition at line 68 of file char_samp.h.

{
if (label32_ != NULL) {
delete []label32_;
}
label32_ = new char_32[2];
if (label32_ != NULL) {
label32_[0] = label;
label32_[1] = 0;
}
}
void tesseract::CharSamp::SetLabel ( const char_32 label32)
inline

Definition at line 78 of file char_samp.h.

{
if (label32_ != NULL) {
delete []label32_;
label32_ = NULL;
}
if (label32 != NULL) {
// remove any byte order markes if any
if (label32[0] == 0xfeff) {
label32++;
}
int len = LabelLen(label32);
label32_ = new char_32[len + 1];
if (label32_ != NULL) {
memcpy(label32_, label32, len * sizeof(*label32));
label32_[len] = 0;
}
}
}
void tesseract::CharSamp::SetLabel ( string  str)

Definition at line 71 of file char_samp.cpp.

{
if (label32_ != NULL) {
delete []label32_;
label32_ = NULL;
}
string_32 str32;
CubeUtils::UTF8ToUTF32(str.c_str(), &str32);
SetLabel(reinterpret_cast<const char_32 *>(str32.c_str()));
}
void tesseract::CharSamp::SetLastChar ( unsigned short  last_char)
inline

Definition at line 107 of file char_samp.h.

{
last_char_ = last_char;
}
void tesseract::CharSamp::SetLeft ( unsigned short  left)
inline

Definition at line 65 of file char_samp.h.

{ left_ = left; }
void tesseract::CharSamp::SetNormAspectRatio ( unsigned short  norm_aspect_ratio)
inline

Definition at line 101 of file char_samp.h.

{
norm_aspect_ratio_ = norm_aspect_ratio;
}
void tesseract::CharSamp::SetNormBottom ( unsigned short  norm_bottom)
inline

Definition at line 98 of file char_samp.h.

{
norm_bottom_ = norm_bottom;
}
void tesseract::CharSamp::SetNormTop ( unsigned short  norm_top)
inline

Definition at line 97 of file char_samp.h.

{ norm_top_ = norm_top; }
void tesseract::CharSamp::SetPage ( unsigned short  page)
inline

Definition at line 67 of file char_samp.h.

{ page_ = page; }
void tesseract::CharSamp::SetTop ( unsigned short  top)
inline

Definition at line 66 of file char_samp.h.

{ top_ = top; }
string tesseract::CharSamp::stringLabel ( ) const

Definition at line 61 of file char_samp.cpp.

{
string str = "";
if (label32_ != NULL) {
string_32 str32(label32_);
CubeUtils::UTF32ToUTF8(str32.c_str(), &str);
}
return str;
}
char_32* tesseract::CharSamp::StrLabel ( ) const
inline

Definition at line 62 of file char_samp.h.

{ return label32_; }
unsigned short tesseract::CharSamp::Top ( ) const
inline

Definition at line 48 of file char_samp.h.

{ return top_; }

The documentation for this class was generated from the following files: