Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tesseract::BeamSearch Class Reference

#include <beam_search.h>

List of all members.

Public Member Functions

 BeamSearch (CubeRecoContext *cntxt, bool word_mode=true)
 ~BeamSearch ()
WordAltListSearch (SearchObject *srch_obj, LangModel *lang_mod=NULL)
SearchNodeBestNode () const
char_32Alt (int alt) const
CharSamp ** BackTrack (SearchObject *srch_obj, int node_index, int *char_cnt, char_32 **str32, Boxa **char_boxes) const
CharSamp ** BackTrack (SearchObject *srch_obj, SearchNode *node, int *char_cnt, char_32 **str32, Boxa **char_boxes) const
int SizeCost (SearchObject *srch_obj, SearchNode *node, char_32 **str32=NULL) const
int WordUnigramCost (char_32 *str32, WordUnigrams *word_unigrams) const
int ColCnt () const
SearchColumnColumn (int col_idx) const
int BestPresortedNodeIndex () const

Detailed Description

Definition at line 44 of file beam_search.h.


Constructor & Destructor Documentation

tesseract::BeamSearch::BeamSearch ( CubeRecoContext cntxt,
bool  word_mode = true 
)
explicit

Definition at line 27 of file beam_search.cpp.

{
cntxt_ = cntxt;
seg_pt_cnt_ = 0;
col_cnt_ = 1;
col_ = NULL;
word_mode_ = word_mode;
}
tesseract::BeamSearch::~BeamSearch ( )

Definition at line 47 of file beam_search.cpp.

{
Cleanup();
}

Member Function Documentation

char_32 * tesseract::BeamSearch::Alt ( int  alt) const

Definition at line 298 of file beam_search.cpp.

{
// get the last column of the lattice
if (col_cnt_ <= 0)
return NULL;
SearchColumn *srch_col = col_[col_cnt_ - 1];
if (!srch_col)
return NULL;
// point to the last node in the selected path
if (alt >= srch_col->NodeCount() || srch_col->Nodes() == NULL) {
return NULL;
}
SearchNode *srch_node = srch_col->Nodes()[alt];
if (!srch_node)
return NULL;
// get string
char_32 *str32 = srch_node->PathString();
if (!str32)
return NULL;
return str32;
}
CharSamp ** tesseract::BeamSearch::BackTrack ( SearchObject srch_obj,
int  node_index,
int *  char_cnt,
char_32 **  str32,
Boxa **  char_boxes 
) const

Definition at line 328 of file beam_search.cpp.

{
// get the last column of the lattice
if (col_cnt_ <= 0)
return NULL;
SearchColumn *srch_col = col_[col_cnt_ - 1];
if (!srch_col)
return NULL;
// point to the last node in the selected path
if (node_index >= srch_col->NodeCount() || !srch_col->Nodes())
return NULL;
SearchNode *srch_node = srch_col->Nodes()[node_index];
if (!srch_node)
return NULL;
return BackTrack(srch_obj, srch_node, char_cnt, str32, char_boxes);
}
CharSamp ** tesseract::BeamSearch::BackTrack ( SearchObject srch_obj,
SearchNode node,
int *  char_cnt,
char_32 **  str32,
Boxa **  char_boxes 
) const

Definition at line 352 of file beam_search.cpp.

{
if (!srch_node)
return NULL;
if (str32) {
if (*str32)
delete [](*str32); // clear existing value
*str32 = srch_node->PathString();
if (!*str32)
return NULL;
}
if (char_boxes && *char_boxes) {
boxaDestroy(char_boxes); // clear existing value
}
CharSamp **chars;
chars = SplitByNode(srch_obj, srch_node, char_cnt, char_boxes);
if (!chars && str32)
delete []*str32;
return chars;
}
SearchNode * tesseract::BeamSearch::BestNode ( ) const

Definition at line 286 of file beam_search.cpp.

{
if (col_cnt_ < 1 || !col_ || !col_[col_cnt_ - 1])
return NULL;
int node_cnt = col_[col_cnt_ - 1]->NodeCount();
SearchNode **srch_nodes = col_[col_cnt_ - 1]->Nodes();
if (node_cnt < 1 || !srch_nodes || !srch_nodes[0])
return NULL;
return srch_nodes[0];
}
int tesseract::BeamSearch::BestPresortedNodeIndex ( ) const
inline

Definition at line 81 of file beam_search.h.

{
return best_presorted_node_idx_;
};
int tesseract::BeamSearch::ColCnt ( ) const
inline

Definition at line 76 of file beam_search.h.

{ return col_cnt_; }
SearchColumn * tesseract::BeamSearch::Column ( int  col_idx) const

Definition at line 279 of file beam_search.cpp.

{
if (col < 0 || col >= col_cnt_ || !col_)
return NULL;
return col_[col];
}
WordAltList * tesseract::BeamSearch::Search ( SearchObject srch_obj,
LangModel lang_mod = NULL 
)

Definition at line 98 of file beam_search.cpp.

{
// verifications
if (!lang_mod)
lang_mod = cntxt_->LangMod();
if (!lang_mod) {
fprintf(stderr, "Cube ERROR (BeamSearch::Search): could not construct "
"LangModel\n");
return NULL;
}
// free existing state
Cleanup();
// get seg pt count
seg_pt_cnt_ = srch_obj->SegPtCnt();
if (seg_pt_cnt_ < 0) {
return NULL;
}
col_cnt_ = seg_pt_cnt_ + 1;
// disregard suspicious cases
if (seg_pt_cnt_ > 128) {
fprintf(stderr, "Cube ERROR (BeamSearch::Search): segment point count is "
"suspiciously high; bailing out\n");
return NULL;
}
// alloc memory for columns
col_ = new SearchColumn *[col_cnt_];
if (!col_) {
fprintf(stderr, "Cube ERROR (BeamSearch::Search): could not construct "
"SearchColumn array\n");
return NULL;
}
memset(col_, 0, col_cnt_ * sizeof(*col_));
// for all possible segments
for (int end_seg = 1; end_seg <= (seg_pt_cnt_ + 1); end_seg++) {
// create a search column
col_[end_seg - 1] = new SearchColumn(end_seg - 1,
cntxt_->Params()->BeamWidth());
if (!col_[end_seg - 1]) {
fprintf(stderr, "Cube ERROR (BeamSearch::Search): could not construct "
"SearchColumn for column %d\n", end_seg - 1);
return NULL;
}
// for all possible start segments
int init_seg = MAX(0, end_seg - cntxt_->Params()->MaxSegPerChar());
for (int strt_seg = init_seg; strt_seg < end_seg; strt_seg++) {
int parent_nodes_cnt;
SearchNode **parent_nodes;
// for the root segment, we do not have a parent
if (strt_seg == 0) {
parent_nodes_cnt = 1;
parent_nodes = NULL;
} else {
// for all the existing nodes in the starting column
parent_nodes_cnt = col_[strt_seg - 1]->NodeCount();
parent_nodes = col_[strt_seg - 1]->Nodes();
}
// run the shape recognizer
CharAltList *char_alt_list = srch_obj->RecognizeSegment(strt_seg - 1,
end_seg - 1);
// for all the possible parents
for (int parent_idx = 0; parent_idx < parent_nodes_cnt; parent_idx++) {
// point to the parent node
SearchNode *parent_node = !parent_nodes ? NULL
: parent_nodes[parent_idx];
LangModEdge *lm_parent_edge = !parent_node ? lang_mod->Root()
: parent_node->LangModelEdge();
// compute the cost of not having spaces within the segment range
int contig_cost = srch_obj->NoSpaceCost(strt_seg - 1, end_seg - 1);
// In phrase mode, compute the cost of not having a space before
// this character
int no_space_cost = 0;
if (!word_mode_ && strt_seg > 0) {
no_space_cost = srch_obj->NoSpaceCost(strt_seg - 1);
}
// if the no space cost is low enough
if ((contig_cost + no_space_cost) < MIN_PROB_COST) {
// Add the children nodes
CreateChildren(col_[end_seg - 1], lang_mod, parent_node,
lm_parent_edge, char_alt_list,
contig_cost + no_space_cost);
}
// In phrase mode and if not starting at the root
if (!word_mode_ && strt_seg > 0) { // parent_node must be non-NULL
// consider starting a new word for nodes that are valid EOW
if (parent_node->LangModelEdge()->IsEOW()) {
// get the space cost
int space_cost = srch_obj->SpaceCost(strt_seg - 1);
// if the space cost is low enough
if ((contig_cost + space_cost) < MIN_PROB_COST) {
// Restart the language model and add nodes as children to the
// space node.
CreateChildren(col_[end_seg - 1], lang_mod, parent_node, NULL,
char_alt_list, contig_cost + space_cost);
}
}
}
} // parent
} // strt_seg
// prune the column nodes
col_[end_seg - 1]->Prune();
// Free the column hash table. No longer needed
col_[end_seg - 1]->FreeHashTable();
} // end_seg
WordAltList *alt_list = CreateWordAltList(srch_obj);
return alt_list;
}
int tesseract::BeamSearch::SizeCost ( SearchObject srch_obj,
SearchNode node,
char_32 **  str32 = NULL 
) const

Definition at line 472 of file beam_search.cpp.

{
CharSamp **chars = NULL;
int char_cnt = 0;
if (!node)
return 0;
// Backtrack to get string and character segmentation
chars = BackTrack(srch_obj, node, &char_cnt, str32, NULL);
if (!chars)
return WORST_COST;
int size_cost = (cntxt_->SizeModel() == NULL) ? 0 :
cntxt_->SizeModel()->Cost(chars, char_cnt);
delete []chars;
return size_cost;
}
int tesseract::BeamSearch::WordUnigramCost ( char_32 str32,
WordUnigrams word_unigrams 
) const

The documentation for this class was generated from the following files: