Tesseract
3.02
Main Page
Related Pages
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
tess_lang_mod_edge.cpp
Go to the documentation of this file.
1
/**********************************************************************
2
* File: tess_lang_mod_edge.cpp
3
* Description: Implementation of the Tesseract Language Model Edge Class
4
* Author: Ahmad Abdulkader
5
* Created: 2008
6
*
7
* (C) Copyright 2008, Google Inc.
8
** Licensed under the Apache License, Version 2.0 (the "License");
9
** you may not use this file except in compliance with the License.
10
** You may obtain a copy of the License at
11
** http://www.apache.org/licenses/LICENSE-2.0
12
** Unless required by applicable law or agreed to in writing, software
13
** distributed under the License is distributed on an "AS IS" BASIS,
14
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
** See the License for the specific language governing permissions and
16
** limitations under the License.
17
*
18
**********************************************************************/
19
20
#include "
tess_lang_mod_edge.h
"
21
#include "
const.h
"
22
#include "
unichar.h
"
23
24
25
26
namespace
tesseract
{
27
// OOD constructor
28
TessLangModEdge::TessLangModEdge
(
CubeRecoContext
*cntxt,
int
class_id) {
29
root_ =
false
;
30
cntxt_ = cntxt;
31
dawg_ =
NULL
;
32
start_edge_ = 0;
33
end_edge_ = 0;
34
edge_mask_ = 0;
35
class_id_ = class_id;
36
str_ = cntxt_->
CharacterSet
()->
ClassString
(class_id);
37
path_cost_ = Cost();
38
}
39
40
// leading, trailing punc constructor and single byte UTF char
41
TessLangModEdge::TessLangModEdge
(
CubeRecoContext
*cntxt,
42
const
Dawg
*dawg,
EDGE_REF
edge_idx,
int
class_id) {
43
root_ =
false
;
44
cntxt_ = cntxt;
45
dawg_ = dawg;
46
start_edge_ = edge_idx;
47
end_edge_ = edge_idx;
48
edge_mask_ = 0;
49
class_id_ = class_id;
50
str_ = cntxt_->
CharacterSet
()->
ClassString
(class_id);
51
path_cost_ = Cost();
52
}
53
54
// dict constructor: multi byte UTF char
55
TessLangModEdge::TessLangModEdge
(
CubeRecoContext
*cntxt,
const
Dawg
*dawg,
56
EDGE_REF
start_edge_idx,
EDGE_REF
end_edge_idx,
57
int
class_id) {
58
root_ =
false
;
59
cntxt_ = cntxt;
60
dawg_ = dawg;
61
start_edge_ = start_edge_idx;
62
end_edge_ = end_edge_idx;
63
edge_mask_ = 0;
64
class_id_ = class_id;
65
str_ = cntxt_->
CharacterSet
()->
ClassString
(class_id);
66
path_cost_ = Cost();
67
}
68
69
char
*
TessLangModEdge::Description
()
const
{
70
char
*char_ptr =
new
char
[256];
71
if
(!char_ptr) {
72
return
NULL
;
73
}
74
75
char
dawg_str[256];
76
char
edge_str[32];
77
if
(dawg_ == (
Dawg
*)
DAWG_OOD
) {
78
strcpy(dawg_str,
"OOD"
);
79
}
else
if
(dawg_ == (
Dawg
*)
DAWG_NUMBER
) {
80
strcpy(dawg_str,
"NUM"
);
81
}
else
if
(dawg_->
permuter
() == SYSTEM_DAWG_PERM) {
82
strcpy(dawg_str,
"Main"
);
83
}
else
if
(dawg_->
permuter
() == USER_DAWG_PERM) {
84
strcpy(dawg_str,
"User"
);
85
}
else
if
(dawg_->
permuter
() == DOC_DAWG_PERM) {
86
strcpy(dawg_str,
"Doc"
);
87
}
else
{
88
strcpy(dawg_str,
"N/A"
);
89
}
90
91
sprintf(edge_str,
"%d"
, static_cast<int>(start_edge_));
92
if
(
IsLeadingPuncEdge
(edge_mask_)) {
93
strcat(edge_str,
"-LP"
);
94
}
95
if
(
IsTrailingPuncEdge
(edge_mask_)) {
96
strcat(edge_str,
"-TP"
);
97
}
98
sprintf(char_ptr,
"%s(%s)%s, Wtd Dawg Cost=%d"
,
99
dawg_str, edge_str,
IsEOW
() ?
"-EOW-"
:
""
, path_cost_);
100
101
return
char_ptr;
102
}
103
104
int
TessLangModEdge::CreateChildren
(
CubeRecoContext
*cntxt,
105
const
Dawg
*dawg,
106
NODE_REF
parent_node,
107
LangModEdge
**edge_array) {
108
int
edge_cnt = 0;
109
NodeChildVector
vec;
110
dawg->
unichar_ids_of
(parent_node, &vec);
// find all children of the parent
111
for
(
int
i = 0; i < vec.
size
(); ++i) {
112
const
NodeChild
&child = vec[i];
113
if
(child.
unichar_id
== INVALID_UNICHAR_ID)
continue
;
114
edge_array[edge_cnt] =
115
new
TessLangModEdge
(cntxt, dawg, child.
edge_ref
, child.
unichar_id
);
116
if
(edge_array[edge_cnt] !=
NULL
) edge_cnt++;
117
}
118
return
edge_cnt;
119
}
120
}
mnt
data
src
tesseract-ocr
cube
tess_lang_mod_edge.cpp
Generated on Thu Nov 1 2012 20:19:48 for Tesseract by
1.8.1