Tesseract
3.02
Main Page
Related Pages
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
lang_model.h
Go to the documentation of this file.
1
/**********************************************************************
2
* File: lang_model.h
3
* Description: Declaration of the Language Model Edge Base Class
4
* Author: Ahmad Abdulkader
5
* Created: 2007
6
*
7
* (C) Copyright 2008, Google Inc.
8
** Licensed under the Apache License, Version 2.0 (the "License");
9
** you may not use this file except in compliance with the License.
10
** You may obtain a copy of the License at
11
** http://www.apache.org/licenses/LICENSE-2.0
12
** Unless required by applicable law or agreed to in writing, software
13
** distributed under the License is distributed on an "AS IS" BASIS,
14
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
** See the License for the specific language governing permissions and
16
** limitations under the License.
17
*
18
**********************************************************************/
19
20
// The LanguageModel class abstracts a State machine that is modeled as a Trie
21
// structure. The state machine models the language being recognized by the OCR
22
// Engine
23
// This is an abstract class that is to be inherited by any language model
24
25
#ifndef LANG_MODEL_H
26
#define LANG_MODEL_H
27
28
#include "
lang_mod_edge.h
"
29
#include "
char_altlist.h
"
30
#include "
char_set.h
"
31
#include "
tuning_params.h
"
32
33
namespace
tesseract
{
34
class
LangModel
{
35
public
:
36
LangModel
() {
37
ood_enabled_
=
true
;
38
numeric_enabled_
=
true
;
39
word_list_enabled_
=
true
;
40
punc_enabled_
=
true
;
41
}
42
virtual
~LangModel
() {}
43
44
// Returns an edge pointer to the Root
45
virtual
LangModEdge
*
Root
() = 0;
46
// Returns the edges that fan-out of the specified edge and their count
47
virtual
LangModEdge
**
GetEdges
(
CharAltList
*alt_list,
48
LangModEdge
*parent_edge,
49
int
*edge_cnt) = 0;
50
// Returns is a sequence of 32-bit characters are valid within this language
51
// model or net. And EndOfWord flag is specified. If true, the sequence has
52
// to end on a valid word. The function also optionally returns the list
53
// of language model edges traversed to parse the string
54
virtual
bool
IsValidSequence
(
const
char_32
*str,
bool
eow_flag,
55
LangModEdge
**edge_array =
NULL
) = 0;
56
virtual
bool
IsLeadingPunc
(
char_32
ch) = 0;
57
virtual
bool
IsTrailingPunc
(
char_32
ch) = 0;
58
virtual
bool
IsDigit
(
char_32
ch) = 0;
59
60
// accessor functions
61
inline
bool
OOD
() {
return
ood_enabled_
; }
62
inline
bool
Numeric
() {
return
numeric_enabled_
; }
63
inline
bool
WordList
() {
return
word_list_enabled_
; }
64
inline
bool
Punc
() {
return
punc_enabled_
; }
65
inline
void
SetOOD
(
bool
ood) {
ood_enabled_
= ood; }
66
inline
void
SetNumeric
(
bool
numeric) {
numeric_enabled_
= numeric; }
67
inline
void
SetWordList
(
bool
word_list) {
word_list_enabled_
= word_list; }
68
inline
void
SetPunc
(
bool
punc_enabled) {
punc_enabled_
= punc_enabled; }
69
70
protected
:
71
bool
ood_enabled_
;
72
bool
numeric_enabled_
;
73
bool
word_list_enabled_
;
74
bool
punc_enabled_
;
75
};
76
}
77
78
#endif // LANG_MODEL_H
mnt
data
src
tesseract-ocr
cube
lang_model.h
Generated on Thu Nov 1 2012 20:19:48 for Tesseract by
1.8.1