Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
baseapi.h
Go to the documentation of this file.
1 
2 // File: baseapi.h
3 // Description: Simple API for calling tesseract.
4 // Author: Ray Smith
5 // Created: Fri Oct 06 15:35:01 PDT 2006
6 //
7 // (C) Copyright 2006, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifndef TESSERACT_API_BASEAPI_H__
21 #define TESSERACT_API_BASEAPI_H__
22 
23 #include <stdio.h>
24 // To avoid collision with other typenames include the ABSOLUTE MINIMUM
25 // complexity of includes here. Use forward declarations wherever possible
26 // and hide includes of complex types in baseapi.cpp.
27 #include "platform.h"
28 #include "apitypes.h"
29 #include "thresholder.h"
30 #include "unichar.h"
31 #include "tesscallback.h"
32 #include "publictypes.h"
33 #include "pageiterator.h"
34 #include "resultiterator.h"
35 
36 template <typename T> class GenericVector;
37 class PAGE_RES;
38 class PAGE_RES_IT;
39 class ParagraphModel;
40 class BlamerBundle;
41 class BLOCK_LIST;
42 class DENORM;
43 class IMAGE;
44 class MATRIX;
45 class PBLOB;
46 class ROW;
47 class STRING;
48 class WERD;
49 struct Pix;
50 struct Box;
51 struct Pixa;
52 struct Boxa;
53 class ETEXT_DESC;
54 struct OSResults;
55 class TBOX;
56 class UNICHARSET;
57 
58 // From oldlist.h
59 // TODO(antonova): remove when oldlist is deprecated.
60 struct list_rec;
61 typedef list_rec *LIST;
62 
63 #define MAX_NUM_INT_FEATURES 512
67 struct TBLOB;
68 
69 namespace tesseract {
70 
71 class CubeRecoContext;
72 class Dawg;
73 class Dict;
74 class EquationDetect;
75 class LTRResultIterator;
76 class MutableIterator;
77 class Tesseract;
78 class Trie;
79 class Wordrec;
80 
81 typedef int (Dict::*DictFunc)(void* void_dawg_args,
82  UNICHAR_ID unichar_id, bool word_end) const;
83 typedef double (Dict::*ProbabilityInContextFunc)(const char* lang,
84  const char* context,
85  int context_bytes,
86  const char* character,
87  int character_bytes);
88 typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings,
89  const LIST &best_choices,
90  const UNICHARSET &unicharset,
91  BlamerBundle *blamer_bundle);
93 
103  public:
104  TessBaseAPI();
105  virtual ~TessBaseAPI();
106 
110  static const char* Version();
111 
116  void SetInputName(const char* name);
117 
119  void SetOutputName(const char* name);
120 
136  bool SetVariable(const char* name, const char* value);
137  bool SetDebugVariable(const char* name, const char* value);
138 
143  bool GetIntVariable(const char *name, int *value) const;
144  bool GetBoolVariable(const char *name, bool *value) const;
145  bool GetDoubleVariable(const char *name, double *value) const;
146 
151  const char *GetStringVariable(const char *name) const;
152 
156  void PrintVariables(FILE *fp) const;
157 
161  bool GetVariableAsString(const char *name, STRING *val);
162 
201  int Init(const char* datapath, const char* language, OcrEngineMode mode,
202  char **configs, int configs_size,
203  const GenericVector<STRING> *vars_vec,
204  const GenericVector<STRING> *vars_values,
205  bool set_only_non_debug_params);
206  int Init(const char* datapath, const char* language, OcrEngineMode oem) {
207  return Init(datapath, language, oem, NULL, 0, NULL, NULL, false);
208  }
209  int Init(const char* datapath, const char* language) {
210  return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false);
211  }
212 
221  const char* GetInitLanguagesAsString() const;
222 
228  void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const;
229 
233  void GetAvailableLanguagesAsVector(GenericVector<STRING>* langs) const;
234 
241  int InitLangMod(const char* datapath, const char* language);
242 
247  void InitForAnalysePage();
248 
255  void ReadConfigFile(const char* filename);
257  void ReadDebugConfigFile(const char* filename);
258 
264  void SetPageSegMode(PageSegMode mode);
265 
267  PageSegMode GetPageSegMode() const;
268 
286  char* TesseractRect(const unsigned char* imagedata,
287  int bytes_per_pixel, int bytes_per_line,
288  int left, int top, int width, int height);
289 
294  void ClearAdaptiveClassifier();
295 
302  /* @{ */
303 
313  void SetImage(const unsigned char* imagedata, int width, int height,
314  int bytes_per_pixel, int bytes_per_line);
315 
326  void SetImage(const Pix* pix);
327 
332  void SetSourceResolution(int ppi);
333 
339  void SetRectangle(int left, int top, int width, int height);
340 
348  void SetThresholder(ImageThresholder* thresholder) {
349  if (thresholder_ != NULL)
350  delete thresholder_;
351  thresholder_ = thresholder;
352  ClearResults();
353  }
354 
360  Pix* GetThresholdedImage();
361 
367  Boxa* GetRegions(Pixa** pixa);
368 
376  Boxa* GetTextlines(Pixa** pixa, int** blockids);
377 
386  Boxa* GetStrips(Pixa** pixa, int** blockids);
387 
393  Boxa* GetWords(Pixa** pixa);
394 
403  Boxa* GetConnectedComponents(Pixa** cc);
404 
413  Boxa* GetComponentImages(PageIteratorLevel level,
414  bool text_only,
415  Pixa** pixa, int** blockids);
416 
423  int GetThresholdedImageScaleFactor() const;
424 
430  void DumpPGM(const char* filename);
431 
443  PageIterator* AnalyseLayout();
444 
451  int Recognize(ETEXT_DESC* monitor);
452 
459  int RecognizeForChopTest(ETEXT_DESC* monitor);
460 
477  bool ProcessPages(const char* filename,
478  const char* retry_config, int timeout_millisec,
479  STRING* text_out);
480 
492  bool ProcessPage(Pix* pix, int page_index, const char* filename,
493  const char* retry_config, int timeout_millisec,
494  STRING* text_out);
495 
504  ResultIterator* GetIterator();
505 
514  MutableIterator* GetMutableIterator();
515 
520  char* GetUTF8Text();
521 
527  char* GetHOCRText(int page_number);
535  char* GetBoxText(int page_number);
541  char* GetUNLVText();
543  int MeanTextConf();
550  int* AllWordConfidences();
551 
562  bool AdaptToWordStr(PageSegMode mode, const char* wordstr);
563 
570  void Clear();
571 
578  void End();
579 
586  int IsValidWord(const char *word);
587 
588  bool GetTextDirection(int* out_offset, float* out_slope);
589 
591  void SetDictFunc(DictFunc f);
592 
596  void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
597 
599  void SetFillLatticeFunc(FillLatticeFunc f);
600 
605  bool DetectOS(OSResults*);
606 
608  void GetFeaturesForBlob(TBLOB* blob, const DENORM& denorm,
609  INT_FEATURE_ARRAY int_features,
610  int* num_features, int* FeatureOutlineIndex);
611 
616  static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top,
617  int right, int bottom);
618 
623  void RunAdaptiveClassifier(TBLOB* blob, const DENORM& denorm,
624  int num_max_matches,
625  int* unichar_ids,
626  float* ratings,
627  int* num_matches_returned);
628 
630  const char* GetUnichar(int unichar_id);
631 
633  const Dawg *GetDawg(int i) const;
634 
636  int NumDawgs() const;
637 
639  static ROW *MakeTessOCRRow(float baseline, float xheight,
640  float descender, float ascender);
641 
643  static TBLOB *MakeTBLOB(Pix *pix);
644 
650  static void NormalizeTBLOB(TBLOB *tblob, ROW *row,
651  bool numeric_mode, DENORM *denorm);
652 
653  Tesseract* const tesseract() const {
654  return tesseract_;
655  }
656 
657  OcrEngineMode const oem() const {
658  return last_oem_requested_;
659  }
660 
661  void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
662 
664  CubeRecoContext *GetCubeRecoContext() const;
665 
666  void set_min_orientation_margin(double margin);
667 
672  void GetBlockTextOrientations(int** block_orientation,
673  bool** vertical_writing);
674 
676  BLOCK_LIST* FindLinesCreateBlockList();
677 
683  static void DeleteBlockList(BLOCK_LIST* block_list);
684  /* @} */
685 
686  protected:
687 
689  TESS_LOCAL bool InternalSetImage();
690 
695  TESS_LOCAL virtual void Threshold(Pix** pix);
696 
701  TESS_LOCAL int FindLines();
702 
704  TESS_LOCAL void ClearResults();
705 
711  TESS_LOCAL LTRResultIterator* GetLTRIterator();
712 
719  TESS_LOCAL int TextLength(int* blob_count);
720 
722  /* @{ */
723 
728  TESS_LOCAL void AdaptToCharacter(const char *unichar_repr,
729  int length,
730  float baseline,
731  float xheight,
732  float descender,
733  float ascender);
734 
736  TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
737  TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list, PAGE_RES* pass1_result);
738 
740  TESS_LOCAL void DetectParagraphs(bool after_text_recognition);
741 
746  TESS_LOCAL static int TesseractExtractResult(char** text,
747  int** lengths,
748  float** costs,
749  int** x0,
750  int** y0,
751  int** x1,
752  int** y1,
753  PAGE_RES* page_res);
754 
755  TESS_LOCAL const PAGE_RES* GetPageRes() const {
756  return page_res_;
757  };
758  /* @} */
759 
760  protected:
761  Tesseract* tesseract_;
766  BLOCK_LIST* block_list_;
775 
780  /* @{ */
787  /* @} */
788 
789 };
790 
791 } // namespace tesseract.
792 
793 #endif // TESSERACT_API_BASEAPI_H__