Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tordmain.h File Reference
#include <time.h>
#include "params.h"
#include "ocrblock.h"
#include "blobs.h"
#include "blobbox.h"
#include "notdll.h"

Go to the source code of this file.

Namespaces

namespace  tesseract

Functions

void make_blocks_from_blobs (TBLOB *tessblobs, const char *filename, ICOORD page_tr, BOOL8 do_shift, BLOCK_LIST *blocks)
void SetBlobStrokeWidth (Pix *pix, BLOBNBOX *blob)
void assign_blobs_to_blocks2 (Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
void textord_page (ICOORD page_tr, BLOCK_LIST *blocks, TO_BLOCK_LIST *land_blocks, TO_BLOCK_LIST *port_blocks, tesseract::Tesseract *)
void tweak_row_baseline (ROW *row, double blshift_maxshift, double blshift_xfraction)
inT32 blob_y_order (void *item1, void *item2)

Function Documentation

void assign_blobs_to_blocks2 ( Pix *  pix,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  port_blocks 
)

Definition at line 156 of file tordmain.cpp.

{ // output list
BLOCK *block; // current block
BLOBNBOX *newblob; // created blob
C_BLOB *blob; // current blob
BLOCK_IT block_it = blocks;
C_BLOB_IT blob_it; // iterator
BLOBNBOX_IT port_box_it; // iterator
// destination iterator
TO_BLOCK_IT port_block_it = port_blocks;
TO_BLOCK *port_block; // created block
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
block = block_it.data();
port_block = new TO_BLOCK(block);
// Convert the good outlines to block->blob_list
port_box_it.set_to_list(&port_block->blobs);
blob_it.set_to_list(block->blob_list());
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
blob = blob_it.extract();
newblob = new BLOBNBOX(blob); // Convert blob to BLOBNBOX.
SetBlobStrokeWidth(pix, newblob);
port_box_it.add_after_then_move(newblob);
}
// Put the rejected outlines in block->noise_blobs, which allows them to
// be reconsidered and sorted back into rows and recover outlines mistakenly
// rejected.
port_box_it.set_to_list(&port_block->noise_blobs);
blob_it.set_to_list(block->reject_blobs());
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
blob = blob_it.extract();
newblob = new BLOBNBOX(blob); // Convert blob to BLOBNBOX.
SetBlobStrokeWidth(pix, newblob);
port_box_it.add_after_then_move(newblob);
}
port_block_it.add_after_then_move(port_block);
}
}
inT32 blob_y_order ( void *  item1,
void *  item2 
)

Definition at line 793 of file tordmain.cpp.

{
//converted ptr
BLOBNBOX *blob1 = *(BLOBNBOX **) item1;
//converted ptr
BLOBNBOX *blob2 = *(BLOBNBOX **) item2;
if (blob1->bounding_box ().bottom () > blob2->bounding_box ().bottom ())
return -1;
else if (blob1->bounding_box ().bottom () <
blob2->bounding_box ().bottom ())
return 1;
else {
if (blob1->bounding_box ().left () < blob2->bounding_box ().left ())
return -1;
else if (blob1->bounding_box ().left () >
blob2->bounding_box ().left ())
return 1;
else
return 0;
}
}
void make_blocks_from_blobs ( TBLOB tessblobs,
const char *  filename,
ICOORD  page_tr,
BOOL8  do_shift,
BLOCK_LIST *  blocks 
)
void SetBlobStrokeWidth ( Pix *  pix,
BLOBNBOX blob 
)

Definition at line 56 of file tordmain.cpp.

{
// Cut the blob rectangle into a Pix.
int pix_height = pixGetHeight(pix);
const TBOX& box = blob->bounding_box();
int width = box.width();
int height = box.height();
Box* blob_pix_box = boxCreate(box.left(), pix_height - box.top(),
width, height);
Pix* pix_blob = pixClipRectangle(pix, blob_pix_box, NULL);
boxDestroy(&blob_pix_box);
Pix* dist_pix = pixDistanceFunction(pix_blob, 4, 8, L_BOUNDARY_BG);
pixDestroy(&pix_blob);
// Compute the stroke widths.
uinT32* data = pixGetData(dist_pix);
int wpl = pixGetWpl(dist_pix);
// Horizontal width of stroke.
STATS h_stats(0, width + 1);
for (int y = 0; y < height; ++y) {
uinT32* pixels = data + y*wpl;
int prev_pixel = 0;
int pixel = GET_DATA_BYTE(pixels, 0);
for (int x = 1; x < width; ++x) {
int next_pixel = GET_DATA_BYTE(pixels, x);
// We are looking for a pixel that is equal to its vertical neighbours,
// yet greater than its left neighbour.
if (prev_pixel < pixel &&
(y == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) &&
(y == height - 1 || pixel == GET_DATA_BYTE(pixels + wpl, x - 1))) {
if (pixel > next_pixel) {
// Single local max, so an odd width.
h_stats.add(pixel * 2 - 1, 1);
} else if (pixel == next_pixel && x + 1 < width &&
pixel > GET_DATA_BYTE(pixels, x + 1)) {
// Double local max, so an even width.
h_stats.add(pixel * 2, 1);
}
}
prev_pixel = pixel;
pixel = next_pixel;
}
}
// Vertical width of stroke.
STATS v_stats(0, height + 1);
for (int x = 0; x < width; ++x) {
int prev_pixel = 0;
int pixel = GET_DATA_BYTE(data, x);
for (int y = 1; y < height; ++y) {
uinT32* pixels = data + y*wpl;
int next_pixel = GET_DATA_BYTE(pixels, x);
// We are looking for a pixel that is equal to its horizontal neighbours,
// yet greater than its upper neighbour.
if (prev_pixel < pixel &&
(x == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) &&
(x == width - 1 || pixel == GET_DATA_BYTE(pixels - wpl, x + 1))) {
if (pixel > next_pixel) {
// Single local max, so an odd width.
v_stats.add(pixel * 2 - 1, 1);
} else if (pixel == next_pixel && y + 1 < height &&
pixel > GET_DATA_BYTE(pixels + wpl, x)) {
// Double local max, so an even width.
v_stats.add(pixel * 2, 1);
}
}
prev_pixel = pixel;
pixel = next_pixel;
}
}
pixDestroy(&dist_pix);
// Store the horizontal and vertical width in the blob, keeping both
// widths if there is enough information, otherwse only the one with
// the most samples.
// If there are insufficent samples, store zero, rather than using
// 2*area/perimeter, as the numbers that gives do not match the numbers
// from the distance method.
if (h_stats.get_total() >= (width + height) / 4) {
blob->set_horz_stroke_width(h_stats.ile(0.5f));
if (v_stats.get_total() >= (width + height) / 4)
blob->set_vert_stroke_width(v_stats.ile(0.5f));
else
} else {
if (v_stats.get_total() >= (width + height) / 4 ||
v_stats.get_total() > h_stats.get_total()) {
blob->set_vert_stroke_width(v_stats.ile(0.5f));
} else {
blob->set_horz_stroke_width(h_stats.get_total() > 2 ? h_stats.ile(0.5f)
: 0.0f);
}
}
}
void textord_page ( ICOORD  page_tr,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  land_blocks,
TO_BLOCK_LIST *  port_blocks,
tesseract::Tesseract  
)
void tweak_row_baseline ( ROW row,
double  blshift_maxshift,
double  blshift_xfraction 
)

Definition at line 680 of file tordmain.cpp.

{
TBOX blob_box; //bounding box
C_BLOB *blob; //current blob
WERD *word; //current word
inT32 blob_count; //no of blobs
inT32 src_index; //source segment
inT32 dest_index; //destination segment
inT32 *xstarts; //spline segments
double *coeffs; //spline coeffs
float ydiff; //baseline error
float x_centre; //centre of blob
//words of row
WERD_IT word_it = row->word_list ();
C_BLOB_IT blob_it; //blob iterator
blob_count = 0;
for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
word = word_it.data (); //current word
//get total blobs
blob_count += word->cblob_list ()->length ();
}
if (blob_count == 0)
return;
xstarts =
(inT32 *) alloc_mem ((blob_count + row->baseline.segments + 1) *
sizeof (inT32));
coeffs =
(double *) alloc_mem ((blob_count + row->baseline.segments) * 3 *
sizeof (double));
src_index = 0;
dest_index = 0;
xstarts[0] = row->baseline.xcoords[0];
for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
word = word_it.data (); //current word
//blobs in word
blob_it.set_to_list (word->cblob_list ());
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
blob_it.forward ()) {
blob = blob_it.data ();
blob_box = blob->bounding_box ();
x_centre = (blob_box.left () + blob_box.right ()) / 2.0;
ydiff = blob_box.bottom () - row->base_line (x_centre);
if (ydiff < 0)
ydiff = -ydiff / row->x_height ();
else
ydiff = ydiff / row->x_height ();
if (ydiff < blshift_maxshift
&& blob_box.height () / row->x_height () > blshift_xfraction) {
if (xstarts[dest_index] >= x_centre)
xstarts[dest_index] = blob_box.left ();
coeffs[dest_index * 3] = 0;
coeffs[dest_index * 3 + 1] = 0;
coeffs[dest_index * 3 + 2] = blob_box.bottom ();
//shift it
dest_index++;
xstarts[dest_index] = blob_box.right () + 1;
}
else {
if (xstarts[dest_index] <= x_centre) {
while (row->baseline.xcoords[src_index + 1] <= x_centre
&& src_index < row->baseline.segments - 1) {
if (row->baseline.xcoords[src_index + 1] >
xstarts[dest_index]) {
coeffs[dest_index * 3] =
row->baseline.quadratics[src_index].a;
coeffs[dest_index * 3 + 1] =
row->baseline.quadratics[src_index].b;
coeffs[dest_index * 3 + 2] =
row->baseline.quadratics[src_index].c;
dest_index++;
xstarts[dest_index] =
row->baseline.xcoords[src_index + 1];
}
src_index++;
}
coeffs[dest_index * 3] =
row->baseline.quadratics[src_index].a;
coeffs[dest_index * 3 + 1] =
row->baseline.quadratics[src_index].b;
coeffs[dest_index * 3 + 2] =
row->baseline.quadratics[src_index].c;
dest_index++;
xstarts[dest_index] = row->baseline.xcoords[src_index + 1];
}
}
}
}
while (src_index < row->baseline.segments
&& row->baseline.xcoords[src_index + 1] <= xstarts[dest_index])
src_index++;
while (src_index < row->baseline.segments) {
coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a;
coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].b;
coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].c;
dest_index++;
src_index++;
xstarts[dest_index] = row->baseline.xcoords[src_index];
}
//turn to spline
row->baseline = QSPLINE (dest_index, xstarts, coeffs);
free_mem(xstarts);
free_mem(coeffs);
}