Struct leptess::LepTess

source ·
pub struct LepTess { /* private fields */ }
Expand description

High level wrapper for Tesseract and Leptonica

Examples

Full page OCR

let mut lt = leptess::LepTess::new(Some("./tests/tessdata"), "eng").unwrap();
lt.set_image("./tests/di.png");
println!("{}", lt.get_utf8_text().unwrap());

OCR on a specific region of the image

lt.set_rectangle(10, 10, 200, 60);
println!("{}", lt.get_utf8_text().unwrap());

Iterate bounding boxes for each recognized word

let boxes = lt.get_component_boxes(
    leptess::capi::TessPageIteratorLevel_RIL_WORD,
    true,
).unwrap();

for b in &boxes {
    println!("{:?}", b);
}

Implementations§

source§

impl LepTess

source

pub fn new( data_path: Option<&str>, lang: &str ) -> Result<LepTess, TessInitError>

source

pub fn set_image(&mut self, img_uri: impl AsRef<Path>) -> Result<(), PixError>

Set image to use for OCR.

source

pub fn set_image_from_mem(&mut self, img: &[u8]) -> Result<(), PixError>

Set the source image from an in-memory file

Only tiff files are supported from windows. More file formats are supported from other operating systems

source

pub fn get_source_y_resolution(&mut self) -> i32

source

pub fn get_image_dimensions(&self) -> Option<(u32, u32)>

source

pub fn set_source_resolution(&mut self, res: i32)

Override image resolution. Can be used to suppress “Warning: Invalid resolution 0 dpi.” output.

source

pub fn set_fallback_source_resolution(&mut self, res: i32)

Override image resolution if not detected

source

pub fn recognize(&mut self) -> i32

source

pub fn set_rectangle(&mut self, left: i32, top: i32, width: i32, height: i32)

Restrict OCR to a specific region of the image.

source

pub fn set_rectangle_from_box(&mut self, b: &Box)

Restrict OCR to a specific region of the image using a leptonica Box struct.

source

pub fn get_utf8_text(&mut self) -> Result<String, Utf8Error>

Extract text from current selected region of the image. By default, it is the full page. But it can be changed through set_rectangle api.

Example
let mut lt = leptess::LepTess::new(None, "eng").unwrap();
lt.set_image("./tests/di.png");
println!("{}", lt.get_utf8_text().unwrap());
source

pub fn get_hocr_text(&mut self, page: c_int) -> Result<String, Utf8Error>

Extract text from image as HTML with bounding box attributes.

source

pub fn get_alto_text(&mut self, page: c_int) -> Result<String, Utf8Error>

Extract text from image as XML-formatted string with Alto markup.

source

pub fn get_tsv_text(&mut self, page: c_int) -> Result<String, Utf8Error>

Extract text from image as TSV-formatted string.

source

pub fn get_lstm_box_text(&mut self, page: c_int) -> Result<String, Utf8Error>

Returns a box file for LSTM training from the internal data structures. Constructs coordinates in the original image - not just the rectangle.

source

pub fn get_word_str_box_text( &mut self, page: c_int ) -> Result<String, Utf8Error>

Extract text from image as a string formatted in the same way as a Tesseract WordStr box file used in training.

source

pub fn mean_text_conf(&self) -> i32

source

pub fn get_regions(&self) -> Option<Boxa>

source

pub fn get_component_boxes( &self, level: TessPageIteratorLevel, text_only: bool ) -> Option<Boxa>

Get the given level kind of components (block, textline, word etc.) as a leptonica-style Boxa, in reading order. If text_only is true, then only text components are returned.

Example
Get word bounding boxes
let mut lt = leptess::LepTess::new(None, "eng").unwrap();
lt.set_image("./tests/di.png");
let boxes = lt.get_component_boxes(
    leptess::capi::TessPageIteratorLevel_RIL_WORD,
    true,
).unwrap();

for b in &boxes {
    println!("{:?}", b);
}
source

pub fn set_variable( &mut self, name: Variable, value: &str ) -> Result<(), TessSetVariableError>

Set the value of an internal Tesseract parameter.

Example
let mut lt = leptess::LepTess::new(None, "eng").unwrap();
lt.set_variable(leptess::Variable::TesseditCharBlacklist, "xyz").unwrap();

Auto Trait Implementations§

Blanket Implementations§

source§

impl<T> Any for Twhere T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for Twhere T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for Twhere T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for Twhere U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.