pub struct LepTess { /* private fields */ }
Expand description
High level wrapper for Tesseract and Leptonica
Examples
Full page OCR
let mut lt = leptess::LepTess::new(Some("./tests/tessdata"), "eng").unwrap();
lt.set_image("./tests/di.png");
println!("{}", lt.get_utf8_text().unwrap());
OCR on a specific region of the image
lt.set_rectangle(10, 10, 200, 60);
println!("{}", lt.get_utf8_text().unwrap());
Iterate bounding boxes for each recognized word
let boxes = lt.get_component_boxes(
leptess::capi::TessPageIteratorLevel_RIL_WORD,
true,
).unwrap();
for b in &boxes {
println!("{:?}", b);
}
Implementations§
source§impl LepTess
impl LepTess
pub fn new( data_path: Option<&str>, lang: &str ) -> Result<LepTess, TessInitError>
sourcepub fn set_image(&mut self, img_uri: impl AsRef<Path>) -> Result<(), PixError>
pub fn set_image(&mut self, img_uri: impl AsRef<Path>) -> Result<(), PixError>
Set image to use for OCR.
sourcepub fn set_image_from_mem(&mut self, img: &[u8]) -> Result<(), PixError>
pub fn set_image_from_mem(&mut self, img: &[u8]) -> Result<(), PixError>
Set the source image from an in-memory file
Only tiff files are supported from windows. More file formats are supported from other operating systems
pub fn get_source_y_resolution(&mut self) -> i32
pub fn get_image_dimensions(&self) -> Option<(u32, u32)>
sourcepub fn set_source_resolution(&mut self, res: i32)
pub fn set_source_resolution(&mut self, res: i32)
Override image resolution. Can be used to suppress “Warning: Invalid resolution 0 dpi.” output.
sourcepub fn set_fallback_source_resolution(&mut self, res: i32)
pub fn set_fallback_source_resolution(&mut self, res: i32)
Override image resolution if not detected
pub fn recognize(&mut self) -> i32
sourcepub fn set_rectangle(&mut self, left: i32, top: i32, width: i32, height: i32)
pub fn set_rectangle(&mut self, left: i32, top: i32, width: i32, height: i32)
Restrict OCR to a specific region of the image.
sourcepub fn set_rectangle_from_box(&mut self, b: &Box)
pub fn set_rectangle_from_box(&mut self, b: &Box)
Restrict OCR to a specific region of the image using a leptonica Box struct.
sourcepub fn get_utf8_text(&mut self) -> Result<String, Utf8Error>
pub fn get_utf8_text(&mut self) -> Result<String, Utf8Error>
Extract text from current selected region of the image. By default, it is the full page. But it can be changed through set_rectangle api.
Example
let mut lt = leptess::LepTess::new(None, "eng").unwrap();
lt.set_image("./tests/di.png");
println!("{}", lt.get_utf8_text().unwrap());
sourcepub fn get_hocr_text(&mut self, page: c_int) -> Result<String, Utf8Error>
pub fn get_hocr_text(&mut self, page: c_int) -> Result<String, Utf8Error>
Extract text from image as HTML with bounding box attributes.
sourcepub fn get_alto_text(&mut self, page: c_int) -> Result<String, Utf8Error>
pub fn get_alto_text(&mut self, page: c_int) -> Result<String, Utf8Error>
Extract text from image as XML-formatted string with Alto markup.
sourcepub fn get_tsv_text(&mut self, page: c_int) -> Result<String, Utf8Error>
pub fn get_tsv_text(&mut self, page: c_int) -> Result<String, Utf8Error>
Extract text from image as TSV-formatted string.
sourcepub fn get_lstm_box_text(&mut self, page: c_int) -> Result<String, Utf8Error>
pub fn get_lstm_box_text(&mut self, page: c_int) -> Result<String, Utf8Error>
Returns a box file for LSTM training from the internal data structures. Constructs coordinates in the original image - not just the rectangle.
sourcepub fn get_word_str_box_text(
&mut self,
page: c_int
) -> Result<String, Utf8Error>
pub fn get_word_str_box_text( &mut self, page: c_int ) -> Result<String, Utf8Error>
Extract text from image as a string formatted in the same way as a Tesseract WordStr box file used in training.
pub fn mean_text_conf(&self) -> i32
pub fn get_regions(&self) -> Option<Boxa>
sourcepub fn get_component_boxes(
&self,
level: TessPageIteratorLevel,
text_only: bool
) -> Option<Boxa>
pub fn get_component_boxes( &self, level: TessPageIteratorLevel, text_only: bool ) -> Option<Boxa>
Get the given level kind of components (block, textline, word etc.) as a leptonica-style Boxa, in reading order. If text_only is true, then only text components are returned.
Example
Get word bounding boxes
let mut lt = leptess::LepTess::new(None, "eng").unwrap();
lt.set_image("./tests/di.png");
let boxes = lt.get_component_boxes(
leptess::capi::TessPageIteratorLevel_RIL_WORD,
true,
).unwrap();
for b in &boxes {
println!("{:?}", b);
}
sourcepub fn set_variable(
&mut self,
name: Variable,
value: &str
) -> Result<(), TessSetVariableError>
pub fn set_variable( &mut self, name: Variable, value: &str ) -> Result<(), TessSetVariableError>
Set the value of an internal Tesseract parameter.
Example
let mut lt = leptess::LepTess::new(None, "eng").unwrap();
lt.set_variable(leptess::Variable::TesseditCharBlacklist, "xyz").unwrap();