Source code for pdftools_sdk.ocr.engine

from __future__ import annotations
import io
from typing import List, Iterator, Tuple, Optional, Any, TYPE_CHECKING, Callable
from ctypes import *
from datetime import datetime
from numbers import Number
from pdftools_sdk.internal import _lib
from pdftools_sdk.internal.utils import _string_to_utf16, _utf16_to_string
from pdftools_sdk.internal.streams import _StreamDescriptor, _NativeStream
from pdftools_sdk.internal.native_base import _NativeBase
from pdftools_sdk.internal.native_object import _NativeObject

import pdftools_sdk.internal

[docs] class Engine(_NativeObject): """ OCR engine An OCR engine is required for most OCR processing operations. Engines can be created using the static method :meth:`pdftools_sdk.ocr.engine.Engine.create` . OCR engines can be reused to process multiple files. However, one OCR engine can only be used to process one file at a time. Note that some OCR engines must be disposed in the same thread where they have been created. Note that of some OCR engines only one instance can be created per process. """
[docs] @staticmethod def create(name: str) -> Engine: """ Create a new OCR engine Optionally the `name` argument may be followed by "@" and engine creation parameters, e.g. "service@http://localhost:7982/" Args: name (str): The engine name and optional creation parameters. Returns: pdftools_sdk.ocr.engine.Engine: The newly created engine instance. Raises: pdftools_sdk.generic_error.GenericError: The engine could not be created, e.g. because the engine is not available. ValueError: The `name` argument is invalid. """ if not isinstance(name, str): raise TypeError(f"Expected type {str.__name__}, but got {type(name).__name__}.") _lib.PdfToolsOcr_Engine_CreateW.argtypes = [c_wchar_p] _lib.PdfToolsOcr_Engine_CreateW.restype = c_void_p ret_val = _lib.PdfToolsOcr_Engine_CreateW(_string_to_utf16(name)) if ret_val is None: _NativeBase._throw_last_error(False) return Engine._create_dynamic_type(ret_val)
@property def languages(self) -> Optional[str]: """ The OCR language settings Set the languages used for recognition to improve detection accuracy. Note that for some engines it is crucial to set the used languages correctly. For example, ABBYY FineReader will only detect characters used in the specified languages. Multiple languages can be specified as a comma-separated list, e.g. "German,English". Default value: `None` (engine default) Returns: Optional[str] """ _lib.PdfToolsOcr_Engine_GetLanguagesW.argtypes = [c_void_p, POINTER(c_wchar), c_size_t] _lib.PdfToolsOcr_Engine_GetLanguagesW.restype = c_size_t ret_val_size = _lib.PdfToolsOcr_Engine_GetLanguagesW(self._handle, None, 0) if ret_val_size == 0: _NativeBase._throw_last_error() return None ret_val = create_unicode_buffer(ret_val_size) _lib.PdfToolsOcr_Engine_GetLanguagesW(self._handle, ret_val, c_size_t(ret_val_size)) return _utf16_to_string(ret_val, ret_val_size) @languages.setter def languages(self, val: Optional[str]) -> None: """ The OCR language settings Set the languages used for recognition to improve detection accuracy. Note that for some engines it is crucial to set the used languages correctly. For example, ABBYY FineReader will only detect characters used in the specified languages. Multiple languages can be specified as a comma-separated list, e.g. "German,English". Default value: `None` (engine default) Args: val (Optional[str]): property value Raises: ValueError: The languages argument is invalid. """ if val is not None and not isinstance(val, str): raise TypeError(f"Expected type {str.__name__} or None, but got {type(val).__name__}.") _lib.PdfToolsOcr_Engine_SetLanguagesW.argtypes = [c_void_p, c_wchar_p] _lib.PdfToolsOcr_Engine_SetLanguagesW.restype = c_bool if not _lib.PdfToolsOcr_Engine_SetLanguagesW(self._handle, _string_to_utf16(val)): _NativeBase._throw_last_error(False) @property def parameters(self) -> Optional[str]: """ Engine-specific parameters OCR engine-specific parameters can be set to optimize performance or activate optional recognition features. Parameters are specified as semicolon-separated key=value pairs, e.g. "PredefinedProfile=Default;Profile=/path/to/profile.ini". The available parameters depend on the OCR engine. Commonly supported parameters include: - "PredefinedProfile": Name of a predefined recognition profile (default: "Default"). - "Profile": Path to a custom profile file. Applied after the predefined profile. Parameter keys are case-sensitive. Default value: `None` Returns: Optional[str] """ _lib.PdfToolsOcr_Engine_GetParametersW.argtypes = [c_void_p, POINTER(c_wchar), c_size_t] _lib.PdfToolsOcr_Engine_GetParametersW.restype = c_size_t ret_val_size = _lib.PdfToolsOcr_Engine_GetParametersW(self._handle, None, 0) if ret_val_size == 0: _NativeBase._throw_last_error() return None ret_val = create_unicode_buffer(ret_val_size) _lib.PdfToolsOcr_Engine_GetParametersW(self._handle, ret_val, c_size_t(ret_val_size)) return _utf16_to_string(ret_val, ret_val_size) @parameters.setter def parameters(self, val: Optional[str]) -> None: """ Engine-specific parameters OCR engine-specific parameters can be set to optimize performance or activate optional recognition features. Parameters are specified as semicolon-separated key=value pairs, e.g. "PredefinedProfile=Default;Profile=/path/to/profile.ini". The available parameters depend on the OCR engine. Commonly supported parameters include: - "PredefinedProfile": Name of a predefined recognition profile (default: "Default"). - "Profile": Path to a custom profile file. Applied after the predefined profile. Parameter keys are case-sensitive. Default value: `None` Args: val (Optional[str]): property value Raises: ValueError: The parameters argument is invalid. """ if val is not None and not isinstance(val, str): raise TypeError(f"Expected type {str.__name__} or None, but got {type(val).__name__}.") _lib.PdfToolsOcr_Engine_SetParametersW.argtypes = [c_void_p, c_wchar_p] _lib.PdfToolsOcr_Engine_SetParametersW.restype = c_bool if not _lib.PdfToolsOcr_Engine_SetParametersW(self._handle, _string_to_utf16(val)): _NativeBase._throw_last_error(False) def __exit__(self, exc_type, exc_value, traceback): _lib.PdfToolsOcr_Engine_Close.argtypes = [c_void_p] _lib.PdfToolsOcr_Engine_Close.restype = c_bool if self._handle is not None: try: if not _lib.PdfToolsOcr_Engine_Close(self._handle): super()._throw_last_error() finally: self._handle = None # Invalidate the handle def __enter__(self): return self @staticmethod def _create_dynamic_type(handle): return Engine._from_handle(handle) @classmethod def _from_handle(cls, handle): """ Internal factory method for constructing an instance using an internal handle. This method creates an instance of the class by bypassing the public constructor. """ instance = Engine.__new__(cls) # Bypass __init__ instance._initialize(handle) return instance def _initialize(self, handle): super()._initialize(handle)