from __future__ import annotations
import io
from typing import List, Iterator, Tuple, Optional, Any, TYPE_CHECKING, Callable
from ctypes import *
from datetime import datetime
from numbers import Number
from pdftools_sdk.internal import _lib
from pdftools_sdk.internal.utils import _string_to_utf16, _utf16_to_string
from pdftools_sdk.internal.streams import _StreamDescriptor, _NativeStream
from pdftools_sdk.internal.native_base import _NativeBase
from pdftools_sdk.internal.native_object import _NativeObject
import pdftools_sdk.internal
[docs]
class Engine(_NativeObject):
"""
OCR engine
An OCR engine is required for most OCR processing operations.
Engines can be created using the static method :meth:`pdftools_sdk.ocr.engine.Engine.create` .
OCR engines can be reused to process multiple files.
However, one OCR engine can only be used to process one file at a time.
Note that some OCR engines must be disposed in the same thread where they have been created.
Note that of some OCR engines only one instance can be created per process.
"""
[docs]
@staticmethod
def create(name: str) -> Engine:
"""
Create a new OCR engine
Optionally the `name` argument may be followed by "@" and engine creation
parameters, e.g. "service@http://localhost:7982/"
Args:
name (str):
The engine name and optional creation parameters.
Returns:
pdftools_sdk.ocr.engine.Engine:
The newly created engine instance.
Raises:
pdftools_sdk.generic_error.GenericError:
The engine could not be created, e.g. because the engine is not available.
ValueError:
The `name` argument is invalid.
"""
if not isinstance(name, str):
raise TypeError(f"Expected type {str.__name__}, but got {type(name).__name__}.")
_lib.PdfToolsOcr_Engine_CreateW.argtypes = [c_wchar_p]
_lib.PdfToolsOcr_Engine_CreateW.restype = c_void_p
ret_val = _lib.PdfToolsOcr_Engine_CreateW(_string_to_utf16(name))
if ret_val is None:
_NativeBase._throw_last_error(False)
return Engine._create_dynamic_type(ret_val)
@property
def languages(self) -> Optional[str]:
"""
The OCR language settings
Set the languages used for recognition to improve detection accuracy.
Note that for some engines it is crucial to set the used languages correctly.
For example, ABBYY FineReader will only detect characters used in the specified languages.
Multiple languages can be specified as a comma-separated list, e.g. "German,English".
Default value: `None` (engine default)
Returns:
Optional[str]
"""
_lib.PdfToolsOcr_Engine_GetLanguagesW.argtypes = [c_void_p, POINTER(c_wchar), c_size_t]
_lib.PdfToolsOcr_Engine_GetLanguagesW.restype = c_size_t
ret_val_size = _lib.PdfToolsOcr_Engine_GetLanguagesW(self._handle, None, 0)
if ret_val_size == 0:
_NativeBase._throw_last_error()
return None
ret_val = create_unicode_buffer(ret_val_size)
_lib.PdfToolsOcr_Engine_GetLanguagesW(self._handle, ret_val, c_size_t(ret_val_size))
return _utf16_to_string(ret_val, ret_val_size)
@languages.setter
def languages(self, val: Optional[str]) -> None:
"""
The OCR language settings
Set the languages used for recognition to improve detection accuracy.
Note that for some engines it is crucial to set the used languages correctly.
For example, ABBYY FineReader will only detect characters used in the specified languages.
Multiple languages can be specified as a comma-separated list, e.g. "German,English".
Default value: `None` (engine default)
Args:
val (Optional[str]):
property value
Raises:
ValueError:
The languages argument is invalid.
"""
if val is not None and not isinstance(val, str):
raise TypeError(f"Expected type {str.__name__} or None, but got {type(val).__name__}.")
_lib.PdfToolsOcr_Engine_SetLanguagesW.argtypes = [c_void_p, c_wchar_p]
_lib.PdfToolsOcr_Engine_SetLanguagesW.restype = c_bool
if not _lib.PdfToolsOcr_Engine_SetLanguagesW(self._handle, _string_to_utf16(val)):
_NativeBase._throw_last_error(False)
@property
def parameters(self) -> Optional[str]:
"""
Engine-specific parameters
OCR engine-specific parameters can be set to optimize performance
or activate optional recognition features.
Parameters are specified as semicolon-separated key=value pairs,
e.g. "PredefinedProfile=Default;Profile=/path/to/profile.ini".
The available parameters depend on the OCR engine.
Commonly supported parameters include:
- "PredefinedProfile": Name of a predefined recognition profile (default: "Default").
- "Profile": Path to a custom profile file. Applied after the predefined profile.
Parameter keys are case-sensitive.
Default value: `None`
Returns:
Optional[str]
"""
_lib.PdfToolsOcr_Engine_GetParametersW.argtypes = [c_void_p, POINTER(c_wchar), c_size_t]
_lib.PdfToolsOcr_Engine_GetParametersW.restype = c_size_t
ret_val_size = _lib.PdfToolsOcr_Engine_GetParametersW(self._handle, None, 0)
if ret_val_size == 0:
_NativeBase._throw_last_error()
return None
ret_val = create_unicode_buffer(ret_val_size)
_lib.PdfToolsOcr_Engine_GetParametersW(self._handle, ret_val, c_size_t(ret_val_size))
return _utf16_to_string(ret_val, ret_val_size)
@parameters.setter
def parameters(self, val: Optional[str]) -> None:
"""
Engine-specific parameters
OCR engine-specific parameters can be set to optimize performance
or activate optional recognition features.
Parameters are specified as semicolon-separated key=value pairs,
e.g. "PredefinedProfile=Default;Profile=/path/to/profile.ini".
The available parameters depend on the OCR engine.
Commonly supported parameters include:
- "PredefinedProfile": Name of a predefined recognition profile (default: "Default").
- "Profile": Path to a custom profile file. Applied after the predefined profile.
Parameter keys are case-sensitive.
Default value: `None`
Args:
val (Optional[str]):
property value
Raises:
ValueError:
The parameters argument is invalid.
"""
if val is not None and not isinstance(val, str):
raise TypeError(f"Expected type {str.__name__} or None, but got {type(val).__name__}.")
_lib.PdfToolsOcr_Engine_SetParametersW.argtypes = [c_void_p, c_wchar_p]
_lib.PdfToolsOcr_Engine_SetParametersW.restype = c_bool
if not _lib.PdfToolsOcr_Engine_SetParametersW(self._handle, _string_to_utf16(val)):
_NativeBase._throw_last_error(False)
def __exit__(self, exc_type, exc_value, traceback):
_lib.PdfToolsOcr_Engine_Close.argtypes = [c_void_p]
_lib.PdfToolsOcr_Engine_Close.restype = c_bool
if self._handle is not None:
try:
if not _lib.PdfToolsOcr_Engine_Close(self._handle):
super()._throw_last_error()
finally:
self._handle = None # Invalidate the handle
def __enter__(self):
return self
@staticmethod
def _create_dynamic_type(handle):
return Engine._from_handle(handle)
@classmethod
def _from_handle(cls, handle):
"""
Internal factory method for constructing an instance using an internal handle.
This method creates an instance of the class by bypassing the public constructor.
"""
instance = Engine.__new__(cls) # Bypass __init__
instance._initialize(handle)
return instance
def _initialize(self, handle):
super()._initialize(handle)