from __future__ import annotations
import io
from typing import List, Iterator, Tuple, Optional, Any, TYPE_CHECKING, Callable
from ctypes import *
from datetime import datetime
from numbers import Number
from pdftools_sdk.internal import _lib
from pdftools_sdk.internal.utils import _string_to_utf16, _utf16_to_string
from pdftools_sdk.internal.streams import _StreamDescriptor, _NativeStream
from pdftools_sdk.internal.native_base import _NativeBase
from pdftools_sdk.internal.native_object import _NativeObject
import pdftools_sdk.internal
if TYPE_CHECKING:
from pdftools_sdk.ocr.image_options import ImageOptions
from pdftools_sdk.ocr.text_options import TextOptions
from pdftools_sdk.ocr.page_options import PageOptions
else:
ImageOptions = "pdftools_sdk.ocr.image_options.ImageOptions"
TextOptions = "pdftools_sdk.ocr.text_options.TextOptions"
PageOptions = "pdftools_sdk.ocr.page_options.PageOptions"
[docs]
class OcrOptions(_NativeObject):
"""
The options for OCR processing
This class aggregates all OCR processing options including resolution settings,
image processing, text processing and page processing.
"""
[docs]
def __init__(self):
"""
"""
_lib.PdfToolsOcr_OcrOptions_New.argtypes = []
_lib.PdfToolsOcr_OcrOptions_New.restype = c_void_p
ret_val = _lib.PdfToolsOcr_OcrOptions_New()
if ret_val is None:
_NativeBase._throw_last_error(False)
super()._initialize(ret_val)
@property
def dpi(self) -> float:
"""
The default resolution in DPI used for OCR
Each page's optimal OCR resolution is determined automatically, such that all images
and text can be recognized. The default resolution is chosen if it is within the range
of optimal resolutions.
The range should be within the resolutions supported by the OCR engine.
Most OCR engines are optimized for resolutions around 300 DPI.
Default value: 300.0
Returns:
float
"""
_lib.PdfToolsOcr_OcrOptions_GetDpi.argtypes = [c_void_p]
_lib.PdfToolsOcr_OcrOptions_GetDpi.restype = c_double
ret_val = _lib.PdfToolsOcr_OcrOptions_GetDpi(self._handle)
if ret_val == -1.0:
_NativeBase._throw_last_error()
return ret_val
@dpi.setter
def dpi(self, val: float) -> None:
"""
The default resolution in DPI used for OCR
Each page's optimal OCR resolution is determined automatically, such that all images
and text can be recognized. The default resolution is chosen if it is within the range
of optimal resolutions.
The range should be within the resolutions supported by the OCR engine.
Most OCR engines are optimized for resolutions around 300 DPI.
Default value: 300.0
Args:
val (float):
property value
Raises:
ValueError:
The DPI value is invalid.
"""
if not isinstance(val, Number):
raise TypeError(f"Expected type {Number.__name__}, but got {type(val).__name__}.")
_lib.PdfToolsOcr_OcrOptions_SetDpi.argtypes = [c_void_p, c_double]
_lib.PdfToolsOcr_OcrOptions_SetDpi.restype = c_bool
if not _lib.PdfToolsOcr_OcrOptions_SetDpi(self._handle, val):
_NativeBase._throw_last_error(False)
@property
def min_dpi(self) -> float:
"""
The minimum resolution in DPI used for OCR
Default value: 200.0
Returns:
float
"""
_lib.PdfToolsOcr_OcrOptions_GetMinDpi.argtypes = [c_void_p]
_lib.PdfToolsOcr_OcrOptions_GetMinDpi.restype = c_double
ret_val = _lib.PdfToolsOcr_OcrOptions_GetMinDpi(self._handle)
if ret_val == -1.0:
_NativeBase._throw_last_error()
return ret_val
@min_dpi.setter
def min_dpi(self, val: float) -> None:
"""
The minimum resolution in DPI used for OCR
Default value: 200.0
Args:
val (float):
property value
Raises:
ValueError:
The minimum DPI value is invalid.
"""
if not isinstance(val, Number):
raise TypeError(f"Expected type {Number.__name__}, but got {type(val).__name__}.")
_lib.PdfToolsOcr_OcrOptions_SetMinDpi.argtypes = [c_void_p, c_double]
_lib.PdfToolsOcr_OcrOptions_SetMinDpi.restype = c_bool
if not _lib.PdfToolsOcr_OcrOptions_SetMinDpi(self._handle, val):
_NativeBase._throw_last_error(False)
@property
def max_dpi(self) -> float:
"""
The maximum resolution in DPI used for OCR
Default value: 400.0
Returns:
float
"""
_lib.PdfToolsOcr_OcrOptions_GetMaxDpi.argtypes = [c_void_p]
_lib.PdfToolsOcr_OcrOptions_GetMaxDpi.restype = c_double
ret_val = _lib.PdfToolsOcr_OcrOptions_GetMaxDpi(self._handle)
if ret_val == -1.0:
_NativeBase._throw_last_error()
return ret_val
@max_dpi.setter
def max_dpi(self, val: float) -> None:
"""
The maximum resolution in DPI used for OCR
Default value: 400.0
Args:
val (float):
property value
Raises:
ValueError:
The maximum DPI value is invalid.
"""
if not isinstance(val, Number):
raise TypeError(f"Expected type {Number.__name__}, but got {type(val).__name__}.")
_lib.PdfToolsOcr_OcrOptions_SetMaxDpi.argtypes = [c_void_p, c_double]
_lib.PdfToolsOcr_OcrOptions_SetMaxDpi.restype = c_bool
if not _lib.PdfToolsOcr_OcrOptions_SetMaxDpi(self._handle, val):
_NativeBase._throw_last_error(False)
@property
def process_embedded_files(self) -> bool:
"""
Whether to process embedded files recursively
If enabled, embedded PDF files are also processed with OCR.
The default is to copy all embedded files as-is.
Default value: `False`
Returns:
bool
"""
_lib.PdfToolsOcr_OcrOptions_GetProcessEmbeddedFiles.argtypes = [c_void_p]
_lib.PdfToolsOcr_OcrOptions_GetProcessEmbeddedFiles.restype = c_bool
ret_val = _lib.PdfToolsOcr_OcrOptions_GetProcessEmbeddedFiles(self._handle)
if not ret_val:
_NativeBase._throw_last_error()
return ret_val
@process_embedded_files.setter
def process_embedded_files(self, val: bool) -> None:
"""
Whether to process embedded files recursively
If enabled, embedded PDF files are also processed with OCR.
The default is to copy all embedded files as-is.
Default value: `False`
Args:
val (bool):
property value
"""
if not isinstance(val, bool):
raise TypeError(f"Expected type {bool.__name__}, but got {type(val).__name__}.")
_lib.PdfToolsOcr_OcrOptions_SetProcessEmbeddedFiles.argtypes = [c_void_p, c_bool]
_lib.PdfToolsOcr_OcrOptions_SetProcessEmbeddedFiles.restype = c_bool
if not _lib.PdfToolsOcr_OcrOptions_SetProcessEmbeddedFiles(self._handle, val):
_NativeBase._throw_last_error(False)
@property
def image_options(self) -> ImageOptions:
"""
The options for image processing
Options controlling how images in the PDF are processed during OCR.
Returns:
pdftools_sdk.ocr.image_options.ImageOptions
"""
from pdftools_sdk.ocr.image_options import ImageOptions
_lib.PdfToolsOcr_OcrOptions_GetImageOptions.argtypes = [c_void_p]
_lib.PdfToolsOcr_OcrOptions_GetImageOptions.restype = c_void_p
ret_val = _lib.PdfToolsOcr_OcrOptions_GetImageOptions(self._handle)
if ret_val is None:
_NativeBase._throw_last_error(False)
return ImageOptions._create_dynamic_type(ret_val)
@property
def text_options(self) -> TextOptions:
"""
The options for text processing
Options controlling how existing text is processed during OCR.
Returns:
pdftools_sdk.ocr.text_options.TextOptions
"""
from pdftools_sdk.ocr.text_options import TextOptions
_lib.PdfToolsOcr_OcrOptions_GetTextOptions.argtypes = [c_void_p]
_lib.PdfToolsOcr_OcrOptions_GetTextOptions.restype = c_void_p
ret_val = _lib.PdfToolsOcr_OcrOptions_GetTextOptions(self._handle)
if ret_val is None:
_NativeBase._throw_last_error(False)
return TextOptions._create_dynamic_type(ret_val)
@property
def page_options(self) -> PageOptions:
"""
The options for page processing
Options controlling page-level OCR processing and tagging.
Returns:
pdftools_sdk.ocr.page_options.PageOptions
"""
from pdftools_sdk.ocr.page_options import PageOptions
_lib.PdfToolsOcr_OcrOptions_GetPageOptions.argtypes = [c_void_p]
_lib.PdfToolsOcr_OcrOptions_GetPageOptions.restype = c_void_p
ret_val = _lib.PdfToolsOcr_OcrOptions_GetPageOptions(self._handle)
if ret_val is None:
_NativeBase._throw_last_error(False)
return PageOptions._create_dynamic_type(ret_val)
@staticmethod
def _create_dynamic_type(handle):
return OcrOptions._from_handle(handle)
@classmethod
def _from_handle(cls, handle):
"""
Internal factory method for constructing an instance using an internal handle.
This method creates an instance of the class by bypassing the public constructor.
"""
instance = OcrOptions.__new__(cls) # Bypass __init__
instance._initialize(handle)
return instance
def _initialize(self, handle):
super()._initialize(handle)