Source code for pdftools_sdk.ocr.ocr_options

from __future__ import annotations
import io
from typing import List, Iterator, Tuple, Optional, Any, TYPE_CHECKING, Callable
from ctypes import *
from datetime import datetime
from numbers import Number
from pdftools_sdk.internal import _lib
from pdftools_sdk.internal.utils import _string_to_utf16, _utf16_to_string
from pdftools_sdk.internal.streams import _StreamDescriptor, _NativeStream
from pdftools_sdk.internal.native_base import _NativeBase
from pdftools_sdk.internal.native_object import _NativeObject

import pdftools_sdk.internal

if TYPE_CHECKING:
    from pdftools_sdk.ocr.image_options import ImageOptions
    from pdftools_sdk.ocr.text_options import TextOptions
    from pdftools_sdk.ocr.page_options import PageOptions

else:
    ImageOptions = "pdftools_sdk.ocr.image_options.ImageOptions"
    TextOptions = "pdftools_sdk.ocr.text_options.TextOptions"
    PageOptions = "pdftools_sdk.ocr.page_options.PageOptions"


[docs] class OcrOptions(_NativeObject): """ The options for OCR processing This class aggregates all OCR processing options including resolution settings, image processing, text processing and page processing. """
[docs] def __init__(self): """ """ _lib.PdfToolsOcr_OcrOptions_New.argtypes = [] _lib.PdfToolsOcr_OcrOptions_New.restype = c_void_p ret_val = _lib.PdfToolsOcr_OcrOptions_New() if ret_val is None: _NativeBase._throw_last_error(False) super()._initialize(ret_val)
@property def dpi(self) -> float: """ The default resolution in DPI used for OCR Each page's optimal OCR resolution is determined automatically, such that all images and text can be recognized. The default resolution is chosen if it is within the range of optimal resolutions. The range should be within the resolutions supported by the OCR engine. Most OCR engines are optimized for resolutions around 300 DPI. Default value: 300.0 Returns: float """ _lib.PdfToolsOcr_OcrOptions_GetDpi.argtypes = [c_void_p] _lib.PdfToolsOcr_OcrOptions_GetDpi.restype = c_double ret_val = _lib.PdfToolsOcr_OcrOptions_GetDpi(self._handle) if ret_val == -1.0: _NativeBase._throw_last_error() return ret_val @dpi.setter def dpi(self, val: float) -> None: """ The default resolution in DPI used for OCR Each page's optimal OCR resolution is determined automatically, such that all images and text can be recognized. The default resolution is chosen if it is within the range of optimal resolutions. The range should be within the resolutions supported by the OCR engine. Most OCR engines are optimized for resolutions around 300 DPI. Default value: 300.0 Args: val (float): property value Raises: ValueError: The DPI value is invalid. """ if not isinstance(val, Number): raise TypeError(f"Expected type {Number.__name__}, but got {type(val).__name__}.") _lib.PdfToolsOcr_OcrOptions_SetDpi.argtypes = [c_void_p, c_double] _lib.PdfToolsOcr_OcrOptions_SetDpi.restype = c_bool if not _lib.PdfToolsOcr_OcrOptions_SetDpi(self._handle, val): _NativeBase._throw_last_error(False) @property def min_dpi(self) -> float: """ The minimum resolution in DPI used for OCR Default value: 200.0 Returns: float """ _lib.PdfToolsOcr_OcrOptions_GetMinDpi.argtypes = [c_void_p] _lib.PdfToolsOcr_OcrOptions_GetMinDpi.restype = c_double ret_val = _lib.PdfToolsOcr_OcrOptions_GetMinDpi(self._handle) if ret_val == -1.0: _NativeBase._throw_last_error() return ret_val @min_dpi.setter def min_dpi(self, val: float) -> None: """ The minimum resolution in DPI used for OCR Default value: 200.0 Args: val (float): property value Raises: ValueError: The minimum DPI value is invalid. """ if not isinstance(val, Number): raise TypeError(f"Expected type {Number.__name__}, but got {type(val).__name__}.") _lib.PdfToolsOcr_OcrOptions_SetMinDpi.argtypes = [c_void_p, c_double] _lib.PdfToolsOcr_OcrOptions_SetMinDpi.restype = c_bool if not _lib.PdfToolsOcr_OcrOptions_SetMinDpi(self._handle, val): _NativeBase._throw_last_error(False) @property def max_dpi(self) -> float: """ The maximum resolution in DPI used for OCR Default value: 400.0 Returns: float """ _lib.PdfToolsOcr_OcrOptions_GetMaxDpi.argtypes = [c_void_p] _lib.PdfToolsOcr_OcrOptions_GetMaxDpi.restype = c_double ret_val = _lib.PdfToolsOcr_OcrOptions_GetMaxDpi(self._handle) if ret_val == -1.0: _NativeBase._throw_last_error() return ret_val @max_dpi.setter def max_dpi(self, val: float) -> None: """ The maximum resolution in DPI used for OCR Default value: 400.0 Args: val (float): property value Raises: ValueError: The maximum DPI value is invalid. """ if not isinstance(val, Number): raise TypeError(f"Expected type {Number.__name__}, but got {type(val).__name__}.") _lib.PdfToolsOcr_OcrOptions_SetMaxDpi.argtypes = [c_void_p, c_double] _lib.PdfToolsOcr_OcrOptions_SetMaxDpi.restype = c_bool if not _lib.PdfToolsOcr_OcrOptions_SetMaxDpi(self._handle, val): _NativeBase._throw_last_error(False) @property def process_embedded_files(self) -> bool: """ Whether to process embedded files recursively If enabled, embedded PDF files are also processed with OCR. The default is to copy all embedded files as-is. Default value: `False` Returns: bool """ _lib.PdfToolsOcr_OcrOptions_GetProcessEmbeddedFiles.argtypes = [c_void_p] _lib.PdfToolsOcr_OcrOptions_GetProcessEmbeddedFiles.restype = c_bool ret_val = _lib.PdfToolsOcr_OcrOptions_GetProcessEmbeddedFiles(self._handle) if not ret_val: _NativeBase._throw_last_error() return ret_val @process_embedded_files.setter def process_embedded_files(self, val: bool) -> None: """ Whether to process embedded files recursively If enabled, embedded PDF files are also processed with OCR. The default is to copy all embedded files as-is. Default value: `False` Args: val (bool): property value """ if not isinstance(val, bool): raise TypeError(f"Expected type {bool.__name__}, but got {type(val).__name__}.") _lib.PdfToolsOcr_OcrOptions_SetProcessEmbeddedFiles.argtypes = [c_void_p, c_bool] _lib.PdfToolsOcr_OcrOptions_SetProcessEmbeddedFiles.restype = c_bool if not _lib.PdfToolsOcr_OcrOptions_SetProcessEmbeddedFiles(self._handle, val): _NativeBase._throw_last_error(False) @property def image_options(self) -> ImageOptions: """ The options for image processing Options controlling how images in the PDF are processed during OCR. Returns: pdftools_sdk.ocr.image_options.ImageOptions """ from pdftools_sdk.ocr.image_options import ImageOptions _lib.PdfToolsOcr_OcrOptions_GetImageOptions.argtypes = [c_void_p] _lib.PdfToolsOcr_OcrOptions_GetImageOptions.restype = c_void_p ret_val = _lib.PdfToolsOcr_OcrOptions_GetImageOptions(self._handle) if ret_val is None: _NativeBase._throw_last_error(False) return ImageOptions._create_dynamic_type(ret_val) @property def text_options(self) -> TextOptions: """ The options for text processing Options controlling how existing text is processed during OCR. Returns: pdftools_sdk.ocr.text_options.TextOptions """ from pdftools_sdk.ocr.text_options import TextOptions _lib.PdfToolsOcr_OcrOptions_GetTextOptions.argtypes = [c_void_p] _lib.PdfToolsOcr_OcrOptions_GetTextOptions.restype = c_void_p ret_val = _lib.PdfToolsOcr_OcrOptions_GetTextOptions(self._handle) if ret_val is None: _NativeBase._throw_last_error(False) return TextOptions._create_dynamic_type(ret_val) @property def page_options(self) -> PageOptions: """ The options for page processing Options controlling page-level OCR processing and tagging. Returns: pdftools_sdk.ocr.page_options.PageOptions """ from pdftools_sdk.ocr.page_options import PageOptions _lib.PdfToolsOcr_OcrOptions_GetPageOptions.argtypes = [c_void_p] _lib.PdfToolsOcr_OcrOptions_GetPageOptions.restype = c_void_p ret_val = _lib.PdfToolsOcr_OcrOptions_GetPageOptions(self._handle) if ret_val is None: _NativeBase._throw_last_error(False) return PageOptions._create_dynamic_type(ret_val) @staticmethod def _create_dynamic_type(handle): return OcrOptions._from_handle(handle) @classmethod def _from_handle(cls, handle): """ Internal factory method for constructing an instance using an internal handle. This method creates an instance of the class by bypassing the public constructor. """ instance = OcrOptions.__new__(cls) # Bypass __init__ instance._initialize(handle) return instance def _initialize(self, handle): super()._initialize(handle)