Pdftools SDK
Loading...
Searching...
No Matches
PdfTools_PdfToolsOcr.h
Go to the documentation of this file.
1/******************************************************************************
2 *
3 * File: PdfTools_PdfToolsOcr.h
4 *
5 * Description: Sub Header file for Pdftools SDK
6 *
7 * Author: PDF Tools AG
8 *
9 * Copyright: Copyright (C) 2023 - 2026 PDF Tools AG, Switzerland
10 * All rights reserved.
11 *
12 * Notice: By downloading and using this artifact, you accept PDF Tools AG's
13 * [license agreement](https://www.pdf-tools.com/license-agreement/),
14 * [privacy policy](https://www.pdf-tools.com/privacy-policy/),
15 * and allow PDF Tools AG to track your usage data.
16 *
17 *****************************************************************************/
18
19#ifndef PDFTOOLS_PDFTOOLSOCR_H__
20#define PDFTOOLS_PDFTOOLSOCR_H__
21
22#ifndef PDFTOOLS_CALL
23#if defined(WIN32)
24#define PDFTOOLS_CALL __stdcall
25#else
26#define PDFTOOLS_CALL
27#endif
28#endif
29
30#include "PdfTools_Types.h"
32
33#ifdef __cplusplus
34extern "C"
35{
36#endif
37
38#ifdef _UNICODE
39#define PdfToolsOcr_Engine_Create PdfToolsOcr_Engine_CreateW
40
41#define PdfToolsOcr_Engine_GetLanguages PdfToolsOcr_Engine_GetLanguagesW
42#define PdfToolsOcr_Engine_SetLanguages PdfToolsOcr_Engine_SetLanguagesW
43#define PdfToolsOcr_Engine_GetParameters PdfToolsOcr_Engine_GetParametersW
44#define PdfToolsOcr_Engine_SetParameters PdfToolsOcr_Engine_SetParametersW
45
46#define TPdfToolsOcr_Processor_Warning TPdfToolsOcr_Processor_WarningW
47#define PdfToolsOcr_Processor_AddWarningHandler PdfToolsOcr_Processor_AddWarningHandlerW
48#define PdfToolsOcr_Processor_RemoveWarningHandler PdfToolsOcr_Processor_RemoveWarningHandlerW
49
50#else
51#define PdfToolsOcr_Engine_Create PdfToolsOcr_Engine_CreateA
52
53#define PdfToolsOcr_Engine_GetLanguages PdfToolsOcr_Engine_GetLanguagesA
54#define PdfToolsOcr_Engine_SetLanguages PdfToolsOcr_Engine_SetLanguagesA
55#define PdfToolsOcr_Engine_GetParameters PdfToolsOcr_Engine_GetParametersA
56#define PdfToolsOcr_Engine_SetParameters PdfToolsOcr_Engine_SetParametersA
57
58#define TPdfToolsOcr_Processor_Warning TPdfToolsOcr_Processor_WarningA
59#define PdfToolsOcr_Processor_AddWarningHandler PdfToolsOcr_Processor_AddWarningHandlerA
60#define PdfToolsOcr_Processor_RemoveWarningHandler PdfToolsOcr_Processor_RemoveWarningHandlerA
61
62#endif
63
82typedef void(PDFTOOLS_CALL* TPdfToolsOcr_Processor_WarningA)(void* pContext, const char* szMessage,
83 TPdfToolsOcr_WarningCategory iCategory, int iPageNo,
84 const char* szContext);
103typedef void(PDFTOOLS_CALL* TPdfToolsOcr_Processor_WarningW)(void* pContext, const WCHAR* szMessage,
104 TPdfToolsOcr_WarningCategory iCategory, int iPageNo,
105 const WCHAR* szContext);
106
107/******************************************************************************
108 * ImageOptions
109 *****************************************************************************/
119
193 BOOL bRotateScan);
234 BOOL bDeskewScan);
278 TPdfToolsOcr_ImageOptions* pImageOptions, BOOL bRemoveOnlyInvisibleOcrText);
279
280/******************************************************************************
281 * TextOptions
282 *****************************************************************************/
292
358 TPdfToolsOcr_TextSkipMode iSkipMode);
391 TPdfToolsOcr_UnicodeSource iUnicodeSource);
392
393/******************************************************************************
394 * PageOptions
395 *****************************************************************************/
405
471 TPdfToolsOcr_TaggingMode iTagging);
472
473/******************************************************************************
474 * OcrOptions
475 *****************************************************************************/
485
576 double dMinDpi);
614 double dMaxDpi);
655 BOOL bProcessEmbeddedFiles);
704
705/******************************************************************************
706 * Engine
707 *****************************************************************************/
750
783 size_t nBufferSize);
816 size_t nBufferSize);
843 const char* szLanguages);
870 const WCHAR* szLanguages);
909 size_t nBufferSize);
948 size_t nBufferSize);
981 const char* szParameters);
1014 const WCHAR* szParameters);
1015
1027/******************************************************************************
1028 * Processor
1029 *****************************************************************************/
1043 void* pContext,
1058 void* pContext,
1073 TPdfToolsOcr_Processor* pProcessor, void* pContext, TPdfToolsOcr_Processor_WarningA pFunction);
1087 TPdfToolsOcr_Processor* pProcessor, void* pContext, TPdfToolsOcr_Processor_WarningW pFunction);
1088
1098
1167 TPdfToolsOcr_Engine* pEngine, const TPdfToolsSys_StreamDescriptor* pOutStreamDesc,
1168 TPdfToolsOcr_OcrOptions* pOptions, TPdfToolsPdf_OutputOptions* pOutOptions);
1169
1170#ifdef __cplusplus
1171}
1172#endif
1173
1174#endif /* PDFTOOLS_PDFTOOLSOCR_H__ */
PDFTOOLS_EXPORT TPdfToolsPdf_Document *PDFTOOLS_CALL PdfToolsOcr_Processor_Process(TPdfToolsOcr_Processor *pProcessor, TPdfToolsPdf_Document *pDocument, TPdfToolsOcr_Engine *pEngine, const TPdfToolsSys_StreamDescriptor *pOutStreamDesc, TPdfToolsOcr_OcrOptions *pOptions, TPdfToolsPdf_OutputOptions *pOutOptions)
Apply OCR to a PDF document.
PDFTOOLS_EXPORT TPdfToolsOcr_ImageOptions *PDFTOOLS_CALL PdfToolsOcr_ImageOptions_New(void)
PDFTOOLS_EXPORT size_t PDFTOOLS_CALL PdfToolsOcr_Engine_GetLanguagesA(TPdfToolsOcr_Engine *pEngine, char *pBuffer, size_t nBufferSize)
The OCR language settings.
PDFTOOLS_EXPORT TPdfToolsOcr_TextOptions *PDFTOOLS_CALL PdfToolsOcr_OcrOptions_GetTextOptions(TPdfToolsOcr_OcrOptions *pOcrOptions)
The options for text processing Options controlling how existing text is processed during OCR.
PDFTOOLS_EXPORT TPdfToolsOcr_PageOptions *PDFTOOLS_CALL PdfToolsOcr_PageOptions_New(void)
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_Engine_SetParametersW(TPdfToolsOcr_Engine *pEngine, const WCHAR *szParameters)
Engine-specific parameters.
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_Processor_RemoveWarningHandlerA(TPdfToolsOcr_Processor *pProcessor, void *pContext, TPdfToolsOcr_Processor_WarningA pFunction)
Removes event handler.
PDFTOOLS_EXPORT TPdfToolsOcr_ImageOptions *PDFTOOLS_CALL PdfToolsOcr_OcrOptions_GetImageOptions(TPdfToolsOcr_OcrOptions *pOcrOptions)
The options for image processing Options controlling how images in the PDF are processed during OCR.
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_TextOptions_SetUnicodeSource(TPdfToolsOcr_TextOptions *pTextOptions, TPdfToolsOcr_UnicodeSource iUnicodeSource)
Define additional ToUnicode sources in addition to OCR processing Default value: ePdfToolsOcr_Unicode...
PDFTOOLS_EXPORT TPdfToolsOcr_Engine *PDFTOOLS_CALL PdfToolsOcr_Engine_CreateW(const WCHAR *szName)
Create a new OCR engine Optionally the szName argument may be followed by "@" and engine creation par...
PDFTOOLS_EXPORT TPdfToolsOcr_OcrOptions *PDFTOOLS_CALL PdfToolsOcr_OcrOptions_New(void)
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_OcrOptions_SetMinDpi(TPdfToolsOcr_OcrOptions *pOcrOptions, double dMinDpi)
The minimum resolution in DPI used for OCR Default value: 200.0.
PDFTOOLS_EXPORT double PDFTOOLS_CALL PdfToolsOcr_OcrOptions_GetMaxDpi(TPdfToolsOcr_OcrOptions *pOcrOptions)
The maximum resolution in DPI used for OCR Default value: 400.0.
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_TextOptions_SetSkipMode(TPdfToolsOcr_TextOptions *pTextOptions, TPdfToolsOcr_TextSkipMode iSkipMode)
Define text that can be skipped from text OCR processing Default value: ePdfToolsOcr_TextSkipMode_Non...
PDFTOOLS_EXPORT TPdfToolsOcr_Engine *PDFTOOLS_CALL PdfToolsOcr_Engine_CreateA(const char *szName)
Create a new OCR engine Optionally the szName argument may be followed by "@" and engine creation par...
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_TextOptions_SetMode(TPdfToolsOcr_TextOptions *pTextOptions, TPdfToolsOcr_TextProcessingMode iMode)
The mode for processing text Default value: ePdfToolsOcr_TextProcessingMode_None.
PDFTOOLS_EXPORT double PDFTOOLS_CALL PdfToolsOcr_OcrOptions_GetMinDpi(TPdfToolsOcr_OcrOptions *pOcrOptions)
The minimum resolution in DPI used for OCR Default value: 200.0.
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_PageOptions_SetMode(TPdfToolsOcr_PageOptions *pPageOptions, TPdfToolsOcr_PageProcessingMode iMode)
The mode for processing pages Default value: ePdfToolsOcr_PageProcessingMode_None.
void(PDFTOOLS_CALL * TPdfToolsOcr_Processor_WarningW)(void *pContext, const WCHAR *szMessage, TPdfToolsOcr_WarningCategory iCategory, int iPageNo, const WCHAR *szContext)
Event for warnings occurring during OCR processing Non-critical issues during processing are reported...
Definition PdfTools_PdfToolsOcr.h:103
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_Engine_SetParametersA(TPdfToolsOcr_Engine *pEngine, const char *szParameters)
Engine-specific parameters.
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_ImageOptions_GetDeskewScan(TPdfToolsOcr_ImageOptions *pImageOptions)
Whether to deskew scanned pages.
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_Engine_SetLanguagesA(TPdfToolsOcr_Engine *pEngine, const char *szLanguages)
The OCR language settings.
PDFTOOLS_EXPORT TPdfToolsOcr_TextSkipMode PDFTOOLS_CALL PdfToolsOcr_TextOptions_GetSkipMode(TPdfToolsOcr_TextOptions *pTextOptions)
Define text that can be skipped from text OCR processing Default value: ePdfToolsOcr_TextSkipMode_Non...
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_ImageOptions_SetRotateScan(TPdfToolsOcr_ImageOptions *pImageOptions, BOOL bRotateScan)
Whether to rotate scanned pages based on detected orientation.
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_Engine_Close(TPdfToolsOcr_Engine *pObject)
Close object.
PDFTOOLS_EXPORT TPdfToolsOcr_TextProcessingMode PDFTOOLS_CALL PdfToolsOcr_TextOptions_GetMode(TPdfToolsOcr_TextOptions *pTextOptions)
The mode for processing text Default value: ePdfToolsOcr_TextProcessingMode_None.
PDFTOOLS_EXPORT double PDFTOOLS_CALL PdfToolsOcr_OcrOptions_GetDpi(TPdfToolsOcr_OcrOptions *pOcrOptions)
The default resolution in DPI used for OCR.
PDFTOOLS_EXPORT TPdfToolsOcr_ImageProcessingMode PDFTOOLS_CALL PdfToolsOcr_ImageOptions_GetMode(TPdfToolsOcr_ImageOptions *pImageOptions)
The mode for processing images Default value: ePdfToolsOcr_ImageProcessingMode_None.
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_Engine_SetLanguagesW(TPdfToolsOcr_Engine *pEngine, const WCHAR *szLanguages)
The OCR language settings.
PDFTOOLS_EXPORT TPdfToolsOcr_PageOptions *PDFTOOLS_CALL PdfToolsOcr_OcrOptions_GetPageOptions(TPdfToolsOcr_OcrOptions *pOcrOptions)
The options for page processing Options controlling page-level OCR processing and tagging.
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_OcrOptions_SetProcessEmbeddedFiles(TPdfToolsOcr_OcrOptions *pOcrOptions, BOOL bProcessEmbeddedFiles)
Whether to process embedded files recursively.
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_ImageOptions_SetRemoveOnlyInvisibleOcrText(TPdfToolsOcr_ImageOptions *pImageOptions, BOOL bRemoveOnlyInvisibleOcrText)
Whether to remove only invisible OCR text (text rendering mode 3)
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_OcrOptions_SetDpi(TPdfToolsOcr_OcrOptions *pOcrOptions, double dDpi)
The default resolution in DPI used for OCR.
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_ImageOptions_GetRemoveOnlyInvisibleOcrText(TPdfToolsOcr_ImageOptions *pImageOptions)
Whether to remove only invisible OCR text (text rendering mode 3)
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_ImageOptions_SetDeskewScan(TPdfToolsOcr_ImageOptions *pImageOptions, BOOL bDeskewScan)
Whether to deskew scanned pages.
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_Processor_RemoveWarningHandlerW(TPdfToolsOcr_Processor *pProcessor, void *pContext, TPdfToolsOcr_Processor_WarningW pFunction)
Removes event handler.
PDFTOOLS_EXPORT TPdfToolsOcr_TaggingMode PDFTOOLS_CALL PdfToolsOcr_PageOptions_GetTagging(TPdfToolsOcr_PageOptions *pPageOptions)
The mode for processing tagging information Default value: ePdfToolsOcr_TaggingMode_Auto.
PDFTOOLS_EXPORT TPdfToolsOcr_TextOptions *PDFTOOLS_CALL PdfToolsOcr_TextOptions_New(void)
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_OcrOptions_SetMaxDpi(TPdfToolsOcr_OcrOptions *pOcrOptions, double dMaxDpi)
The maximum resolution in DPI used for OCR Default value: 400.0.
PDFTOOLS_EXPORT TPdfToolsOcr_Processor *PDFTOOLS_CALL PdfToolsOcr_Processor_New(void)
PDFTOOLS_EXPORT size_t PDFTOOLS_CALL PdfToolsOcr_Engine_GetLanguagesW(TPdfToolsOcr_Engine *pEngine, WCHAR *pBuffer, size_t nBufferSize)
The OCR language settings.
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_Processor_AddWarningHandlerA(TPdfToolsOcr_Processor *pProcessor, void *pContext, TPdfToolsOcr_Processor_WarningA pFunction)
Adds event handler.
PDFTOOLS_EXPORT size_t PDFTOOLS_CALL PdfToolsOcr_Engine_GetParametersA(TPdfToolsOcr_Engine *pEngine, char *pBuffer, size_t nBufferSize)
Engine-specific parameters.
PDFTOOLS_EXPORT TPdfToolsOcr_PageProcessingMode PDFTOOLS_CALL PdfToolsOcr_PageOptions_GetMode(TPdfToolsOcr_PageOptions *pPageOptions)
The mode for processing pages Default value: ePdfToolsOcr_PageProcessingMode_None.
#define PDFTOOLS_CALL
Definition PdfTools_PdfToolsOcr.h:26
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_OcrOptions_GetProcessEmbeddedFiles(TPdfToolsOcr_OcrOptions *pOcrOptions)
Whether to process embedded files recursively.
PDFTOOLS_EXPORT TPdfToolsOcr_UnicodeSource PDFTOOLS_CALL PdfToolsOcr_TextOptions_GetUnicodeSource(TPdfToolsOcr_TextOptions *pTextOptions)
Define additional ToUnicode sources in addition to OCR processing Default value: ePdfToolsOcr_Unicode...
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_PageOptions_SetTagging(TPdfToolsOcr_PageOptions *pPageOptions, TPdfToolsOcr_TaggingMode iTagging)
The mode for processing tagging information Default value: ePdfToolsOcr_TaggingMode_Auto.
PDFTOOLS_EXPORT size_t PDFTOOLS_CALL PdfToolsOcr_Engine_GetParametersW(TPdfToolsOcr_Engine *pEngine, WCHAR *pBuffer, size_t nBufferSize)
Engine-specific parameters.
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_Processor_AddWarningHandlerW(TPdfToolsOcr_Processor *pProcessor, void *pContext, TPdfToolsOcr_Processor_WarningW pFunction)
Adds event handler.
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_ImageOptions_SetMode(TPdfToolsOcr_ImageOptions *pImageOptions, TPdfToolsOcr_ImageProcessingMode iMode)
The mode for processing images Default value: ePdfToolsOcr_ImageProcessingMode_None.
void(PDFTOOLS_CALL * TPdfToolsOcr_Processor_WarningA)(void *pContext, const char *szMessage, TPdfToolsOcr_WarningCategory iCategory, int iPageNo, const char *szContext)
Event for warnings occurring during OCR processing Non-critical issues during processing are reported...
Definition PdfTools_PdfToolsOcr.h:82
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsOcr_ImageOptions_GetRotateScan(TPdfToolsOcr_ImageOptions *pImageOptions)
Whether to rotate scanned pages based on detected orientation.
#define PDFTOOLS_EXPORT
Definition PdfTools_Platform.h:143
unsigned short WCHAR
Definition PdfTools_Platform.h:95
TPdfToolsOcr_UnicodeSource
Flags specifying additional sources for Unicode mapping.
Definition PdfTools_Types.h:1925
TPdfToolsOcr_TextProcessingMode
The mode for processing text in OCR.
Definition PdfTools_Types.h:1875
TPdfToolsOcr_WarningCategory
The category of OCR warnings.
Definition PdfTools_Types.h:2009
TPdfToolsOcr_TaggingMode
The mode for PDF tagging/structure generation.
Definition PdfTools_Types.h:1985
TPdfToolsOcr_ImageProcessingMode
The mode for processing images in OCR.
Definition PdfTools_Types.h:1839
#define BOOL
Definition PdfTools_Types.h:23
TPdfToolsOcr_PageProcessingMode
The mode for processing pages in OCR.
Definition PdfTools_Types.h:1956
TPdfToolsOcr_TextSkipMode
Flags specifying which text to skip during OCR processing.
Definition PdfTools_Types.h:1902
Options for OCR processing of images These options control how images in the PDF document are process...
The options for OCR processing This class aggregates all OCR processing options including resolution ...
Options for OCR processing of pages These options control page-level OCR processing and tagging.
Process PDF documents with OCR.
Options for OCR processing of text These options control how existing text in the PDF document is pro...
The PDF document PDF documents are either opened using PdfToolsPdf_Document_Open or the result of an ...
The parameters for document-level features of output PDFs Output options are used in many operations ...
Structure that groups a set of callbacks that model streams.
Definition PdfTools_PdfToolsSys.h:49