Pdftools SDK
Loading...
Searching...
No Matches
PdfTools_PdfToolsExtraction.h
Go to the documentation of this file.
1/******************************************************************************
2 *
3 * File: PdfTools_PdfToolsExtraction.h
4 *
5 * Description: Sub Header file for Pdftools SDK
6 *
7 * Author: PDF Tools AG
8 *
9 * Copyright: Copyright (C) 2023 - 2025 PDF Tools AG, Switzerland
10 * All rights reserved.
11 *
12 * Notice: By downloading and using this artifact, you accept PDF Tools AG's
13 * [license agreement](https://www.pdf-tools.com/license-agreement/),
14 * [privacy policy](https://www.pdf-tools.com/privacy-policy/),
15 * and allow PDF Tools AG to track your usage data.
16 *
17 *****************************************************************************/
18
19#ifndef PDFTOOLS_PDFTOOLSEXTRACTION_H__
20#define PDFTOOLS_PDFTOOLSEXTRACTION_H__
21
22#ifndef PDFTOOLS_CALL
23#if defined(WIN32)
24#define PDFTOOLS_CALL __stdcall
25#else
26#define PDFTOOLS_CALL
27#endif
28#endif
29
30#include "PdfTools_Types.h"
32
33#ifdef __cplusplus
34extern "C"
35{
36#endif
37
38#ifdef _UNICODE
39#else
40#endif
41
42/******************************************************************************
43 * TextOptions
44 *****************************************************************************/
54
136 TPdfToolsExtraction_TextOptions* pTextOptions, const double* pAdvanceWidth);
224 TPdfToolsExtraction_TextOptions* pTextOptions, double dWordSeparationFactor);
225
226/******************************************************************************
227 * Extractor
228 *****************************************************************************/
238
277 const TPdfToolsSys_StreamDescriptor* pOutStreamDesc, TPdfToolsExtraction_TextOptions* pOptions,
278 const int* pFirstPage, const int* pLastPage);
279
280#ifdef __cplusplus
281}
282#endif
283
284#endif /* PDFTOOLS_PDFTOOLSEXTRACTION_H__ */
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsExtraction_TextOptions_SetAdvanceWidth(TPdfToolsExtraction_TextOptions *pTextOptions, const double *pAdvanceWidth)
PDFTOOLS_EXPORT TPdfToolsExtraction_TextExtractionFormat PDFTOOLS_CALL PdfToolsExtraction_TextOptions_GetExtractionFormat(TPdfToolsExtraction_TextOptions *pTextOptions)
Format of the extracted text.
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsExtraction_TextOptions_GetAdvanceWidth(TPdfToolsExtraction_TextOptions *pTextOptions, double *pAdvanceWidth)
PDFTOOLS_EXPORT TPdfToolsExtraction_Extractor *PDFTOOLS_CALL PdfToolsExtraction_Extractor_New(void)
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsExtraction_Extractor_ExtractText(TPdfToolsExtraction_Extractor *pExtractor, TPdfToolsPdf_Document *pInDoc, const TPdfToolsSys_StreamDescriptor *pOutStreamDesc, TPdfToolsExtraction_TextOptions *pOptions, const int *pFirstPage, const int *pLastPage)
Extract text from a PDF document.
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsExtraction_TextOptions_SetExtractionFormat(TPdfToolsExtraction_TextOptions *pTextOptions, TPdfToolsExtraction_TextExtractionFormat iExtractionFormat)
Format of the extracted text.
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsExtraction_TextOptions_GetLineHeight(TPdfToolsExtraction_TextOptions *pTextOptions, double *pLineHeight)
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsExtraction_TextOptions_SetLineHeight(TPdfToolsExtraction_TextOptions *pTextOptions, const double *pLineHeight)
PDFTOOLS_EXPORT TPdfToolsExtraction_TextOptions *PDFTOOLS_CALL PdfToolsExtraction_TextOptions_New(void)
#define PDFTOOLS_CALL
Definition PdfTools_PdfToolsExtraction.h:26
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsExtraction_TextOptions_SetWordSeparationFactor(TPdfToolsExtraction_TextOptions *pTextOptions, double dWordSeparationFactor)
PDFTOOLS_EXPORT double PDFTOOLS_CALL PdfToolsExtraction_TextOptions_GetWordSeparationFactor(TPdfToolsExtraction_TextOptions *pTextOptions)
#define PDFTOOLS_EXPORT
Definition PdfTools_Platform.h:143
TPdfToolsExtraction_TextExtractionFormat
Definition PdfTools_Types.h:1782
#define BOOL
Definition PdfTools_Types.h:23
Options for text extraction This class specifies the details of text extraction.
The PDF document PDF documents are either opened using PdfToolsPdf_Document_Open or the result of an ...
Structure that groups a set of callbacks that model streams.
Definition PdfTools_PdfToolsSys.h:49