Pdftools SDK
Loading...
Searching...
No Matches
Macros | Functions
PdfTools_PdfToolsExtraction.h File Reference
#include "PdfTools_Types.h"
#include "PdfTools_PdfToolsSys.h"

Go to the source code of this file.

Macros

#define PDFTOOLS_CALL
 

Functions

PDFTOOLS_EXPORT TPdfToolsExtraction_TextOptions *PDFTOOLS_CALL PdfToolsExtraction_TextOptions_New (void)
 
PDFTOOLS_EXPORT TPdfToolsExtraction_TextExtractionFormat PDFTOOLS_CALL PdfToolsExtraction_TextOptions_GetExtractionFormat (TPdfToolsExtraction_TextOptions *pTextOptions)
 Format of the extracted text.
 
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsExtraction_TextOptions_SetExtractionFormat (TPdfToolsExtraction_TextOptions *pTextOptions, TPdfToolsExtraction_TextExtractionFormat iExtractionFormat)
 Format of the extracted text.
 
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsExtraction_TextOptions_GetAdvanceWidth (TPdfToolsExtraction_TextOptions *pTextOptions, double *pAdvanceWidth)
 
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsExtraction_TextOptions_SetAdvanceWidth (TPdfToolsExtraction_TextOptions *pTextOptions, const double *pAdvanceWidth)
 
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsExtraction_TextOptions_GetLineHeight (TPdfToolsExtraction_TextOptions *pTextOptions, double *pLineHeight)
 
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsExtraction_TextOptions_SetLineHeight (TPdfToolsExtraction_TextOptions *pTextOptions, const double *pLineHeight)
 
PDFTOOLS_EXPORT double PDFTOOLS_CALL PdfToolsExtraction_TextOptions_GetWordSeparationFactor (TPdfToolsExtraction_TextOptions *pTextOptions)
 
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsExtraction_TextOptions_SetWordSeparationFactor (TPdfToolsExtraction_TextOptions *pTextOptions, double dWordSeparationFactor)
 
PDFTOOLS_EXPORT TPdfToolsExtraction_Extractor *PDFTOOLS_CALL PdfToolsExtraction_Extractor_New (void)
 
PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsExtraction_Extractor_ExtractText (TPdfToolsExtraction_Extractor *pExtractor, TPdfToolsPdf_Document *pInDoc, const TPdfToolsSys_StreamDescriptor *pOutStreamDesc, TPdfToolsExtraction_TextOptions *pOptions, const int *pFirstPage, const int *pLastPage)
 Extract text from a PDF document.
 

Macro Definition Documentation

◆ PDFTOOLS_CALL

#define PDFTOOLS_CALL

Function Documentation

◆ PdfToolsExtraction_Extractor_ExtractText()

PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsExtraction_Extractor_ExtractText ( TPdfToolsExtraction_Extractor * pExtractor,
TPdfToolsPdf_Document * pInDoc,
const TPdfToolsSys_StreamDescriptor * pOutStreamDesc,
TPdfToolsExtraction_TextOptions * pOptions,
const int * pFirstPage,
const int * pLastPage )

Extract text from a PDF document.

Parameters
[in,out]pExtractorActs as a handle to the native object of type TPdfToolsExtraction_Extractor.
[in,out]pInDocThe input PDF document.
[in,out]pOutStreamDescThe stream to which output file the extracted text is written.
[in,out]pOptionsThe option object that controls the text extraction.
[in]pFirstPageOptional parameter denoting the index of the first page to be copied. This index is one-based. If set, the number must be in the range of 1 (first page) to PdfToolsPdf_Document_GetPageCount (last page). If not set, 1 is used.
[in]pLastPageOptional parameter denoting the index of the last page to be copied. This index is one-based. If set, the number must be in the range of 1 (first page) to PdfToolsPdf_Document_GetPageCount (last page). If not set, PdfToolsPdf_Document_GetPageCount is used.
Returns
TRUE if the operation is successful; FALSE if there is an error.
Note
An error occurred when FALSE was returned. Retrieve specific error code by calling PdfTools_GetLastError. Get the error message with PdfTools_GetLastErrorMessage. Possible error codes:

◆ PdfToolsExtraction_Extractor_New()

PDFTOOLS_EXPORT TPdfToolsExtraction_Extractor *PDFTOOLS_CALL PdfToolsExtraction_Extractor_New ( void )
Returns
Handle to the newly created native object.

NULL if there is an error.

Note
An error occurred when NULL was returned. Retrieve specific error code by calling PdfTools_GetLastError. Get the error message with PdfTools_GetLastErrorMessage.

◆ PdfToolsExtraction_TextOptions_GetAdvanceWidth()

PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsExtraction_TextOptions_GetAdvanceWidth ( TPdfToolsExtraction_TextOptions * pTextOptions,
double * pAdvanceWidth )

The horizontal space in a PDF that corresponds to a character in monospaced text output.

If NULL, the horizontal space is 7.2pt.

Default value: NULL

Parameters
[in,out]pTextOptionsActs as a handle to the native object of type TPdfToolsExtraction_TextOptions.
[out]pAdvanceWidthRetrieved value.
Returns
FALSE if either an error occurred or the [out] argument returns NULL. To determine if an error has occurred, check the error code as described in the note section below.
Note
An error occurred when FALSE was returned and the error code returned by PdfTools_GetLastError is different from ePdfTools_Error_Success. Get the error message with PdfTools_GetLastErrorMessage.

◆ PdfToolsExtraction_TextOptions_GetExtractionFormat()

PDFTOOLS_EXPORT TPdfToolsExtraction_TextExtractionFormat PDFTOOLS_CALL PdfToolsExtraction_TextOptions_GetExtractionFormat ( TPdfToolsExtraction_TextOptions * pTextOptions)

Format of the extracted text.

Specifies the format of the extracted text.

Default value: ePdfToolsExtraction_TextExtractionFormat_DocumentOrder

Parameters
[in,out]pTextOptionsActs as a handle to the native object of type TPdfToolsExtraction_TextOptions.
Returns
Retrieved value.

May indicate an error in certain scenarios. For further information see the note section below.

Note
An error occurred when 0 was returned. Retrieve specific error code by calling PdfTools_GetLastError. Get the error message with PdfTools_GetLastErrorMessage.

◆ PdfToolsExtraction_TextOptions_GetLineHeight()

PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsExtraction_TextOptions_GetLineHeight ( TPdfToolsExtraction_TextOptions * pTextOptions,
double * pLineHeight )

The vertical space in a PDF that triggers a new line in monospaced text output.

If NULL, no extra blank lines are added in the text output.

Default value: NULL

Parameters
[in,out]pTextOptionsActs as a handle to the native object of type TPdfToolsExtraction_TextOptions.
[out]pLineHeightRetrieved value.
Returns
FALSE if either an error occurred or the [out] argument returns NULL. To determine if an error has occurred, check the error code as described in the note section below.
Note
An error occurred when FALSE was returned and the error code returned by PdfTools_GetLastError is different from ePdfTools_Error_Success. Get the error message with PdfTools_GetLastErrorMessage.

◆ PdfToolsExtraction_TextOptions_GetWordSeparationFactor()

PDFTOOLS_EXPORT double PDFTOOLS_CALL PdfToolsExtraction_TextOptions_GetWordSeparationFactor ( TPdfToolsExtraction_TextOptions * pTextOptions)

This parameter defines a factor multiplied by the width of the space character to determine word boundaries. If the distance between two characters exceeds this calculated value, it is recognized as a word separation.

Default value: 0.3

Parameters
[in,out]pTextOptionsActs as a handle to the native object of type TPdfToolsExtraction_TextOptions.
Returns
Retrieved value.

May indicate an error in certain scenarios. For further information see the note section below.

Note
An error occurred when -1.0 was returned. Retrieve specific error code by calling PdfTools_GetLastError. Get the error message with PdfTools_GetLastErrorMessage. Possible error codes:

◆ PdfToolsExtraction_TextOptions_New()

PDFTOOLS_EXPORT TPdfToolsExtraction_TextOptions *PDFTOOLS_CALL PdfToolsExtraction_TextOptions_New ( void )
Returns
Handle to the newly created native object.

NULL if there is an error.

Note
An error occurred when NULL was returned. Retrieve specific error code by calling PdfTools_GetLastError. Get the error message with PdfTools_GetLastErrorMessage.

◆ PdfToolsExtraction_TextOptions_SetAdvanceWidth()

PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsExtraction_TextOptions_SetAdvanceWidth ( TPdfToolsExtraction_TextOptions * pTextOptions,
const double * pAdvanceWidth )

The horizontal space in a PDF that corresponds to a character in monospaced text output.

If NULL, the horizontal space is 7.2pt.

Default value: NULL

Parameters
[in,out]pTextOptionsActs as a handle to the native object of type TPdfToolsExtraction_TextOptions.
[in]pAdvanceWidthSet value.
Returns
TRUE if the operation is successful; FALSE if there is an error.
Note
An error occurred when FALSE was returned. Retrieve specific error code by calling PdfTools_GetLastError. Get the error message with PdfTools_GetLastErrorMessage.

◆ PdfToolsExtraction_TextOptions_SetExtractionFormat()

PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsExtraction_TextOptions_SetExtractionFormat ( TPdfToolsExtraction_TextOptions * pTextOptions,
TPdfToolsExtraction_TextExtractionFormat iExtractionFormat )

Format of the extracted text.

Specifies the format of the extracted text.

Default value: ePdfToolsExtraction_TextExtractionFormat_DocumentOrder

Parameters
[in,out]pTextOptionsActs as a handle to the native object of type TPdfToolsExtraction_TextOptions.
[in]iExtractionFormatSet value.
Returns
TRUE if the operation is successful; FALSE if there is an error.
Note
An error occurred when FALSE was returned. Retrieve specific error code by calling PdfTools_GetLastError. Get the error message with PdfTools_GetLastErrorMessage.

◆ PdfToolsExtraction_TextOptions_SetLineHeight()

PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsExtraction_TextOptions_SetLineHeight ( TPdfToolsExtraction_TextOptions * pTextOptions,
const double * pLineHeight )

The vertical space in a PDF that triggers a new line in monospaced text output.

If NULL, no extra blank lines are added in the text output.

Default value: NULL

Parameters
[in,out]pTextOptionsActs as a handle to the native object of type TPdfToolsExtraction_TextOptions.
[in]pLineHeightSet value.
Returns
TRUE if the operation is successful; FALSE if there is an error.
Note
An error occurred when FALSE was returned. Retrieve specific error code by calling PdfTools_GetLastError. Get the error message with PdfTools_GetLastErrorMessage.

◆ PdfToolsExtraction_TextOptions_SetWordSeparationFactor()

PDFTOOLS_EXPORT BOOL PDFTOOLS_CALL PdfToolsExtraction_TextOptions_SetWordSeparationFactor ( TPdfToolsExtraction_TextOptions * pTextOptions,
double dWordSeparationFactor )

This parameter defines a factor multiplied by the width of the space character to determine word boundaries. If the distance between two characters exceeds this calculated value, it is recognized as a word separation.

Default value: 0.3

Parameters
[in,out]pTextOptionsActs as a handle to the native object of type TPdfToolsExtraction_TextOptions.
[in]dWordSeparationFactorSet value.
Returns
TRUE if the operation is successful; FALSE if there is an error.
Note
An error occurred when FALSE was returned. Retrieve specific error code by calling PdfTools_GetLastError. Get the error message with PdfTools_GetLastErrorMessage. Possible error codes: