Source code for pdftools_toolbox.pdf.content.text_fragment

from __future__ import annotations
import io
from typing import List, Iterator, Tuple, Optional, Any, TYPE_CHECKING, Callable
from ctypes import *
from datetime import datetime
from numbers import Number
from pdftools_toolbox.internal import _lib
from pdftools_toolbox.internal.utils import _string_to_utf16, _utf16_to_string
from pdftools_toolbox.internal.streams import _StreamDescriptor, _NativeStream
from pdftools_toolbox.internal.native_base import _NativeBase
from pdftools_toolbox.internal.native_object import _NativeObject

import pdftools_toolbox.internal
import pdftools_toolbox.pdf.content.glyph

if TYPE_CHECKING:
    from pdftools_toolbox.geometry.real.rectangle import Rectangle
    from pdftools_toolbox.geometry.real.affine_transform import AffineTransform
    from pdftools_toolbox.pdf.content.stroke import Stroke
    from pdftools_toolbox.pdf.content.fill import Fill
    from pdftools_toolbox.pdf.content.writing_mode import WritingMode
    from pdftools_toolbox.pdf.content.font import Font
    from pdftools_toolbox.pdf.content.glyph import Glyph

else:
    Rectangle = "pdftools_toolbox.geometry.real.rectangle.Rectangle"
    AffineTransform = "pdftools_toolbox.geometry.real.affine_transform.AffineTransform"
    Stroke = "pdftools_toolbox.pdf.content.stroke.Stroke"
    Fill = "pdftools_toolbox.pdf.content.fill.Fill"
    WritingMode = "pdftools_toolbox.pdf.content.writing_mode.WritingMode"
    Font = "pdftools_toolbox.pdf.content.font.Font"
    Glyph = "pdftools_toolbox.pdf.content.glyph.Glyph"


[docs] class TextFragment(_NativeObject, list): """ Text Fragment A :class:`pdftools_toolbox.pdf.content.text_element.TextElement` contains an arbitrary number of text fragments. Text can be partitioned arbibrarily into fragments without respecting word boundaries or reading order. A text fragment provides iteration over all contained :class:`pdftools_toolbox.pdf.content.glyph.Glyph` s. Removing, clearing, adding, and sub-ranges are not supported. While iterating, a :class:`pdftools_toolbox.corrupt_error.CorruptError` may be generated if the text fragment contains glyphs with inconsistent Unicode information. """ @property def bounding_box(self) -> Rectangle: """ the bounding box This is a rectangle that encompasses all parts of the text fragment. Returns: pdftools_toolbox.geometry.real.rectangle.Rectangle Raises: StateError: the object has already been closed """ from pdftools_toolbox.geometry.real.rectangle import Rectangle _lib.PtxPdfContent_TextFragment_GetBoundingBox.argtypes = [c_void_p, POINTER(Rectangle)] _lib.PtxPdfContent_TextFragment_GetBoundingBox.restype = c_bool ret_val = Rectangle() if not _lib.PtxPdfContent_TextFragment_GetBoundingBox(self._handle, byref(ret_val)): _NativeBase._throw_last_error(False) return ret_val @property def transform(self) -> AffineTransform: """ the transform to be applied to the bounding box rectangle Use this transform matrix to compute the actual location of the text fragment's bounding box. Returns: pdftools_toolbox.geometry.real.affine_transform.AffineTransform Raises: StateError: the object has already been closed """ from pdftools_toolbox.geometry.real.affine_transform import AffineTransform _lib.PtxPdfContent_TextFragment_GetTransform.argtypes = [c_void_p, POINTER(AffineTransform)] _lib.PtxPdfContent_TextFragment_GetTransform.restype = c_bool ret_val = AffineTransform() if not _lib.PtxPdfContent_TextFragment_GetTransform(self._handle, byref(ret_val)): _NativeBase._throw_last_error(False) return ret_val @property def text(self) -> Optional[str]: """ The string painted by this text fragment Returns: Optional[str] Raises: StateError: the object has already been closed """ _lib.PtxPdfContent_TextFragment_GetTextW.argtypes = [c_void_p, POINTER(c_wchar), c_size_t] _lib.PtxPdfContent_TextFragment_GetTextW.restype = c_size_t ret_val_size = _lib.PtxPdfContent_TextFragment_GetTextW(self._handle, None, 0) if ret_val_size == 0: _NativeBase._throw_last_error() return None ret_val = create_unicode_buffer(ret_val_size) _lib.PtxPdfContent_TextFragment_GetTextW(self._handle, ret_val, c_size_t(ret_val_size)) return _utf16_to_string(ret_val, ret_val_size) @property def stroke(self) -> Optional[Stroke]: """ This text fragment's parameters for stroking the text or `None` if the text is not stroked Returns: Optional[pdftools_toolbox.pdf.content.stroke.Stroke] Raises: StateError: the object has already been closed """ from pdftools_toolbox.pdf.content.stroke import Stroke _lib.PtxPdfContent_TextFragment_GetStroke.argtypes = [c_void_p] _lib.PtxPdfContent_TextFragment_GetStroke.restype = c_void_p ret_val = _lib.PtxPdfContent_TextFragment_GetStroke(self._handle) if ret_val is None: _NativeBase._throw_last_error() return None return Stroke._create_dynamic_type(ret_val) @property def fill(self) -> Optional[Fill]: """ This text fragment's parameters for filling the text or `None` if the text is not filled Returns: Optional[pdftools_toolbox.pdf.content.fill.Fill] Raises: StateError: the object has already been closed """ from pdftools_toolbox.pdf.content.fill import Fill _lib.PtxPdfContent_TextFragment_GetFill.argtypes = [c_void_p] _lib.PtxPdfContent_TextFragment_GetFill.restype = c_void_p ret_val = _lib.PtxPdfContent_TextFragment_GetFill(self._handle) if ret_val is None: _NativeBase._throw_last_error() return None return Fill._create_dynamic_type(ret_val) @property def font_size(self) -> float: """ The font size Returns: float Raises: StateError: the object has already been closed """ _lib.PtxPdfContent_TextFragment_GetFontSize.argtypes = [c_void_p] _lib.PtxPdfContent_TextFragment_GetFontSize.restype = c_double ret_val = _lib.PtxPdfContent_TextFragment_GetFontSize(self._handle) if ret_val == 0.0: _NativeBase._throw_last_error() return ret_val @property def character_spacing(self) -> float: """ The additional spacing between glyphs When the glyph for each character in the text is rendered, the character spacing is added to the horizontal or vertical component of the glyph's displacement, depending on the writing mode. It is subject to scaling by the horizontal scaling if the writing mode is horizontal. Returns: float Raises: StateError: the object has already been closed """ _lib.PtxPdfContent_TextFragment_GetCharacterSpacing.argtypes = [c_void_p] _lib.PtxPdfContent_TextFragment_GetCharacterSpacing.restype = c_double ret_val = _lib.PtxPdfContent_TextFragment_GetCharacterSpacing(self._handle) if ret_val == -1.0: _NativeBase._throw_last_error() return ret_val @property def word_spacing(self) -> float: """ The additional spacing between words Word spacing works the same way as character spacing, but applies only to the space character, code 32. Returns: float Raises: StateError: the object has already been closed """ _lib.PtxPdfContent_TextFragment_GetWordSpacing.argtypes = [c_void_p] _lib.PtxPdfContent_TextFragment_GetWordSpacing.restype = c_double ret_val = _lib.PtxPdfContent_TextFragment_GetWordSpacing(self._handle) if ret_val == -1.0: _NativeBase._throw_last_error() return ret_val @property def horizontal_scaling(self) -> float: """ The horizontal scaling factor The horizontal scaling parameter adjusts the width of glyphs by stretching or compressing them in the horizontal direction. Its value is specified relative to the normal width of the glyphs, with 1 being the normal width. Returns: float Raises: StateError: the object has already been closed """ _lib.PtxPdfContent_TextFragment_GetHorizontalScaling.argtypes = [c_void_p] _lib.PtxPdfContent_TextFragment_GetHorizontalScaling.restype = c_double ret_val = _lib.PtxPdfContent_TextFragment_GetHorizontalScaling(self._handle) if ret_val == 0.0: _NativeBase._throw_last_error() return ret_val @property def rise(self) -> float: """ The rise of the baseline The text rise specifies the distance to move the baseline up or down from its default location. Positive values of text rise move the baseline up. Adjustments to the baseline are useful for drawing superscripts or subscripts. Returns: float Raises: StateError: the object has already been closed """ _lib.PtxPdfContent_TextFragment_GetRise.argtypes = [c_void_p] _lib.PtxPdfContent_TextFragment_GetRise.restype = c_double ret_val = _lib.PtxPdfContent_TextFragment_GetRise(self._handle) if ret_val == -1.0: _NativeBase._throw_last_error() return ret_val @property def writing_mode(self) -> WritingMode: """ The writing direction This is the writing mode for the text fragment. It applies to all contained :class:`pdftools_toolbox.pdf.content.glyph.Glyph` s. Returns: pdftools_toolbox.pdf.content.writing_mode.WritingMode Raises: StateError: the object has already been closed """ from pdftools_toolbox.pdf.content.writing_mode import WritingMode _lib.PtxPdfContent_TextFragment_GetWritingMode.argtypes = [c_void_p] _lib.PtxPdfContent_TextFragment_GetWritingMode.restype = c_int ret_val = _lib.PtxPdfContent_TextFragment_GetWritingMode(self._handle) if ret_val == 0: _NativeBase._throw_last_error() return WritingMode(ret_val) @property def font(self) -> Font: """ The font The returned :class:`pdftools_toolbox.pdf.content.font.Font` can only be used for extraction purposes. Specifically, using this object in `pdftools_toolbox.pdf.content.text_generator.TextGenerator.__init__` or in :attr:`pdftools_toolbox.pdf.content.text_generator.TextGenerator.font` results in a :class:`ValueError` error. Returns: pdftools_toolbox.pdf.content.font.Font Raises: StateError: the object has already been closed """ from pdftools_toolbox.pdf.content.font import Font _lib.PtxPdfContent_TextFragment_GetFont.argtypes = [c_void_p] _lib.PtxPdfContent_TextFragment_GetFont.restype = c_void_p ret_val = _lib.PtxPdfContent_TextFragment_GetFont(self._handle) if ret_val is None: _NativeBase._throw_last_error(False) return Font._create_dynamic_type(ret_val) def __len__(self) -> int: _lib.PtxPdfContent_TextFragment_GetCount.argtypes = [c_void_p] _lib.PtxPdfContent_TextFragment_GetCount.restype = c_int ret_val = _lib.PtxPdfContent_TextFragment_GetCount(self._handle) if ret_val == -1: _NativeBase._throw_last_error() return ret_val
[docs] def clear(self) -> None: raise NotImplementedError("Clear method is not supported in TextFragment.")
def __delitem__(self, index: int) -> None: if index < 0: # Handle negative indexing index += len(self) self.remove(index)
[docs] def remove(self, index: int) -> None: if not isinstance(index, int): raise TypeError(f"Expected type {int.__name__}, but got {type(index).__name__}.") _lib.PtxPdfContent_TextFragment_Remove.argtypes = [c_void_p, c_int] _lib.PtxPdfContent_TextFragment_Remove.restype = c_bool if not _lib.PtxPdfContent_TextFragment_Remove(self._handle, index): _NativeBase._throw_last_error(False)
[docs] def extend(self, items: TextFragment) -> None: if not isinstance(items, TextFragment): raise TypeError(f"Expected type {TextFragment.__name__}, but got {type(items).__name__}.") raise NotImplementedError("Extend method is not supported in TextFragment.")
[docs] def insert(self, index: int, value: Any) -> None: raise NotImplementedError("Insert method is not supported in TextFragment.")
[docs] def pop(self, index: int = -1) -> Any: raise NotImplementedError("Pop method is not supported in TextFragment.")
[docs] def copy(self) -> TextFragment: raise NotImplementedError("Copy method is not supported in TextFragment.")
[docs] def sort(self, key=None, reverse=False) -> None: raise NotImplementedError("Sort method is not supported in TextFragment.")
[docs] def reverse(self) -> None: raise NotImplementedError("Reverse method is not supported in TextFragment.")
def __getitem__(self, index: Union[int, slice]) -> Union[Any, List[Any]]: from pdftools_toolbox.pdf.content.glyph import Glyph if isinstance(index, slice): raise NotImplementedError("Slicing is not implemented.") if not isinstance(index, int): raise TypeError(f"Expected type {int.__name__}, but got {type(index).__name__}.") if index < 0: # Handle negative indexing index += len(self) _lib.PtxPdfContent_TextFragment_Get.argtypes = [c_void_p, c_int] _lib.PtxPdfContent_TextFragment_Get.restype = c_void_p ret_val = _lib.PtxPdfContent_TextFragment_Get(self._handle, index) if ret_val is None: _NativeBase._throw_last_error(False) return Glyph._create_dynamic_type(ret_val) def __setitem__(self, index: int, value: Any) -> None: raise NotImplementedError("Setting elements is not supported in TextFragment.")
[docs] def append(self, value: Glyph) -> None: raise NotImplementedError("Append method is not supported in TextFragment.")
[docs] def index(self, value: Glyph, start: int = 0, stop: Optional[int] = None) -> int: from pdftools_toolbox.pdf.content.glyph import Glyph if not isinstance(value, Glyph): raise TypeError(f"Expected type {Glyph.__name__}, but got {type(value).__name__}.") if not isinstance(start, int): raise TypeError(f"Expected type {int.__name__}, but got {type(start).__name__}.") if stop is not None and not isinstance(stop, int): raise TypeError(f"Expected type {int.__name__} or None, but got {type(stop).__name__}.") length = len(self) if start < 0: start += length if stop is None: stop = length elif stop < 0: stop += length for i in range(max(start, 0), min(stop, length)): if self[i] == value: return i raise ValueError(f"{value} is not in the list.")
def __iter__(self): self._iter_index = 0 # Initialize the index for iteration return self def __next__(self): if self._iter_index < len(self): # Check if there are more items to iterate over item = self.__getitem__(self._iter_index) # Get the item at the current index self._iter_index += 1 # Move to the next index return item else: raise StopIteration # Signal that iteration is complete @staticmethod def _create_dynamic_type(handle): return TextFragment._from_handle(handle) @classmethod def _from_handle(cls, handle): """ Internal factory method for constructing an instance using an internal handle. This method creates an instance of the class by bypassing the public constructor. """ instance = TextFragment.__new__(cls) # Bypass __init__ instance._initialize(handle) return instance def _initialize(self, handle): super()._initialize(handle)