Source code for pdftools_toolbox.pdf.metadata

from __future__ import annotations
import io
from typing import List, Iterator, Tuple, Optional, Any, TYPE_CHECKING, Callable
from ctypes import *
from datetime import datetime
from numbers import Number
from pdftools_toolbox.internal import _lib
from pdftools_toolbox.internal.utils import _string_to_utf16, _utf16_to_string
from pdftools_toolbox.internal.streams import _StreamDescriptor, _NativeStream
from pdftools_toolbox.internal.native_base import _NativeBase
from pdftools_toolbox.internal.native_object import _NativeObject

import pdftools_toolbox.internal

if TYPE_CHECKING:
    from pdftools_toolbox.pdf.document import Document
    from pdftools_toolbox.sys.date import _Date
    from pdftools_toolbox.string_map import StringMap

else:
    Document = "pdftools_toolbox.pdf.document.Document"
    _Date = "pdftools_toolbox.sys.date._Date"
    StringMap = "pdftools_toolbox.string_map.StringMap"


[docs] class Metadata(_NativeObject): """ Represents the metadata of a document or an object in a document. For document level metadata, all changes are reflected in both, XMP metadata and document info dictionary depending on the conformance of the document. """
[docs] @staticmethod def create(target_document: Document, xmp: io.IOBase) -> Metadata: """ Create a new metadata object The newly created metadata object is associated with the target document but not (yet) used as the document metadata. The object can be used either as document metadata using :attr:`pdftools_toolbox.pdf.document.Document.metadata` or as page metadata using :attr:`pdftools_toolbox.pdf.page.Page.metadata` . Args: targetDocument (pdftools_toolbox.pdf.document.Document): the output document with which the returned object is associated xmp (io.IOBase): A stream containing an XMP file or `None` to create an empty metadata object. Returns: pdftools_toolbox.pdf.metadata.Metadata: the newly created metadata object Raises: ValueError: if the `targetDocument` argument has already been closed ValueError: if the `targetDocument` argument is read-only OSError: if the `xmp` stream could not be read pdftools_toolbox.corrupt_error.CorruptError: if the `xmp` stream is corrupt """ from pdftools_toolbox.pdf.document import Document if not isinstance(target_document, Document): raise TypeError(f"Expected type {Document.__name__}, but got {type(target_document).__name__}.") if not isinstance(xmp, io.IOBase): raise TypeError(f"Expected type {io.IOBase.__name__}, but got {type(xmp).__name__}.") _lib.PtxPdf_Metadata_Create.argtypes = [c_void_p, POINTER(pdftools_toolbox.internal.streams._StreamDescriptor)] _lib.PtxPdf_Metadata_Create.restype = c_void_p ret_val = _lib.PtxPdf_Metadata_Create(target_document._handle, _StreamDescriptor(xmp)) if ret_val is None: _NativeBase._throw_last_error(False) return Metadata._create_dynamic_type(ret_val)
[docs] @staticmethod def copy(target_document: Document, metadata: Metadata) -> Metadata: """ Copy a metadata object Copy a metadata object from an input document to the given `targetDocument`. The returned object is associated with the target document but not (yet) used as the document metadata. The object can be used either as document metadata using :attr:`pdftools_toolbox.pdf.document.Document.metadata` or as page metadata using :attr:`pdftools_toolbox.pdf.page.Page.metadata` . Args: targetDocument (pdftools_toolbox.pdf.document.Document): the output document with which the returned object is associated metadata (pdftools_toolbox.pdf.metadata.Metadata): the metadata of a different document Returns: pdftools_toolbox.pdf.metadata.Metadata: a metadata object with the same content, but associated with the current document. Raises: OSError: Error reading from the source document or writing to the target document pdftools_toolbox.corrupt_error.CorruptError: The source document is corrupt pdftools_toolbox.conformance_error.ConformanceError: The conformance level of the source document is not compatible with the conformance level of the target document. ValueError: if the `targetDocument` argument has already been closed ValueError: if the `targetDocument` argument is read-only ValueError: if the `metadata` object has already been closed ValueError: if the `metadata` object is not associated with an input document """ from pdftools_toolbox.pdf.document import Document if not isinstance(target_document, Document): raise TypeError(f"Expected type {Document.__name__}, but got {type(target_document).__name__}.") if not isinstance(metadata, Metadata): raise TypeError(f"Expected type {Metadata.__name__}, but got {type(metadata).__name__}.") _lib.PtxPdf_Metadata_Copy.argtypes = [c_void_p, c_void_p] _lib.PtxPdf_Metadata_Copy.restype = c_void_p ret_val = _lib.PtxPdf_Metadata_Copy(target_document._handle, metadata._handle) if ret_val is None: _NativeBase._throw_last_error(False) return Metadata._create_dynamic_type(ret_val)
@property def title(self) -> Optional[str]: """ The title of the document or resource. This property corresponds to the "dc:title" entry in the XMP metadata and to the "Title" entry in the document information dictionary. Returns: Optional[str] Raises: StateError: if the metadata have already been closed """ _lib.PtxPdf_Metadata_GetTitleW.argtypes = [c_void_p, POINTER(c_wchar), c_size_t] _lib.PtxPdf_Metadata_GetTitleW.restype = c_size_t ret_val_size = _lib.PtxPdf_Metadata_GetTitleW(self._handle, None, 0) if ret_val_size == 0: _NativeBase._throw_last_error() return None ret_val = create_unicode_buffer(ret_val_size) _lib.PtxPdf_Metadata_GetTitleW(self._handle, ret_val, c_size_t(ret_val_size)) return _utf16_to_string(ret_val, ret_val_size) @title.setter def title(self, val: Optional[str]) -> None: """ The title of the document or resource. This property corresponds to the "dc:title" entry in the XMP metadata and to the "Title" entry in the document information dictionary. Args: val (Optional[str]): property value Raises: StateError: if the metadata have already been closed OperationError: the document is read-only """ if val is not None and not isinstance(val, str): raise TypeError(f"Expected type {str.__name__} or None, but got {type(val).__name__}.") _lib.PtxPdf_Metadata_SetTitleW.argtypes = [c_void_p, c_wchar_p] _lib.PtxPdf_Metadata_SetTitleW.restype = c_bool if not _lib.PtxPdf_Metadata_SetTitleW(self._handle, _string_to_utf16(val)): _NativeBase._throw_last_error(False) @property def author(self) -> Optional[str]: """ The name of the person who created the document or resource. This property corresponds to the "dc:creator" entry in the XMP metadata and to the "Author" entry in the document information dictionary. Returns: Optional[str] Raises: StateError: if the metadata have already been closed """ _lib.PtxPdf_Metadata_GetAuthorW.argtypes = [c_void_p, POINTER(c_wchar), c_size_t] _lib.PtxPdf_Metadata_GetAuthorW.restype = c_size_t ret_val_size = _lib.PtxPdf_Metadata_GetAuthorW(self._handle, None, 0) if ret_val_size == 0: _NativeBase._throw_last_error() return None ret_val = create_unicode_buffer(ret_val_size) _lib.PtxPdf_Metadata_GetAuthorW(self._handle, ret_val, c_size_t(ret_val_size)) return _utf16_to_string(ret_val, ret_val_size) @author.setter def author(self, val: Optional[str]) -> None: """ The name of the person who created the document or resource. This property corresponds to the "dc:creator" entry in the XMP metadata and to the "Author" entry in the document information dictionary. Args: val (Optional[str]): property value Raises: StateError: if the metadata have already been closed OperationError: the document is read-only """ if val is not None and not isinstance(val, str): raise TypeError(f"Expected type {str.__name__} or None, but got {type(val).__name__}.") _lib.PtxPdf_Metadata_SetAuthorW.argtypes = [c_void_p, c_wchar_p] _lib.PtxPdf_Metadata_SetAuthorW.restype = c_bool if not _lib.PtxPdf_Metadata_SetAuthorW(self._handle, _string_to_utf16(val)): _NativeBase._throw_last_error(False) @property def subject(self) -> Optional[str]: """ The subject of the document or resource. This property corresponds to the "dc:description" entry in the XMP metadata and to the "Subject" entry in the document information dictionary. Returns: Optional[str] Raises: StateError: if the metadata have already been closed """ _lib.PtxPdf_Metadata_GetSubjectW.argtypes = [c_void_p, POINTER(c_wchar), c_size_t] _lib.PtxPdf_Metadata_GetSubjectW.restype = c_size_t ret_val_size = _lib.PtxPdf_Metadata_GetSubjectW(self._handle, None, 0) if ret_val_size == 0: _NativeBase._throw_last_error() return None ret_val = create_unicode_buffer(ret_val_size) _lib.PtxPdf_Metadata_GetSubjectW(self._handle, ret_val, c_size_t(ret_val_size)) return _utf16_to_string(ret_val, ret_val_size) @subject.setter def subject(self, val: Optional[str]) -> None: """ The subject of the document or resource. This property corresponds to the "dc:description" entry in the XMP metadata and to the "Subject" entry in the document information dictionary. Args: val (Optional[str]): property value Raises: StateError: if the metadata have already been closed OperationError: the document is read-only """ if val is not None and not isinstance(val, str): raise TypeError(f"Expected type {str.__name__} or None, but got {type(val).__name__}.") _lib.PtxPdf_Metadata_SetSubjectW.argtypes = [c_void_p, c_wchar_p] _lib.PtxPdf_Metadata_SetSubjectW.restype = c_bool if not _lib.PtxPdf_Metadata_SetSubjectW(self._handle, _string_to_utf16(val)): _NativeBase._throw_last_error(False) @property def keywords(self) -> Optional[str]: """ Keywords associated with the document or resource. Keywords can be separated by: - carriage return / line feed - comma - semicolon - tab - double space This property corresponds to the "pdf:Keywords" entry in the XMP metadata and to the "Keywords" entry in the document information dictionary. Setting this property also sets the XMP property dc:subject accordingly. Returns: Optional[str] Raises: StateError: if the metadata have already been closed """ _lib.PtxPdf_Metadata_GetKeywordsW.argtypes = [c_void_p, POINTER(c_wchar), c_size_t] _lib.PtxPdf_Metadata_GetKeywordsW.restype = c_size_t ret_val_size = _lib.PtxPdf_Metadata_GetKeywordsW(self._handle, None, 0) if ret_val_size == 0: _NativeBase._throw_last_error() return None ret_val = create_unicode_buffer(ret_val_size) _lib.PtxPdf_Metadata_GetKeywordsW(self._handle, ret_val, c_size_t(ret_val_size)) return _utf16_to_string(ret_val, ret_val_size) @keywords.setter def keywords(self, val: Optional[str]) -> None: """ Keywords associated with the document or resource. Keywords can be separated by: - carriage return / line feed - comma - semicolon - tab - double space This property corresponds to the "pdf:Keywords" entry in the XMP metadata and to the "Keywords" entry in the document information dictionary. Setting this property also sets the XMP property dc:subject accordingly. Args: val (Optional[str]): property value Raises: StateError: if the metadata have already been closed OperationError: the document is read-only """ if val is not None and not isinstance(val, str): raise TypeError(f"Expected type {str.__name__} or None, but got {type(val).__name__}.") _lib.PtxPdf_Metadata_SetKeywordsW.argtypes = [c_void_p, c_wchar_p] _lib.PtxPdf_Metadata_SetKeywordsW.restype = c_bool if not _lib.PtxPdf_Metadata_SetKeywordsW(self._handle, _string_to_utf16(val)): _NativeBase._throw_last_error(False) @property def creator(self) -> Optional[str]: """ The original application that created the document. The name of the first known tool used to create the document or resource. This property corresponds to the "xmp:CreatorTool" entry in the XMP metadata and to the "Creator" entry in the document information dictionary. Returns: Optional[str] Raises: StateError: if the metadata have already been closed """ _lib.PtxPdf_Metadata_GetCreatorW.argtypes = [c_void_p, POINTER(c_wchar), c_size_t] _lib.PtxPdf_Metadata_GetCreatorW.restype = c_size_t ret_val_size = _lib.PtxPdf_Metadata_GetCreatorW(self._handle, None, 0) if ret_val_size == 0: _NativeBase._throw_last_error() return None ret_val = create_unicode_buffer(ret_val_size) _lib.PtxPdf_Metadata_GetCreatorW(self._handle, ret_val, c_size_t(ret_val_size)) return _utf16_to_string(ret_val, ret_val_size) @creator.setter def creator(self, val: Optional[str]) -> None: """ The original application that created the document. The name of the first known tool used to create the document or resource. This property corresponds to the "xmp:CreatorTool" entry in the XMP metadata and to the "Creator" entry in the document information dictionary. Args: val (Optional[str]): property value Raises: StateError: if the metadata have already been closed OperationError: the document is read-only """ if val is not None and not isinstance(val, str): raise TypeError(f"Expected type {str.__name__} or None, but got {type(val).__name__}.") _lib.PtxPdf_Metadata_SetCreatorW.argtypes = [c_void_p, c_wchar_p] _lib.PtxPdf_Metadata_SetCreatorW.restype = c_bool if not _lib.PtxPdf_Metadata_SetCreatorW(self._handle, _string_to_utf16(val)): _NativeBase._throw_last_error(False) @property def producer(self) -> Optional[str]: """ The application that created the PDF If the document was converted to PDF from another format, the name of the PDF processor that converted it to PDF. This property corresponds to the "pdf:Producer" entry in the XMP metadata and to the "Producer" entry in the document information dictionary. Returns: Optional[str] Raises: StateError: if the metadata have already been closed """ _lib.PtxPdf_Metadata_GetProducerW.argtypes = [c_void_p, POINTER(c_wchar), c_size_t] _lib.PtxPdf_Metadata_GetProducerW.restype = c_size_t ret_val_size = _lib.PtxPdf_Metadata_GetProducerW(self._handle, None, 0) if ret_val_size == 0: _NativeBase._throw_last_error() return None ret_val = create_unicode_buffer(ret_val_size) _lib.PtxPdf_Metadata_GetProducerW(self._handle, ret_val, c_size_t(ret_val_size)) return _utf16_to_string(ret_val, ret_val_size) @property def creation_date(self) -> Optional[datetime]: """ The date and time the document or resource was originally created. This property corresponds to the "xmp:CreateDate" entry in the XMP metadata and to the "CreationDate" entry in the document information dictionary. Returns: Optional[datetime] Raises: StateError: if the metadata have already been closed """ from pdftools_toolbox.sys.date import _Date _lib.PtxPdf_Metadata_GetCreationDate.argtypes = [c_void_p, POINTER(_Date)] _lib.PtxPdf_Metadata_GetCreationDate.restype = c_bool ret_val = _Date() if not _lib.PtxPdf_Metadata_GetCreationDate(self._handle, byref(ret_val)): _NativeBase._throw_last_error() return None return ret_val._to_datetime() @creation_date.setter def creation_date(self, val: Optional[datetime]) -> None: """ The date and time the document or resource was originally created. This property corresponds to the "xmp:CreateDate" entry in the XMP metadata and to the "CreationDate" entry in the document information dictionary. Args: val (Optional[datetime]): property value Raises: StateError: if the metadata have already been closed OperationError: the document is read-only """ from pdftools_toolbox.sys.date import _Date if val is not None and not isinstance(val, datetime): raise TypeError(f"Expected type {datetime.__name__} or None, but got {type(val).__name__}.") _lib.PtxPdf_Metadata_SetCreationDate.argtypes = [c_void_p, POINTER(_Date)] _lib.PtxPdf_Metadata_SetCreationDate.restype = c_bool if not _lib.PtxPdf_Metadata_SetCreationDate(self._handle, _Date._from_datetime(val)): _NativeBase._throw_last_error(False) @property def modification_date(self) -> Optional[datetime]: """ The date and time the document or resource was most recently modified. This property corresponds to the "xmp:ModifyDate" entry in the XMP metadata and to the "ModDate" entry in the document information dictionary. Returns: Optional[datetime] Raises: pdftools_toolbox.corrupt_error.CorruptError: The date is corrupt. StateError: if the metadata have already been closed """ from pdftools_toolbox.sys.date import _Date _lib.PtxPdf_Metadata_GetModificationDate.argtypes = [c_void_p, POINTER(_Date)] _lib.PtxPdf_Metadata_GetModificationDate.restype = c_bool ret_val = _Date() if not _lib.PtxPdf_Metadata_GetModificationDate(self._handle, byref(ret_val)): _NativeBase._throw_last_error() return None return ret_val._to_datetime() @property def xmp(self) -> io.IOBase: """ The XMP metadata The XMP metadata stream or `None` if there is none. The stream is read-only. To set the XMP stream of a metadata object use the method Document.CreateMetadata instead. Returns: io.IOBase Raises: StateError: if the metadata have already been closed """ _lib.PtxPdf_Metadata_GetXmp.argtypes = [c_void_p, POINTER(pdftools_toolbox.internal.streams._StreamDescriptor)] _lib.PtxPdf_Metadata_GetXmp.restype = c_bool ret_val = _StreamDescriptor() if not _lib.PtxPdf_Metadata_GetXmp(self._handle, byref(ret_val)): _NativeBase._throw_last_error(False) return _NativeStream(ret_val) @property def custom_entries(self) -> StringMap: """ The custom entries in the document information dictionary The standard entries "Title", "Author", "Subject", "Keywords", "CreationDate", "ModDate", "Creator", "Producer" and "Trapped" are not included in the map. Any attempt to set a standard entry through this map will result in an error. To get or set standard entries use the corresponding properties instead. Note: The document information dictionary has been superseded by XMP metadata and is deprecated in PDF 2.0. Returns: pdftools_toolbox.string_map.StringMap Raises: StateError: if the metadata have already been closed """ from pdftools_toolbox.string_map import StringMap _lib.PtxPdf_Metadata_GetCustomEntries.argtypes = [c_void_p] _lib.PtxPdf_Metadata_GetCustomEntries.restype = c_void_p ret_val = _lib.PtxPdf_Metadata_GetCustomEntries(self._handle) if ret_val is None: _NativeBase._throw_last_error(False) return StringMap._create_dynamic_type(ret_val) @staticmethod def _create_dynamic_type(handle): return Metadata._from_handle(handle) @classmethod def _from_handle(cls, handle): """ Internal factory method for constructing an instance using an internal handle. This method creates an instance of the class by bypassing the public constructor. """ instance = Metadata.__new__(cls) # Bypass __init__ instance._initialize(handle) return instance def _initialize(self, handle): super()._initialize(handle)