from __future__ import annotations
import io
from typing import List, Iterator, Tuple, Optional, Any, TYPE_CHECKING, Callable
from ctypes import *
from datetime import datetime
from numbers import Number
from pdftools_toolbox.internal import _lib
from pdftools_toolbox.internal.utils import _string_to_utf16, _utf16_to_string
from pdftools_toolbox.internal.streams import _StreamDescriptor, _NativeStream
from pdftools_toolbox.internal.native_base import _NativeBase
from pdftools_toolbox.internal.native_object import _NativeObject
import pdftools_toolbox.internal
if TYPE_CHECKING:
from pdftools_toolbox.pdf.document import Document
from pdftools_toolbox.pdf.content.image_type import ImageType
from pdftools_toolbox.geometry.real.rectangle import Rectangle
from pdftools_toolbox.geometry.integer.size import Size
from pdftools_toolbox.pdf.content.color_space import ColorSpace
else:
Document = "pdftools_toolbox.pdf.document.Document"
ImageType = "pdftools_toolbox.pdf.content.image_type.ImageType"
Rectangle = "pdftools_toolbox.geometry.real.rectangle.Rectangle"
Size = "pdftools_toolbox.geometry.integer.size.Size"
ColorSpace = "pdftools_toolbox.pdf.content.color_space.ColorSpace"
[docs]
class Image(_NativeObject):
"""
"""
[docs]
@staticmethod
def create(target_document: Document, stream: io.IOBase) -> Image:
"""
Create an image object from image data.
Supported formats are:
- BMP
- DIB
- JPEG
- JPEG2000
- JBIG2
- PNG
- GIF
The returned image object is not yet painted on any page, but it is associated with the given target document.
Args:
targetDocument (pdftools_toolbox.pdf.document.Document):
the output document with which the returned object is associated
stream (io.IOBase):
the image data stream
Returns:
pdftools_toolbox.pdf.content.image.Image:
the newly created image object
Raises:
OSError:
Error reading from the image or writing to the document
pdftools_toolbox.unknown_format_error.UnknownFormatError:
The image data has an unknown format
pdftools_toolbox.corrupt_error.CorruptError:
The image data is corrupt
ValueError:
if the `targetDocument` argument has already been closed
ValueError:
if the `targetDocument` argument is read-only
ValueError:
if the `stream` argument is `None`
"""
from pdftools_toolbox.pdf.document import Document
if not isinstance(target_document, Document):
raise TypeError(f"Expected type {Document.__name__}, but got {type(target_document).__name__}.")
if not isinstance(stream, io.IOBase):
raise TypeError(f"Expected type {io.IOBase.__name__}, but got {type(stream).__name__}.")
_lib.PtxPdfContent_Image_Create.argtypes = [c_void_p, POINTER(pdftools_toolbox.internal.streams._StreamDescriptor)]
_lib.PtxPdfContent_Image_Create.restype = c_void_p
ret_val = _lib.PtxPdfContent_Image_Create(target_document._handle, _StreamDescriptor(stream))
if ret_val is None:
_NativeBase._throw_last_error(False)
return Image._create_dynamic_type(ret_val)
[docs]
def redact(self, rect: Rectangle) -> None:
"""
Redact rectangular part of the image
Redacts a part of the image specified by a rectangle, by changing the content of the image.
This is not an annotation, the image data is changed and there will be no way to get the original data from the image itself.
The content is changed by setting all pixels to the same color.
This color, in general, is black, but that depends on the color space of the image.
Args:
rect (pdftools_toolbox.geometry.real.rectangle.Rectangle):
Defines rectangular part of the image which is to be redacted.
If the rectangle is not completely within the image boundaries, only the part that is within the boundaries will be redacted.
Raises:
ValueError:
if the `rect` argument is invalid
"""
from pdftools_toolbox.geometry.real.rectangle import Rectangle
if not isinstance(rect, Rectangle):
raise TypeError(f"Expected type {Rectangle.__name__}, but got {type(rect).__name__}.")
_lib.PtxPdfContent_Image_Redact.argtypes = [c_void_p, POINTER(Rectangle)]
_lib.PtxPdfContent_Image_Redact.restype = c_bool
if not _lib.PtxPdfContent_Image_Redact(self._handle, rect):
_NativeBase._throw_last_error(False)
@property
def default_image_type(self) -> ImageType:
"""
Default extracted image type.
The default image type that will be extracted, based on the way that the image data is compressed and stored in the PDF file.
The type of the output image is :attr:`pdftools_toolbox.pdf.content.image_type.ImageType.JPEG` for embedded JPEG and JPEG2000 images.
In all other cases the image type will be :attr:`pdftools_toolbox.pdf.content.image_type.ImageType.TIFF` .
Returns:
pdftools_toolbox.pdf.content.image_type.ImageType
Raises:
StateError:
if the image has already been closed
"""
from pdftools_toolbox.pdf.content.image_type import ImageType
_lib.PtxPdfContent_Image_GetDefaultImageType.argtypes = [c_void_p]
_lib.PtxPdfContent_Image_GetDefaultImageType.restype = c_int
ret_val = _lib.PtxPdfContent_Image_GetDefaultImageType(self._handle)
if ret_val == 0:
_NativeBase._throw_last_error()
return ImageType(ret_val)
@property
def size(self) -> Size:
"""
The size of the image in samples.
Samples are often also called pixels.
Returns:
pdftools_toolbox.geometry.integer.size.Size
Raises:
StateError:
if the image has already been closed
"""
from pdftools_toolbox.geometry.integer.size import Size
_lib.PtxPdfContent_Image_GetSize.argtypes = [c_void_p, POINTER(Size)]
_lib.PtxPdfContent_Image_GetSize.restype = c_bool
ret_val = Size()
if not _lib.PtxPdfContent_Image_GetSize(self._handle, byref(ret_val)):
_NativeBase._throw_last_error(False)
return ret_val
@property
def samples(self) -> List[int]:
"""
The raw content of the image.
The samples (pixels) are given in order, top to bottom,
left to right. Each sample is given component by component.
There is no padding between components or samples, except
that each row of sample data begins on a byte boundary.
If the number of data bits per row is not a multiple of 8,
the end of the row is padded with extra bits to fill out the
last byte. Padding bits should be ignored.
Most often, each component is 8 bits, so there's no packing/unpacking
or alignment/padding. Components with 2 or 4 bits are very rare.
If the image is compressed, it will be decompressed in order
to get the samples. For very large images, this may take some
time.
When setting samples, the original compression type of the image does not change.
Compression from the raw samples typically takes significantly longer than decompression.
Therefore, setting for large images might be perceived as slow.
None of the image parameters can be changed, so when setting samples, the size of the array must match that of the original image.
Returns:
List[int]
Raises:
StateError:
if the image has already been closed
"""
_lib.PtxPdfContent_Image_GetSamples.argtypes = [c_void_p, POINTER(c_ubyte), c_size_t]
_lib.PtxPdfContent_Image_GetSamples.restype = c_size_t
ret_val_size = _lib.PtxPdfContent_Image_GetSamples(self._handle, None, 0)
if ret_val_size == -1:
_NativeBase._throw_last_error(False)
ret_val = (c_ubyte * ret_val_size)()
_lib.PtxPdfContent_Image_GetSamples(self._handle, ret_val, c_size_t(ret_val_size))
return list(ret_val)
@samples.setter
def samples(self, val: List[int]) -> None:
"""
The raw content of the image.
The samples (pixels) are given in order, top to bottom,
left to right. Each sample is given component by component.
There is no padding between components or samples, except
that each row of sample data begins on a byte boundary.
If the number of data bits per row is not a multiple of 8,
the end of the row is padded with extra bits to fill out the
last byte. Padding bits should be ignored.
Most often, each component is 8 bits, so there's no packing/unpacking
or alignment/padding. Components with 2 or 4 bits are very rare.
If the image is compressed, it will be decompressed in order
to get the samples. For very large images, this may take some
time.
When setting samples, the original compression type of the image does not change.
Compression from the raw samples typically takes significantly longer than decompression.
Therefore, setting for large images might be perceived as slow.
None of the image parameters can be changed, so when setting samples, the size of the array must match that of the original image.
Args:
val (List[int]):
property value
Raises:
StateError:
if the image has already been closed
"""
if not isinstance(val, list):
raise TypeError(f"Expected type {list.__name__}, but got {type(val).__name__}.")
if not all(isinstance(c, int) for c in val):
raise TypeError(f"All elements in {val} must be {int}")
_lib.PtxPdfContent_Image_SetSamples.argtypes = [c_void_p, POINTER(c_ubyte), c_size_t]
_lib.PtxPdfContent_Image_SetSamples.restype = c_bool
if not _lib.PtxPdfContent_Image_SetSamples(self._handle, (c_ubyte * len(val))(*val), len(val)):
_NativeBase._throw_last_error(False)
@property
def bits_per_component(self) -> int:
"""
the number of bits per component.
The number of bits used to represent each color component.
Only a single value may be specified; the number of bits is the same for all color components.
Valid values are 1, 2, 4, and 8.
Returns:
int
Raises:
StateError:
if the image has already been closed
"""
_lib.PtxPdfContent_Image_GetBitsPerComponent.argtypes = [c_void_p]
_lib.PtxPdfContent_Image_GetBitsPerComponent.restype = c_int
ret_val = _lib.PtxPdfContent_Image_GetBitsPerComponent(self._handle)
if ret_val == 0:
_NativeBase._throw_last_error()
return ret_val
@property
def color_space(self) -> ColorSpace:
"""
the color space in which image samples are specified.
Returns:
pdftools_toolbox.pdf.content.color_space.ColorSpace
Raises:
StateError:
if the image has already been closed
"""
from pdftools_toolbox.pdf.content.color_space import ColorSpace
_lib.PtxPdfContent_Image_GetColorSpace.argtypes = [c_void_p]
_lib.PtxPdfContent_Image_GetColorSpace.restype = c_void_p
ret_val = _lib.PtxPdfContent_Image_GetColorSpace(self._handle)
if ret_val is None:
_NativeBase._throw_last_error(False)
return ColorSpace._create_dynamic_type(ret_val)
@staticmethod
def _create_dynamic_type(handle):
return Image._from_handle(handle)
@classmethod
def _from_handle(cls, handle):
"""
Internal factory method for constructing an instance using an internal handle.
This method creates an instance of the class by bypassing the public constructor.
"""
instance = Image.__new__(cls) # Bypass __init__
instance._initialize(handle)
return instance
def _initialize(self, handle):
super()._initialize(handle)