Source code for scitacean.thumbnail

# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2024 SciCat Project (https://github.com/SciCatProject/scitacean)
"""Thumbnail type for encoding images."""

from __future__ import annotations

import base64
import mimetypes
import os
import re
from collections.abc import Callable
from dataclasses import dataclass
from typing import Any

from pydantic import GetCoreSchemaHandler
from pydantic_core import core_schema


[docs] @dataclass(init=False, kw_only=True, slots=True) class Thumbnail: """Encodes an image to be used as a thumbnail in SciCat. Thumbnails are *small* images used, e.g., in attachments. In SciCat, they are base64-encoded strings and have a size limit. This class handles the encoding but does not enforce a size limit or content type. Currently, data is stored in encoded form. This means that creating a thumbnail object in a SciCat download is cheap. But creating one from local data and throwing it away without uploading it has some small overhead. Examples -------- Given some raw bytes from a PNG, create a thumbnail using .. code-block:: python from scitacean import Thumbnail data = ... # the bytes of the PNG thumbnail = Thumbnail(mime="image/png", data=data) Or load the data directly from a file: .. code-block:: python thumbnail = Thumbnail.load_file("file_path.png") Access the raw bytes: .. code-block:: python data: bytes = thumbnail.decoded_data() """ mime: str | None """Complete MIME type in the form ``type/subtype``.""" _encoded_data: str
[docs] def __init__( self, mime: str | None, data: bytes | None = None, _encoded_data: str | None = None, ) -> None: """Create a new thumbnail object. Parameters ---------- mime: The MIME type of the thumbnail. Must be a string of the form ``image/png``. Parameters are not allowed. data: The raw bytes of the thumbnail. Will be encoded automatically. _encoded_data: Primarily for internal use. Base64-encoded data of the thumbnail. Mutually exclusive with ``data``. """ if data is None: if _encoded_data is None: raise TypeError("No thumbnail data specified") self._encoded_data = _encoded_data else: if _encoded_data is None: self._encoded_data = base64.b64encode(data).decode("utf-8") else: raise TypeError("Only only of data and _encoded_data may be given") self.mime = mime
[docs] @classmethod def load_file(cls, path: os.PathLike[str] | str) -> Thumbnail: """Construct a thumbnail from data loaded from a file. Parameters ---------- path: The path to the file. Returns ------- : A new thumbnail with MIME type guessed from the file and data loaded from disk. """ with open(path, "rb") as f: data = f.read() encoded_data = base64.b64encode(data).decode("utf-8") return Thumbnail(mime=mimetypes.guess_type(path)[0], _encoded_data=encoded_data)
[docs] @classmethod def parse(cls, encoded: str | Thumbnail, /) -> Thumbnail: """Construct a thumbnail from a string as used by SciCat. Parameters ---------- encoded: A string containing a MIME content-header and the thumbnail in base64 encoding. Or an existing ``Thumbnail`` instance which is copied on return. Returns ------- : A new thumbnail with MIME type and data extracted from the string. See Also -------- Thumbnail.serialize: The inverse operation. """ if isinstance(encoded, Thumbnail): return Thumbnail(mime=encoded.mime, _encoded_data=encoded._encoded_data) if (match := _MESSAGE_REGEX.match(encoded)) is None: mime = None encoded_data = encoded else: mime = match[1] encoded_data = match[2] return Thumbnail(mime=mime, _encoded_data=encoded_data)
[docs] def serialize(self) -> str: """Format the thumbnail into a string in the format expected by SciCat. Returns ------- : A string containing the MIME content-header and the thumbnail in base64 encoding. See Also -------- Thumbnail.parse: The inverse operation. """ mime_str = f"data:{self.mime};base64," if self.mime is not None else "" return mime_str + self.encoded_data()
@property def mime_type(self) -> str | None: """The MIME type, i.e., the first part of ``type/subtype``.""" if self.mime is None: return None return self.mime.split("/", 1)[0] @property def mime_subtype(self) -> str | None: """The MIME subtype, i.e., the second part of ``type/subtype``.""" if self.mime is None: return None return self.mime.split("/", 1)[1]
[docs] def encoded_data(self) -> str: """Return the base64-encoded data of the thumbnail.""" return self._encoded_data
[docs] def decoded_data(self) -> bytes: """Return the raw bytes of the thumbnail.""" return base64.b64decode(self._encoded_data)
def __str__(self) -> str: return f"Thumbnail({self.mime}, len:{len(self.encoded_data())}B)" def __repr__(self) -> str: return f"Thumbnail(mime={self.mime}, data={self.decoded_data()!r})" def _repr_mimebundle_( self, include: Any = None, exclude: Any = None ) -> dict[str, bytes | str]: def decoded() -> bytes: return self.decoded_data() repr_fns: dict[str, Callable[[], bytes | str]] = { "image/png": decoded, "image/jpeg": decoded, "image/svg+xml": decoded, "application/pdf": decoded, "text/html": lambda: f"<img src={self.serialize()}>", "text/plain": self.__str__, } if include is not None: repr_fns = {k: v for k, v in repr_fns.items() if k in include} if exclude is not None: repr_fns = {k: v for k, v in repr_fns.items() if k not in exclude} if self.mime in repr_fns: return {self.mime: repr_fns[self.mime]()} return { mime: fn() for mime in {"text/html", "text/plain"} if (fn := repr_fns.get(mime)) } @classmethod def __get_pydantic_core_schema__( cls, _source_type: Any, _handler: GetCoreSchemaHandler ) -> core_schema.CoreSchema: return core_schema.no_info_after_validator_function( cls.parse, core_schema.union_schema( [core_schema.is_instance_schema(Thumbnail), core_schema.str_schema()] ), serialization=core_schema.plain_serializer_function_ser_schema( cls.serialize, info_arg=False, return_schema=core_schema.str_schema() ), )
# A regex that matches encoded thumbnails with header. # Expected form: data:image/png;base64,the-data _MESSAGE_REGEX = re.compile( r"^(?:data:)?" # optional data: prefix "(?:([^/]+/[^;]+)(?:;.*)?,)?" # MIME content-header "(.*)$" # data )