pyrit.models.DataTypeSerializer#

class DataTypeSerializer[source]#

Bases: ABC

Abstract base class for data type normalizers.

Responsible for reading and saving multi-modal data types to local disk or Azure Storage Account.

__init__()#

Methods

__init__()

data_on_disk()

Indicate whether the data is stored on disk.

get_data_filename([file_name])

Generate or retrieve a unique filename for the data file.

get_extension(file_path)

Get the file extension from the file path.

get_mime_type(file_path)

Get the MIME type of the file path.

get_sha256()

Compute SHA256 hash for this serializer's current value.

read_data()

Read data from storage.

read_data_base64()

Read data from storage and return it as a base64 string.

save_b64_image(data[, output_filename])

Save a base64-encoded image to storage.

save_data(data[, output_filename])

Save data to storage.

save_formatted_audio(data[, num_channels, ...])

Save PCM16 or similarly formatted audio data to storage.

Attributes

category: str#
abstract data_on_disk() bool[source]#

Indicate whether the data is stored on disk.

Returns:

True when data is persisted on disk.

Return type:

bool

data_sub_directory: str#
data_type: Literal['text', 'image_path', 'audio_path', 'video_path', 'binary_path', 'url', 'reasoning', 'error', 'function_call', 'tool_call', 'function_call_output']#
file_extension: str#
async get_data_filename(file_name: str | None = None) Path | str[source]#

Generate or retrieve a unique filename for the data file.

Parameters:

file_name (Optional[str]) – Optional file name override.

Returns:

Full storage path for the generated data file.

Return type:

Union[Path, str]

Raises:
  • TypeError – If the serializer is not configured for on-disk data.

  • RuntimeError – If required data subdirectory information is missing.

static get_extension(file_path: str) str | None[source]#

Get the file extension from the file path.

Parameters:

file_path (str) – Input file path.

Returns:

File extension (including dot) or None if unavailable.

Return type:

str | None

static get_mime_type(file_path: str) str | None[source]#

Get the MIME type of the file path.

Parameters:

file_path (str) – Input file path.

Returns:

MIME type if detectable; otherwise None.

Return type:

str | None

async get_sha256() str[source]#

Compute SHA256 hash for this serializer’s current value.

Returns:

Hex digest of the computed SHA256 hash.

Return type:

str

Raises:
async read_data() bytes[source]#

Read data from storage.

Returns:

The data read from storage.

Return type:

bytes

Raises:
async read_data_base64() str[source]#

Read data from storage and return it as a base64 string.

Returns:

Base64-encoded data.

Return type:

str

async save_b64_image(data: str | bytes, output_filename: str = None) None[source]#

Save a base64-encoded image to storage.

Parameters:
  • data – string or bytes with base64 data

  • output_filename (optional, str) – filename to store image as. Defaults to UUID if not provided

async save_data(data: bytes, output_filename: str | None = None) None[source]#

Save data to storage.

Parameters:
  • data – bytes: The data to be saved.

  • output_filename (optional, str) – filename to store data as. Defaults to UUID if not provided

async save_formatted_audio(data: bytes, num_channels: int = 1, sample_width: int = 2, sample_rate: int = 16000, output_filename: str | None = None) None[source]#

Save PCM16 or similarly formatted audio data to storage.

Parameters:
  • data – bytes with audio data

  • output_filename (optional, str) – filename to store audio as. Defaults to UUID if not provided

  • num_channels (optional, int) – number of channels in audio data. Defaults to 1

  • sample_width (optional, int) – sample width in bytes. Defaults to 2

  • sample_rate (optional, int) – sample rate in Hz. Defaults to 16000

value: str#