Source code for pyrit.prompt_converter.prompt_converter

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import abc
import asyncio
import re
from dataclasses import dataclass
from typing import get_args

from pyrit.models import Identifier, PromptDataType



[docs]
@dataclass
class ConverterResult:
    """The result of a prompt conversion, containing the converted output and its type."""

    #: The converted text output. This is the main result of the conversion.
    output_text: str
    #: The data type of the converted output. Indicates the format/type of the ``output_text``.
    output_type: PromptDataType

    def __str__(self):
        return f"{self.output_type}: {self.output_text}"




[docs]
class PromptConverter(abc.ABC, Identifier):
    """
    Base class for converters that transform prompts into a different representation or format.
    """


[docs]
    def __init__(self):
        """
        Initializes the prompt converter.
        """
        super().__init__()



[docs]
    @abc.abstractmethod
    async def convert_async(self, *, prompt: str, input_type: PromptDataType = "text") -> ConverterResult:
        """
        Converts the given prompt into the target format supported by the converter.

        Args:
            prompt (str): The prompt to be converted.
            input_type (PromptDataType): The type of input data.

        Returns:
            ConverterResult: The result containing the converted output and its type.
        """



[docs]
    @abc.abstractmethod
    def input_supported(self, input_type: PromptDataType) -> bool:
        """
        Checks if the input type is supported by the converter.

        Args:
            input_type (PromptDataType): The input type to check.

        Returns:
            bool: True if the input type is supported, False otherwise.
        """



[docs]
    @abc.abstractmethod
    def output_supported(self, output_type: PromptDataType) -> bool:
        """
        Checks if the output type is supported by the converter.

        Args:
            output_type (PromptDataType): The output type to check.

        Returns:
            bool: True if the output type is supported, False otherwise.
        """



[docs]
    async def convert_tokens_async(
        self, *, prompt: str, input_type: PromptDataType = "text", start_token: str = "⟪", end_token: str = "⟫"
    ) -> ConverterResult:
        """
        Converts substrings within a prompt that are enclosed by specified start and end tokens. If there are no tokens
        present, the entire prompt is converted.

        Args:
            prompt (str): The input prompt containing text to be converted.
            input_type (str): The type of input data. Defaults to "text".
            start_token (str): The token indicating the start of a substring to be converted. Defaults to "⟪" which is
                relatively distinct.
            end_token (str): The token indicating the end of a substring to be converted. Defaults to "⟫" which is
                relatively distinct.

        Returns:
            str: The prompt with specified substrings converted.

        Raises:
            ValueError: If the input is inconsistent.
        """
        if input_type != "text" and (start_token in prompt or end_token in prompt):
            raise ValueError("Input type must be text when start or end tokens are present.")

        # Find all matches between start_token and end_token
        pattern = re.escape(start_token) + "(.*?)" + re.escape(end_token)
        matches = re.findall(pattern, prompt)

        if not matches:
            # No tokens found, convert the entire prompt
            return await self.convert_async(prompt=prompt, input_type=input_type)

        if prompt.count(start_token) != prompt.count(end_token):
            raise ValueError("Uneven number of start tokens and end tokens.")

        tasks = [self._replace_text_match(match) for match in matches]
        converted_parts = await asyncio.gather(*tasks)

        for original, converted in zip(matches, converted_parts):
            prompt = prompt.replace(f"{start_token}{original}{end_token}", converted.output_text, 1)

        return ConverterResult(output_text=prompt, output_type="text")


    async def _replace_text_match(self, match):
        result = await self.convert_async(prompt=match, input_type="text")
        return result


[docs]
    def get_identifier(self):
        """
        Returns an identifier dictionary for the converter.

        Returns:
            dict: The identifier dictionary.
        """
        public_attributes = {}
        public_attributes["__type__"] = self.__class__.__name__
        public_attributes["__module__"] = self.__class__.__module__
        return public_attributes


    @property
    def supported_input_types(self) -> list[PromptDataType]:
        """
        Returns a list of supported input types for the converter.

        Returns:
            list[PromptDataType]: A list of supported input types.
        """
        return [data_type for data_type in get_args(PromptDataType) if self.input_supported(data_type)]

    @property
    def supported_output_types(self) -> list[PromptDataType]:
        """
        Returns a list of supported output types for the converter.

        Returns:
            list[PromptDataType]: A list of supported output types.
        """
        return [data_type for data_type in get_args(PromptDataType) if self.output_supported(data_type)]