Source code for pyrit.prompt_converter.repeat_token_converter

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import re
from typing import Literal, Optional

from pyrit.models import PromptDataType
from pyrit.prompt_converter import ConverterResult, PromptConverter



[docs]
class RepeatTokenConverter(PromptConverter):
    """
    Repeats a specified token a specified number of times in addition to a given prompt.

    Based on:
    https://dropbox.tech/machine-learning/bye-bye-bye-evolution-of-repeated-token-attacks-on-chatgpt-models

    Supported insertion modes:
        - "split":
            The prompt text will be split on the first occurrence of (.?!) punctuation,
            and repeated tokens will be inserted at the location of the split.
        - "prepend":
            Repeated tokens will be inserted before the prompt text.
        - "append":
            Repeated tokens will be inserted after the prompt text.
        - "repeat":
            The prompt text will be ignored, and the result will only contain repeated tokens.
    """


[docs]
    def __init__(
        self,
        *,
        token_to_repeat: str,
        times_to_repeat: int,
        token_insert_mode: Optional[Literal["split", "prepend", "append", "repeat"]] = None,
    ) -> None:
        """
        Initializes the converter with the specified token, number of repetitions, and insertion mode.

        Args:
            token_to_repeat (str): The string to be repeated.
            times_to_repeat (int): The number of times the string will be repeated.
            token_insert_mode (str, optional): The mode of insertion for the repeated token.
                Can be "split", "prepend", "append", or "repeat".
        """
        self.token_to_repeat = " " + token_to_repeat.strip()
        self.times_to_repeat = times_to_repeat
        if not token_insert_mode:
            token_insert_mode = "split"

        match token_insert_mode:
            case "split":
                # function to split prompt on first punctuation (.?! only), preserve punctuation, 2 parts max.
                def insert(text: str) -> list:
                    parts = re.split(r"(\?|\.|\!)", text, maxsplit=1)
                    if len(parts) == 3:  # if split mode with no punctuation
                        return [parts[0] + parts[1], parts[2]]
                    return ["", text]

                self.insert = insert
            case "prepend":

                def insert(text: str) -> list:
                    return ["", text]

                self.insert = insert
            case "append":

                def insert(text: str) -> list:
                    return [text, ""]

                self.insert = insert
            case "repeat":

                def insert(text: str) -> list:
                    return ["", ""]

                self.insert = insert



[docs]
    async def convert_async(self, *, prompt: str, input_type: PromptDataType = "text") -> ConverterResult:
        """
        Converts the given prompt by repeating the specified token a specified number of times.
        """
        if not self.input_supported(input_type):
            raise ValueError("Input type not supported")
        prompt_parts = self.insert(prompt)

        return ConverterResult(
            output_text=f"{prompt_parts[0]}{self.token_to_repeat * self.times_to_repeat}{prompt_parts[1]}",
            output_type="text",
        )



[docs]
    def input_supported(self, input_type: PromptDataType) -> bool:
        return input_type == "text"



[docs]
    def output_supported(self, output_type: PromptDataType) -> bool:
        return output_type == "text"