Source code for pyrit.prompt_converter.unicode_replacement_converter

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from typing import Optional

from pyrit.prompt_converter.text_selection_strategy import WordSelectionStrategy
from pyrit.prompt_converter.word_level_converter import WordLevelConverter


[docs] class UnicodeReplacementConverter(WordLevelConverter): """ Converts a prompt to its unicode representation. """
[docs] def __init__( self, *, encode_spaces: bool = False, word_selection_strategy: Optional[WordSelectionStrategy] = None, ): """ Initializes the converter with the specified selection strategy. Args: encode_spaces (bool): If True, spaces in the prompt will be replaced with unicode representation. word_selection_strategy (Optional[WordSelectionStrategy]): Strategy for selecting which words to convert. If None, all words will be converted. """ super().__init__(word_selection_strategy=word_selection_strategy) self.encode_spaces = encode_spaces
[docs] async def convert_word_async(self, word: str) -> str: return "".join(f"\\u{ord(ch):04x}" for ch in word)
[docs] def join_words(self, words: list[str]) -> str: if self.encode_spaces: return "\\u0020".join(words) return super().join_words(words)