Source code for pyrit.prompt_converter.unicode_replacement_converter
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from typing import Optional
from pyrit.identifiers import ConverterIdentifier
from pyrit.prompt_converter.text_selection_strategy import WordSelectionStrategy
from pyrit.prompt_converter.word_level_converter import WordLevelConverter
[docs]
class UnicodeReplacementConverter(WordLevelConverter):
"""
Converts a prompt to its unicode representation.
"""
[docs]
def __init__(
self,
*,
encode_spaces: bool = False,
word_selection_strategy: Optional[WordSelectionStrategy] = None,
):
"""
Initialize the converter with the specified selection strategy.
Args:
encode_spaces (bool): If True, spaces in the prompt will be replaced with unicode representation.
word_selection_strategy (Optional[WordSelectionStrategy]): Strategy for selecting which words to convert.
If None, all words will be converted.
"""
super().__init__(word_selection_strategy=word_selection_strategy)
self.encode_spaces = encode_spaces
def _build_identifier(self) -> ConverterIdentifier:
"""
Build identifier with unicode replacement parameters.
Returns:
ConverterIdentifier: The identifier for this converter.
"""
base_params = super()._build_identifier().converter_specific_params or {}
base_params["encode_spaces"] = self.encode_spaces
return self._create_identifier(converter_specific_params=base_params)
[docs]
async def convert_word_async(self, word: str) -> str:
"""
Convert a single word into the target format supported by the converter.
Args:
word (str): The word to be converted.
Returns:
str: The converted word.
"""
return "".join(f"\\u{ord(ch):04x}" for ch in word)
[docs]
def join_words(self, words: list[str]) -> str:
"""
Join a list of words into a single string, optionally encoding spaces as unicode.
Args:
words (list[str]): The list of words to join.
Returns:
str: The joined string.
"""
if self.encode_spaces:
return "\\u0020".join(words)
return super().join_words(words)