Source code for pyrit.prompt_converter.unicode_replacement_converter

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import re
from typing import List, Optional, Union

from pyrit.prompt_converter.word_level_converter import WordLevelConverter


[docs] class UnicodeReplacementConverter(WordLevelConverter): """ Converts a prompt to its unicode representation. """
[docs] def __init__( self, *, encode_spaces: bool = False, indices: Optional[List[int]] = None, keywords: Optional[List[str]] = None, proportion: Optional[float] = None, regex: Optional[Union[str, re.Pattern]] = None, ): """ Initializes the converter with the specified selection parameters. This class allows for selection of words to convert based on various criteria. Only one selection parameter may be provided at a time (indices, keywords, proportion, or regex). If no selection parameter is provided, all words will be converted. Args: encode_spaces (bool): If True, spaces in the prompt will be replaced with unicode representation. indices (Optional[List[int]]): Specific indices of words to convert. keywords (Optional[List[str]]): Keywords to select words for conversion. proportion (Optional[float]): Proportion of randomly selected words to convert [0.0-1.0]. regex (Optional[Union[str, re.Pattern]]): Regex pattern to match words for conversion. """ super().__init__(indices=indices, keywords=keywords, proportion=proportion, regex=regex) self.encode_spaces = encode_spaces
[docs] async def convert_word_async(self, word: str) -> str: return "".join(f"\\u{ord(ch):04x}" for ch in word)
[docs] def join_words(self, words: list[str]) -> str: if self.encode_spaces: return "\\u0020".join(words) return super().join_words(words)