Source code for pyrit.prompt_converter.bin_ascii_converter

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import binascii
import re
from typing import List, Literal, Optional, Union

from pyrit.prompt_converter.word_level_converter import WordLevelConverter


[docs] class BinAsciiConverter(WordLevelConverter): """ Converts text to various binary-to-ASCII encodings. Supports hex, quoted-printable, and UUencode formats. """ EncodingFunc = Literal[ "hex", "quoted-printable", "UUencode", ]
[docs] def __init__( self, *, encoding_func: EncodingFunc = "hex", indices: Optional[List[int]] = None, keywords: Optional[List[str]] = None, proportion: Optional[float] = None, regex: Optional[Union[str, re.Pattern]] = None, word_split_separator: Optional[str] = " ", ) -> None: """ Initialize the BinAsciiConverter. Args: encoding_func (str): The encoding function to use. Options: "hex", "quoted-printable", "UUencode". Defaults to "hex". indices (Optional[List[int]]): Specific indices of words to convert. keywords (Optional[List[str]]): Keywords to select words for conversion. proportion (Optional[float]): Proportion of randomly selected words to convert [0.0-1.0]. regex (Optional[Union[str, re.Pattern]]): Regex pattern to match words for conversion. word_split_separator (Optional[str]): Separator used to split words in the input text. Defaults to " ". """ super().__init__( indices=indices, keywords=keywords, proportion=proportion, regex=regex, word_split_separator=word_split_separator, ) if encoding_func not in ["hex", "quoted-printable", "UUencode"]: raise ValueError( f"Invalid encoding_func '{encoding_func}'. " "Must be one of: 'hex', 'quoted-printable', 'UUencode'" ) self._encoding_func = encoding_func
[docs] async def convert_word_async(self, word: str) -> str: """ Convert a word using the specified encoding function. Args: word (str): The word to encode. Returns: str: The encoded word. """ if self._encoding_func == "hex": return word.encode("utf-8").hex().upper() elif self._encoding_func == "quoted-printable": return binascii.b2a_qp(word.encode("utf-8")).decode("ascii") elif self._encoding_func == "UUencode": return self._uuencode_chunk(word) else: raise ValueError(f"Unsupported encoding function: {self._encoding_func}")
def _uuencode_chunk(self, text: str) -> str: """ Encode text using UUencode in 45-byte chunks. Args: text (str): The text to encode. Returns: str: The UUencoded text. """ payload = text.encode("utf-8") hash_chunks = [] for i in range(0, len(payload), 45): test_chunk = payload[i : i + 45] hash_chunks.append(binascii.b2a_uu(test_chunk)) return "".join(chunk.decode("ascii") for chunk in hash_chunks).rstrip("\n")
[docs] def join_words(self, words: list[str]) -> str: """ Join words appropriately based on the encoding type and mode. Args: words (list[str]): The list of encoded words to join. Returns: str: The joined string. """ if self._mode == "all": if self._encoding_func == "hex": return "20".join(words) # 20 is the hex representation of space elif self._encoding_func == "quoted-printable": # Quoted-printable uses =20 for space return "=20".join(words) elif self._encoding_func == "UUencode": # UUencode: join with encoded space return "".join(words) # UUencode handles spaces within encoding return super().join_words(words=words)