Source code for pyrit.prompt_converter.binary_converter
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from __future__ import annotations
from enum import Enum
from typing import Optional
from pyrit.identifiers import ConverterIdentifier
from pyrit.prompt_converter.text_selection_strategy import WordSelectionStrategy
from pyrit.prompt_converter.word_level_converter import WordLevelConverter
[docs]
class BinaryConverter(WordLevelConverter):
"""
Transforms input text into its binary representation with configurable bits per character (8, 16, or 32).
"""
[docs]
class BitsPerChar(Enum):
"""The number of bits per character for binary conversion."""
BITS_8 = 8 #: 8 bits per character, suitable for ASCII characters.
BITS_16 = 16 #: 16 bits per character, suitable for Unicode characters.
BITS_32 = 32 #: 32 bits per character, suitable for extended Unicode characters.
[docs]
def __init__(
self,
*,
bits_per_char: BinaryConverter.BitsPerChar = BitsPerChar.BITS_16,
word_selection_strategy: Optional[WordSelectionStrategy] = None,
):
"""
Initialize the converter with the specified bits per character and selection strategy.
Args:
bits_per_char (BinaryConverter.BitsPerChar): Number of bits to use for each character (8, 16, or 32).
Default is 16 bits.
word_selection_strategy (Optional[WordSelectionStrategy]): Strategy for selecting which words to convert.
If None, all words will be converted.
Raises:
TypeError: If ``bits_per_char`` is not an instance of BinaryConverter.BitsPerChar Enum.
"""
super().__init__(word_selection_strategy=word_selection_strategy)
if not isinstance(bits_per_char, BinaryConverter.BitsPerChar):
raise TypeError("bits_per_char must be an instance of BinaryConverter.BitsPerChar Enum.")
self.bits_per_char = bits_per_char
def _build_identifier(self) -> ConverterIdentifier:
"""
Build identifier with binary converter parameters.
Returns:
ConverterIdentifier: The identifier for this converter.
"""
base_params = super()._build_identifier().converter_specific_params or {}
base_params["bits_per_char"] = self.bits_per_char.value
return self._create_identifier(converter_specific_params=base_params)
[docs]
async def convert_word_async(self, word: str) -> str:
"""
Convert a single word into the target format supported by the converter.
Args:
word (str): The word to be converted.
Returns:
str: The converted word.
"""
bits = self.bits_per_char.value
return " ".join(format(ord(char), f"0{bits}b") for char in word)
[docs]
def join_words(self, words: list[str]) -> str:
"""
Join the converted words with the binary representation of a space.
Args:
words (list[str]): The list of converted words.
Returns:
str: The final joined string with spaces in binary format.
"""
space_binary = format(ord(" "), f"0{self.bits_per_char.value}b")
return f" {space_binary} ".join(words)