Source code for pyrit.prompt_converter.token_smuggling.sneaky_bits_smuggler_converter

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import logging
from typing import Literal, Optional, Tuple

from pyrit.prompt_converter.token_smuggling.base import SmugglerConverter

logger = logging.getLogger(__name__)


[docs] class SneakyBitsSmugglerConverter(SmugglerConverter): """ Encodes and decodes text using a bit-level approach. Uses two invisible Unicode characters: - ``zero_char`` (default: U+2062) to represent binary 0. - ``one_char`` (default: U+2064) to represent binary 1. Replicates functionality detailed in: - https://embracethered.com/blog/posts/2025/sneaky-bits-and-ascii-smuggler/ """
[docs] def __init__( self, action: Literal["encode", "decode"] = "encode", zero_char: Optional[str] = None, one_char: Optional[str] = None, ): """ Initializes the converter with options for encoding/decoding in Sneaky Bits mode. Args: action (Literal["encode", "decode"]): The action to perform. zero_char (Optional[str]): Character to represent binary 0 in ``sneaky_bits`` mode (default: U+2062). one_char (Optional[str]): Character to represent binary 1 in ``sneaky_bits`` mode (default: U+2064). Raises: ValueError: If an unsupported action or ``encoding_mode`` is provided. """ super().__init__(action=action) self.zero_char = zero_char if zero_char is not None else "\u2062" # Invisible Times self.one_char = one_char if one_char is not None else "\u2064" # Invisible Plus
[docs] def encode_message(self, message: str) -> Tuple[str, str]: """ Encodes the message using Sneaky Bits mode. The message is first converted to its UTF-8 byte sequence. Then each byte is represented as 8 bits, with each bit replaced by an invisible character (``self.zero_char`` for 0 and ``self.one_char`` for 1). Args: message (str): The message to encode. Returns: Tuple[str, str]: A tuple where the first element is a bit summary (empty in this implementation) and the second element is the encoded message containing the invisible bits. """ encoded_bits = [] data = message.encode("utf-8") for byte in data: for bit_index in range(7, -1, -1): bit = (byte >> bit_index) & 1 encoded_bits.append(self.zero_char if bit == 0 else self.one_char) encoded_text = "".join(encoded_bits) # For Sneaky Bits, we do not provide a code point summary. bit_summary = "" logger.info( f"Sneaky Bits encoding complete: {len(message)} characters encoded into {len(encoded_text)} invisible bits." ) return bit_summary, encoded_text
[docs] def decode_message(self, message: str) -> str: """ Decodes the message encoded using Sneaky Bits mode. The method filters out only the valid invisible characters (``self.zero_char`` and ``self.one_char``), groups them into 8-bit chunks, reconstructs each byte, and finally decodes the byte sequence using UTF-8. Args: message (str): The message encoded with Sneaky Bits. Returns: str: The decoded original message. """ # Filter only the valid bit characters. bit_chars = [c for c in message if c == self.zero_char or c == self.one_char] bit_count = len(bit_chars) if bit_count % 8 != 0: logger.warning("Encoded bit string length is not a multiple of 8. Ignoring incomplete trailing bits.") bit_count -= bit_count % 8 bit_chars = bit_chars[:bit_count] decoded_bytes = bytearray() current_byte = 0 bits_collected = 0 for c in bit_chars: current_byte = (current_byte << 1) | (1 if c == self.one_char else 0) bits_collected += 1 if bits_collected == 8: decoded_bytes.append(current_byte) current_byte = 0 bits_collected = 0 try: decoded_text = decoded_bytes.decode("utf-8") except UnicodeDecodeError: decoded_text = decoded_bytes.decode("utf-8", errors="replace") logger.error("Decoded byte sequence is not valid UTF-8; some characters may be replaced.") logger.info(f"Sneaky Bits decoding complete: Decoded text length is {len(decoded_text)} characters.") return decoded_text