Source code for pyrit.prompt_converter.audio_white_noise_converter

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import io
import logging
from typing import Any, Literal

import numpy as np
from scipy.io import wavfile

from pyrit.models import PromptDataType, data_serializer_factory
from pyrit.prompt_converter.prompt_converter import ConverterResult, PromptConverter

logger = logging.getLogger(__name__)



[docs]
class AudioWhiteNoiseConverter(PromptConverter):
    """
    Adds white noise to an audio file.

    White noise is generated and mixed into the original signal at a level
    controlled by the noise_scale parameter. The output preserves the original
    sample rate, bit depth, channel count, and number of samples.
    """

    SUPPORTED_INPUT_TYPES = ("audio_path",)
    SUPPORTED_OUTPUT_TYPES = ("audio_path",)

    #: Accepted audio formats for conversion.
    AcceptedAudioFormats = Literal["wav"]


[docs]
    def __init__(
        self,
        *,
        output_format: AcceptedAudioFormats = "wav",
        noise_scale: float = 0.02,
    ) -> None:
        """
        Initialize the converter with the white noise parameters.

        Args:
            output_format (str): The format of the audio file, defaults to "wav".
            noise_scale (float): Controls the amplitude of the added noise, expressed
                as a fraction of the signal's maximum possible value. For int16 audio
                the noise amplitude will be noise_scale * 32767. Must be greater than 0
                and at most 1.0. Defaults to 0.02.

        Raises:
            ValueError: If noise_scale is not in (0, 1].
        """
        if noise_scale <= 0 or noise_scale > 1.0:
            raise ValueError("noise_scale must be between 0 (exclusive) and 1.0 (inclusive).")
        self._output_format = output_format
        self._noise_scale = noise_scale


    def _add_noise(self, data: np.ndarray[Any, Any]) -> np.ndarray[Any, Any]:
        """
        Add white noise to a 1-D audio signal.

        Args:
            data: 1-D numpy array of audio samples.

        Returns:
            numpy array with white noise added, same length and dtype as input.
        """
        float_data = data.astype(np.float64)

        # Determine the amplitude range based on dtype
        if np.issubdtype(data.dtype, np.integer):
            info = np.iinfo(data.dtype)
            max_val = float(info.max)
        else:
            max_val = 1.0

        noise = np.random.normal(0, self._noise_scale * max_val, size=data.shape)
        noisy = float_data + noise

        # Clip to valid range
        if np.issubdtype(data.dtype, np.integer):
            noisy = np.clip(noisy, info.min, info.max)

        return np.asarray(noisy)


[docs]
    async def convert_async(self, *, prompt: str, input_type: PromptDataType = "audio_path") -> ConverterResult:
        """
        Convert the given audio file by adding white noise.

        Args:
            prompt (str): File path to the audio file to be converted.
            input_type (PromptDataType): The type of input data.

        Returns:
            ConverterResult: The result containing the converted audio file path.

        Raises:
            ValueError: If the input type is not supported.
            Exception: If there is an error during the conversion process.
        """
        if not self.input_supported(input_type):
            raise ValueError("Input type not supported")
        try:
            # Create serializer to read audio data
            audio_serializer = data_serializer_factory(
                category="prompt-memory-entries", data_type="audio_path", extension=self._output_format, value=prompt
            )
            audio_bytes = await audio_serializer.read_data()

            # Read the audio file bytes and process the data
            bytes_io = io.BytesIO(audio_bytes)
            sample_rate, data = wavfile.read(bytes_io)
            original_dtype = data.dtype

            # Apply white noise to each channel
            if data.ndim == 1:
                noisy_data = self._add_noise(data).astype(original_dtype)
            else:
                channels = []
                for ch in range(data.shape[1]):
                    channels.append(self._add_noise(data[:, ch]))
                noisy_data = np.column_stack(channels).astype(original_dtype)

            # Write the processed data as a new WAV file
            output_bytes_io = io.BytesIO()
            wavfile.write(output_bytes_io, sample_rate, noisy_data)

            # Save the converted bytes using the serializer
            converted_bytes = output_bytes_io.getvalue()
            await audio_serializer.save_data(data=converted_bytes)
            audio_serializer_file = str(audio_serializer.value)
            logger.info(
                "White noise (scale=%.4f) added to [%s], saved to [%s]",
                self._noise_scale,
                prompt,
                audio_serializer_file,
            )

        except Exception as e:
            logger.error("Failed to add white noise: %s", str(e))
            raise
        return ConverterResult(output_text=audio_serializer_file, output_type=input_type)