Source code for pyrit.prompt_converter.audio_white_noise_converter
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import io
import logging
from typing import Any, Literal
import numpy as np
from scipy.io import wavfile
from pyrit.models import PromptDataType, data_serializer_factory
from pyrit.prompt_converter.prompt_converter import ConverterResult, PromptConverter
logger = logging.getLogger(__name__)
[docs]
class AudioWhiteNoiseConverter(PromptConverter):
"""
Adds white noise to an audio file.
White noise is generated and mixed into the original signal at a level
controlled by the noise_scale parameter. The output preserves the original
sample rate, bit depth, channel count, and number of samples.
"""
SUPPORTED_INPUT_TYPES = ("audio_path",)
SUPPORTED_OUTPUT_TYPES = ("audio_path",)
#: Accepted audio formats for conversion.
AcceptedAudioFormats = Literal["wav"]
[docs]
def __init__(
self,
*,
output_format: AcceptedAudioFormats = "wav",
noise_scale: float = 0.02,
) -> None:
"""
Initialize the converter with the white noise parameters.
Args:
output_format (str): The format of the audio file, defaults to "wav".
noise_scale (float): Controls the amplitude of the added noise, expressed
as a fraction of the signal's maximum possible value. For int16 audio
the noise amplitude will be noise_scale * 32767. Must be greater than 0
and at most 1.0. Defaults to 0.02.
Raises:
ValueError: If noise_scale is not in (0, 1].
"""
if noise_scale <= 0 or noise_scale > 1.0:
raise ValueError("noise_scale must be between 0 (exclusive) and 1.0 (inclusive).")
self._output_format = output_format
self._noise_scale = noise_scale
def _add_noise(self, data: np.ndarray[Any, Any]) -> np.ndarray[Any, Any]:
"""
Add white noise to a 1-D audio signal.
Args:
data: 1-D numpy array of audio samples.
Returns:
numpy array with white noise added, same length and dtype as input.
"""
float_data = data.astype(np.float64)
# Determine the amplitude range based on dtype
if np.issubdtype(data.dtype, np.integer):
info = np.iinfo(data.dtype)
max_val = float(info.max)
else:
max_val = 1.0
noise = np.random.normal(0, self._noise_scale * max_val, size=data.shape)
noisy = float_data + noise
# Clip to valid range
if np.issubdtype(data.dtype, np.integer):
noisy = np.clip(noisy, info.min, info.max)
return np.asarray(noisy)
[docs]
async def convert_async(self, *, prompt: str, input_type: PromptDataType = "audio_path") -> ConverterResult:
"""
Convert the given audio file by adding white noise.
Args:
prompt (str): File path to the audio file to be converted.
input_type (PromptDataType): The type of input data.
Returns:
ConverterResult: The result containing the converted audio file path.
Raises:
ValueError: If the input type is not supported.
Exception: If there is an error during the conversion process.
"""
if not self.input_supported(input_type):
raise ValueError("Input type not supported")
try:
# Create serializer to read audio data
audio_serializer = data_serializer_factory(
category="prompt-memory-entries", data_type="audio_path", extension=self._output_format, value=prompt
)
audio_bytes = await audio_serializer.read_data()
# Read the audio file bytes and process the data
bytes_io = io.BytesIO(audio_bytes)
sample_rate, data = wavfile.read(bytes_io)
original_dtype = data.dtype
# Apply white noise to each channel
if data.ndim == 1:
noisy_data = self._add_noise(data).astype(original_dtype)
else:
channels = []
for ch in range(data.shape[1]):
channels.append(self._add_noise(data[:, ch]))
noisy_data = np.column_stack(channels).astype(original_dtype)
# Write the processed data as a new WAV file
output_bytes_io = io.BytesIO()
wavfile.write(output_bytes_io, sample_rate, noisy_data)
# Save the converted bytes using the serializer
converted_bytes = output_bytes_io.getvalue()
await audio_serializer.save_data(data=converted_bytes)
audio_serializer_file = str(audio_serializer.value)
logger.info(
"White noise (scale=%.4f) added to [%s], saved to [%s]",
self._noise_scale,
prompt,
audio_serializer_file,
)
except Exception as e:
logger.error("Failed to add white noise: %s", str(e))
raise
return ConverterResult(output_text=audio_serializer_file, output_type=input_type)