Source code for pyrit.prompt_target.openai.openai_chat_audio_config
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from dataclasses import dataclass
from typing import Any, Literal
# Voices supported by OpenAI Chat Completions API audio output.
# OpenAI SDK: openai/types/chat/chat_completion_audio_param.py voice field
# SDK Literal includes: alloy, ash, ballad, coral, echo, sage, shimmer, verse, marin, cedar
# SDK docstring also lists: fable, nova, onyx (we include these for completeness)
# Note: SDK uses Union[str, Literal[...]] so any string is accepted by the API.
ChatAudioVoice = Literal[
"alloy", "ash", "ballad", "coral", "echo", "fable", "nova", "onyx", "sage", "shimmer", "verse", "marin", "cedar"
]
# Audio output formats supported by OpenAI Chat Completions API.
# OpenAI SDK: openai/types/chat/chat_completion_audio_param.py format field
# defines format: Required[Literal["wav", "aac", "mp3", "flac", "opus", "pcm16"]]
ChatAudioFormat = Literal["wav", "aac", "mp3", "flac", "opus", "pcm16"]
[docs]
@dataclass
class OpenAIChatAudioConfig:
"""
Configuration for audio output from OpenAI Chat Completions API.
When provided to OpenAIChatTarget, this enables audio output from models
that support it (e.g., gpt-4o-audio-preview).
Note: This is specific to the Chat Completions API. The Responses API does not
support audio input or output. For real-time audio, use RealtimeTarget instead.
"""
# The voice to use for audio output. Supported voices are:
voice: ChatAudioVoice
# The audio format for the response. Supported formats are:
audio_format: ChatAudioFormat = "wav"
# If True, historical user messages that contain both audio and text will only send
# the text (transcript) to reduce bandwidth and token usage. The current (last) user
# message will still include audio. Defaults to True.
prefer_transcript_for_history: bool = True
[docs]
def to_extra_body_parameters(self) -> dict[str, Any]:
"""
Convert the config to extra_body_parameters format for OpenAI API.
Returns:
dict: Parameters to include in the request body for audio output.
"""
return {
"modalities": ["text", "audio"],
"audio": {"voice": self.voice, "format": self.audio_format},
}