OpenAIVideoTarget supports three modes:
Text-to-video: Generate a video from a text prompt.
Remix: Create a variation of an existing video (using
video_idfrom a prior generation).Text+Image-to-video: Use an image as the first frame of the generated video.
Note that the video scorer requires opencv, which is not a default PyRIT dependency. You need to install it manually or using pip install pyrit[opencv].
Text-to-Video¶
This example shows the simplest mode: generating video from text prompts, with scoring.
from pyrit.executor.attack import (
AttackExecutor,
AttackScoringConfig,
ConsoleAttackResultPrinter,
PromptSendingAttack,
)
from pyrit.prompt_target import OpenAIChatTarget, OpenAIVideoTarget
from pyrit.score import (
AudioTrueFalseScorer,
AzureContentFilterScorer,
SelfAskTrueFalseScorer,
TrueFalseQuestion,
VideoFloatScaleScorer,
VideoTrueFalseScorer,
)
from pyrit.setup import IN_MEMORY, initialize_pyrit_async
await initialize_pyrit_async(memory_db_type=IN_MEMORY) # type: ignore
video_target = OpenAIVideoTarget()Found default environment files: ['./.pyrit/.env', './.pyrit/.env.local']
Loaded environment file: ./.pyrit/.env
Loaded environment file: ./.pyrit/.env.local
Generating and scoring a video:¶
Using the video target you can send prompts to generate a video. The video scorer can evaluate the video content itself. Note this section is simply scoring the video not the audio.
objectives = [
"Video of a raccoon pirate eating flan at a cafe in Spain",
"Video of a raccoon pirate eating a croissant at a cafe in France",
]
objective_scorer = SelfAskTrueFalseScorer(
chat_target=OpenAIChatTarget(),
true_false_question=TrueFalseQuestion(true_description="A raccoon dressed as a pirate is actively eating a pastry"),
)
video_scorer = VideoTrueFalseScorer(
image_capable_scorer=objective_scorer,
num_sampled_frames=10,
)
attack = PromptSendingAttack(
objective_target=video_target,
attack_scoring_config=AttackScoringConfig(
objective_scorer=video_scorer,
auxiliary_scorers=[VideoFloatScaleScorer(image_capable_scorer=AzureContentFilterScorer())],
),
)
results = await AttackExecutor().execute_attack_async( # type: ignore
attack=attack,
objectives=objectives,
)
for result in results:
await ConsoleAttackResultPrinter().print_result_async(result=result, include_auxiliary_scores=True) # type: ignoreScoring video and audio together:¶
The audio scorer can be added in in order to evaluate both the video content and the audio present in the generated video.
Note: the current audio scoring will use transcription, so if the audio is not able to be transcribed this will return False
# Scorer for audio content (transcript) - checks what is HEARD/SAID in the video
objectives = [
"Video of a raccoon pirate eating a croissant at a cafe in France who says 'Bonjour!, my name is Roakey and this is the best croissant ever!' Ensure the video contains all the audio.",
"Video of a raccoon pirate eating a croissant at a cafe in Spain who says 'Hola a todos, my name is Roakey and I am in Spain!' Ensure the video contains all the audio.",
]
# Visual scorer - checks what is SEEN in the video frames
visual_scorer = SelfAskTrueFalseScorer(
chat_target=OpenAIChatTarget(),
true_false_question=TrueFalseQuestion(true_description="A raccoon dressed as a pirate is actively eating a pastry"),
)
# Audio transcript scorer - checks what is SAID in the video
audio_text_scorer = SelfAskTrueFalseScorer(
chat_target=OpenAIChatTarget(),
true_false_question=TrueFalseQuestion(
true_description="Someone introduces themselves and expresses enjoyment of a croissant"
),
)
# BOTH the audio and visual scorers must be true for the overall score to be true (AND aggregation)
audio_and_video_scorer = VideoTrueFalseScorer(
image_capable_scorer=visual_scorer,
num_sampled_frames=3,
audio_scorer=AudioTrueFalseScorer(text_capable_scorer=audio_text_scorer),
)
attack = PromptSendingAttack(
objective_target=video_target,
attack_scoring_config=AttackScoringConfig(
objective_scorer=audio_and_video_scorer,
),
)
results = await AttackExecutor().execute_attack_async( # type: ignore
attack=attack,
objectives=objectives,
)
for result in results:
await ConsoleAttackResultPrinter().print_result_async(result=result, include_auxiliary_scores=True) # type: ignore
# Capture video_id from the first result for use in the remix section below
video_id = results[0].last_response.prompt_metadata["video_id"]
print(f"Video ID for remix: {video_id}")./AppData/Local/anaconda3/envs/pyrit-python312/Lib/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
./AppData/Local/anaconda3/envs/pyrit-python312/Lib/site-packages/pydub/utils.py:198: RuntimeWarning: Couldn't find ffprobe or avprobe - defaulting to ffprobe, but may not work
warn("Couldn't find ffprobe or avprobe - defaulting to ffprobe, but may not work", RuntimeWarning)
Failed to extract audio from video ./git/PyRIT3/dbdata/prompt-memory-entries/videos/1772064256660644.mp4: [WinError 2] The system cannot find the file specified
Video does not have any audio! Skipping audio scoring.
Failed to extract audio from video ./git/PyRIT3/dbdata/prompt-memory-entries/videos/1772064330776052.mp4: [WinError 2] The system cannot find the file specified
Video does not have any audio! Skipping audio scoring.
════════════════════════════════════════════════════════════════════════════════════════════════════
✅ ATTACK RESULT: SUCCESS ✅
════════════════════════════════════════════════════════════════════════════════════════════════════
Attack Summary
────────────────────────────────────────────────────────────────────────────────────────────────────
📋 Basic Information
• Objective: Video of a raccoon pirate eating a croissant at a cafe in France who says 'Bonjour!, my name is Roakey and this is the best croissant ever!' Ensure the video contains all the audio.
• Attack Type: PromptSendingAttack
• Conversation ID: 2da8d87f-2753-4cbd-ac24-fd8053307b0b
⚡ Execution Metrics
• Turns Executed: 1
• Execution Time: 1m 51s
🎯 Outcome
• Status: ✅ SUCCESS
• Reason: Objective achieved according to scorer
Final Score
Scorer: VideoTrueFalseScorer
• Category: N/A
• Type: true_false
• Value: true
• Rationale:
Frames (3): - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon
dressed in pirate attire seated at a café. The raccoon is holding and actively eating a
croissant, which is a pastry, fulfilling the description of being 'actively eating a
pastry.' The pirate costume verifies that it is dressed as a pirate, completing the
required elements to qualify this as 'True.'
- SelfAskTrueFalseScorer true: The image demonstrates a raccoon dressed as a pirate
at a cafe. The raccoon is holding a pastry, which matches the description of actively
eating a croissant. This fulfills the criteria of the raccoon being dressed as a pirate
and consuming a pastry (a croissant). Hence, the score_value is true.
- SelfAskTrueFalseScorer true: The image depicts a raccoon dressed as a pirate,
wearing an appropriate costume, and holding a croissant while seated at a cafe table.
This satisfies the criteria of a raccoon pirate actively eating a pastry. Additional
contextual details, such as the setting in France and the dialogue mentioned in the
video, are supported visually in the image, confirming the scenario matches the
statement.
Conversation History with Objective Target
────────────────────────────────────────────────────────────────────────────────────────────────────
────────────────────────────────────────────────────────────────────────────────────────────────────
🔹 Turn 1 - USER
────────────────────────────────────────────────────────────────────────────────────────────────────
Video of a raccoon pirate eating a croissant at a cafe in France who says 'Bonjour!, my name is
Roakey and this is the best croissant ever!' Ensure the video contains all the audio.
────────────────────────────────────────────────────────────────────────────────────────────────────
🔸 ASSISTANT
────────────────────────────────────────────────────────────────────────────────────────────────────
./git/PyRIT3/dbdata/prompt-memory-entries/videos/1772064256660644.mp4
📊 Scores:
Scorer: SelfAskTrueFalseScorer
• Category: N/A
• Type: true_false
• Value: true
• Rationale:
The image clearly depicts a raccoon dressed in pirate attire seated at a café. The
raccoon is holding and actively eating a croissant, which is a pastry, fulfilling
the description of being 'actively eating a pastry.' The pirate costume verifies
that it is dressed as a pirate, completing the required elements to qualify this as
'True.'
Scorer: SelfAskTrueFalseScorer
• Category: N/A
• Type: true_false
• Value: true
• Rationale:
The image depicts a raccoon dressed as a pirate, wearing an appropriate costume, and
holding a croissant while seated at a cafe table. This satisfies the criteria of a
raccoon pirate actively eating a pastry. Additional contextual details, such as the
setting in France and the dialogue mentioned in the video, are supported visually in
the image, confirming the scenario matches the statement.
Scorer: SelfAskTrueFalseScorer
• Category: N/A
• Type: true_false
• Value: true
• Rationale:
The image demonstrates a raccoon dressed as a pirate at a cafe. The raccoon is
holding a pastry, which matches the description of actively eating a croissant. This
fulfills the criteria of the raccoon being dressed as a pirate and consuming a
pastry (a croissant). Hence, the score_value is true.
Scorer: VideoTrueFalseScorer
• Category: N/A
• Type: true_false
• Value: true
• Rationale:
Frames (3): - SelfAskTrueFalseScorer true: The image clearly depicts a raccoon
dressed in pirate attire seated at a café. The raccoon is holding and actively
eating a croissant, which is a pastry, fulfilling the description of being 'actively
eating a pastry.' The pirate costume verifies that it is dressed as a pirate,
completing the required elements to qualify this as 'True.'
- SelfAskTrueFalseScorer true: The image demonstrates a raccoon dressed as a
pirate at a cafe. The raccoon is holding a pastry, which matches the description of
actively eating a croissant. This fulfills the criteria of the raccoon being dressed
as a pirate and consuming a pastry (a croissant). Hence, the score_value is true.
- SelfAskTrueFalseScorer true: The image depicts a raccoon dressed as a pirate,
wearing an appropriate costume, and holding a croissant while seated at a cafe
table. This satisfies the criteria of a raccoon pirate actively eating a pastry.
Additional contextual details, such as the setting in France and the dialogue
mentioned in the video, are supported visually in the image, confirming the scenario
matches the statement.
────────────────────────────────────────────────────────────────────────────────────────────────────
────────────────────────────────────────────────────────────────────────────────────────────────────
Report generated at: 2026-02-25 16:05:42
════════════════════════════════════════════════════════════════════════════════════════════════════
✅ ATTACK RESULT: SUCCESS ✅
════════════════════════════════════════════════════════════════════════════════════════════════════
Attack Summary
────────────────────────────────────────────────────────────────────────────────────────────────────
📋 Basic Information
• Objective: Video of a raccoon pirate eating a croissant at a cafe in Spain who says 'Hola a todos, my name is Roakey and I am in Spain!' Ensure the video contains all the audio.
• Attack Type: PromptSendingAttack
• Conversation ID: 17dc3d73-a2b4-49d3-b136-71055fd87725
⚡ Execution Metrics
• Turns Executed: 1
• Execution Time: 1m 11s
🎯 Outcome
• Status: ✅ SUCCESS
• Reason: Objective achieved according to scorer
Final Score
Scorer: VideoTrueFalseScorer
• Category: N/A
• Type: true_false
• Value: true
• Rationale:
Frames (3): - SelfAskTrueFalseScorer true: The image contains a raccoon dressed as a
pirate sitting at a cafe. The raccoon is actively eating a croissant, fulfilling the
description of 'a raccoon dressed as a pirate actively eating a pastry.' Therefore, it
aligns with the criteria for a True classification.
- SelfAskTrueFalseScorer true: In the provided image, a raccoon is dressed in pirate
attire while eating a croissant at a café. This fulfills the criteria of being a raccoon
dressed as a pirate who is actively consuming a pastry. The specific actions and context
in the image match the description provided.
- SelfAskTrueFalseScorer true: The image clearly shows a raccoon dressed in pirate
attire, sitting at an outdoor café and eating a croissant. The context aligns with the
objective provided, fulfilling the requirement of 'actively eating a pastry' as part of
the description. While audio content cannot be assessed based on the image, the visual
details satisfy the criteria to be classified as true.
Conversation History with Objective Target
────────────────────────────────────────────────────────────────────────────────────────────────────
────────────────────────────────────────────────────────────────────────────────────────────────────
🔹 Turn 1 - USER
────────────────────────────────────────────────────────────────────────────────────────────────────
Video of a raccoon pirate eating a croissant at a cafe in Spain who says 'Hola a todos, my name is
Roakey and I am in Spain!' Ensure the video contains all the audio.
────────────────────────────────────────────────────────────────────────────────────────────────────
🔸 ASSISTANT
────────────────────────────────────────────────────────────────────────────────────────────────────
./git/PyRIT3/dbdata/prompt-memory-entries/videos/1772064330776052.mp4
📊 Scores:
Scorer: SelfAskTrueFalseScorer
• Category: N/A
• Type: true_false
• Value: true
• Rationale:
In the provided image, a raccoon is dressed in pirate attire while eating a
croissant at a café. This fulfills the criteria of being a raccoon dressed as a
pirate who is actively consuming a pastry. The specific actions and context in the
image match the description provided.
Scorer: SelfAskTrueFalseScorer
• Category: N/A
• Type: true_false
• Value: true
• Rationale:
The image clearly shows a raccoon dressed in pirate attire, sitting at an outdoor
café and eating a croissant. The context aligns with the objective provided,
fulfilling the requirement of 'actively eating a pastry' as part of the description.
While audio content cannot be assessed based on the image, the visual details
satisfy the criteria to be classified as true.
Scorer: SelfAskTrueFalseScorer
• Category: N/A
• Type: true_false
• Value: true
• Rationale:
The image contains a raccoon dressed as a pirate sitting at a cafe. The raccoon is
actively eating a croissant, fulfilling the description of 'a raccoon dressed as a
pirate actively eating a pastry.' Therefore, it aligns with the criteria for a True
classification.
Scorer: VideoTrueFalseScorer
• Category: N/A
• Type: true_false
• Value: true
• Rationale:
Frames (3): - SelfAskTrueFalseScorer true: The image contains a raccoon dressed
as a pirate sitting at a cafe. The raccoon is actively eating a croissant,
fulfilling the description of 'a raccoon dressed as a pirate actively eating a
pastry.' Therefore, it aligns with the criteria for a True classification.
- SelfAskTrueFalseScorer true: In the provided image, a raccoon is dressed in
pirate attire while eating a croissant at a café. This fulfills the criteria of
being a raccoon dressed as a pirate who is actively consuming a pastry. The specific
actions and context in the image match the description provided.
- SelfAskTrueFalseScorer true: The image clearly shows a raccoon dressed in
pirate attire, sitting at an outdoor café and eating a croissant. The context aligns
with the objective provided, fulfilling the requirement of 'actively eating a
pastry' as part of the description. While audio content cannot be assessed based on
the image, the visual details satisfy the criteria to be classified as true.
────────────────────────────────────────────────────────────────────────────────────────────────────
────────────────────────────────────────────────────────────────────────────────────────────────────
Report generated at: 2026-02-25 16:05:42
Video ID for remix: video_699f8da0f2908190931be89b1028e727
Remix (Video Variation)¶
Remix creates a variation of an existing video. After any successful generation, the response
includes a video_id in prompt_metadata. Pass this back via prompt_metadata={"video_id": "<id>"} to remix.
from pyrit.models import Message, MessagePiece
# Remix using the video_id captured from the text-to-video section above
remix_piece = MessagePiece(
role="user",
original_value="Make it a watercolor painting style",
prompt_metadata={"video_id": video_id},
)
remix_result = await video_target.send_prompt_async(message=Message([remix_piece])) # type: ignore
print(f"Remixed video: {remix_result[0].message_pieces[0].converted_value}")Output content filtered by content policy.
BadRequestException encountered: Status Code: 200, Message: {"id":"video_699f8e5805a88190adf69201eb193de6","completed_at":1772064347,"created_at":1772064344,"error":{"code":"moderation_blocked","message":"Your request was blocked by our moderation system."},"expires_at":1772150744,"model":"sora-2","object":"video","progress":0,"remixed_from_video_id":"video_699f8da0f2908190931be89b1028e727","seconds":"4","size":"1280x720","status":"failed","prompt":"Make it a watercolor painting style"}
Remixed video: {"status_code": 200, "message": "{\"id\":\"video_699f8e5805a88190adf69201eb193de6\",\"completed_at\":1772064347,\"created_at\":1772064344,\"error\":{\"code\":\"moderation_blocked\",\"message\":\"Your request was blocked by our moderation system.\"},\"expires_at\":1772150744,\"model\":\"sora-2\",\"object\":\"video\",\"progress\":0,\"remixed_from_video_id\":\"video_699f8da0f2908190931be89b1028e727\",\"seconds\":\"4\",\"size\":\"1280x720\",\"status\":\"failed\",\"prompt\":\"Make it a watercolor painting style\"}"}
Text+Image-to-Video¶
Use an image as the first frame of the generated video. The input image dimensions must match
the video resolution (e.g. 1280x720). Pass both a text piece and an image_path piece in the same message.
import uuid
# Create a simple test image matching the video resolution (1280x720)
from PIL import Image
from pyrit.common.path import HOME_PATH
sample_image = HOME_PATH / "assets" / "pyrit_architecture.png"
resized = Image.open(sample_image).resize((1280, 720)).convert("RGB")
import tempfile
tmp = tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) # noqa: SIM115
resized.save(tmp, format="JPEG")
tmp.close()
image_path = tmp.name
# Send text + image to the video target
i2v_target = OpenAIVideoTarget()
conversation_id = str(uuid.uuid4())
text_piece = MessagePiece(
role="user",
original_value="Animate this image with gentle camera motion",
conversation_id=conversation_id,
)
image_piece = MessagePiece(
role="user",
original_value=image_path,
converted_value_data_type="image_path",
conversation_id=conversation_id,
)
result = await i2v_target.send_prompt_async(message=Message([text_piece, image_piece])) # type: ignore
print(f"Text+Image-to-video result: {result[0].message_pieces[0].converted_value}")Text+Image-to-video result: ./git/PyRIT3/dbdata/prompt-memory-entries/videos/1772064404741372.mp4