Source code for pyrit.orchestrator.single_turn.prompt_sending_orchestrator

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import logging
from typing import Any, Optional, Sequence, cast

from typing_extensions import LiteralString, deprecated

from pyrit.common import deprecation_message
from pyrit.executor.attack import (
    AttackConverterConfig,
    AttackScoringConfig,
    PromptSendingAttack,
    SingleTurnAttackContext,
)
from pyrit.models import (
    AttackOutcome,
    PromptRequestResponse,
    SeedPromptGroup,
)
from pyrit.models.filter_criteria import PromptConverterState, PromptFilterCriteria
from pyrit.orchestrator import (
    Orchestrator,
    OrchestratorResult,
    OrchestratorResultStatus,
)
from pyrit.prompt_normalizer import PromptConverterConfiguration, PromptNormalizer
from pyrit.prompt_target import PromptTarget
from pyrit.prompt_target.batch_helper import batch_task_async
from pyrit.score import Scorer

logger = logging.getLogger(__name__)



[docs]
@deprecated(
    cast(
        LiteralString,
        deprecation_message(
            old_item="PromptSendingOrchestrator",
            new_item=PromptSendingAttack,
            removed_in="v0.12.0",
        ),
    ),
)
class PromptSendingOrchestrator(Orchestrator):
    """
    .. warning::
        `PromptSendingOrchestrator` is deprecated and will be removed in **v0.12.0**;
        use `pyrit.executor.attack.PromptSendingAttack` instead.

    This orchestrator takes a set of prompts, converts them using the list of PromptConverters,
    sends them to a target, and scores the resonses with scorers (if provided).
    """


[docs]
    def __init__(
        self,
        objective_target: PromptTarget,
        request_converter_configurations: Optional[list[PromptConverterConfiguration]] = None,
        response_converter_configurations: Optional[list[PromptConverterConfiguration]] = None,
        objective_scorer: Optional[Scorer] = None,
        auxiliary_scorers: Optional[list[Scorer]] = None,
        should_convert_prepended_conversation: bool = True,
        batch_size: int = 10,
        retries_on_objective_failure: int = 0,
        verbose: bool = False,
    ) -> None:
        """
        Args:
            objective_target (PromptTarget): The target for sending prompts.
            prompt_converters (list[PromptConverter], Optional): List of prompt converters. These are stacked in
                the order they are provided. E.g. the output of converter1 is the input of converter2.
            scorers (list[Scorer], Optional): List of scorers to use for each prompt request response, to be
                scored immediately after receiving response. Default is None.
            batch_size (int, Optional): The (max) batch size for sending prompts. Defaults to 10.
                Note: If providing max requests per minute on the prompt_target, this should be set to 1 to
                ensure proper rate limit management.
            retries_on_objective_failure (int, Optional): Number of retries to attempt if objective fails. Defaults to
                0.
            verbose (bool, Optional): Whether to log debug information. Defaults to False.
        """
        super().__init__(verbose=verbose)

        self._prompt_normalizer = PromptNormalizer()

        if objective_scorer and objective_scorer.scorer_type != "true_false":
            raise ValueError("Objective scorer must be a true/false scorer")

        self._objective_scorer = objective_scorer or None
        self._auxiliary_scorers = auxiliary_scorers or []

        self._objective_target = objective_target

        self._request_converter_configurations = request_converter_configurations or []
        self._response_converter_configurations = response_converter_configurations or []

        self._should_convert_prepended_conversation = should_convert_prepended_conversation
        self._batch_size = batch_size
        self._retries_on_objective_failure = retries_on_objective_failure

        # Build the new attack model
        self._attack = PromptSendingAttack(
            objective_target=objective_target,
            attack_converter_config=AttackConverterConfig(
                request_converters=self._request_converter_configurations,
                response_converters=self._response_converter_configurations,
            ),
            attack_scoring_config=AttackScoringConfig(
                objective_scorer=objective_scorer,
                auxiliary_scorers=self._auxiliary_scorers,
            ),
            prompt_normalizer=self._prompt_normalizer,
            max_attempts_on_failure=self._retries_on_objective_failure,
        )



[docs]
    def set_skip_criteria(
        self, *, skip_criteria: PromptFilterCriteria, skip_value_type: PromptConverterState = "original"
    ):
        """
        Sets the skip criteria for the orchestrator.

        If prompts match this in memory, then they won't be sent to a target.
        """
        self._prompt_normalizer.set_skip_criteria(skip_criteria=skip_criteria, skip_value_type=skip_value_type)



[docs]
    async def run_attack_async(
        self,
        *,
        objective: str,
        seed_prompt: Optional[SeedPromptGroup] = None,
        prepended_conversation: Optional[list[PromptRequestResponse]] = None,
        memory_labels: Optional[dict[str, str]] = None,
    ) -> OrchestratorResult:
        """
        Runs the attack.

        Args:
            objective (str): The objective of the attack.
            seed_prompt (SeedPromptGroup, Optional): The seed prompt group to start the conversation. By default the
                objective is used.
            prepended_conversation (list[PromptRequestResponse], Optional): The conversation to prepend to the attack.
                Sent to objective target.
            memory_labels (dict[str, str], Optional): The memory labels to use for the attack.
        """

        context = SingleTurnAttackContext(
            objective=objective,
            seed_prompt_group=seed_prompt,
            prepended_conversation=prepended_conversation or [],
            memory_labels=memory_labels or {},
        )

        result = await self._attack.execute_with_context_async(context=context)

        # Map attack outcome to orchestrator status
        status_mapping: dict[AttackOutcome, OrchestratorResultStatus] = {
            AttackOutcome.SUCCESS: "success",
            AttackOutcome.FAILURE: "failure",
            AttackOutcome.UNDETERMINED: "unknown",
        }

        return OrchestratorResult(
            conversation_id=result.conversation_id,
            objective=objective,
            status=status_mapping.get(result.outcome, "unknown"),
            objective_score=result.last_score,
        )



[docs]
    async def run_attacks_async(
        self,
        *,
        objectives: list[str],
        seed_prompts: Optional[list[SeedPromptGroup]] = None,
        prepended_conversations: Optional[list[list[PromptRequestResponse]]] = None,
        memory_labels: Optional[dict[str, str]] = None,
    ) -> list[OrchestratorResult]:
        """
        Runs multiple attacks in parallel using batch_size.

        Args:
            objectives (list[str]): List of objectives for the attacks.
            seed_prompts (list[SeedPromptGroup], Optional): List of seed prompt groups to start the conversations.
                If not provided, each objective will be used as its own seed prompt.
            prepended_conversation (list[PromptRequestResponse], Optional): The conversation to prepend to each attack.
            memory_labels (dict[str, str], Optional): The memory labels to use for the attacks.
        Returns:
            list[OrchestratorResult]: List of results from each attack.
        """
        if not seed_prompts:
            seed_prompts = [None] * len(objectives)
        elif len(seed_prompts) != len(objectives):
            raise ValueError("Number of seed prompts must match number of objectives")

        if not prepended_conversations:
            prepended_conversations = [None] * len(objectives)
        elif len(prepended_conversations) != len(objectives):
            raise ValueError("Number of prepended conversations must match number of objectives")

        batch_items: list[Sequence[Any]] = [objectives, seed_prompts, prepended_conversations]

        batch_item_keys = [
            "objective",
            "seed_prompt",
            "prepended_conversation",
        ]

        results = await batch_task_async(
            prompt_target=self._objective_target,
            batch_size=self._batch_size,
            items_to_batch=batch_items,
            task_func=self.run_attack_async,
            task_arguments=batch_item_keys,
            memory_labels=memory_labels,
        )

        return [result for result in results if result is not None]


    async def _run_attacks_with_only_objectives_async(
        self,
        *,
        objectives: list[str],
        memory_labels: Optional[dict[str, str]] = None,
    ) -> list[OrchestratorResult]:
        """
        Runs multiple role play attacks in parallel using batch_size.

        Args:
            objectives (list[str]): List of objectives for the attacks.
            memory_labels (dict[str, str], Optional): The memory labels to use for the attacks.
        Returns:
            list[OrchestratorResult]: List of results from each attack.
        """

        batch_items = [
            objectives,
        ]

        batch_item_keys = [
            "objective",
        ]

        results = await batch_task_async(
            prompt_target=self._objective_target,
            batch_size=self._batch_size,
            items_to_batch=batch_items,
            task_func=self.run_attack_async,
            task_arguments=batch_item_keys,
            memory_labels=memory_labels,
        )

        return results