Source code for pyrit.orchestrator.models.orchestrator_result

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import logging
from typing import Annotated, Literal, Optional

from colorama import Fore, Style

from pyrit.common.display_response import display_image_response
from pyrit.memory import CentralMemory
from pyrit.models import Score

logger = logging.getLogger(__name__)


OrchestratorResultStatus = Annotated[
    Literal["success", "failure", "pruned", "adversarial_generation", "in_progress", "error", "unknown"],
    """The status of an orchestrator result.

    Completion States:
        - success: The orchestrator run is complete and achieved its objective.
        - failure: The orchestrator run is complete and failed to achieve its objective.
        - error: The orchestrator run is complete and encountered an error.
        - unknown: The orchestrator run is complete and it is unknown whether it achieved its objective.

    Intermediate States:
        - in_progress: The orchestrator is still running.

    Special States:
        - pruned: The conversation was pruned as part of an attack and not related to success/failure/unknown/error.
        - adversarial_generation: The conversation was used as part of adversarial generation and not related to
          success/failure/unknown/error.
    """,
]



[docs]
class OrchestratorResult:
    """The result of an orchestrator."""


[docs]
    def __init__(
        self,
        conversation_id: str,
        objective: str,
        status: OrchestratorResultStatus = "in_progress",
        confidence: float = 0.1,
        objective_score: Optional[Score] = None,
    ):
        self.conversation_id = conversation_id
        self.objective = objective
        self.status = status
        self.objective_score = objective_score
        self.confidence = confidence

        self._memory = CentralMemory.get_memory_instance()



[docs]
    async def print_conversation_async(self, *, include_auxiliary_scores: bool = False):
        """Prints the conversation between the objective target and the adversarial chat, including the scores.

        Args:
            prompt_target_conversation_id (str): the conversation ID for the prompt target.
        """
        target_messages = self._memory.get_conversation(conversation_id=self.conversation_id)

        if not target_messages or len(target_messages) == 0:
            print("No conversation with the target")
            return

        if self.status == "success":
            print(
                f"{Style.BRIGHT}{Fore.RED}The orchestrator has completed the conversation and achieved "
                f"the objective: {self.objective}"
            )
        elif self.status == "failure":
            print(f"{Style.BRIGHT}{Fore.RED}The orchestrator has not achieved the objective: " f"{self.objective}")
        else:
            print(
                f"{Style.BRIGHT}{Fore.RED}The orchestrator with objective: {self.objective} "
                f"has ended with status: {self.status}"
            )

        for message in target_messages:
            for piece in message.request_pieces:
                if piece.role == "user":
                    print(f"{Style.BRIGHT}{Fore.BLUE}{piece.role}:")
                    if piece.converted_value != piece.original_value:
                        print(f"Original value: {piece.original_value}")
                    print(f"Converted value: {piece.converted_value}")
                else:
                    print(f"{Style.NORMAL}{Fore.YELLOW}{piece.role}: {piece.converted_value}")

                await display_image_response(piece)

                if include_auxiliary_scores:
                    auxiliary_scores = (
                        self._memory.get_scores_by_prompt_ids(prompt_request_response_ids=[str(piece.id)]) or []
                    )
                    for auxiliary_score in auxiliary_scores:
                        if not self.objective_score or auxiliary_score.id != self.objective_score.id:
                            print(
                                f"{Style.DIM}{Fore.WHITE}auxiliary score: {auxiliary_score} : "
                                f"{auxiliary_score.score_rationale}"
                            )

        if self.objective_score:
            print(
                f"{Style.NORMAL}{Fore.WHITE}objective score: {self.objective_score} : "
                f"{self.objective_score.score_rationale}"
            )