Source code for pyrit.score.true_false.true_false_scorer

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from typing import Optional

from pyrit.models import Score
from pyrit.models.prompt_request_response import PromptRequestResponse
from pyrit.score.scorer import Scorer
from pyrit.score.scorer_prompt_validator import ScorerPromptValidator
from pyrit.score.true_false.true_false_score_aggregator import (
    TrueFalseAggregatorFunc,
    TrueFalseScoreAggregator,
)


[docs] class TrueFalseScorer(Scorer): """ Base class for scorers that return true/false binary scores. This scorer evaluates prompt responses and returns a single boolean score indicating whether the response meets a specific criterion. Multiple pieces in a request response are aggregated using a TrueFalseAggregatorFunc function (default: TrueFalseScoreAggregator.OR). """
[docs] def __init__( self, *, validator: ScorerPromptValidator, score_aggregator: TrueFalseAggregatorFunc = TrueFalseScoreAggregator.OR, ) -> None: super().__init__(validator=validator) self._score_aggregator = score_aggregator
[docs] def validate_return_scores(self, scores: list[Score]): if len(scores) != 1: raise ValueError("TrueFalseScorer should return exactly one score.") if scores[0].score_value.lower() not in ["true", "false"]: raise ValueError("TrueFalseScorer score value must be True or False.")
async def _score_async( self, request_response: PromptRequestResponse, *, objective: Optional[str] = None ) -> list[Score]: """ Score the given request response asynchronously. For TrueFalseScorer, multiple piece scores are aggregated into a single true/false score. Args: request_response (PromptRequestResponse): The prompt request response to score. objective (Optional[str]): The objective to evaluate against. Defaults to None. Returns: list[Score]: A list containing a single true/false Score object. """ # Get individual scores for all supported pieces using base implementation logic score_list = await super()._score_async(request_response, objective=objective) if not score_list: # If no pieces matched (e.g., due to role filter), return False # Use the first request piece's ID (or original_prompt_id as fallback) first_piece = request_response.request_pieces[0] piece_id = first_piece.id or first_piece.original_prompt_id if piece_id is None: raise ValueError("Cannot create score: request piece has no id or original_prompt_id") return_score = Score( score_value=str(False).lower(), score_value_description="No pieces to score after filtering; returning false.", score_type="true_false", score_category=None, score_metadata=None, score_rationale="No supported pieces (possibly filtered by role).", scorer_class_identifier=self.get_identifier(), prompt_request_response_id=piece_id, objective=objective, ) return [return_score] # Use score aggregator to combine multiple piece scores into a single score result = self._score_aggregator(score_list) # Use the prompt_request_response_id from the first score return_score = Score( score_value=str(result.value).lower(), score_value_description=result.description, score_type="true_false", score_category=result.category, score_metadata=result.metadata, score_rationale=result.rationale, scorer_class_identifier=self.get_identifier(), prompt_request_response_id=score_list[0].prompt_request_response_id, objective=objective, ) return [return_score]