Source code for pyrit.models.score
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from dataclasses import dataclass
from datetime import datetime
from typing import Dict, Literal, Optional, get_args
import uuid
ScoreType = Literal["true_false", "float_scale"]
[docs]
class Score:
id: uuid.UUID | str
# The value the scorer ended up with; e.g. True (if true_false) or 0 (if float_scale)
score_value: str
# Value that can include a description of the score value
score_value_description: str
# The type of the scorer; e.g. "true_false" or "float_scale"
score_type: ScoreType
# The type of the harms category (e.g. "hate" or "violence")
score_category: str
# Extra data the scorer provides around the rationale of the score
score_rationale: str
# Custom metadata a scorer might use. This is left undefined other than for the
# specific scorer that uses it.
score_metadata: str
# The identifier of the scorer class, including relavent information
# e.g. {"scorer_name": "SelfAskScorer", "classifier": "current_events.yml"}
scorer_class_identifier: Dict[str, str]
# This is the prompt_request_response_id that the score is scoring
# Note a scorer can generate an additional request. This is NOT that, but
# the ID associated with what we're scoring.
prompt_request_response_id: uuid.UUID | str
# Timestamp of when the score was created
timestamp: datetime
# The task based on which the text is scored (the original attacker model's objective).
task: str
[docs]
def __init__(
self,
*,
id: Optional[uuid.UUID | str] = None,
score_value: str,
score_value_description: str,
score_type: ScoreType,
score_category: str,
score_rationale: str,
score_metadata: str,
scorer_class_identifier: Dict[str, str] = None,
prompt_request_response_id: str | uuid.UUID,
timestamp: Optional[datetime] = None,
task: Optional[str] = None,
):
self.id = id if id else uuid.uuid4()
self.timestamp = timestamp if timestamp else datetime.now()
self.validate(score_type, score_value)
self.score_value = score_value
self.score_value_description = score_value_description
if score_type not in get_args(ScoreType):
raise ValueError(f"Score type {score_type} is not a valid score type.")
self.score_type = score_type
self.score_category = score_category
self.score_rationale = score_rationale
self.score_metadata = score_metadata
self.scorer_class_identifier = scorer_class_identifier
self.prompt_request_response_id = prompt_request_response_id
self.task = task
[docs]
def get_value(self):
"""
Returns the value of the score based on its type.
If the score type is "true_false", it returns True if the score value is "true" (case-insensitive),
otherwise it returns False.
If the score type is "float_scale", it returns the score value as a float.
Raises:
ValueError: If the score type is unknown.
Returns:
The value of the score based on its type.
"""
if self.score_type == "true_false":
return self.score_value.lower() == "true"
elif self.score_type == "float_scale":
return float(self.score_value)
raise ValueError(f"Unknown scorer type: {self.score_type}")
[docs]
def validate(self, scorer_type, score_value):
if scorer_type == "true_false" and str(score_value).lower() not in ["true", "false"]:
raise ValueError(f"True False scorers must have a score value of 'true' or 'false' not {score_value}")
elif scorer_type == "float_scale":
try:
score = float(score_value)
if not (0 <= score <= 1):
raise ValueError(f"Float scale scorers must have a score value between 0 and 1. Got {score_value}")
except ValueError:
raise ValueError(f"Float scale scorers require a numeric score value. Got {score_value}")
def __str__(self):
if self.scorer_class_identifier:
return f"{self.scorer_class_identifier['__type__']}: {self.score_category}: {self.score_value}"
return f": {self.score_category}: {self.score_value}"
__repr__ = __str__
[docs]
@dataclass
class UnvalidatedScore:
"""
Score is an object that validates all the fields. However, we need a common
data class that can be used to store the raw score value before it is normalized and validated.
"""
# The raw score value; has no scale. E.g. in likert could be 1-5
raw_score_value: str
score_value_description: str
score_type: ScoreType
score_category: str
score_rationale: str
score_metadata: str
scorer_class_identifier: Dict[str, str]
prompt_request_response_id: uuid.UUID | str
task: str
id: Optional[uuid.UUID | str] = None
timestamp: Optional[datetime] = None
[docs]
def to_score(self, *, score_value: str):
return Score(
id=self.id,
score_value=score_value,
score_value_description=self.score_value_description,
score_type=self.score_type,
score_category=self.score_category,
score_rationale=self.score_rationale,
score_metadata=self.score_metadata,
scorer_class_identifier=self.scorer_class_identifier,
prompt_request_response_id=self.prompt_request_response_id,
timestamp=self.timestamp,
task=self.task,
)