Source code for pyrit.score.scorer_identifier
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from __future__ import annotations
import hashlib
import json
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
import pyrit
[docs]
@dataclass
class ScorerIdentifier:
"""
Configuration class for Scorers.
This class encapsulates the modifiable parameters that can be used to create a complete scoring configuration.
These parameters can be modified, and configurations can be compared to each other via scorer evaluations.
"""
type: str
system_prompt_template: Optional[str] = None
user_prompt_template: Optional[str] = None
sub_identifier: Optional[List[ScorerIdentifier]] = None
target_info: Optional[Dict[str, Any]] = None
score_aggregator: Optional[str] = None
scorer_specific_params: Optional[Dict[str, Any]] = None
pyrit_version: str = pyrit.__version__
[docs]
def compute_hash(self, hashable_dict: Optional[Dict[str, Any]] = None) -> str:
"""
Compute a hash representing the current configuration.
Args:
hashable_dict: Pre-computed hashable dict to avoid recomputation.
If None, _to_hashable_dict() will be called.
Returns:
str: A hash string representing the configuration.
"""
if hashable_dict is None:
hashable_dict = self._to_hashable_dict()
# Sort keys to ensure deterministic ordering and encode as JSON
config_json = json.dumps(hashable_dict, sort_keys=True, separators=(",", ":"))
hasher = hashlib.sha256()
hasher.update(config_json.encode("utf-8"))
return hasher.hexdigest()
[docs]
def to_compact_dict(self) -> Dict[str, Any]:
"""
Convert the ScorerIdentifier to a compact dictionary for storage.
Long prompts (>100 characters) are hashed to sha256:{hash[:16]} format.
Nested sub_identifiers are recursively compacted.
Includes the computed hash of the configuration.
Returns:
Dict[str, Any]: A compact dictionary representation with hash.
"""
result = self._to_hashable_dict()
result["hash"] = self.compute_hash(hashable_dict=result)
return result
def _to_hashable_dict(self) -> Dict[str, Any]:
"""
Convert to a dictionary suitable for hashing (without the hash field).
Long prompts (>100 characters) are hashed to sha256:{hash[:16]} format.
Nested sub_identifiers are recursively compacted.
Returns:
Dict[str, Any]: A dictionary representation without hash.
"""
# Hash system_prompt_template if longer than 100 characters
sys_prompt = self.system_prompt_template
if sys_prompt and len(sys_prompt) > 100:
sys_prompt = f"sha256:{hashlib.sha256(sys_prompt.encode()).hexdigest()[:16]}"
# Hash user_prompt_template if longer than 100 characters
user_prompt = self.user_prompt_template
if user_prompt and len(user_prompt) > 100:
user_prompt = f"sha256:{hashlib.sha256(user_prompt.encode()).hexdigest()[:16]}"
# Recursively compact sub_identifiers (without hash for consistent hashing)
sub_id_serialized: Any = None
if self.sub_identifier is not None:
sub_id_serialized = [si._to_hashable_dict() for si in self.sub_identifier]
return {
"__type__": self.type,
"system_prompt_template": sys_prompt,
"user_prompt_template": user_prompt,
"sub_identifier": sub_id_serialized,
"target_info": self.target_info,
"score_aggregator": self.score_aggregator,
"scorer_specific_params": self.scorer_specific_params,
"pyrit_version": self.pyrit_version,
}