# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""
Module for loading and validating harm definition YAML files.
Harm definitions provide the scale descriptions used for evaluating harm categories.
"""
import logging
import re
from dataclasses import dataclass, field
from pathlib import Path
from typing import Dict, List, Optional, Union
import yaml
from pyrit.common.path import HARM_DEFINITION_PATH
logger = logging.getLogger(__name__)
[docs]
@dataclass
class ScaleDescription:
"""
A single scale description entry from a harm definition.
Args:
score_value: The score value (e.g., "1", "2", etc.)
description: The description for this score level.
"""
score_value: str
description: str
[docs]
@dataclass
class HarmDefinition:
"""
A harm definition loaded from a YAML file.
This class represents the structured content of a harm definition YAML file,
which includes the version, category name, and scale descriptions that define
how to score content for this harm category.
Args:
version: The version of the harm definition (e.g., "1.0").
category: The harm category name (e.g., "violence", "hate_speech").
scale_descriptions: List of scale descriptions defining score levels.
source_path: The path to the YAML file this was loaded from.
"""
version: str
category: str
scale_descriptions: List[ScaleDescription] = field(default_factory=list)
source_path: Optional[str] = field(default=None, kw_only=True)
[docs]
def get_scale_description(self, score_value: str) -> Optional[str]:
"""
Get the description for a specific score value.
Args:
score_value: The score value to look up (e.g., "1", "2").
Returns:
The description for the score value, or None if not found.
"""
for scale in self.scale_descriptions:
if scale.score_value == score_value:
return scale.description
return None
[docs]
@staticmethod
def validate_category(category: str, *, check_exists: bool = False) -> bool:
"""
Validate a harm category name.
Validates that the category name follows the naming convention (lowercase letters
and underscores only) and optionally checks if it exists in the standard
harm definitions.
Args:
category: The category name to validate.
check_exists: If True, also verify the category exists in
get_all_harm_definitions(). Defaults to False.
Returns:
True if the category is valid (and exists if check_exists is True),
False otherwise.
"""
# Check if category matches pattern: only lowercase letters and underscores
if not re.match(r"^[a-z_]+$", category):
return False
if check_exists:
all_definitions = get_all_harm_definitions()
if category not in all_definitions:
return False
return True
[docs]
@classmethod
def from_yaml(cls, harm_definition_path: Union[str, Path]) -> "HarmDefinition":
"""
Load and validate a harm definition from a YAML file.
The function first checks if the path is a simple filename (e.g., "violence.yaml")
and if so, looks for it in the standard HARM_DEFINITION_PATH directory.
Otherwise, it treats the path as a full or relative path.
Args:
harm_definition_path: Path to the harm definition YAML file.
Can be a simple filename like "violence.yaml" which will be resolved
relative to the standard harm_definition directory, or a full path.
Returns:
HarmDefinition: The loaded harm definition.
Raises:
FileNotFoundError: If the harm definition file does not exist.
ValueError: If the YAML file is invalid or missing required fields.
"""
path = Path(harm_definition_path)
# If it's just a filename (no directory separators), look in the standard directory
if path.parent == Path("."):
resolved_path = HARM_DEFINITION_PATH / path
else:
resolved_path = path
if not resolved_path.exists():
raise FileNotFoundError(
f"Harm definition file not found: {resolved_path}. "
f"Expected a YAML file in {HARM_DEFINITION_PATH} or a valid path."
)
try:
with open(resolved_path, "r", encoding="utf-8") as f:
data = yaml.safe_load(f)
except yaml.YAMLError as e:
raise ValueError(f"Invalid YAML in harm definition file {resolved_path}: {e}")
if not isinstance(data, dict):
raise ValueError(f"Harm definition file {resolved_path} must contain a YAML mapping/dictionary.")
# Validate required fields
if "version" not in data:
raise ValueError(f"Harm definition file {resolved_path} is missing required 'version' field.")
if "category" not in data:
raise ValueError(f"Harm definition file {resolved_path} is missing required 'category' field.")
if "scale_descriptions" not in data:
raise ValueError(f"Harm definition file {resolved_path} is missing required 'scale_descriptions' field.")
# Parse scale descriptions
scale_descriptions = []
for item in data["scale_descriptions"]:
if not isinstance(item, dict) or "score_value" not in item or "description" not in item:
raise ValueError(
f"Each scale_description in {resolved_path} must have 'score_value' and 'description' fields."
)
scale_descriptions.append(
ScaleDescription(
score_value=str(item["score_value"]),
description=str(item["description"]),
)
)
return cls(
version=str(data["version"]),
category=str(data["category"]),
scale_descriptions=scale_descriptions,
source_path=str(resolved_path),
)
[docs]
def get_all_harm_definitions() -> Dict[str, HarmDefinition]:
"""
Load all harm definitions from the standard harm_definition directory.
This function scans the HARM_DEFINITION_PATH directory for all YAML files
and loads each one as a HarmDefinition.
Returns:
Dict[str, HarmDefinition]: A dictionary mapping category names to their
HarmDefinition objects. The keys are the category names from the YAML files
(e.g., "violence", "hate_speech").
Raises:
ValueError: If any YAML file in the directory is invalid.
"""
harm_definitions: Dict[str, HarmDefinition] = {}
if not HARM_DEFINITION_PATH.exists():
logger.warning(f"Harm definition directory does not exist: {HARM_DEFINITION_PATH}")
return harm_definitions
for yaml_file in HARM_DEFINITION_PATH.glob("*.yaml"):
try:
harm_def = HarmDefinition.from_yaml(yaml_file)
harm_definitions[harm_def.category] = harm_def
except (FileNotFoundError, ValueError) as e:
logger.warning(f"Failed to load harm definition from {yaml_file}: {e}")
return harm_definitions