Source code for pyrit.datasets.harmbench_dataset
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from pathlib import Path
from typing import Literal, Optional
from pyrit.datasets.dataset_helper import FILE_TYPE_HANDLERS, fetch_examples
from pyrit.models import SeedDataset, SeedPrompt
[docs]
def fetch_harmbench_dataset(
source: str = (
"https://raw.githubusercontent.com/centerforaisafety/HarmBench/c0423b9/data/behavior_datasets/"
"harmbench_behaviors_text_all.csv"
),
source_type: Literal["public_url"] = "public_url",
cache: bool = True,
data_home: Optional[Path] = None,
) -> SeedDataset:
"""
Fetch HarmBench examples and create a SeedDataset.
Args:
source (str): The source from which to fetch examples. Defaults to the HarmBench repository.
source_type (Literal["public_url"]): The type of source ('public_url').
cache (bool): Whether to cache the fetched examples. Defaults to True.
data_home (Optional[Path]): Directory to store cached data. Defaults to None.
Returns:
SeedDataset: A SeedDataset containing the examples.
Note:
For more information and access to the original dataset and related materials, visit:
https://github.com/centerforaisafety/HarmBench
"""
# Determine the file type from the source URL
file_type = source.split(".")[-1]
if file_type not in FILE_TYPE_HANDLERS:
valid_types = ", ".join(FILE_TYPE_HANDLERS.keys())
raise ValueError(f"Invalid file_type. Expected one of: {valid_types}.")
# Required keys to validate each example
required_keys = {"Behavior", "SemanticCategory"}
# Initialize containers for prompts and semantic categories
prompt_data = []
all_semantic_categories = set()
# Fetch the examples using the provided `fetch_examples` function
examples = fetch_examples(source, source_type, cache, data_home)
# Validate each example and extract data
for example in examples:
# Check for missing keys in the example
missing_keys = required_keys - example.keys()
if missing_keys:
raise ValueError(f"Missing keys in example: {', '.join(missing_keys)}")
# Extract and append the data with its specific category
category = example["SemanticCategory"]
prompt_data.append(
{
"behavior": example["Behavior"],
"category": category,
}
)
all_semantic_categories.add(category)
seed_prompts = [
SeedPrompt(
value=item["behavior"],
data_type="text",
name="HarmBench Examples",
dataset_name="HarmBench Examples",
harm_categories=[item["category"]],
description="A dataset of HarmBench examples containing various categories such as chemical,"
"biological, illegal activities, etc.",
)
for item in prompt_data
]
seed_dataset = SeedDataset(
prompts=seed_prompts,
harm_categories=list(all_semantic_categories),
)
return seed_dataset