Source code for pyrit.datasets.harmbench_dataset

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from pathlib import Path
from typing import Literal, Optional

from pyrit.datasets.dataset_helper import FILE_TYPE_HANDLERS, fetch_examples
from pyrit.models import SeedPromptDataset
from pyrit.models.seed_prompt import SeedPrompt


[docs] def fetch_harmbench_dataset( source: str = ( "https://raw.githubusercontent.com/centerforaisafety/HarmBench/c0423b9/data/behavior_datasets/" "harmbench_behaviors_text_all.csv" ), source_type: Literal["public_url"] = "public_url", cache: bool = True, data_home: Optional[Path] = None, ) -> SeedPromptDataset: """ Fetch HarmBench examples and create a SeedPromptDataset. Args: source (str): The source from which to fetch examples. Defaults to the HarmBench repository. source_type (Literal["public_url"]): The type of source ('public_url'). cache (bool): Whether to cache the fetched examples. Defaults to True. data_home (Optional[Path]): Directory to store cached data. Defaults to None. Returns: SeedPromptDataset: A SeedPromptDataset containing the examples. Note: For more information and access to the original dataset and related materials, visit: https://github.com/centerforaisafety/HarmBench """ # Determine the file type from the source URL file_type = source.split(".")[-1] if file_type not in FILE_TYPE_HANDLERS: valid_types = ", ".join(FILE_TYPE_HANDLERS.keys()) raise ValueError(f"Invalid file_type. Expected one of: {valid_types}.") # Required keys to validate each example required_keys = {"Behavior", "SemanticCategory"} # Initialize containers for prompts and semantic categories prompts = [] semantic_categories = set() # Fetch the examples using the provided `fetch_examples` function examples = fetch_examples(source, source_type, cache, data_home) # Validate each example and extract data for example in examples: # Check for missing keys in the example missing_keys = required_keys - example.keys() if missing_keys: raise ValueError(f"Missing keys in example: {', '.join(missing_keys)}") # Extract and append the data to respective containers prompts.append(example["Behavior"]) semantic_categories.add(example["SemanticCategory"]) seed_prompts = [ SeedPrompt( value=example, data_type="text", name="HarmBench Examples", dataset_name="HarmBench Examples", harm_categories=list(semantic_categories), description="A dataset of HarmBench examples containing various categories such as chemical," "biological, illegal activities, etc.", ) for example in prompts ] seed_prompt_dataset = SeedPromptDataset(prompts=seed_prompts) return seed_prompt_dataset