Source code for pyrit.datasets.llm_latent_adversarial_training_harmful_dataset

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from datasets import load_dataset

from pyrit.models import SeedPromptDataset
from pyrit.models.seed_prompt import SeedPrompt



[docs]
def fetch_llm_latent_adversarial_training_harmful_dataset() -> SeedPromptDataset:
    data = load_dataset("LLM-LAT/harmful-dataset", "default")

    prompts = [item["prompt"] for item in data["train"]]

    # Create SeedPrompt instances from each example in 'prompts'
    seed_prompts = [
        SeedPrompt(
            value=prompt,
            data_type="text",
            name="LLM-LAT/harmful-dataset",
            dataset_name="LLM-LAT/harmful-dataset",
            description="This dataset contains prompts used to assess and analyze harmful behaviors in llm",
            source="https://huggingface.co/datasets/LLM-LAT/harmful-dataset",
        )
        for prompt in prompts
    ]

    seed_prompt_dataset = SeedPromptDataset(prompts=seed_prompts)
    return seed_prompt_dataset