Source code for pyrit.datasets.llm_latent_adversarial_training_harmful_dataset
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from datasets import load_dataset
from pyrit.models import SeedPromptDataset
from pyrit.models.seed_prompt import SeedPrompt
[docs]
def fetch_llm_latent_adversarial_training_harmful_dataset() -> SeedPromptDataset:
data = load_dataset("LLM-LAT/harmful-dataset", "default")
prompts = [item["prompt"] for item in data["train"]]
# Create SeedPrompt instances from each example in 'prompts'
seed_prompts = [
SeedPrompt(
value=prompt,
data_type="text",
name="LLM-LAT/harmful-dataset",
dataset_name="LLM-LAT/harmful-dataset",
description="This dataset contains prompts used to assess and analyze harmful behaviors in llm",
source="https://huggingface.co/datasets/LLM-LAT/harmful-dataset",
)
for prompt in prompts
]
seed_prompt_dataset = SeedPromptDataset(prompts=seed_prompts)
return seed_prompt_dataset