Source code for pyrit.datasets.forbidden_questions_dataset

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from datasets import load_dataset

from pyrit.models import SeedPromptDataset
from pyrit.models.seed_prompt import SeedPrompt



[docs]
def fetch_forbidden_questions_dataset() -> SeedPromptDataset:
    """
    Fetch Forbidden question dataset and return it as a SeedPromptDataset

    Returns: SeedPromptDataset

    Note: For more info
        Paper - https://arxiv.org/abs/2308.03825
        Github - https://github.com/verazuo/jailbreak_llms/
        Website - https://jailbreak-llms.xinyueshen.me/
    """
    data = load_dataset("TrustAIRLab/forbidden_question_set", "default")

    authors = ["Xinyue Shen", "Zeyuan Chen", "Michael Backes", "Yun Shen", "Yang Zhang"]
    seed_prompts = [
        SeedPrompt(
            value=item["question"],
            data_type="text",
            name="TrustAIRLab/forbidden_question_set",
            dataset_name="TrustAIRLab/forbidden_question_set",
            authors=authors,
            harm_categories=item["content_policy_name"],
            source="https://huggingface.co/datasets/TrustAIRLab/forbidden_question_set",
            description="""This is the Forbidden Question Set dataset proposed in the ACM CCS 2024 paper
        "Do Anything Now'': Characterizing and Evaluating In-The-Wild Jailbreak Prompts on Large Language Models.
        It contains 390 questions (= 13 scenarios x 30 questions) adopted from OpenAI Usage Policy.
        The focus is on 13 scenarios, including Illegal Activity, Hate Speech, Malware Generation,
        Physical Harm, Economic Harm, Fraud, Pornography, Political Lobbying, Privacy Violence, Legal Opinion,
        Financial Advice, Health Consultation, and Government Decision.""",
        )
        for item in data["train"]
    ]

    seed_prompt_dataset = SeedPromptDataset(prompts=seed_prompts)
    return seed_prompt_dataset