Source code for pyrit.datasets.aya_redteaming_dataset

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import ast
from pathlib import Path
from typing import List, Literal, Optional

from pyrit.datasets.dataset_helper import fetch_examples
from pyrit.models import SeedPromptDataset
from pyrit.models.seed_prompt import SeedPrompt



[docs]
def fetch_aya_redteaming_dataset(
    cache: bool = True,
    data_home: Optional[Path] = None,
    language: Literal["English", "Hindi", "French", "Spanish", "Arabic", "Russian", "Serbian", "Tagalog"] = "English",
    harm_categories: Optional[
        List[
            Literal[
                "Bullying & Harassment",
                "Discrimination & Injustice",
                "Graphic material",
                "Harms of Representation Allocation and Quality of Service",
                "Hate Speech",
                "Non-consensual sexual content",
                "Profanity",
                "Self-Harm",
                "Violence, Threats & Incitement",
            ]
        ]
    ] = None,
    harm_scope: Optional[Literal["global", "local"]] = None,
) -> SeedPromptDataset:
    """
    Fetch examples from the Aya Red-teaming dataset with optional filtering and create a SeedPromptDataset.

    Args:
        cache (bool): Whether to cache the fetched examples. Defaults to True.
        data_home: Directory to store cached data. Defaults to None.
        language: The language to fetch prompts for. Defaults to "English".
        harm_categories: List of harm categories to filter the examples.
            Defaults to None, which means all categories are included.
            Otherwise, only prompts with at least one matching category are included.
        harm_scope: Whether to fetch globally or locally harmful prompts.
            Defaults to None, which means all examples are included.

    Returns:
        SeedPromptDataset: A SeedPromptDataset containing the filtered examples.

    Note:
        For more information and access to the original dataset and related materials, visit:
        https://huggingface.co/datasets/CohereForAI/aya_redteaming/blob/main/README.md \n
        Related paper: https://arxiv.org/abs/2406.18682 \n
        The dataset license: Apache 2.0

    Warning:
        Due to the nature of these prompts, it may be advisable to consult your relevant legal
        department before testing them with LLMs to ensure compliance and reduce potential risks.
    """
    _lang = {
        "English": "eng",
        "Hindi": "hin",
        "French": "fra",
        "Spanish": "spa",
        "Arabic": "arb",
        "Russian": "rus",
        "Serbian": "srp",
        "Tagalog": "tgl",
    }

    examples = fetch_examples(
        source=f"https://huggingface.co/datasets/CohereForAI/aya_redteaming/raw/main/aya_{_lang[language]}.jsonl",
        source_type="public_url",
        cache=cache,
        data_home=data_home,
    )

    seed_prompts = []

    for example in examples:
        categories = ast.literal_eval(example["harm_category"])
        if harm_categories is None or any(cat in categories for cat in harm_categories):
            if harm_scope is None or example["global_or_local"] == harm_scope:
                seed_prompts.append(
                    SeedPrompt(
                        value=example["prompt"],
                        data_type="text",
                        name="Aya Red-teaming Examples",
                        dataset_name="Aya Red-teaming Examples",
                        harm_categories=categories,
                        source="https://huggingface.co/datasets/CohereForAI/aya_redteaming",
                    )
                )

    seed_prompt_dataset = SeedPromptDataset(prompts=seed_prompts)
    return seed_prompt_dataset