Source code for pyrit.datasets.ccp_sensitive_prompts_dataset
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from datasets import load_dataset
from pyrit.models import SeedPromptDataset
from pyrit.models.seed_prompt import SeedPrompt
[docs]
def fetch_ccp_sensitive_prompts_dataset() -> SeedPromptDataset:
"""
Fetch CCP-sensitive-prompts examples and create a SeedPromptDataset.
The dataset was collected by Promptfoo to cover sensitive topics in China.
These are likely to be censored by certain models.
Returns:
SeedPromptDataset: A dataset of CCP-sensitive prompts.
Note:
For more information, see https://huggingface.co/datasets/promptfoo/CCP-sensitive-prompts
"""
data = load_dataset(
"promptfoo/CCP-sensitive-prompts",
split="train",
)
return SeedPromptDataset(
prompts=[
SeedPrompt(
value=row["prompt"],
data_type="text",
name="",
dataset_name="CCP-sensitive-prompts",
harm_categories=[row["subject"]],
description=("Prompts covering topics sensitive to the CCP."),
groups=["promptfoo"],
source="https://huggingface.co/datasets/promptfoo/CCP-sensitive-prompts",
)
for row in data
]
)