Source code for pyrit.datasets.jailbreak.text_jailbreak

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import logging
import random
import threading
from pathlib import Path
from typing import Any, Optional

from pyrit.common.path import JAILBREAK_TEMPLATES_PATH
from pyrit.models import SeedPrompt

logger = logging.getLogger(__name__)


[docs] class TextJailBreak: """ A class that manages jailbreak datasets (like DAN, etc.). """ _template_cache: Optional[dict[str, list[Path]]] = None _cache_lock: threading.Lock = threading.Lock() @classmethod def _scan_template_files(cls) -> dict[str, list[Path]]: """ Scan the jailbreak templates directory for YAML files. Excludes files under the multi_parameter subdirectory. Groups results by filename so duplicate names across subdirectories are detectable. Returns: Dict[str, List[Path]]: Mapping of filename to list of matching paths. """ result: dict[str, list[Path]] = {} for path in JAILBREAK_TEMPLATES_PATH.rglob("*.yaml"): if "multi_parameter" not in path.parts: result.setdefault(path.name, []).append(path) return result @classmethod def _get_template_cache(cls) -> dict[str, list[Path]]: """ Return the cached filename-to-path lookup, building it on first access. Thread-safe: uses a lock to prevent concurrent scans from racing. Returns: Dict[str, List[Path]]: Cached mapping of filename to list of matching paths. """ if cls._template_cache is None: with cls._cache_lock: # Double-checked locking: re-test after acquiring the lock if cls._template_cache is None: cls._template_cache = cls._scan_template_files() return cls._template_cache @classmethod def _resolve_template_by_name(cls, template_file_name: str) -> Path: """ Look up a single template file by name from the cached directory scan. Args: template_file_name (str): Exact filename (e.g. "aim.yaml") to find. Returns: Path: The resolved path to the template file. Raises: ValueError: If the file is not found or if multiple files share the same name. """ cache = cls._get_template_cache() paths = cache.get(template_file_name) if not paths: raise ValueError( f"Template file '{template_file_name}' not found in jailbreak directory or its subdirectories" ) if len(paths) > 1: raise ValueError(f"Multiple files named '{template_file_name}' found in jailbreak directory") return paths[0] @classmethod def _get_all_template_paths(cls) -> list[Path]: """ Return a flat list of all cached template file paths. Returns: List[Path]: All template paths (excluding multi_parameter), in no particular order. Raises: ValueError: If no templates are available. """ cache = cls._get_template_cache() all_paths = [path for paths in cache.values() for path in paths] if not all_paths: raise ValueError("No YAML templates found in jailbreak directory (excluding multi_parameter subdirectory)") return all_paths
[docs] def __init__( self, *, template_path: Optional[str] = None, template_file_name: Optional[str] = None, string_template: Optional[str] = None, random_template: bool = False, **kwargs: Any, ) -> None: """ Initialize a Jailbreak instance with exactly one template source. Args: template_path (str, optional): Full path to a YAML template file. template_file_name (str, optional): Name of a template file in datasets/jailbreak directory. string_template (str, optional): A string template to use directly. random_template (bool, optional): Whether to use a random template from datasets/jailbreak. **kwargs: Additional parameters to apply to the template. The 'prompt' parameter will be preserved for later use in get_jailbreak(). Raises: ValueError: If more than one template source is provided or if no template source is provided. """ # Track the template source for error reporting self.template_source: str = "<unknown>" # Count how many template sources are provided template_sources = [template_path, template_file_name, string_template, random_template] provided_sources = [source for source in template_sources if source] if len(provided_sources) != 1: raise ValueError( "Exactly one of template_path, template_file_name, string_template, or random_template must be provided" ) if template_path: self.template = SeedPrompt.from_yaml_file(template_path) self.template_source = str(template_path) elif string_template: self.template = SeedPrompt(value=string_template, is_general_technique=True) self.template_source = "<string_template>" elif template_file_name: resolved_path = self._resolve_template_by_name(template_file_name) self.template = SeedPrompt.from_yaml_file(resolved_path) self.template_source = str(resolved_path) else: self._load_random_template() self._validate_required_kwargs(kwargs) self._apply_extra_kwargs(kwargs)
def _load_random_template(self) -> None: """ Select and load a random single-parameter jailbreak template. Picks templates at random until one with exactly a ``prompt`` parameter is found, then validates it can render successfully. Raises: ValueError: If no templates are available, no single-parameter template is found after exhausting all candidates, or the chosen template has syntax errors. """ all_paths = self._get_all_template_paths() random.shuffle(all_paths) for candidate_path in all_paths: self.template = SeedPrompt.from_yaml_file(candidate_path) if self.template.parameters == ["prompt"]: self.template_source = str(candidate_path) try: self.template.render_template_value(prompt="test") return except ValueError as e: raise ValueError(f"Invalid jailbreak template '{candidate_path}': {str(e)}") from e raise ValueError("No jailbreak template with a single 'prompt' parameter found among available templates.") def _validate_required_kwargs(self, kwargs: dict[str, Any]) -> None: """ Verify that all template parameters (except 'prompt') are present in kwargs. Args: kwargs (dict): Keyword arguments supplied by the caller. Raises: ValueError: If any required template parameter is missing from kwargs. """ parameters = self.template.parameters if parameters is None: logger.warning("Template '%s' has no parameter metadata defined.", self.template_source) parameters = [] required_params = [p for p in parameters if p != "prompt"] missing_params = [p for p in required_params if p not in kwargs] if missing_params: raise ValueError( f"Template requires parameters that were not provided: {missing_params}. " f"Required parameters (excluding 'prompt'): {required_params}" ) def _apply_extra_kwargs(self, kwargs: dict[str, Any]) -> None: """ Apply additional keyword arguments to the template, preserving the prompt placeholder. Args: kwargs (dict): Keyword arguments whose values are rendered into the template. The 'prompt' key is skipped so it remains available for later rendering. """ if kwargs: kwargs.pop("prompt", None) self.template.value = self.template.render_template_value_silent(**kwargs)
[docs] @classmethod def get_jailbreak_templates(cls, num_templates: Optional[int] = None) -> list[str]: """ Retrieve all jailbreaks from the JAILBREAK_TEMPLATES_PATH. Args: num_templates (int, optional): Number of jailbreak templates to return. None to get all. Returns: List[str]: List of jailbreak template file names. Raises: ValueError: If no jailbreak templates are found in the jailbreak directory. ValueError: If n is larger than the number of templates that exist. """ jailbreak_template_names = [str(f.stem) + ".yaml" for f in JAILBREAK_TEMPLATES_PATH.glob("*.yaml")] if not jailbreak_template_names: raise ValueError("No jailbreak templates found in the jailbreak directory") if num_templates: if num_templates > len(jailbreak_template_names): raise ValueError( f"Attempted to pull {num_templates} jailbreaks from a dataset" f" with only {len(jailbreak_template_names)} jailbreaks!" ) jailbreak_template_names = random.choices(jailbreak_template_names, k=num_templates) return jailbreak_template_names
[docs] def get_jailbreak_system_prompt(self) -> str: """ Get the jailbreak template as a system prompt without a specific user prompt. Returns: str: The rendered jailbreak template with an empty prompt parameter. """ return self.get_jailbreak(prompt="")
[docs] def get_jailbreak(self, prompt: str) -> str: """ Render the jailbreak template with the provided user prompt. Args: prompt (str): The user prompt to insert into the jailbreak template. Returns: str: The rendered jailbreak template with the prompt parameter filled in. Raises: ValueError: If the template fails to render. """ return self.template.render_template_value(prompt=prompt)