Source code for pyrit.prompt_converter.pdf_converter

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import ast
from io import BytesIO
from pathlib import Path
from typing import Dict, List, Optional

from fpdf import FPDF
from pypdf import PageObject, PdfReader, PdfWriter

from pyrit.common.logger import logger
from pyrit.models import PromptDataType, SeedPrompt, data_serializer_factory
from pyrit.prompt_converter import ConverterResult, PromptConverter


[docs] class PDFConverter(PromptConverter): """ Converts a text prompt into a PDF file. Supports various modes: 1. Template-Based Generation: If a `SeedPrompt` is provided, dynamic data can be injected into the template using the `SeedPrompt.render_template_value` method, and the resulting content is converted to a PDF. 2. Direct Text-Based Generation: If no template is provided, the raw string prompt is converted directly into a PDF. 3. Modify Existing PDFs (Overlay approach): Enables injecting text into existing PDFs at specified coordinates, merging a new "overlay layer" onto the original PDF. Args: prompt_template (Optional[SeedPrompt], optional): A `SeedPrompt` object representing a template. font_type (str): Font type for the PDF. Defaults to "Helvetica". font_size (int): Font size for the PDF. Defaults to 12. font_color (tuple): Font color for the PDF in RGB format. Defaults to (255, 255, 255). page_width (int): Width of the PDF page in mm. Defaults to 210 (A4 width). page_height (int): Height of the PDF page in mm. Defaults to 297 (A4 height). column_width (int): Width of each column in the PDF. Defaults to 0 (full page width). row_height (int): Height of each row in the PDF. Defaults to 10. existing_pdf (Optional[Path], optional): Path to an existing PDF file. Defaults to None. injection_items (Optional[List[Dict]], optional): A list of injection items for modifying an existing PDF. """
[docs] def __init__( self, prompt_template: Optional[SeedPrompt] = None, font_type: str = "Helvetica", font_size: int = 12, font_color: tuple = (255, 255, 255), page_width: int = 210, page_height: int = 297, column_width: int = 0, row_height: int = 10, existing_pdf: Optional[Path] = None, injection_items: Optional[List[Dict]] = None, ) -> None: self._prompt_template = prompt_template self._font_type = font_type self._font_size = font_size self._font_color = font_color self._page_width = page_width self._page_height = page_height self._column_width = column_width self._row_height = row_height # Keeping the user's path here self._existing_pdf_path: Optional[Path] = existing_pdf # We store the file data in a separate BytesIO because of a mypy error self._existing_pdf_bytes: Optional[BytesIO] = None self._injection_items = injection_items or [] # Validate font color if not (isinstance(font_color, tuple) and len(font_color) == 3 and all(0 <= c <= 255 for c in font_color)): raise ValueError(f"Invalid font_color: {font_color}. Must be a tuple of three integers (0-255).") # If a valid path is provided, load it into memory as BytesIO if existing_pdf is not None: if not existing_pdf.is_file(): raise FileNotFoundError(f"PDF file not found at: {existing_pdf}") # Read the file contents into a BytesIO stream with open(existing_pdf, "rb") as pdf_file: self._existing_pdf_bytes = BytesIO(pdf_file.read()) else: # No existing PDF path was provided self._existing_pdf = None # Validate injection items if not all(isinstance(item, dict) for item in self._injection_items): raise ValueError("Each injection item must be a dictionary.")
[docs] async def convert_async(self, *, prompt: str, input_type: PromptDataType = "text") -> ConverterResult: """ Converts the given prompt into a PDF. If a template is provided, it injects the prompt into the template, otherwise, it generates a simple PDF with the prompt as the content. Further it can modify existing PDFs. Args: prompt (str): The prompt to be embedded in the PDF. input_type (PromptDataType): The type of the input data (default: "text"). Returns: ConverterResult: The result containing the full file path to the generated PDF. """ if not self.input_supported(input_type): raise ValueError("Input type not supported") # Step 1: Prepare content content = self._prepare_content(prompt) # Step 2: Generate or modify the PDF (Overlay, if existing PDF) if self._existing_pdf_bytes: pdf_bytes = self._modify_existing_pdf() else: pdf_bytes = self._generate_pdf(content) # Step 3: Serialize PDF pdf_serializer = await self._serialize_pdf(pdf_bytes, content) # Return the result return ConverterResult(output_text=pdf_serializer.value, output_type="url")
[docs] def input_supported(self, input_type: PromptDataType) -> bool: return input_type == "text"
[docs] def output_supported(self, output_type: PromptDataType) -> bool: return output_type == "url"
def _prepare_content(self, prompt: str) -> str: """ Prepares the content for the PDF, either from a template or directly from the prompt. Args: prompt (str): The input prompt. Returns: str: The prepared content. """ if self._prompt_template: logger.debug(f"Preparing content with template: {self._prompt_template.value}") try: # Parse string prompt to dictionary dynamic_data = ast.literal_eval(prompt) if isinstance(prompt, str) else prompt logger.debug(f"Parsed dynamic data: {dynamic_data}") if not isinstance(dynamic_data, dict): raise ValueError("Prompt must be a dictionary-compatible object after parsing.") # Use SeedPrompt's render_template_value for rendering rendered_content = self._prompt_template.render_template_value(**dynamic_data) logger.debug(f"Rendered content: {rendered_content}") return rendered_content except (ValueError, KeyError) as e: logger.error(f"Error rendering prompt: {e}") raise ValueError(f"Failed to render the prompt: {e}") # If no template is provided, return the raw prompt as content if isinstance(prompt, str): logger.debug("No template provided. Using raw prompt.") return prompt else: logger.error("Prompt must be a string when no template is provided.") raise ValueError("Prompt must be a string when no template is provided.") def _generate_pdf(self, content: str) -> bytes: """ Generates a PDF with the given content. Args: content (str): The text content to include in the PDF. Returns: bytes: The generated PDF content in bytes. """ pdf = FPDF(format=(self._page_width, self._page_height)) # Use custom page size pdf.add_page() pdf.set_font(self._font_type, size=self._font_size) # Use custom font settings pdf.multi_cell(self._column_width, self._row_height, content) # Use configurable cell dimensions pdf_bytes = BytesIO() pdf.output(pdf_bytes) return pdf_bytes.getvalue() def _modify_existing_pdf(self) -> bytes: """ The method loops over each page, checks for matching injection items, and merges a small "overlay PDF" for each item. Returns: bytes: The modified PDF content in bytes. Raises: ValueError: If the existing PDF or injection items are not provided. """ if not self._existing_pdf_bytes or not self._injection_items: raise ValueError("Existing PDF and injection items are required for modification.") reader = PdfReader(self._existing_pdf_bytes) writer = PdfWriter() # Keep a list of overlay buffers to close them after final write overlay_buffers = [] for page_number, page in enumerate(reader.pages): # We know page_number is valid because enumerate() only provides indices in range(total_pages). # Therefore, no extra check needed here. logger.info(f"Processing page {page_number} with {len(self._injection_items)} injection items.") # Extract page dimensions for early coordinate checks page_width = float(page.mediabox[2] - page.mediabox[0]) page_height = float(page.mediabox[3] - page.mediabox[1]) # For each item that belongs on this page, create and merge an overlay for item in self._injection_items: if item.get("page", 0) == page_number: # Default to a small offset (10 points) from the top-left corner if no coordinates are provided. # This prevents injected text from starting at (0,0) and potentially running off the edges. x = item.get("x", 10) y = item.get("y", 10) text = item.get("text", "") font = item.get("font", self._font_type) font_size = item.get("font_size", self._font_size) font_color = item.get("font_color", self._font_color) # Coordinate validation before calling _inject_text_into_page if not (0 <= x <= page_width and 0 <= y <= page_height): raise ValueError(f"Coordinates x={x}, y={y} out of bounds for page {page_number}.") # (1) Build the overlay PageObject + buffer overlay_page, overlay_buffer = self._inject_text_into_page( page, x, y, text, font, font_size, font_color ) # (2) Merge onto the page page.merge_page(overlay_page) # (3) Store overlay buffer to close later overlay_buffers.append(overlay_buffer) # Add the modified page to the writer writer.add_page(page) # Finalize the PDF output_pdf = BytesIO() writer.write(output_pdf) output_pdf.seek(0) # Safe to close all overlays AFTER writing is finished for buf in overlay_buffers: buf.close() return output_pdf.getvalue() def _inject_text_into_page( self, page: PageObject, x: float, y: float, text: str, font: str, font_size: int, font_color: tuple ) -> tuple[PageObject, BytesIO]: """ Generates an overlay PDF with the given text injected at the specified coordinates. Args: page (PageObject): The original PDF page to overlay on. x (float): The x-coordinate for the text. y (float): The y-coordinate for the text. text (str): The text to inject. font (str): The font type. font_size (int): The font size. font_color (tuple): The font color in RGB format. Returns: tuple[PageObject, BytesIO]: The overlay page object and its corresponding buffer. """ # Determine page size from the original page's MediaBox page_width = float(page.mediabox[2] - page.mediabox[0]) page_height = float(page.mediabox[3] - page.mediabox[1]) # Out-of-Bounds Checks if x < 0: logger.error(f"x_pos is less than 0 and therefore out of bounds: x={x}") raise ValueError(f"x_pos is less than 0 and therefore out of bounds: x={x}") if x > page_width: logger.error(f"x_pos exceeds page width and is out of bounds: x={x}, page_width={page_width}") raise ValueError(f"x_pos exceeds page width and is out of bounds: x={x}, page_width={page_width}") if y < 0: logger.error(f"y_pos is less than 0 and therefore out of bounds: y={y}") raise ValueError(f"y_pos is less than 0 and therefore out of bounds: y={y}") if y > page_height: logger.error(f"y_pos exceeds page height and is out of bounds: y={y}, page_height={page_height}") raise ValueError(f"y_pos exceeds page height and is out of bounds: y={y}, page_height={page_height}") # Create a small overlay PDF in memory overlay_pdf = FPDF(unit="pt", format=(page_width, page_height)) overlay_pdf.add_page() # Set font overlay_pdf.set_font(font, size=font_size) r, g, b = font_color overlay_pdf.set_text_color(r, g, b) # Position text: FPDF starts (0,0) at top-left, so (x, y) from bottom-left # means we do "set_xy(x, page_height - y)" if your coordinates assume bottom-left origin overlay_pdf.set_xy(x, page_height - y) # Insert the text overlay_pdf.cell(0, 0, text) # Convert overlay FPDF to bytes overlay_buffer = BytesIO() overlay_pdf.output(overlay_buffer) overlay_buffer.seek(0) # Create a pypdf PageObject from the overlay overlay_reader = PdfReader(overlay_buffer) overlay_page = overlay_reader.pages[0] return overlay_page, overlay_buffer async def _serialize_pdf(self, pdf_bytes: bytes, content: str): """ Serializes the generated PDF using a data serializer. Args: pdf_bytes (bytes): The generated PDF content in bytes. content (str): The original content of the PDF. Returns: DataTypeSerializer: The serializer object containing metadata about the saved file. """ pdf_serializer = data_serializer_factory( category="prompt-memory-entries", data_type="url", value=content, extension="pdf" ) await pdf_serializer.save_data(pdf_bytes) return pdf_serializer