Source code for pyrit.prompt_target.gandalf_target
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import enum
import json
import logging
from typing import Optional
from pyrit.common import net_utility
from pyrit.models import PromptRequestResponse
from pyrit.models import construct_response_from_request
from pyrit.prompt_target import PromptTarget, limit_requests_per_minute
logger = logging.getLogger(__name__)
[docs]
class GandalfLevel(enum.Enum):
LEVEL_1 = "baseline"
LEVEL_2 = "do-not-tell"
LEVEL_3 = "do-not-tell-and-block"
LEVEL_4 = "gpt-is-password-encoded"
LEVEL_5 = "word-blacklist"
LEVEL_6 = "gpt-blacklist"
LEVEL_7 = "gandalf"
LEVEL_8 = "gandalf-the-white"
LEVEL_9 = "adventure-1"
LEVEL_10 = "adventure-2"
[docs]
class GandalfTarget(PromptTarget):
[docs]
def __init__(
self,
*,
level: GandalfLevel,
max_requests_per_minute: Optional[int] = None,
) -> None:
super().__init__(max_requests_per_minute=max_requests_per_minute)
self._endpoint = "https://gandalf.lakera.ai/api/send-message"
self._defender = level.value
@limit_requests_per_minute
async def send_prompt_async(self, *, prompt_request: PromptRequestResponse) -> PromptRequestResponse:
self._validate_request(prompt_request=prompt_request)
request = prompt_request.request_pieces[0]
logger.info(f"Sending the following prompt to the prompt target: {request}")
response = await self._complete_text_async(request.converted_value)
response_entry = construct_response_from_request(request=request, response_text_pieces=[response])
return response_entry
def _validate_request(self, *, prompt_request: PromptRequestResponse) -> None:
if len(prompt_request.request_pieces) != 1:
raise ValueError("This target only supports a single prompt request piece.")
if prompt_request.request_pieces[0].converted_value_data_type != "text":
raise ValueError("This target only supports text prompt input.")
[docs]
async def check_password(self, password: str) -> bool:
"""
Checks if the password is correct
True means the password is correct, False means it is not
"""
payload: dict[str, object] = {
"defender": self._defender,
"password": password,
}
resp = await net_utility.make_request_and_raise_if_error_async(
endpoint_uri=self._endpoint, method="POST", request_body=payload, post_type="data"
)
if not resp.text:
raise ValueError("The chat returned an empty response.")
json_response = resp.json()
return json_response["success"]
async def _complete_text_async(self, text: str) -> str:
payload: dict[str, object] = {
"defender": self._defender,
"prompt": text,
}
resp = await net_utility.make_request_and_raise_if_error_async(
endpoint_uri=self._endpoint, method="POST", request_body=payload, post_type="data"
)
if not resp.text:
raise ValueError("The chat returned an empty response.")
answer = json.loads(resp.text)["answer"]
logger.info(f'Received the following response from the prompt target "{answer}"')
return answer