Source code for pyrit.prompt_target.azure_ml_chat_target

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import logging
from typing import Optional

from httpx import HTTPStatusError

from pyrit.chat_message_normalizer import ChatMessageNop, ChatMessageNormalizer
from pyrit.common import default_values, net_utility
from pyrit.exceptions import (
    EmptyResponseException,
    RateLimitException,
    handle_bad_request_exception,
    pyrit_target_retry,
)
from pyrit.models import (
    ChatMessage,
    PromptRequestResponse,
    construct_response_from_request,
)
from pyrit.prompt_target import PromptChatTarget, limit_requests_per_minute

logger = logging.getLogger(__name__)



[docs]
class AzureMLChatTarget(PromptChatTarget):

    endpoint_uri_environment_variable: str = "AZURE_ML_MANAGED_ENDPOINT"
    api_key_environment_variable: str = "AZURE_ML_KEY"


[docs]
    def __init__(
        self,
        *,
        endpoint: Optional[str] = None,
        api_key: Optional[str] = None,
        chat_message_normalizer: ChatMessageNormalizer = ChatMessageNop(),
        max_new_tokens: int = 400,
        temperature: float = 1.0,
        top_p: float = 1.0,
        repetition_penalty: float = 1.0,
        max_requests_per_minute: Optional[int] = None,
        **param_kwargs,
    ) -> None:
        """
        Initializes an instance of the AzureMLChatTarget class. This class works with most chat completion
        Instruct models deployed on Azure AI Machine Learning Studio endpoints
        (including but not limited to: mistralai-Mixtral-8x7B-Instruct-v01, mistralai-Mistral-7B-Instruct-v01,
        Phi-3.5-MoE-instruct, Phi-3-mini-4k-instruct, Llama-3.2-3B-Instruct, and Meta-Llama-3.1-8B-Instruct).
        Please create or adjust environment variables (endpoint and key) as needed for the
        model you are using.

        Args:
            endpoint (str, Optional): The endpoint URL for the deployed Azure ML model.
                Defaults to the value of the AZURE_ML_MANAGED_ENDPOINT environment variable.
            api_key (str, Optional): The API key for accessing the Azure ML endpoint.
                Defaults to the value of the `AZURE_ML_KEY` environment variable.
            chat_message_normalizer (ChatMessageNormalizer, Optional): The chat message normalizer.
                For models that do not allow system prompts such as mistralai-Mixtral-8x7B-Instruct-v01,
                GenericSystemSquash() can be passed in. Defaults to ChatMessageNop(), which does not
                alter the chat messages.
            max_new_tokens (int, Optional): The maximum number of tokens to generate in the response.
                Defaults to 400.
            temperature (float, Optional): The temperature for generating diverse responses. 1.0 is most random,
                0.0 is least random. Defaults to 1.0.
            top_p (float, Optional): The top-p value for generating diverse responses. It represents
                the cumulative probability of the top tokens to keep. Defaults to 1.0.
            repetition_penalty (float, Optional): The repetition penalty for generating diverse responses.
                1.0 means no penalty with a greater value (up to 2.0) meaning more penalty for repeating tokens.
                Defaults to 1.2.
            max_requests_per_minute (int, Optional): Number of requests the target can handle per
                minute before hitting a rate limit. The number of requests sent to the target
                will be capped at the value provided.
            **param_kwargs: Additional parameters to pass to the model for generating responses. Example
                parameters can be found here: https://huggingface.co/docs/api-inference/tasks/text-generation.
                Note that the link above may not be comprehensive, and specific acceptable parameters may be
                model-dependent. If a model does not accept a certain parameter that is passed in, it will be skipped
                without throwing an error.
        """
        PromptChatTarget.__init__(self, max_requests_per_minute=max_requests_per_minute)

        self._initialize_vars(endpoint=endpoint, api_key=api_key)

        self.chat_message_normalizer = chat_message_normalizer
        self._max_new_tokens = max_new_tokens
        self._temperature = temperature
        self._top_p = top_p
        self._repetition_penalty = repetition_penalty
        self._extra_parameters = param_kwargs


    def _set_env_configuration_vars(
        self,
        endpoint_uri_environment_variable: Optional[str] = None,
        api_key_environment_variable: Optional[str] = None,
    ) -> None:
        """
        Sets the environment configuration variable names from which to pull the endpoint uri and the api key
        to access the deployed Azure ML model. Use this function to set the environment variable names to
        however they are named in the .env file and pull the corresponding endpoint uri and api key.
        This is the recommended way to pass in a uri and key to access the model endpoint.
        Defaults to "AZURE_ML_MANAGED_ENDPOINT" and "AZURE_ML_KEY".

        Args:
            endpoint_uri_environment_variable (str, optional): The environment variable name for the endpoint uri.
            api_key_environment_variable (str, optional): The environment variable name for the api key.

        Returns:
            None
        """
        self.endpoint_uri_environment_variable = endpoint_uri_environment_variable or "AZURE_ML_MANAGED_ENDPOINT"
        self.api_key_environment_variable = api_key_environment_variable or "AZURE_ML_KEY"
        self._initialize_vars()

    def _initialize_vars(self, endpoint: Optional[str] = None, api_key: Optional[str] = None) -> None:
        """
        Sets the endpoint and key for accessing the Azure ML model. Use this function to manually
        pass in your own endpoint uri and api key. Defaults to the values in the .env file for the variables
        stored in self.endpoint_uri_environment_variable and self.api_key_environment_variable (which default to
        "AZURE_ML_MANAGED_ENDPOINT" and "AZURE_ML_KEY" respectively). It is recommended to set these variables
        in the .env file and call _set_env_configuration_vars rather than passing the uri and key directly to
        this function or the target constructor.

        Args:
            endpoint (str, optional): The endpoint uri for the deployed Azure ML model.
            api_key (str, optional): The API key for accessing the Azure ML endpoint.

        Returns:
            None
        """
        self._endpoint = default_values.get_required_value(
            env_var_name=self.endpoint_uri_environment_variable, passed_value=endpoint
        )
        self._api_key = default_values.get_required_value(
            env_var_name=self.api_key_environment_variable, passed_value=api_key
        )

    def _set_model_parameters(
        self,
        max_new_tokens: Optional[int] = None,
        temperature: Optional[float] = None,
        top_p: Optional[float] = None,
        repetition_penalty: Optional[float] = None,
        **param_kwargs,
    ) -> None:
        """
        Sets the model parameters for generating responses, offering the option to add additional ones not
        explicitly listed.
        """
        self._max_new_tokens = max_new_tokens or self._max_new_tokens
        self._temperature = temperature or self._temperature
        self._top_p = top_p or self._top_p
        self._repetition_penalty = repetition_penalty or self._repetition_penalty
        # Set any other parameters via additional keyword arguments
        self._extra_parameters = param_kwargs

    @limit_requests_per_minute
    async def send_prompt_async(self, *, prompt_request: PromptRequestResponse) -> PromptRequestResponse:

        self._validate_request(prompt_request=prompt_request)
        request = prompt_request.request_pieces[0]

        messages = list(self._memory.get_chat_messages_with_conversation_id(conversation_id=request.conversation_id))

        messages.append(request.to_chat_message())

        logger.info(f"Sending the following prompt to the prompt target: {request}")

        try:
            resp_text = await self._complete_chat_async(
                messages=messages,
            )

            if not resp_text:
                raise EmptyResponseException(message="The chat returned an empty response.")

            response_entry = construct_response_from_request(request=request, response_text_pieces=[resp_text])
        except HTTPStatusError as hse:
            if hse.response.status_code == 400:
                # Handle Bad Request
                response_entry = handle_bad_request_exception(response_text=hse.response.text, request=request)
            elif hse.response.status_code == 429:
                raise RateLimitException()
            else:
                raise hse

        logger.info("Received the following response from the prompt target" + f"{response_entry.get_value()}")
        return response_entry

    @pyrit_target_retry
    async def _complete_chat_async(
        self,
        messages: list[ChatMessage],
    ) -> str:
        """
        Completes a chat interaction by generating a response to the given input prompt.

        This is a synchronous wrapper for the asynchronous _generate_and_extract_response method.

        Args:
            messages (list[ChatMessage]): The chat messages objects containing the role and content.

        Raises:
            Exception: For any errors during the process.

        Returns:
            str: The generated response message.
        """
        headers = self._get_headers()
        payload = self._construct_http_body(messages)

        response = await net_utility.make_request_and_raise_if_error_async(
            endpoint_uri=self._endpoint, method="POST", request_body=payload, headers=headers
        )

        try:
            return response.json()["output"]
        except Exception as e:
            if response.json() == {}:
                raise EmptyResponseException(message="The chat returned an empty response.")
            raise e(
                f"Exception obtaining response from the target. Returned response: {response.json()}. "
                + f"Exception: {str(e)}"  # type: ignore
            )

    def _construct_http_body(
        self,
        messages: list[ChatMessage],
    ) -> dict:
        """Constructs the HTTP request body for the AML online endpoint."""

        squashed_messages = self.chat_message_normalizer.normalize(messages)
        messages_dict = [message.model_dump() for message in squashed_messages]

        # parameters include additional ones passed in through **kwargs. Those not accepted by the model will
        # be ignored.
        data = {
            "input_data": {
                "input_string": messages_dict,
                "parameters": {
                    "max_new_tokens": self._max_new_tokens,
                    "temperature": self._temperature,
                    "top_p": self._top_p,
                    "stop": ["</s>"],
                    "stop_sequences": ["</s>"],
                    "return_full_text": False,
                    "repetition_penalty": self._repetition_penalty,
                }
                | self._extra_parameters,
            }
        }

        return data

    def _get_headers(self) -> dict:
        """Headers for accessing inference endpoint deployed in AML.
        Returns:
            headers(dict): contains bearer token as AML key and content-type: JSON
        """

        headers: dict = {
            "Content-Type": "application/json",
            "Authorization": ("Bearer " + self._api_key),
        }

        return headers

    def _validate_request(self, *, prompt_request: PromptRequestResponse) -> None:
        n_pieces = len(prompt_request.request_pieces)
        if n_pieces != 1:
            raise ValueError(f"This target only supports a single prompt request piece. Received: {n_pieces} pieces.")

        piece_type = prompt_request.request_pieces[0].converted_value_data_type
        if piece_type != "text":
            raise ValueError(f"This target only supports text prompt input. Received: {piece_type}.")


[docs]
    def is_json_response_supported(self) -> bool:
        """Indicates that this target supports JSON response format."""
        return False