Source code for pyrit.chat_message_normalizer.chat_message_normalizer_chatml
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import re
from typing import cast
from pyrit.chat_message_normalizer import ChatMessageNormalizer
from pyrit.models import ALLOWED_CHAT_MESSAGE_ROLES, ChatMessage, ChatMessageRole
[docs]
class ChatMessageNormalizerChatML(ChatMessageNormalizer[str]):
[docs]
def normalize(self, messages: list[ChatMessage]) -> str:
"""Convert a string of text to a ChatML string.
This is compliant with the ChatML specified in
https://github.com/openai/openai-python/blob/release-v0.28.0/chatml.md
"""
final_string: str = ""
final_string = ""
for m in messages:
final_string += f"<|im_start|>{m.role}{f' name={m.name}' if m.name else ''}\n{m.content}<|im_end|>\n"
return final_string
[docs]
@staticmethod
def from_chatml(content: str) -> list[ChatMessage]:
"""Convert a chatML string to a list of chat messages"""
messages: list[ChatMessage] = []
matches = list(re.finditer(r"<\|im_start\|>(.*?)<\|im_end\|>", content, re.DOTALL | re.MULTILINE))
if not matches:
raise ValueError("No chat messages found in the chatML string")
for match in matches:
lines = match.group(1).split("\n")
role_line = lines[0].strip()
role_match = re.match(r"(?P<role>\w+)( name=(?P<name>\w+))?", role_line)
name = role_match.group("name") if role_match else None
role = role_match.group("role")
if role not in ALLOWED_CHAT_MESSAGE_ROLES:
raise ValueError(f"Role {role} is not allowed in chatML")
content = lines[1].strip()
messages.append(ChatMessage(role=cast(ChatMessageRole, role), content=content, name=name))
return messages