fix(axon): deduplicate system messages before forwarding to vLLM

vLLM requires system messages to be at the beginning. When Axon merges
conversation history with new messages, duplicate system messages cause
a 400 error. Strip all but the first system message.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
e3mrah 2026-04-26 08:35:28 +02:00
parent 4110161577
commit 40c4abe4f6

View File

@ -34,8 +34,18 @@ export class VllmProvider {
private static readonly SYSTEM_MSG_MAX_CHARS = 6000;
private trimSystemMessages(messages: ChatMessage[]): ChatMessage[] {
return messages.map((msg) => {
private sanitizeMessages(messages: ChatMessage[]): ChatMessage[] {
let seenSystem = false;
const deduped: ChatMessage[] = [];
for (const msg of messages) {
if (msg.role === "system") {
if (seenSystem) continue;
seenSystem = true;
}
deduped.push(msg);
}
return deduped.map((msg) => {
if (msg.role !== "system" || !msg.content || msg.content.length <= VllmProvider.SYSTEM_MSG_MAX_CHARS) {
return msg;
}
@ -52,7 +62,7 @@ export class VllmProvider {
const payload: Record<string, unknown> = {
...body,
model: this.resolveModel(body.model),
messages: this.trimSystemMessages(body.messages),
messages: this.sanitizeMessages(body.messages),
stream,
};
delete payload.conversation_id;