feat: add request tracing spans to chat completion path

Traces: convLookup, formatPrompt, acquire, send, firstMsg,
stream, release, convStore — logged per request for profiling.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
e3mrah 2026-03-04 11:20:54 +01:00
parent 023ee6d5e4
commit a2685dd158
2 changed files with 50 additions and 7 deletions

View File

@ -121,15 +121,33 @@ function extractText(msg: Record<string, unknown>): string {
.join("");
}
export async function chat(opts: ChatOptions): Promise<string> {
export interface ChatTrace {
formatPromptMs: number;
acquireMs: number;
sendMs: number;
firstMsgMs: number;
streamMs: number;
releaseMs: number;
totalMs: number;
}
export async function chat(opts: ChatOptions): Promise<{ text: string; trace: ChatTrace }> {
const t0 = performance.now();
const prompt = formatPrompt(opts);
const tFormat = performance.now();
const session = await pool.acquire();
const tAcquire = performance.now();
try {
await session.send(prompt);
const tSend = performance.now();
let resultText = "";
let tFirstMsg = 0;
for await (const msg of session.stream()) {
if (!tFirstMsg) tFirstMsg = performance.now();
if (msg.type === "result") {
if (msg.subtype === "success") {
resultText = msg.result;
@ -142,9 +160,22 @@ export async function chat(opts: ChatOptions): Promise<string> {
break;
}
}
const tStream = performance.now();
pool.release(session);
return resultText;
const tRelease = performance.now();
const trace: ChatTrace = {
formatPromptMs: Math.round(tFormat - t0),
acquireMs: Math.round(tAcquire - tFormat),
sendMs: Math.round(tSend - tAcquire),
firstMsgMs: Math.round((tFirstMsg || tStream) - tSend),
streamMs: Math.round(tStream - (tFirstMsg || tSend)),
releaseMs: Math.round(tRelease - tStream),
totalMs: Math.round(tRelease - t0),
};
return { text: resultText, trace };
} catch (err) {
pool.discard(session);
throw err;

View File

@ -1,6 +1,6 @@
import type { FastifyInstance } from "fastify";
import { v4 as uuidv4 } from "uuid";
import { chat, chatStream, type ChatOptions } from "../providers/claude.js";
import { chat, chatStream, type ChatOptions, type ChatTrace } from "../providers/claude.js";
import { trackPoolMetric } from "../providers/valkey.js";
import type { ConversationStore } from "../providers/conversation.js";
import type { Config } from "../config.js";
@ -46,6 +46,7 @@ export async function chatCompletionsRoute(
const apiKey = authHeader.replace("Bearer ", "");
// Build full message list: history + new messages
const tConvLookup0 = performance.now();
let allMessages: ChatMessage[];
let convId: string;
@ -69,6 +70,8 @@ export async function chatCompletionsRoute(
allMessages = newMessages;
convId = await conversations.create(newMessages, model, apiKey);
}
const tConvLookup1 = performance.now();
const convLookupMs = Math.round(tConvLookup1 - tConvLookup0);
// Build ChatOptions — pass all OpenAI params through
const chatOpts: ChatOptions = {
@ -86,11 +89,20 @@ export async function chatCompletionsRoute(
if (!stream) {
// ── Non-streaming ────────────────────────────────────────
const t0 = Date.now();
const text = await chat(chatOpts);
trackPoolMetric("request", Date.now() - t0);
const tRoute0 = performance.now();
const { text, trace } = await chat(chatOpts);
const tConvStore0 = performance.now();
await conversations.append(convId, { role: "assistant", content: text });
const tConvStore1 = performance.now();
const routeTrace = {
convLookupMs,
...trace,
convStoreMs: Math.round(tConvStore1 - tConvStore0),
routeTotalMs: Math.round(tConvStore1 - tRoute0),
};
app.log.info({ trace: routeTrace }, "request trace");
trackPoolMetric("request", routeTrace.routeTotalMs);
const usage: ChatCompletionUsage = {
prompt_tokens: 0,