feat: add request tracing spans to chat completion path
Traces: convLookup, formatPrompt, acquire, send, firstMsg, stream, release, convStore — logged per request for profiling. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
023ee6d5e4
commit
a2685dd158
@ -121,15 +121,33 @@ function extractText(msg: Record<string, unknown>): string {
|
||||
.join("");
|
||||
}
|
||||
|
||||
export async function chat(opts: ChatOptions): Promise<string> {
|
||||
export interface ChatTrace {
|
||||
formatPromptMs: number;
|
||||
acquireMs: number;
|
||||
sendMs: number;
|
||||
firstMsgMs: number;
|
||||
streamMs: number;
|
||||
releaseMs: number;
|
||||
totalMs: number;
|
||||
}
|
||||
|
||||
export async function chat(opts: ChatOptions): Promise<{ text: string; trace: ChatTrace }> {
|
||||
const t0 = performance.now();
|
||||
|
||||
const prompt = formatPrompt(opts);
|
||||
const tFormat = performance.now();
|
||||
|
||||
const session = await pool.acquire();
|
||||
const tAcquire = performance.now();
|
||||
|
||||
try {
|
||||
await session.send(prompt);
|
||||
const tSend = performance.now();
|
||||
|
||||
let resultText = "";
|
||||
let tFirstMsg = 0;
|
||||
for await (const msg of session.stream()) {
|
||||
if (!tFirstMsg) tFirstMsg = performance.now();
|
||||
if (msg.type === "result") {
|
||||
if (msg.subtype === "success") {
|
||||
resultText = msg.result;
|
||||
@ -142,9 +160,22 @@ export async function chat(opts: ChatOptions): Promise<string> {
|
||||
break;
|
||||
}
|
||||
}
|
||||
const tStream = performance.now();
|
||||
|
||||
pool.release(session);
|
||||
return resultText;
|
||||
const tRelease = performance.now();
|
||||
|
||||
const trace: ChatTrace = {
|
||||
formatPromptMs: Math.round(tFormat - t0),
|
||||
acquireMs: Math.round(tAcquire - tFormat),
|
||||
sendMs: Math.round(tSend - tAcquire),
|
||||
firstMsgMs: Math.round((tFirstMsg || tStream) - tSend),
|
||||
streamMs: Math.round(tStream - (tFirstMsg || tSend)),
|
||||
releaseMs: Math.round(tRelease - tStream),
|
||||
totalMs: Math.round(tRelease - t0),
|
||||
};
|
||||
|
||||
return { text: resultText, trace };
|
||||
} catch (err) {
|
||||
pool.discard(session);
|
||||
throw err;
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import type { FastifyInstance } from "fastify";
|
||||
import { v4 as uuidv4 } from "uuid";
|
||||
import { chat, chatStream, type ChatOptions } from "../providers/claude.js";
|
||||
import { chat, chatStream, type ChatOptions, type ChatTrace } from "../providers/claude.js";
|
||||
import { trackPoolMetric } from "../providers/valkey.js";
|
||||
import type { ConversationStore } from "../providers/conversation.js";
|
||||
import type { Config } from "../config.js";
|
||||
@ -46,6 +46,7 @@ export async function chatCompletionsRoute(
|
||||
const apiKey = authHeader.replace("Bearer ", "");
|
||||
|
||||
// Build full message list: history + new messages
|
||||
const tConvLookup0 = performance.now();
|
||||
let allMessages: ChatMessage[];
|
||||
let convId: string;
|
||||
|
||||
@ -69,6 +70,8 @@ export async function chatCompletionsRoute(
|
||||
allMessages = newMessages;
|
||||
convId = await conversations.create(newMessages, model, apiKey);
|
||||
}
|
||||
const tConvLookup1 = performance.now();
|
||||
const convLookupMs = Math.round(tConvLookup1 - tConvLookup0);
|
||||
|
||||
// Build ChatOptions — pass all OpenAI params through
|
||||
const chatOpts: ChatOptions = {
|
||||
@ -86,11 +89,20 @@ export async function chatCompletionsRoute(
|
||||
|
||||
if (!stream) {
|
||||
// ── Non-streaming ────────────────────────────────────────
|
||||
const t0 = Date.now();
|
||||
const text = await chat(chatOpts);
|
||||
trackPoolMetric("request", Date.now() - t0);
|
||||
|
||||
const tRoute0 = performance.now();
|
||||
const { text, trace } = await chat(chatOpts);
|
||||
const tConvStore0 = performance.now();
|
||||
await conversations.append(convId, { role: "assistant", content: text });
|
||||
const tConvStore1 = performance.now();
|
||||
|
||||
const routeTrace = {
|
||||
convLookupMs,
|
||||
...trace,
|
||||
convStoreMs: Math.round(tConvStore1 - tConvStore0),
|
||||
routeTotalMs: Math.round(tConvStore1 - tRoute0),
|
||||
};
|
||||
app.log.info({ trace: routeTrace }, "request trace");
|
||||
trackPoolMetric("request", routeTrace.routeTotalMs);
|
||||
|
||||
const usage: ChatCompletionUsage = {
|
||||
prompt_tokens: 0,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user