feat: add request tracing spans to chat completion path

Traces: convLookup, formatPrompt, acquire, send, firstMsg, stream, release, convStore — logged per request for profiling. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-04 11:20:54 +01:00 · 2026-03-04 11:20:54 +01:00 · a2685dd158
commit a2685dd158
parent 023ee6d5e4
2 changed files with 50 additions and 7 deletions
--- a/products/axon/src/providers/claude.ts
+++ b/products/axon/src/providers/claude.ts
@ -121,15 +121,33 @@ function extractText(msg: Record<string, unknown>): string {
    .join("");
 }

-export async function chat(opts: ChatOptions): Promise<string> {
+export interface ChatTrace {
+  formatPromptMs: number;
+  acquireMs: number;
+  sendMs: number;
+  firstMsgMs: number;
+  streamMs: number;
+  releaseMs: number;
+  totalMs: number;
+}
+
+export async function chat(opts: ChatOptions): Promise<{ text: string; trace: ChatTrace }> {
+  const t0 = performance.now();
+
  const prompt = formatPrompt(opts);
+  const tFormat = performance.now();
+
  const session = await pool.acquire();
+  const tAcquire = performance.now();

  try {
    await session.send(prompt);
+    const tSend = performance.now();

    let resultText = "";
+    let tFirstMsg = 0;
    for await (const msg of session.stream()) {
+      if (!tFirstMsg) tFirstMsg = performance.now();
      if (msg.type === "result") {
        if (msg.subtype === "success") {
          resultText = msg.result;
@ -142,9 +160,22 @@ export async function chat(opts: ChatOptions): Promise<string> {
        break;
      }
    }
+    const tStream = performance.now();

    pool.release(session);
-    return resultText;
+    const tRelease = performance.now();
+
+    const trace: ChatTrace = {
+      formatPromptMs: Math.round(tFormat - t0),
+      acquireMs: Math.round(tAcquire - tFormat),
+      sendMs: Math.round(tSend - tAcquire),
+      firstMsgMs: Math.round((tFirstMsg || tStream) - tSend),
+      streamMs: Math.round(tStream - (tFirstMsg || tSend)),
+      releaseMs: Math.round(tRelease - tStream),
+      totalMs: Math.round(tRelease - t0),
+    };
+
+    return { text: resultText, trace };
  } catch (err) {
    pool.discard(session);
    throw err;
--- a/products/axon/src/routes/chat-completions.ts
+++ b/products/axon/src/routes/chat-completions.ts
@ -1,6 +1,6 @@
 import type { FastifyInstance } from "fastify";
 import { v4 as uuidv4 } from "uuid";
-import { chat, chatStream, type ChatOptions } from "../providers/claude.js";
+import { chat, chatStream, type ChatOptions, type ChatTrace } from "../providers/claude.js";
 import { trackPoolMetric } from "../providers/valkey.js";
 import type { ConversationStore } from "../providers/conversation.js";
 import type { Config } from "../config.js";
@ -46,6 +46,7 @@ export async function chatCompletionsRoute(
      const apiKey = authHeader.replace("Bearer ", "");

      // Build full message list: history + new messages
+      const tConvLookup0 = performance.now();
      let allMessages: ChatMessage[];
      let convId: string;

@ -69,6 +70,8 @@ export async function chatCompletionsRoute(
        allMessages = newMessages;
        convId = await conversations.create(newMessages, model, apiKey);
      }
+      const tConvLookup1 = performance.now();
+      const convLookupMs = Math.round(tConvLookup1 - tConvLookup0);

      // Build ChatOptions — pass all OpenAI params through
      const chatOpts: ChatOptions = {
@ -86,11 +89,20 @@ export async function chatCompletionsRoute(

      if (!stream) {
        // ── Non-streaming ────────────────────────────────────────
-        const t0 = Date.now();
-        const text = await chat(chatOpts);
-        trackPoolMetric("request", Date.now() - t0);
-
+        const tRoute0 = performance.now();
+        const { text, trace } = await chat(chatOpts);
+        const tConvStore0 = performance.now();
        await conversations.append(convId, { role: "assistant", content: text });
+        const tConvStore1 = performance.now();
+
+        const routeTrace = {
+          convLookupMs,
+          ...trace,
+          convStoreMs: Math.round(tConvStore1 - tConvStore0),
+          routeTotalMs: Math.round(tConvStore1 - tRoute0),
+        };
+        app.log.info({ trace: routeTrace }, "request trace");
+        trackPoolMetric("request", routeTrace.routeTotalMs);

        const usage: ChatCompletionUsage = {
          prompt_tokens: 0,