From a2c1379c1d3b75c513d7df8667a5df6079cb9e4b Mon Sep 17 00:00:00 2001
From: Jiayuan Zhang <forrestchang7@gmail.com>
Date: Sun, 15 Feb 2026 16:03:40 +0800
Subject: [PATCH] feat(cli): add --run-log flag and session dir output for
 agent-driven E2E testing

Add --run-log CLI flag to enable structured run logging without env var.
Print session directory path to stderr when run-log is enabled so Coding
Agents can easily locate log files for analysis.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 apps/cli/src/commands/run.ts       | 16 +++++++++
 packages/core/src/agent/run-log.ts | 57 ++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+)
diff --git a/apps/cli/src/commands/run.ts b/apps/cli/src/commands/run.ts
index 1f5656be..1b915d0d 100644
--- a/apps/cli/src/commands/run.ts
+++ b/apps/cli/src/commands/run.ts
@@ -6,9 +6,11 @@
  *   echo "prompt" | multica run
  */
 
+import { join } from "node:path";
 import { Agent } from "@multica/core";
 import type { AgentOptions } from "@multica/core";
 import type { ToolsConfig } from "@multica/core";
+import { DATA_DIR } from "@multica/utils";
 import { cyan, yellow, dim } from "../colors.js";
 
 type RunOptions = {
@@ -23,6 +25,7 @@ type RunOptions = {
   cwd?: string | undefined;
   session?: string | undefined;
   debug?: boolean;
+  runLog?: boolean;
   toolsAllow?: string[];
   toolsDeny?: string[];
   help?: boolean;
@@ -45,6 +48,7 @@ ${cyan("Options:")}
   ${yellow("--cwd")} DIR           Working directory
   ${yellow("--session")} ID        Session ID for persistence
   ${yellow("--debug")}             Enable debug logging
+  ${yellow("--run-log")}           Enable structured run logging (run-log.jsonl)
   ${yellow("--help")}, -h          Show this help
 
 ${cyan("Tools Configuration:")}
@@ -123,6 +127,10 @@ function parseArgs(argv: string[]): { opts: RunOptions; prompt: string } {
       opts.debug = true;
       continue;
     }
+    if (arg === "--run-log") {
+      opts.runLog = true;
+      continue;
+    }
     if (arg === "--tools-allow") {
       const value = args.shift();
       opts.toolsAllow = value?.split(",").map((s) => s.trim()) ?? [];
@@ -182,6 +190,8 @@ export async function runCommand(args: string[]): Promise<void> {
     }
   }
 
+  const enableRunLog = opts.runLog || !!process.env.MULTICA_RUN_LOG;
+
   const agent = new Agent({
     profileId: opts.profile,
     provider: opts.provider,
@@ -194,13 +204,19 @@ export async function runCommand(args: string[]): Promise<void> {
     cwd: opts.cwd,
     sessionId: opts.session,
     debug: opts.debug,
+    enableRunLog,
     tools: toolsConfig,
   });
 
+  const sessionDir = join(DATA_DIR, "sessions", agent.sessionId);
+
   // If it's a newly created session, notify user of sessionId
   if (!opts.session) {
     console.error(`[session: ${agent.sessionId}]`);
   }
+  if (enableRunLog) {
+    console.error(`[session-dir: ${sessionDir}]`);
+  }
 
   const result = await agent.run(finalPrompt);
   if (result.error) {
diff --git a/packages/core/src/agent/run-log.ts b/packages/core/src/agent/run-log.ts
index e9b9e371..e8e3b446 100644
--- a/packages/core/src/agent/run-log.ts
+++ b/packages/core/src/agent/run-log.ts
@@ -1,3 +1,60 @@
+/**
+ * Structured Run Log
+ *
+ * Records agent execution events to `{sessionDir}/run-log.jsonl`.
+ * Each line is a JSON object with `ts` (epoch ms) and `event` (type string).
+ *
+ * Enable via `MULTICA_RUN_LOG=1` env var or `enableRunLog: true` in AgentOptions.
+ * CLI: `pnpm multica run --run-log "prompt"`
+ *
+ * ## Event Reference
+ *
+ * ### Lifecycle
+ * - `run_start`   — Agent run begins.
+ *     Fields: prompt (first 200 chars), internal, provider, model, messages (count)
+ * - `run_end`     — Agent run completes.
+ *     Fields: duration_ms, error (string|null), text (first 200 chars), aborted?
+ *
+ * ### LLM Interaction
+ * - `llm_call`    — LLM API request sent.
+ *     Fields: provider, model, profile, messages (count)
+ * - `llm_result`  — LLM API response received.
+ *     Fields: duration_ms
+ *
+ * ### Tool Execution
+ * - `tool_start`  — Tool execution begins.
+ *     Fields: tool (name), args (first 500 chars of JSON)
+ * - `tool_end`    — Tool execution completes.
+ *     Fields: tool (name), duration_ms, is_error
+ *
+ * ### Context Management — Preflight (before LLM call)
+ * - `preflight_compact_start` — Preflight compaction triggered.
+ *     Fields: utilization, trigger, messages (count), est_tokens
+ * - `preflight_compact_end`   — Preflight compaction completed.
+ *     Fields: messages_before, messages_after, pruned (count removed)
+ * - `tool_result_pruning`     — Tool result pruning applied (Phase 1).
+ *     Fields: soft_trimmed, hard_cleared, chars_saved, phase ("preflight"|"compaction"),
+ *             tokens_before?, tokens_after? (present when phase="compaction")
+ *
+ * ### Context Management — Compaction (during session)
+ * - `compaction`        — Summary compaction completed (Phase 2).
+ *     Fields: removed, kept, tokens_removed, tokens_kept, reason, pruning_stats?
+ * - `compaction_detail` — Detailed compaction breakdown.
+ *     Fields: pre_pruning_tokens, post_compaction_tokens, messages_removed, reason, pruning_applied
+ *
+ * ### Error Recovery
+ * - `context_overflow`           — Context window overflow detected.
+ *     Fields: attempt, messages_before
+ * - `context_overflow_compacted` — Overflow recovered via compaction.
+ *     Fields: messages_after, tokens_removed
+ * - `context_overflow_forced`    — Overflow recovered via forced message drop.
+ *     Fields: messages_before, messages_after
+ * - `error_classify`             — Error classified for auth rotation.
+ *     Fields: error (first 200 chars), reason, rotatable
+ * - `auth_rotate`                — Auth profile rotated after error.
+ *     Fields: from, to, reason
+ */
+
 import { join } from "path";
 import { mkdirSync } from "fs";
 import { appendFile } from "fs/promises";