feat(cli): add --run-log flag and session dir output for agent-driven E2E testing

Add --run-log CLI flag to enable structured run logging without env var. Print session directory path to stderr when run-log is enabled so Coding Agents can easily locate log files for analysis. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 16:03:40 +08:00 · 2026-02-15 16:03:40 +08:00 · a2c1379c1d
commit a2c1379c1d
parent 239dc5a7c6
2 changed files with 73 additions and 0 deletions
--- a/apps/cli/src/commands/run.ts
+++ b/apps/cli/src/commands/run.ts
@ -6,9 +6,11 @@
 *   echo "prompt" | multica run
 */

+import { join } from "node:path";
 import { Agent } from "@multica/core";
 import type { AgentOptions } from "@multica/core";
 import type { ToolsConfig } from "@multica/core";
+import { DATA_DIR } from "@multica/utils";
 import { cyan, yellow, dim } from "../colors.js";

 type RunOptions = {
@ -23,6 +25,7 @@ type RunOptions = {
  cwd?: string | undefined;
  session?: string | undefined;
  debug?: boolean;
+  runLog?: boolean;
  toolsAllow?: string[];
  toolsDeny?: string[];
  help?: boolean;
@ -45,6 +48,7 @@ ${cyan("Options:")}
  ${yellow("--cwd")} DIR           Working directory
  ${yellow("--session")} ID        Session ID for persistence
  ${yellow("--debug")}             Enable debug logging
+  ${yellow("--run-log")}           Enable structured run logging (run-log.jsonl)
  ${yellow("--help")}, -h          Show this help

 ${cyan("Tools Configuration:")}
@ -123,6 +127,10 @@ function parseArgs(argv: string[]): { opts: RunOptions; prompt: string } {
      opts.debug = true;
      continue;
    }
+    if (arg === "--run-log") {
+      opts.runLog = true;
+      continue;
+    }
    if (arg === "--tools-allow") {
      const value = args.shift();
      opts.toolsAllow = value?.split(",").map((s) => s.trim()) ?? [];
@ -182,6 +190,8 @@ export async function runCommand(args: string[]): Promise<void> {
    }
  }

+  const enableRunLog = opts.runLog || !!process.env.MULTICA_RUN_LOG;
+
  const agent = new Agent({
    profileId: opts.profile,
    provider: opts.provider,
@ -194,13 +204,19 @@ export async function runCommand(args: string[]): Promise<void> {
    cwd: opts.cwd,
    sessionId: opts.session,
    debug: opts.debug,
+    enableRunLog,
    tools: toolsConfig,
  });

+  const sessionDir = join(DATA_DIR, "sessions", agent.sessionId);
+
  // If it's a newly created session, notify user of sessionId
  if (!opts.session) {
    console.error(`[session: ${agent.sessionId}]`);
  }
+  if (enableRunLog) {
+    console.error(`[session-dir: ${sessionDir}]`);
+  }

  const result = await agent.run(finalPrompt);
  if (result.error) {
--- a/packages/core/src/agent/run-log.ts
+++ b/packages/core/src/agent/run-log.ts
@ -1,3 +1,60 @@
+/**
+ * Structured Run Log
+ *
+ * Records agent execution events to `{sessionDir}/run-log.jsonl`.
+ * Each line is a JSON object with `ts` (epoch ms) and `event` (type string).
+ *
+ * Enable via `MULTICA_RUN_LOG=1` env var or `enableRunLog: true` in AgentOptions.
+ * CLI: `pnpm multica run --run-log "prompt"`
+ *
+ * ## Event Reference
+ *
+ * ### Lifecycle
+ * - `run_start`   — Agent run begins.
+ *     Fields: prompt (first 200 chars), internal, provider, model, messages (count)
+ * - `run_end`     — Agent run completes.
+ *     Fields: duration_ms, error (string|null), text (first 200 chars), aborted?
+ *
+ * ### LLM Interaction
+ * - `llm_call`    — LLM API request sent.
+ *     Fields: provider, model, profile, messages (count)
+ * - `llm_result`  — LLM API response received.
+ *     Fields: duration_ms
+ *
+ * ### Tool Execution
+ * - `tool_start`  — Tool execution begins.
+ *     Fields: tool (name), args (first 500 chars of JSON)
+ * - `tool_end`    — Tool execution completes.
+ *     Fields: tool (name), duration_ms, is_error
+ *
+ * ### Context Management — Preflight (before LLM call)
+ * - `preflight_compact_start` — Preflight compaction triggered.
+ *     Fields: utilization, trigger, messages (count), est_tokens
+ * - `preflight_compact_end`   — Preflight compaction completed.
+ *     Fields: messages_before, messages_after, pruned (count removed)
+ * - `tool_result_pruning`     — Tool result pruning applied (Phase 1).
+ *     Fields: soft_trimmed, hard_cleared, chars_saved, phase ("preflight"|"compaction"),
+ *             tokens_before?, tokens_after? (present when phase="compaction")
+ *
+ * ### Context Management — Compaction (during session)
+ * - `compaction`        — Summary compaction completed (Phase 2).
+ *     Fields: removed, kept, tokens_removed, tokens_kept, reason, pruning_stats?
+ * - `compaction_detail` — Detailed compaction breakdown.
+ *     Fields: pre_pruning_tokens, post_compaction_tokens, messages_removed, reason, pruning_applied
+ *
+ * ### Error Recovery
+ * - `context_overflow`           — Context window overflow detected.
+ *     Fields: attempt, messages_before
+ * - `context_overflow_compacted` — Overflow recovered via compaction.
+ *     Fields: messages_after, tokens_removed
+ * - `context_overflow_forced`    — Overflow recovered via forced message drop.
+ *     Fields: messages_before, messages_after
+ * - `error_classify`             — Error classified for auth rotation.
+ *     Fields: error (first 200 chars), reason, rotatable
+ * - `auth_rotate`                — Auth profile rotated after error.
+ *     Fields: from, to, reason
+ */
+
 import { join } from "path";
 import { mkdirSync } from "fs";
 import { appendFile } from "fs/promises";