From a2c1379c1d3b75c513d7df8667a5df6079cb9e4b Mon Sep 17 00:00:00 2001 From: Jiayuan Zhang Date: Sun, 15 Feb 2026 16:03:40 +0800 Subject: [PATCH] feat(cli): add --run-log flag and session dir output for agent-driven E2E testing Add --run-log CLI flag to enable structured run logging without env var. Print session directory path to stderr when run-log is enabled so Coding Agents can easily locate log files for analysis. Co-Authored-By: Claude Opus 4.6 --- apps/cli/src/commands/run.ts | 16 +++++++++ packages/core/src/agent/run-log.ts | 57 ++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/apps/cli/src/commands/run.ts b/apps/cli/src/commands/run.ts index 1f5656be..1b915d0d 100644 --- a/apps/cli/src/commands/run.ts +++ b/apps/cli/src/commands/run.ts @@ -6,9 +6,11 @@ * echo "prompt" | multica run */ +import { join } from "node:path"; import { Agent } from "@multica/core"; import type { AgentOptions } from "@multica/core"; import type { ToolsConfig } from "@multica/core"; +import { DATA_DIR } from "@multica/utils"; import { cyan, yellow, dim } from "../colors.js"; type RunOptions = { @@ -23,6 +25,7 @@ type RunOptions = { cwd?: string | undefined; session?: string | undefined; debug?: boolean; + runLog?: boolean; toolsAllow?: string[]; toolsDeny?: string[]; help?: boolean; @@ -45,6 +48,7 @@ ${cyan("Options:")} ${yellow("--cwd")} DIR Working directory ${yellow("--session")} ID Session ID for persistence ${yellow("--debug")} Enable debug logging + ${yellow("--run-log")} Enable structured run logging (run-log.jsonl) ${yellow("--help")}, -h Show this help ${cyan("Tools Configuration:")} @@ -123,6 +127,10 @@ function parseArgs(argv: string[]): { opts: RunOptions; prompt: string } { opts.debug = true; continue; } + if (arg === "--run-log") { + opts.runLog = true; + continue; + } if (arg === "--tools-allow") { const value = args.shift(); opts.toolsAllow = value?.split(",").map((s) => s.trim()) ?? []; @@ -182,6 +190,8 @@ export async function runCommand(args: string[]): Promise { } } + const enableRunLog = opts.runLog || !!process.env.MULTICA_RUN_LOG; + const agent = new Agent({ profileId: opts.profile, provider: opts.provider, @@ -194,13 +204,19 @@ export async function runCommand(args: string[]): Promise { cwd: opts.cwd, sessionId: opts.session, debug: opts.debug, + enableRunLog, tools: toolsConfig, }); + const sessionDir = join(DATA_DIR, "sessions", agent.sessionId); + // If it's a newly created session, notify user of sessionId if (!opts.session) { console.error(`[session: ${agent.sessionId}]`); } + if (enableRunLog) { + console.error(`[session-dir: ${sessionDir}]`); + } const result = await agent.run(finalPrompt); if (result.error) { diff --git a/packages/core/src/agent/run-log.ts b/packages/core/src/agent/run-log.ts index e9b9e371..e8e3b446 100644 --- a/packages/core/src/agent/run-log.ts +++ b/packages/core/src/agent/run-log.ts @@ -1,3 +1,60 @@ +/** + * Structured Run Log + * + * Records agent execution events to `{sessionDir}/run-log.jsonl`. + * Each line is a JSON object with `ts` (epoch ms) and `event` (type string). + * + * Enable via `MULTICA_RUN_LOG=1` env var or `enableRunLog: true` in AgentOptions. + * CLI: `pnpm multica run --run-log "prompt"` + * + * ## Event Reference + * + * ### Lifecycle + * - `run_start` — Agent run begins. + * Fields: prompt (first 200 chars), internal, provider, model, messages (count) + * - `run_end` — Agent run completes. + * Fields: duration_ms, error (string|null), text (first 200 chars), aborted? + * + * ### LLM Interaction + * - `llm_call` — LLM API request sent. + * Fields: provider, model, profile, messages (count) + * - `llm_result` — LLM API response received. + * Fields: duration_ms + * + * ### Tool Execution + * - `tool_start` — Tool execution begins. + * Fields: tool (name), args (first 500 chars of JSON) + * - `tool_end` — Tool execution completes. + * Fields: tool (name), duration_ms, is_error + * + * ### Context Management — Preflight (before LLM call) + * - `preflight_compact_start` — Preflight compaction triggered. + * Fields: utilization, trigger, messages (count), est_tokens + * - `preflight_compact_end` — Preflight compaction completed. + * Fields: messages_before, messages_after, pruned (count removed) + * - `tool_result_pruning` — Tool result pruning applied (Phase 1). + * Fields: soft_trimmed, hard_cleared, chars_saved, phase ("preflight"|"compaction"), + * tokens_before?, tokens_after? (present when phase="compaction") + * + * ### Context Management — Compaction (during session) + * - `compaction` — Summary compaction completed (Phase 2). + * Fields: removed, kept, tokens_removed, tokens_kept, reason, pruning_stats? + * - `compaction_detail` — Detailed compaction breakdown. + * Fields: pre_pruning_tokens, post_compaction_tokens, messages_removed, reason, pruning_applied + * + * ### Error Recovery + * - `context_overflow` — Context window overflow detected. + * Fields: attempt, messages_before + * - `context_overflow_compacted` — Overflow recovered via compaction. + * Fields: messages_after, tokens_removed + * - `context_overflow_forced` — Overflow recovered via forced message drop. + * Fields: messages_before, messages_after + * - `error_classify` — Error classified for auth rotation. + * Fields: error (first 200 chars), reason, rotatable + * - `auth_rotate` — Auth profile rotated after error. + * Fields: from, to, reason + */ + import { join } from "path"; import { mkdirSync } from "fs"; import { appendFile } from "fs/promises";