feat(cli): add --run-log flag and session dir output for agent-driven E2E testing

Add --run-log CLI flag to enable structured run logging without env var.
Print session directory path to stderr when run-log is enabled so Coding
Agents can easily locate log files for analysis.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jiayuan Zhang 2026-02-15 16:03:40 +08:00
parent 239dc5a7c6
commit a2c1379c1d
2 changed files with 73 additions and 0 deletions

View file

@ -6,9 +6,11 @@
* echo "prompt" | multica run
*/
import { join } from "node:path";
import { Agent } from "@multica/core";
import type { AgentOptions } from "@multica/core";
import type { ToolsConfig } from "@multica/core";
import { DATA_DIR } from "@multica/utils";
import { cyan, yellow, dim } from "../colors.js";
type RunOptions = {
@ -23,6 +25,7 @@ type RunOptions = {
cwd?: string | undefined;
session?: string | undefined;
debug?: boolean;
runLog?: boolean;
toolsAllow?: string[];
toolsDeny?: string[];
help?: boolean;
@ -45,6 +48,7 @@ ${cyan("Options:")}
${yellow("--cwd")} DIR Working directory
${yellow("--session")} ID Session ID for persistence
${yellow("--debug")} Enable debug logging
${yellow("--run-log")} Enable structured run logging (run-log.jsonl)
${yellow("--help")}, -h Show this help
${cyan("Tools Configuration:")}
@ -123,6 +127,10 @@ function parseArgs(argv: string[]): { opts: RunOptions; prompt: string } {
opts.debug = true;
continue;
}
if (arg === "--run-log") {
opts.runLog = true;
continue;
}
if (arg === "--tools-allow") {
const value = args.shift();
opts.toolsAllow = value?.split(",").map((s) => s.trim()) ?? [];
@ -182,6 +190,8 @@ export async function runCommand(args: string[]): Promise<void> {
}
}
const enableRunLog = opts.runLog || !!process.env.MULTICA_RUN_LOG;
const agent = new Agent({
profileId: opts.profile,
provider: opts.provider,
@ -194,13 +204,19 @@ export async function runCommand(args: string[]): Promise<void> {
cwd: opts.cwd,
sessionId: opts.session,
debug: opts.debug,
enableRunLog,
tools: toolsConfig,
});
const sessionDir = join(DATA_DIR, "sessions", agent.sessionId);
// If it's a newly created session, notify user of sessionId
if (!opts.session) {
console.error(`[session: ${agent.sessionId}]`);
}
if (enableRunLog) {
console.error(`[session-dir: ${sessionDir}]`);
}
const result = await agent.run(finalPrompt);
if (result.error) {

View file

@ -1,3 +1,60 @@
/**
* Structured Run Log
*
* Records agent execution events to `{sessionDir}/run-log.jsonl`.
* Each line is a JSON object with `ts` (epoch ms) and `event` (type string).
*
* Enable via `MULTICA_RUN_LOG=1` env var or `enableRunLog: true` in AgentOptions.
* CLI: `pnpm multica run --run-log "prompt"`
*
* ## Event Reference
*
* ### Lifecycle
* - `run_start` Agent run begins.
* Fields: prompt (first 200 chars), internal, provider, model, messages (count)
* - `run_end` Agent run completes.
* Fields: duration_ms, error (string|null), text (first 200 chars), aborted?
*
* ### LLM Interaction
* - `llm_call` LLM API request sent.
* Fields: provider, model, profile, messages (count)
* - `llm_result` LLM API response received.
* Fields: duration_ms
*
* ### Tool Execution
* - `tool_start` Tool execution begins.
* Fields: tool (name), args (first 500 chars of JSON)
* - `tool_end` Tool execution completes.
* Fields: tool (name), duration_ms, is_error
*
* ### Context Management Preflight (before LLM call)
* - `preflight_compact_start` Preflight compaction triggered.
* Fields: utilization, trigger, messages (count), est_tokens
* - `preflight_compact_end` Preflight compaction completed.
* Fields: messages_before, messages_after, pruned (count removed)
* - `tool_result_pruning` Tool result pruning applied (Phase 1).
* Fields: soft_trimmed, hard_cleared, chars_saved, phase ("preflight"|"compaction"),
* tokens_before?, tokens_after? (present when phase="compaction")
*
* ### Context Management Compaction (during session)
* - `compaction` Summary compaction completed (Phase 2).
* Fields: removed, kept, tokens_removed, tokens_kept, reason, pruning_stats?
* - `compaction_detail` Detailed compaction breakdown.
* Fields: pre_pruning_tokens, post_compaction_tokens, messages_removed, reason, pruning_applied
*
* ### Error Recovery
* - `context_overflow` Context window overflow detected.
* Fields: attempt, messages_before
* - `context_overflow_compacted` Overflow recovered via compaction.
* Fields: messages_after, tokens_removed
* - `context_overflow_forced` Overflow recovered via forced message drop.
* Fields: messages_before, messages_after
* - `error_classify` Error classified for auth rotation.
* Fields: error (first 200 chars), reason, rotatable
* - `auth_rotate` Auth profile rotated after error.
* Fields: from, to, reason
*/
import { join } from "path";
import { mkdirSync } from "fs";
import { appendFile } from "fs/promises";