feat(cli): add --run-log flag and session dir output for agent-driven E2E testing
Add --run-log CLI flag to enable structured run logging without env var. Print session directory path to stderr when run-log is enabled so Coding Agents can easily locate log files for analysis. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
239dc5a7c6
commit
a2c1379c1d
2 changed files with 73 additions and 0 deletions
|
|
@ -6,9 +6,11 @@
|
|||
* echo "prompt" | multica run
|
||||
*/
|
||||
|
||||
import { join } from "node:path";
|
||||
import { Agent } from "@multica/core";
|
||||
import type { AgentOptions } from "@multica/core";
|
||||
import type { ToolsConfig } from "@multica/core";
|
||||
import { DATA_DIR } from "@multica/utils";
|
||||
import { cyan, yellow, dim } from "../colors.js";
|
||||
|
||||
type RunOptions = {
|
||||
|
|
@ -23,6 +25,7 @@ type RunOptions = {
|
|||
cwd?: string | undefined;
|
||||
session?: string | undefined;
|
||||
debug?: boolean;
|
||||
runLog?: boolean;
|
||||
toolsAllow?: string[];
|
||||
toolsDeny?: string[];
|
||||
help?: boolean;
|
||||
|
|
@ -45,6 +48,7 @@ ${cyan("Options:")}
|
|||
${yellow("--cwd")} DIR Working directory
|
||||
${yellow("--session")} ID Session ID for persistence
|
||||
${yellow("--debug")} Enable debug logging
|
||||
${yellow("--run-log")} Enable structured run logging (run-log.jsonl)
|
||||
${yellow("--help")}, -h Show this help
|
||||
|
||||
${cyan("Tools Configuration:")}
|
||||
|
|
@ -123,6 +127,10 @@ function parseArgs(argv: string[]): { opts: RunOptions; prompt: string } {
|
|||
opts.debug = true;
|
||||
continue;
|
||||
}
|
||||
if (arg === "--run-log") {
|
||||
opts.runLog = true;
|
||||
continue;
|
||||
}
|
||||
if (arg === "--tools-allow") {
|
||||
const value = args.shift();
|
||||
opts.toolsAllow = value?.split(",").map((s) => s.trim()) ?? [];
|
||||
|
|
@ -182,6 +190,8 @@ export async function runCommand(args: string[]): Promise<void> {
|
|||
}
|
||||
}
|
||||
|
||||
const enableRunLog = opts.runLog || !!process.env.MULTICA_RUN_LOG;
|
||||
|
||||
const agent = new Agent({
|
||||
profileId: opts.profile,
|
||||
provider: opts.provider,
|
||||
|
|
@ -194,13 +204,19 @@ export async function runCommand(args: string[]): Promise<void> {
|
|||
cwd: opts.cwd,
|
||||
sessionId: opts.session,
|
||||
debug: opts.debug,
|
||||
enableRunLog,
|
||||
tools: toolsConfig,
|
||||
});
|
||||
|
||||
const sessionDir = join(DATA_DIR, "sessions", agent.sessionId);
|
||||
|
||||
// If it's a newly created session, notify user of sessionId
|
||||
if (!opts.session) {
|
||||
console.error(`[session: ${agent.sessionId}]`);
|
||||
}
|
||||
if (enableRunLog) {
|
||||
console.error(`[session-dir: ${sessionDir}]`);
|
||||
}
|
||||
|
||||
const result = await agent.run(finalPrompt);
|
||||
if (result.error) {
|
||||
|
|
|
|||
|
|
@ -1,3 +1,60 @@
|
|||
/**
|
||||
* Structured Run Log
|
||||
*
|
||||
* Records agent execution events to `{sessionDir}/run-log.jsonl`.
|
||||
* Each line is a JSON object with `ts` (epoch ms) and `event` (type string).
|
||||
*
|
||||
* Enable via `MULTICA_RUN_LOG=1` env var or `enableRunLog: true` in AgentOptions.
|
||||
* CLI: `pnpm multica run --run-log "prompt"`
|
||||
*
|
||||
* ## Event Reference
|
||||
*
|
||||
* ### Lifecycle
|
||||
* - `run_start` — Agent run begins.
|
||||
* Fields: prompt (first 200 chars), internal, provider, model, messages (count)
|
||||
* - `run_end` — Agent run completes.
|
||||
* Fields: duration_ms, error (string|null), text (first 200 chars), aborted?
|
||||
*
|
||||
* ### LLM Interaction
|
||||
* - `llm_call` — LLM API request sent.
|
||||
* Fields: provider, model, profile, messages (count)
|
||||
* - `llm_result` — LLM API response received.
|
||||
* Fields: duration_ms
|
||||
*
|
||||
* ### Tool Execution
|
||||
* - `tool_start` — Tool execution begins.
|
||||
* Fields: tool (name), args (first 500 chars of JSON)
|
||||
* - `tool_end` — Tool execution completes.
|
||||
* Fields: tool (name), duration_ms, is_error
|
||||
*
|
||||
* ### Context Management — Preflight (before LLM call)
|
||||
* - `preflight_compact_start` — Preflight compaction triggered.
|
||||
* Fields: utilization, trigger, messages (count), est_tokens
|
||||
* - `preflight_compact_end` — Preflight compaction completed.
|
||||
* Fields: messages_before, messages_after, pruned (count removed)
|
||||
* - `tool_result_pruning` — Tool result pruning applied (Phase 1).
|
||||
* Fields: soft_trimmed, hard_cleared, chars_saved, phase ("preflight"|"compaction"),
|
||||
* tokens_before?, tokens_after? (present when phase="compaction")
|
||||
*
|
||||
* ### Context Management — Compaction (during session)
|
||||
* - `compaction` — Summary compaction completed (Phase 2).
|
||||
* Fields: removed, kept, tokens_removed, tokens_kept, reason, pruning_stats?
|
||||
* - `compaction_detail` — Detailed compaction breakdown.
|
||||
* Fields: pre_pruning_tokens, post_compaction_tokens, messages_removed, reason, pruning_applied
|
||||
*
|
||||
* ### Error Recovery
|
||||
* - `context_overflow` — Context window overflow detected.
|
||||
* Fields: attempt, messages_before
|
||||
* - `context_overflow_compacted` — Overflow recovered via compaction.
|
||||
* Fields: messages_after, tokens_removed
|
||||
* - `context_overflow_forced` — Overflow recovered via forced message drop.
|
||||
* Fields: messages_before, messages_after
|
||||
* - `error_classify` — Error classified for auth rotation.
|
||||
* Fields: error (first 200 chars), reason, rotatable
|
||||
* - `auth_rotate` — Auth profile rotated after error.
|
||||
* Fields: from, to, reason
|
||||
*/
|
||||
|
||||
import { join } from "path";
|
||||
import { mkdirSync } from "fs";
|
||||
import { appendFile } from "fs/promises";
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue