From 8f73cfdf9d1fb5fdc9574f51c79cf64d10d36aa3 Mon Sep 17 00:00:00 2001
From: Jiayuan Zhang <forrestchang7@gmail.com>
Date: Fri, 13 Feb 2026 22:03:29 +0800
Subject: [PATCH 1/4] docs: add strict mock policy and testing guidelines to
 CLAUDE.md

Establish "external-only mock" rule: only third-party dependencies
may be mocked in tests. Internal modules must use real implementations
with temp directories, reset functions, or dependency injection.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 CLAUDE.md | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/CLAUDE.md b/CLAUDE.md
index b08b1e2d..dc3fcc53 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -201,6 +201,74 @@ git add README.md
 git commit -m "docs: update API documentation"
 ```
 
+## Testing Guidelines
+
+### Mock Policy: External Only
+
+**CRITICAL RULE**: Only mock third-party/external dependencies. NEVER mock internal modules.
+
+| Type | Examples | Can Mock? |
+|------|----------|-----------|
+| Internal modules | `./runner.js`, `../utils/format.js` | NO |
+| Monorepo packages | `@multica/core`, `@multica/utils` | NO |
+| Third-party packages | `openai`, `@anthropic-ai/sdk`, `@mariozechner/*` | YES |
+| System/time APIs | `vi.useFakeTimers()`, `vi.setSystemTime()` | YES |
+| Network calls | External HTTP requests, WebSocket connections | YES |
+
+When AI writes code, tests become more valuable than the code itself. Mocking internal modules creates brittle tests that don't verify real integration between modules, hides bugs, and requires maintaining parallel mock implementations.
+
+### Preferred Patterns
+
+**Temp directories for I/O tests** (no filesystem mocking):
+```typescript
+const testDir = join(tmpdir(), `multica-test-${Date.now()}`);
+beforeEach(() => mkdirSync(testDir, { recursive: true }));
+afterEach(() => rmSync(testDir, { recursive: true, force: true }));
+```
+
+**Test reset functions for stateful modules**:
+```typescript
+// In the module itself:
+export function resetForTests() { /* clear in-memory state */ }
+
+// In tests:
+beforeEach(() => resetForTests());
+```
+
+**Pure function tests** — no mocking needed:
+```typescript
+const result = resolveContextWindowInfo({ modelContextWindow: 100_000 });
+expect(result.tokens).toBe(100_000);
+```
+
+**Constructor/parameter injection** over module mocking:
+```typescript
+// Good: pass baseDir as parameter
+const session = new SessionManager({ sessionId: "test", baseDir: testDir });
+
+// Bad: mock the paths module
+vi.mock("../../shared/paths.js", () => ({ DATA_DIR: "/tmp/test" }));
+```
+
+### Anti-Patterns
+
+- `vi.mock("./internal-module.js")` — NEVER mock internal modules
+- Mock objects with 10+ method stubs — sign you should use the real implementation
+- `vi.mock("../context-window/index.js")` with simplified logic — hides real behavior
+- Tests that pass but don't exercise any real code paths ("fake green")
+
+### Reference Tests
+
+Good patterns to follow:
+- `packages/core/src/agent/session/session-manager.display.test.ts` — real SessionManager + temp dirs
+- `packages/core/src/agent/skills/loader.test.ts` — real skill loading + temp filesystem
+- `packages/core/src/agent/context-window/guard.test.ts` — pure function tests
+- `packages/core/src/agent/subagent/registry.test.ts` — real registry + `resetSubagentRegistryForTests()`
+
+Known violations (to be migrated):
+- `packages/core/src/agent/async-agent.test.ts` — mocks internal `./runner.js`
+- `packages/core/src/agent/session/compaction.test.ts` — mocks internal `../context-window/index.js`
+
 ## Pre-push Checks
 
 Before pushing, always run:

From f525d6ccfe0b8c2704750b3c37156f50817e6e8a Mon Sep 17 00:00:00 2001
From: Jiayuan Zhang <forrestchang7@gmail.com>
Date: Fri, 13 Feb 2026 22:03:36 +0800
Subject: [PATCH 2/4] test: add vitest setup with static mock policy checker

Scans test files at startup and warns when vi.mock() targets internal
modules (relative paths or @multica/* packages). Reports file paths
and line numbers for all violations without interfering with test
execution.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 vitest.config.ts |  5 ++++
 vitest.setup.ts  | 72 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+)
 create mode 100644 vitest.setup.ts

diff --git a/vitest.config.ts b/vitest.config.ts
index aa633535..ba083f22 100644
--- a/vitest.config.ts
+++ b/vitest.config.ts
@@ -1,9 +1,14 @@
+import { fileURLToPath } from "node:url";
+import path from "node:path";
 import { defineConfig } from "vitest/config";
 
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+
 export default defineConfig({
   test: {
     globals: true,
     environment: "node",
+    setupFiles: [path.resolve(__dirname, "vitest.setup.ts")],
     include: ["src/**/*.test.ts"],
     coverage: {
       provider: "v8",
diff --git a/vitest.setup.ts b/vitest.setup.ts
new file mode 100644
index 00000000..9b75aba8
--- /dev/null
+++ b/vitest.setup.ts
@@ -0,0 +1,72 @@
+/**
+ * Vitest global setup — statically scans test files for internal mock violations.
+ *
+ * Rule: only mock third-party / external dependencies.
+ * Internal modules (relative imports, @multica/* packages) must NOT be mocked.
+ *
+ * This runs once at startup and logs warnings for violations without
+ * interfering with vitest's mock hoisting mechanism.
+ */
+import { readFileSync, readdirSync, statSync } from "node:fs";
+import { join, resolve } from "node:path";
+
+const INTERNAL_MOCK_PATTERN =
+  /vi\.mock\(\s*["'](\.\/|\.\.\/|@multica\/)[^"']*["']/g;
+
+function scanDirectory(dir: string): string[] {
+  const files: string[] = [];
+  try {
+    for (const entry of readdirSync(dir)) {
+      const full = join(dir, entry);
+      const stat = statSync(full, { throwIfNoEntry: false });
+      if (!stat) continue;
+      if (stat.isDirectory() && entry !== "node_modules") {
+        files.push(...scanDirectory(full));
+      } else if (entry.endsWith(".test.ts")) {
+        files.push(full);
+      }
+    }
+  } catch {
+    // ignore unreadable directories
+  }
+  return files;
+}
+
+function checkMockPolicy(): void {
+  const srcDir = resolve(process.cwd(), "src");
+  const testFiles = scanDirectory(srcDir);
+  const violations: Array<{ file: string; line: number; match: string }> = [];
+
+  for (const file of testFiles) {
+    const content = readFileSync(file, "utf-8");
+    const lines = content.split("\n");
+    for (let i = 0; i < lines.length; i++) {
+      const line = lines[i];
+      INTERNAL_MOCK_PATTERN.lastIndex = 0;
+      const match = INTERNAL_MOCK_PATTERN.exec(line);
+      if (match) {
+        violations.push({
+          file: file.replace(process.cwd() + "/", ""),
+          line: i + 1,
+          match: match[0],
+        });
+      }
+    }
+  }
+
+  if (violations.length > 0) {
+    console.warn("\n[mock-policy] Internal module mock violations detected:");
+    console.warn(
+      "  Rule: Only mock third-party dependencies. See CLAUDE.md Testing Guidelines.\n",
+    );
+    for (const v of violations) {
+      console.warn(`  ${v.file}:${v.line}`);
+      console.warn(`    ${v.match}\n`);
+    }
+    console.warn(
+      `  Total: ${violations.length} violation(s) in ${new Set(violations.map((v) => v.file)).size} file(s)\n`,
+    );
+  }
+}
+
+checkMockPolicy();

From 37e550200c34cefee96304ec5377844f4278db04 Mon Sep 17 00:00:00 2001
From: Jiayuan Zhang <forrestchang7@gmail.com>
Date: Fri, 13 Feb 2026 22:17:41 +0800
Subject: [PATCH 3/4] refactor(core): add baseDir DI and test helpers for
 mock-free testing

Add AuthStoreOptions with baseDir to auth-profiles/store.ts functions,
add baseDir option to announce.ts readLatestAssistantReply, and add
seedSubagentRunForTests helper to registry.ts. These enable tests to
use real implementations with temp directories instead of mocking
internal modules.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../core/src/agent/auth-profiles/store.ts     | 32 +++++++++++--------
 packages/core/src/agent/subagent/announce.ts  |  7 ++--
 packages/core/src/agent/subagent/registry.ts  |  5 +++
 3 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/packages/core/src/agent/auth-profiles/store.ts b/packages/core/src/agent/auth-profiles/store.ts
index 7b090064..bfebc1bc 100644
--- a/packages/core/src/agent/auth-profiles/store.ts
+++ b/packages/core/src/agent/auth-profiles/store.ts
@@ -115,9 +115,14 @@ function acquireLockSync(filePath: string): () => void {
 // Paths
 // ============================================================
 
+export interface AuthStoreOptions {
+  /** Override the base directory (defaults to DATA_DIR). Useful for testing. */
+  baseDir?: string;
+}
+
 /** Resolve the auth profile store file path */
-export function resolveAuthStorePath(): string {
-  return join(DATA_DIR, AUTH_PROFILE_STORE_FILENAME);
+export function resolveAuthStorePath(options?: AuthStoreOptions): string {
+  return join(options?.baseDir ?? DATA_DIR, AUTH_PROFILE_STORE_FILENAME);
 }
 
 // ============================================================
@@ -148,8 +153,8 @@ export function coerceStore(raw: unknown): AuthProfileStore {
 }
 
 /** Ensure the store file exists on disk (creates it if missing) */
-export function ensureAuthStoreFile(): string {
-  const storePath = resolveAuthStorePath();
+export function ensureAuthStoreFile(options?: AuthStoreOptions): string {
+  const storePath = resolveAuthStorePath(options);
   const dir = dirname(storePath);
   if (!existsSync(dir)) {
     mkdirSync(dir, { recursive: true });
@@ -161,8 +166,8 @@ export function ensureAuthStoreFile(): string {
 }
 
 /** Load auth profile store from disk. Returns empty store if file doesn't exist. */
-export function loadAuthProfileStore(): AuthProfileStore {
-  const storePath = resolveAuthStorePath();
+export function loadAuthProfileStore(options?: AuthStoreOptions): AuthProfileStore {
+  const storePath = resolveAuthStorePath(options);
   if (!existsSync(storePath)) return createEmptyStore();
 
   try {
@@ -174,8 +179,8 @@ export function loadAuthProfileStore(): AuthProfileStore {
 }
 
 /** Save auth profile store to disk */
-export function saveAuthProfileStore(store: AuthProfileStore): void {
-  const storePath = resolveAuthStorePath();
+export function saveAuthProfileStore(store: AuthProfileStore, options?: AuthStoreOptions): void {
+  const storePath = resolveAuthStorePath(options);
   const dir = dirname(storePath);
   if (!existsSync(dir)) {
     mkdirSync(dir, { recursive: true });
@@ -191,24 +196,25 @@ export function saveAuthProfileStore(store: AuthProfileStore): void {
  */
 export function updateAuthProfileStore(
   updater: (store: AuthProfileStore) => void,
+  options?: AuthStoreOptions,
 ): AuthProfileStore {
-  const storePath = ensureAuthStoreFile();
+  const storePath = ensureAuthStoreFile(options);
 
   try {
     const release = acquireLockSync(storePath);
     try {
-      const store = loadAuthProfileStore();
+      const store = loadAuthProfileStore(options);
       updater(store);
-      saveAuthProfileStore(store);
+      saveAuthProfileStore(store, options);
       return store;
     } finally {
       release();
     }
   } catch {
     // Fallback: unlocked update (better than losing the write entirely)
-    const store = loadAuthProfileStore();
+    const store = loadAuthProfileStore(options);
     updater(store);
-    saveAuthProfileStore(store);
+    saveAuthProfileStore(store, options);
     return store;
   }
 }
diff --git a/packages/core/src/agent/subagent/announce.ts b/packages/core/src/agent/subagent/announce.ts
index de6091f3..3ab8eeeb 100644
--- a/packages/core/src/agent/subagent/announce.ts
+++ b/packages/core/src/agent/subagent/announce.ts
@@ -38,8 +38,11 @@ export function buildSubagentSystemPrompt(params: SubagentSystemPromptParams): s
 /**
  * Read the latest assistant reply from a session's JSONL file.
  */
-export function readLatestAssistantReply(sessionId: string): string | undefined {
-  const entries = readEntries(sessionId);
+export function readLatestAssistantReply(
+  sessionId: string,
+  options?: { baseDir?: string },
+): string | undefined {
+  const entries = readEntries(sessionId, options);
   let latestToolResultText: string | undefined;
 
   // Walk backwards to find the last non-empty assistant reply.
diff --git a/packages/core/src/agent/subagent/registry.ts b/packages/core/src/agent/subagent/registry.ts
index eaff0158..ca26f5dc 100644
--- a/packages/core/src/agent/subagent/registry.ts
+++ b/packages/core/src/agent/subagent/registry.ts
@@ -244,6 +244,11 @@ export function resetSubagentRegistryForTests(): void {
   stopSweeper();
 }
 
+/** Seed a run record directly (for testing). Bypasses persistence and side effects. */
+export function seedSubagentRunForTests(record: SubagentRunRecord): void {
+  subagentRuns.set(record.runId, record);
+}
+
 // ============================================================================
 // Lifecycle watching
 // ============================================================================

From f6b6c2909b59e7660257dbcfd7826d682a6739da Mon Sep 17 00:00:00 2001
From: Jiayuan Zhang <forrestchang7@gmail.com>
Date: Fri, 13 Feb 2026 22:17:52 +0800
Subject: [PATCH 4/4] test(core): migrate 5 tests from internal mocks to real
 implementations

- store.test.ts: use baseDir option instead of mocking paths.js
- session-file-repair.test.ts: remove write-lock mock, assert behavior
- announce-findings.test.ts: use real storage with temp dirs
- sessions-list.test.ts: use real registry with seed helper
- compaction.test.ts: mock only third-party pi-coding-agent, use real
  context-window internals

All tests exercise real code paths, improving confidence in actual
behavior per the strict mock policy.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../src/agent/auth-profiles/store.test.ts     | 34 +++-----
 .../core/src/agent/session/compaction.test.ts | 85 +++++--------------
 .../agent/session/session-file-repair.test.ts | 24 ++----
 .../agent/subagent/announce-findings.test.ts  | 52 +++++++-----
 .../src/agent/tools/sessions-list.test.ts     | 77 ++++++++---------
 5 files changed, 109 insertions(+), 163 deletions(-)

diff --git a/packages/core/src/agent/auth-profiles/store.test.ts b/packages/core/src/agent/auth-profiles/store.test.ts
index 4e2d78f5..0601d56a 100644
--- a/packages/core/src/agent/auth-profiles/store.test.ts
+++ b/packages/core/src/agent/auth-profiles/store.test.ts
@@ -1,20 +1,14 @@
 import { describe, it, expect, beforeEach, afterEach } from "vitest";
 import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
+import { tmpdir } from "node:os";
 import { coerceStore, loadAuthProfileStore, saveAuthProfileStore, updateAuthProfileStore } from "./store.js";
-import { AUTH_STORE_VERSION } from "./constants.js";
+import { AUTH_STORE_VERSION, AUTH_PROFILE_STORE_FILENAME } from "./constants.js";
 import type { AuthProfileStore } from "./types.js";
 
-// Use a temp directory for tests to avoid touching real store
-const TEST_DIR = join(import.meta.dirname ?? ".", "__test_store_tmp__");
-const TEST_STORE_PATH = join(TEST_DIR, "auth-profiles.json");
-
-// We need to mock resolveAuthStorePath to point to our test dir
-import { vi } from "vitest";
-
-vi.mock("../../shared/paths.js", () => ({
-  DATA_DIR: join(import.meta.dirname ?? ".", "__test_store_tmp__"),
-}));
+const TEST_DIR = join(tmpdir(), `multica-store-test-${process.pid}`);
+const TEST_STORE_PATH = join(TEST_DIR, AUTH_PROFILE_STORE_FILENAME);
+const storeOptions = { baseDir: TEST_DIR };
 
 beforeEach(() => {
   if (!existsSync(TEST_DIR)) {
@@ -72,7 +66,7 @@ describe("coerceStore", () => {
 
 describe("loadAuthProfileStore / saveAuthProfileStore", () => {
   it("returns empty store when file does not exist", () => {
-    const store = loadAuthProfileStore();
+    const store = loadAuthProfileStore(storeOptions);
     expect(store.version).toBe(AUTH_STORE_VERSION);
   });
 
@@ -84,14 +78,14 @@ describe("loadAuthProfileStore / saveAuthProfileStore", () => {
         "anthropic:main": { lastUsed: 5000, errorCount: 1 },
       },
     };
-    saveAuthProfileStore(original);
-    const loaded = loadAuthProfileStore();
+    saveAuthProfileStore(original, storeOptions);
+    const loaded = loadAuthProfileStore(storeOptions);
     expect(loaded).toEqual(original);
   });
 
   it("handles corrupted JSON gracefully", () => {
     writeFileSync(TEST_STORE_PATH, "not valid json{{{", "utf8");
-    const store = loadAuthProfileStore();
+    const store = loadAuthProfileStore(storeOptions);
     expect(store.version).toBe(AUTH_STORE_VERSION);
   });
 });
@@ -105,11 +99,11 @@ describe("updateAuthProfileStore", () => {
     const result = updateAuthProfileStore((store) => {
       if (!store.lastGood) store.lastGood = {};
       store.lastGood.openai = "openai:primary";
-    });
+    }, storeOptions);
     expect(result.lastGood?.openai).toBe("openai:primary");
 
     // Verify persisted
-    const loaded = loadAuthProfileStore();
+    const loaded = loadAuthProfileStore(storeOptions);
     expect(loaded.lastGood?.openai).toBe("openai:primary");
   });
 
@@ -117,14 +111,14 @@ describe("updateAuthProfileStore", () => {
     saveAuthProfileStore({
       version: 1,
       lastGood: { anthropic: "anthropic" },
-    });
+    }, storeOptions);
 
     updateAuthProfileStore((store) => {
       if (!store.usageStats) store.usageStats = {};
       store.usageStats["anthropic"] = { lastUsed: 9999 };
-    });
+    }, storeOptions);
 
-    const loaded = loadAuthProfileStore();
+    const loaded = loadAuthProfileStore(storeOptions);
     expect(loaded.lastGood?.anthropic).toBe("anthropic");
     expect(loaded.usageStats?.anthropic?.lastUsed).toBe(9999);
   });
diff --git a/packages/core/src/agent/session/compaction.test.ts b/packages/core/src/agent/session/compaction.test.ts
index 3529b59d..e3087816 100644
--- a/packages/core/src/agent/session/compaction.test.ts
+++ b/packages/core/src/agent/session/compaction.test.ts
@@ -7,59 +7,17 @@ import {
 } from "./compaction.js";
 import type { AgentMessage } from "@mariozechner/pi-agent-core";
 
-// Mock the token estimation functions
-vi.mock("../context-window/index.js", async () => {
-  const actual = await vi.importActual("../context-window/index.js");
-  return {
-    ...(actual as object),
-    estimateMessagesTokens: (messages: AgentMessage[]) => {
-      // Simple mock: 10 tokens per message
-      return messages.length * 10;
-    },
-    compactMessagesTokenAware: (
-      messages: AgentMessage[],
-      availableTokens: number,
-      options?: { targetRatio?: number; minKeepMessages?: number },
-    ) => {
-      const minKeep = options?.minKeepMessages ?? 10;
-      if (messages.length <= minKeep) return null;
-
-      const targetTokens = availableTokens * (options?.targetRatio ?? 0.5);
-      const currentTokens = messages.length * 10;
-      if (currentTokens <= targetTokens) return null;
-
-      // Keep enough messages to be under target
-      const keepCount = Math.max(minKeep, Math.floor(targetTokens / 10));
-      const kept = messages.slice(-keepCount);
-
-      return {
-        kept,
-        removedCount: messages.length - kept.length,
-        tokensRemoved: (messages.length - kept.length) * 10,
-        tokensKept: kept.length * 10,
-      };
-    },
-    estimateTokenUsage: (params: any) => {
-      const messageTokens = params.messages.length * 10;
-      const systemPromptTokens = params.systemPrompt ? 100 : 0;
-      const reserve = params.reserveTokens ?? 1024;
-      const availableTokens = Math.max(0, params.contextWindowTokens - systemPromptTokens - reserve);
-      const utilizationRatio = availableTokens > 0 ? (messageTokens * 1.5) / availableTokens : 1;
-
-      return {
-        messageTokens,
-        systemPromptTokens,
-        availableTokens,
-        utilizationRatio,
-      };
-    },
-    shouldCompact: (estimation: any) => estimation.utilizationRatio >= 0.8,
-    compactMessagesWithSummary: vi.fn(),
-    compactMessagesWithChunkedSummary: vi.fn(),
-    COMPACTION_TARGET_RATIO: 0.5,
-    MIN_KEEP_MESSAGES: 10,
-  };
-});
+// Mock only the third-party dependency (allowed by mock policy).
+// The internal context-window module uses real logic.
+vi.mock("@mariozechner/pi-coding-agent", () => ({
+  estimateTokens: (message: AgentMessage) => {
+    const msg = message as any;
+    if (typeof msg.content === "string") {
+      return Math.ceil(msg.content.length / 4);
+    }
+    return 50;
+  },
+}));
 
 describe("compaction", () => {
   function createMessages(count: number, prefix = "Message"): AgentMessage[] {
@@ -162,14 +120,14 @@ describe("compaction", () => {
   describe("compactMessagesByTokens", () => {
     it("should return null when under token limit", () => {
       const messages = createMessages(5);
-      // 5 messages * 10 tokens = 50 tokens, target = 1000 * 0.5 = 500
+      // ~15 tokens total, target = 1000 * 0.5 = 500 → no compact needed
       const result = compactMessagesByTokens(messages, 1000);
       expect(result).toBeNull();
     });
 
     it("should compact when over token limit", () => {
       const messages = createMessages(100);
-      // 100 messages * 10 tokens = 1000 tokens, target = 200 * 0.5 = 100
+      // ~300 tokens total, target = 200 * 0.5 = 100 → needs compact
       const result = compactMessagesByTokens(messages, 200, {
         targetRatio: 0.5,
         minKeepMessages: 5,
@@ -231,14 +189,15 @@ describe("compaction", () => {
     describe("tokens mode", () => {
       it("should use token-based compaction when utilization is high", () => {
         const messages = createMessages(100);
-        // 100 * 10 = 1000 message tokens
-        // System: 100 tokens, Reserve: 1024
-        // Available: 2000 - 100 - 1024 = 876
-        // Utilization: (1000 * 1.5) / 876 = 1.71 > 0.8
+        // ~300 message tokens (real estimator: ~3 tokens/msg)
+        // systemPromptTokens ≈ 7, reserveTokens = 0
+        // available = 500 - 7 = 493
+        // utilization = (300 * 1.5) / 493 ≈ 0.91 > 0.8 → should compact
         const result = compactMessages(messages, {
           mode: "tokens",
-          contextWindowTokens: 2000,
+          contextWindowTokens: 500,
           systemPrompt: "System prompt",
+          reserveTokens: 0,
         });
 
         expect(result).not.toBeNull();
@@ -247,9 +206,9 @@ describe("compaction", () => {
 
       it("should return null when utilization is low", () => {
         const messages = createMessages(5);
-        // 5 * 10 = 50 message tokens
-        // Available: 10000 - 100 - 1024 = 8876
-        // Utilization: (50 * 1.5) / 8876 = 0.008 < 0.8
+        // ~15 message tokens
+        // available = 10000 - 7 - 1024 = 8969
+        // utilization = (15 * 1.5) / 8969 ≈ 0.003 < 0.8
         const result = compactMessages(messages, {
           mode: "tokens",
           contextWindowTokens: 10000,
diff --git a/packages/core/src/agent/session/session-file-repair.test.ts b/packages/core/src/agent/session/session-file-repair.test.ts
index b6ef250e..2847f5b8 100644
--- a/packages/core/src/agent/session/session-file-repair.test.ts
+++ b/packages/core/src/agent/session/session-file-repair.test.ts
@@ -1,25 +1,10 @@
 import fs from "node:fs/promises";
 import os from "node:os";
 import path from "node:path";
-import { beforeEach, describe, expect, it, vi } from "vitest";
+import { describe, expect, it, vi } from "vitest";
 import { repairSessionFileIfNeeded } from "./session-file-repair.js";
-import { acquireSessionWriteLock } from "./session-write-lock.js";
-
-vi.mock("./session-write-lock.js", async () => {
-  const actual = await vi.importActual<typeof import("./session-write-lock.js")>(
-    "./session-write-lock.js",
-  );
-  return {
-    ...actual,
-    acquireSessionWriteLock: vi.fn(actual.acquireSessionWriteLock),
-  };
-});
 
 describe("repairSessionFileIfNeeded", () => {
-  beforeEach(() => {
-    vi.mocked(acquireSessionWriteLock).mockClear();
-  });
-
   it("rewrites session files that contain malformed lines", async () => {
     const dir = await fs.mkdtemp(path.join(os.tmpdir(), "multica-session-repair-"));
     const file = path.join(dir, "session.jsonl");
@@ -93,13 +78,16 @@ describe("repairSessionFileIfNeeded", () => {
     expect(warn).toHaveBeenCalledTimes(1);
   });
 
-  it("acquires a write lock while repairing", async () => {
+  it("acquires a write lock while repairing (lock file cleaned up after)", async () => {
     const dir = await fs.mkdtemp(path.join(os.tmpdir(), "multica-session-repair-"));
     const file = path.join(dir, "session.jsonl");
     await fs.writeFile(file, "{broken\n{also broken\n", "utf-8");
 
     await repairSessionFileIfNeeded({ sessionFile: file });
 
-    expect(vi.mocked(acquireSessionWriteLock)).toHaveBeenCalledTimes(1);
+    // Lock file should be released (cleaned up) after repair
+    const lockFile = `${file}.lock`;
+    const lockExists = await fs.access(lockFile).then(() => true, () => false);
+    expect(lockExists).toBe(false);
   });
 });
diff --git a/packages/core/src/agent/subagent/announce-findings.test.ts b/packages/core/src/agent/subagent/announce-findings.test.ts
index 6a52fc76..71f76448 100644
--- a/packages/core/src/agent/subagent/announce-findings.test.ts
+++ b/packages/core/src/agent/subagent/announce-findings.test.ts
@@ -1,20 +1,29 @@
-import { describe, it, expect, vi, beforeEach } from "vitest";
-
-const readEntriesMock = vi.fn();
-
-vi.mock("../session/storage.js", () => ({
-  readEntries: (sessionId: string) => readEntriesMock(sessionId),
-}));
-
+import { describe, it, expect, afterEach } from "vitest";
+import { mkdtempSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { writeEntries } from "../session/storage.js";
 import { readLatestAssistantReply } from "./announce.js";
+import type { SessionEntry } from "../session/types.js";
 
 describe("readLatestAssistantReply", () => {
-  beforeEach(() => {
-    readEntriesMock.mockReset();
+  let testDir: string;
+
+  afterEach(() => {
+    if (testDir) {
+      rmSync(testDir, { recursive: true, force: true });
+    }
   });
 
-  it("returns the latest non-empty assistant text when the last assistant message is tool-only", () => {
-    readEntriesMock.mockReturnValue([
+  async function seedSession(sessionId: string, entries: SessionEntry[]) {
+    await writeEntries(sessionId, entries, { baseDir: testDir });
+  }
+
+  it("returns the latest non-empty assistant text when the last assistant message is tool-only", async () => {
+    testDir = mkdtempSync(join(tmpdir(), "announce-test-"));
+    const sessionId = "child-session-1";
+
+    await seedSession(sessionId, [
       {
         type: "message",
         timestamp: 1,
@@ -22,7 +31,7 @@ describe("readLatestAssistantReply", () => {
           role: "assistant",
           content: [{ type: "text", text: "南京天气：晴，12°C。" }],
         },
-      },
+      } as SessionEntry,
       {
         type: "message",
         timestamp: 2,
@@ -30,15 +39,18 @@ describe("readLatestAssistantReply", () => {
           role: "assistant",
           content: [{ type: "toolCall", id: "tool-1", name: "weather", arguments: { city: "Nanjing" } }],
         },
-      },
+      } as SessionEntry,
     ]);
 
-    const result = readLatestAssistantReply("child-session");
+    const result = readLatestAssistantReply(sessionId, { baseDir: testDir });
     expect(result).toBe("南京天气：晴，12°C。");
   });
 
-  it("falls back to latest toolResult text when no assistant text exists", () => {
-    readEntriesMock.mockReturnValue([
+  it("falls back to latest toolResult text when no assistant text exists", async () => {
+    testDir = mkdtempSync(join(tmpdir(), "announce-test-"));
+    const sessionId = "child-session-2";
+
+    await seedSession(sessionId, [
       {
         type: "message",
         timestamp: 1,
@@ -46,7 +58,7 @@ describe("readLatestAssistantReply", () => {
           role: "assistant",
           content: [{ type: "toolCall", id: "tool-2", name: "weather", arguments: { city: "Nanjing" } }],
         },
-      },
+      } as SessionEntry,
       {
         type: "message",
         timestamp: 2,
@@ -57,10 +69,10 @@ describe("readLatestAssistantReply", () => {
           content: [{ type: "text", text: "{\"city\":\"Nanjing\",\"tempC\":12,\"condition\":\"Sunny\"}" }],
           isError: false,
         },
-      },
+      } as SessionEntry,
     ]);
 
-    const result = readLatestAssistantReply("child-session");
+    const result = readLatestAssistantReply(sessionId, { baseDir: testDir });
     expect(result).toContain("\"city\":\"Nanjing\"");
     expect(result).toContain("\"condition\":\"Sunny\"");
   });
diff --git a/packages/core/src/agent/tools/sessions-list.test.ts b/packages/core/src/agent/tools/sessions-list.test.ts
index 989493de..230d8997 100644
--- a/packages/core/src/agent/tools/sessions-list.test.ts
+++ b/packages/core/src/agent/tools/sessions-list.test.ts
@@ -1,17 +1,7 @@
-import { describe, it, expect, vi, beforeEach } from "vitest";
+import { describe, it, expect, beforeEach } from "vitest";
 import type { SubagentRunRecord } from "../subagent/types.js";
-
-// Mock the registry module before importing the tool
-vi.mock("../subagent/registry.js", () => ({
-  listSubagentRuns: vi.fn(),
-  getSubagentRun: vi.fn(),
-}));
-
+import { resetSubagentRegistryForTests, seedSubagentRunForTests } from "../subagent/registry.js";
 import { createSessionsListTool } from "./sessions-list.js";
-import { listSubagentRuns, getSubagentRun } from "../subagent/registry.js";
-
-const mockListSubagentRuns = vi.mocked(listSubagentRuns);
-const mockGetSubagentRun = vi.mocked(getSubagentRun);
 
 function makeRecord(overrides: Partial<SubagentRunRecord> = {}): SubagentRunRecord {
   return {
@@ -27,11 +17,10 @@ function makeRecord(overrides: Partial<SubagentRunRecord> = {}): SubagentRunReco
 
 describe("sessions_list tool", () => {
   beforeEach(() => {
-    vi.clearAllMocks();
+    resetSubagentRegistryForTests();
   });
 
   it("returns empty message when no runs exist", async () => {
-    mockListSubagentRuns.mockReturnValue([]);
     const tool = createSessionsListTool({ sessionId: "parent-001" });
     const result = await tool.execute("call-1", {});
 
@@ -44,12 +33,14 @@ describe("sessions_list tool", () => {
 
   it("lists multiple runs with correct status mapping", async () => {
     const now = Date.now();
-    const runs: SubagentRunRecord[] = [
+    seedSubagentRunForTests(
       makeRecord({
         runId: "run-aaa",
         label: "Code Review",
         startedAt: now - 45000,
       }),
+    );
+    seedSubagentRunForTests(
       makeRecord({
         runId: "run-bbb",
         label: "Test Analysis",
@@ -57,6 +48,8 @@ describe("sessions_list tool", () => {
         endedAt: now - 30000,
         outcome: { status: "ok" },
       }),
+    );
+    seedSubagentRunForTests(
       makeRecord({
         runId: "run-ccc",
         label: "Lint Check",
@@ -64,8 +57,7 @@ describe("sessions_list tool", () => {
         endedAt: now,
         outcome: { status: "error", error: "timeout" },
       }),
-    ];
-    mockListSubagentRuns.mockReturnValue(runs);
+    );
 
     const tool = createSessionsListTool({ sessionId: "parent-001" });
     const result = await tool.execute("call-1", {});
@@ -88,17 +80,18 @@ describe("sessions_list tool", () => {
 
   it("returns detail for a specific runId", async () => {
     const now = Date.now();
-    const record = makeRecord({
-      runId: "run-detail",
-      label: "Deep Analysis",
-      task: "Analyze the authentication module thoroughly",
-      startedAt: now - 90000,
-      endedAt: now - 10000,
-      outcome: { status: "ok" },
-      findings: "Found 2 potential issues in token validation.",
-      findingsCaptured: true,
-    });
-    mockGetSubagentRun.mockReturnValue(record);
+    seedSubagentRunForTests(
+      makeRecord({
+        runId: "run-detail",
+        label: "Deep Analysis",
+        task: "Analyze the authentication module thoroughly",
+        startedAt: now - 90000,
+        endedAt: now - 10000,
+        outcome: { status: "ok" },
+        findings: "Found 2 potential issues in token validation.",
+        findingsCaptured: true,
+      }),
+    );
 
     const tool = createSessionsListTool({ sessionId: "parent-001" });
     const result = await tool.execute("call-1", { runId: "run-detail" });
@@ -115,8 +108,6 @@ describe("sessions_list tool", () => {
   });
 
   it("returns not found for unknown runId", async () => {
-    mockGetSubagentRun.mockReturnValue(undefined);
-
     const tool = createSessionsListTool({ sessionId: "parent-001" });
     const result = await tool.execute("call-1", { runId: "nonexistent" });
 
@@ -126,11 +117,12 @@ describe("sessions_list tool", () => {
   });
 
   it("rejects runId belonging to a different requester", async () => {
-    const record = makeRecord({
-      runId: "run-other",
-      requesterSessionId: "other-parent",
-    });
-    mockGetSubagentRun.mockReturnValue(record);
+    seedSubagentRunForTests(
+      makeRecord({
+        runId: "run-other",
+        requesterSessionId: "other-parent",
+      }),
+    );
 
     const tool = createSessionsListTool({ sessionId: "parent-001" });
     const result = await tool.execute("call-1", { runId: "run-other" });
@@ -151,13 +143,14 @@ describe("sessions_list tool", () => {
 
   it("shows findings status for running task", async () => {
     const now = Date.now();
-    const record = makeRecord({
-      runId: "run-running",
-      label: "Still Running",
-      startedAt: now - 30000,
-      // no endedAt
-    });
-    mockGetSubagentRun.mockReturnValue(record);
+    seedSubagentRunForTests(
+      makeRecord({
+        runId: "run-running",
+        label: "Still Running",
+        startedAt: now - 30000,
+        // no endedAt
+      }),
+    );
 
     const tool = createSessionsListTool({ sessionId: "parent-001" });
     const result = await tool.execute("call-1", { runId: "run-running" });