Merge pull request #174 from multica-ai/forrestchang/arch-analysis

test(core): migrate tests to strict mock policy with real implementations
2026-02-14 00:13:24 +08:00 · 2026-02-14 00:13:24 +08:00 · c2a2c75b24
commit c2a2c75b24
parent 597f24a436 f6b6c2909b
11 changed files with 283 additions and 178 deletions
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -220,6 +220,74 @@ git add README.md
 git commit -m "docs: update API documentation"
 ```

+## Testing Guidelines
+
+### Mock Policy: External Only
+
+**CRITICAL RULE**: Only mock third-party/external dependencies. NEVER mock internal modules.
+
+| Type | Examples | Can Mock? |
+|------|----------|-----------|
+| Internal modules | `./runner.js`, `../utils/format.js` | NO |
+| Monorepo packages | `@multica/core`, `@multica/utils` | NO |
+| Third-party packages | `openai`, `@anthropic-ai/sdk`, `@mariozechner/*` | YES |
+| System/time APIs | `vi.useFakeTimers()`, `vi.setSystemTime()` | YES |
+| Network calls | External HTTP requests, WebSocket connections | YES |
+
+When AI writes code, tests become more valuable than the code itself. Mocking internal modules creates brittle tests that don't verify real integration between modules, hides bugs, and requires maintaining parallel mock implementations.
+
+### Preferred Patterns
+
+**Temp directories for I/O tests** (no filesystem mocking):
+```typescript
+const testDir = join(tmpdir(), `multica-test-${Date.now()}`);
+beforeEach(() => mkdirSync(testDir, { recursive: true }));
+afterEach(() => rmSync(testDir, { recursive: true, force: true }));
+```
+
+**Test reset functions for stateful modules**:
+```typescript
+// In the module itself:
+export function resetForTests() { /* clear in-memory state */ }
+
+// In tests:
+beforeEach(() => resetForTests());
+```
+
+**Pure function tests** — no mocking needed:
+```typescript
+const result = resolveContextWindowInfo({ modelContextWindow: 100_000 });
+expect(result.tokens).toBe(100_000);
+```
+
+**Constructor/parameter injection** over module mocking:
+```typescript
+// Good: pass baseDir as parameter
+const session = new SessionManager({ sessionId: "test", baseDir: testDir });
+
+// Bad: mock the paths module
+vi.mock("../../shared/paths.js", () => ({ DATA_DIR: "/tmp/test" }));
+```
+
+### Anti-Patterns
+
+- `vi.mock("./internal-module.js")` — NEVER mock internal modules
+- Mock objects with 10+ method stubs — sign you should use the real implementation
+- `vi.mock("../context-window/index.js")` with simplified logic — hides real behavior
+- Tests that pass but don't exercise any real code paths ("fake green")
+
+### Reference Tests
+
+Good patterns to follow:
+- `packages/core/src/agent/session/session-manager.display.test.ts` — real SessionManager + temp dirs
+- `packages/core/src/agent/skills/loader.test.ts` — real skill loading + temp filesystem
+- `packages/core/src/agent/context-window/guard.test.ts` — pure function tests
+- `packages/core/src/agent/subagent/registry.test.ts` — real registry + `resetSubagentRegistryForTests()`
+
+Known violations (to be migrated):
+- `packages/core/src/agent/async-agent.test.ts` — mocks internal `./runner.js`
+- `packages/core/src/agent/session/compaction.test.ts` — mocks internal `../context-window/index.js`
+
 ## Pre-push Checks

 Before pushing, always run:
--- a/packages/core/src/agent/auth-profiles/store.test.ts
+++ b/packages/core/src/agent/auth-profiles/store.test.ts
@ -1,20 +1,14 @@
 import { describe, it, expect, beforeEach, afterEach } from "vitest";
 import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
+import { tmpdir } from "node:os";
 import { coerceStore, loadAuthProfileStore, saveAuthProfileStore, updateAuthProfileStore } from "./store.js";
-import { AUTH_STORE_VERSION } from "./constants.js";
+import { AUTH_STORE_VERSION, AUTH_PROFILE_STORE_FILENAME } from "./constants.js";
 import type { AuthProfileStore } from "./types.js";

-// Use a temp directory for tests to avoid touching real store
-const TEST_DIR = join(import.meta.dirname ?? ".", "__test_store_tmp__");
-const TEST_STORE_PATH = join(TEST_DIR, "auth-profiles.json");
-
-// We need to mock resolveAuthStorePath to point to our test dir
-import { vi } from "vitest";
-
-vi.mock("../../shared/paths.js", () => ({
-  DATA_DIR: join(import.meta.dirname ?? ".", "__test_store_tmp__"),
-}));
+const TEST_DIR = join(tmpdir(), `multica-store-test-${process.pid}`);
+const TEST_STORE_PATH = join(TEST_DIR, AUTH_PROFILE_STORE_FILENAME);
+const storeOptions = { baseDir: TEST_DIR };

 beforeEach(() => {
  if (!existsSync(TEST_DIR)) {
@ -72,7 +66,7 @@ describe("coerceStore", () => {

 describe("loadAuthProfileStore / saveAuthProfileStore", () => {
  it("returns empty store when file does not exist", () => {
-    const store = loadAuthProfileStore();
+    const store = loadAuthProfileStore(storeOptions);
    expect(store.version).toBe(AUTH_STORE_VERSION);
  });

@ -84,14 +78,14 @@ describe("loadAuthProfileStore / saveAuthProfileStore", () => {
        "anthropic:main": { lastUsed: 5000, errorCount: 1 },
      },
    };
-    saveAuthProfileStore(original);
-    const loaded = loadAuthProfileStore();
+    saveAuthProfileStore(original, storeOptions);
+    const loaded = loadAuthProfileStore(storeOptions);
    expect(loaded).toEqual(original);
  });

  it("handles corrupted JSON gracefully", () => {
    writeFileSync(TEST_STORE_PATH, "not valid json{{{", "utf8");
-    const store = loadAuthProfileStore();
+    const store = loadAuthProfileStore(storeOptions);
    expect(store.version).toBe(AUTH_STORE_VERSION);
  });
 });
@ -105,11 +99,11 @@ describe("updateAuthProfileStore", () => {
    const result = updateAuthProfileStore((store) => {
      if (!store.lastGood) store.lastGood = {};
      store.lastGood.openai = "openai:primary";
-    });
+    }, storeOptions);
    expect(result.lastGood?.openai).toBe("openai:primary");

    // Verify persisted
-    const loaded = loadAuthProfileStore();
+    const loaded = loadAuthProfileStore(storeOptions);
    expect(loaded.lastGood?.openai).toBe("openai:primary");
  });

@ -117,14 +111,14 @@ describe("updateAuthProfileStore", () => {
    saveAuthProfileStore({
      version: 1,
      lastGood: { anthropic: "anthropic" },
-    });
+    }, storeOptions);

    updateAuthProfileStore((store) => {
      if (!store.usageStats) store.usageStats = {};
      store.usageStats["anthropic"] = { lastUsed: 9999 };
-    });
+    }, storeOptions);

-    const loaded = loadAuthProfileStore();
+    const loaded = loadAuthProfileStore(storeOptions);
    expect(loaded.lastGood?.anthropic).toBe("anthropic");
    expect(loaded.usageStats?.anthropic?.lastUsed).toBe(9999);
  });
--- a/packages/core/src/agent/auth-profiles/store.ts
+++ b/packages/core/src/agent/auth-profiles/store.ts
@ -115,9 +115,14 @@ function acquireLockSync(filePath: string): () => void {
 // Paths
 // ============================================================

+export interface AuthStoreOptions {
+  /** Override the base directory (defaults to DATA_DIR). Useful for testing. */
+  baseDir?: string;
+}
+
 /** Resolve the auth profile store file path */
-export function resolveAuthStorePath(): string {
-  return join(DATA_DIR, AUTH_PROFILE_STORE_FILENAME);
+export function resolveAuthStorePath(options?: AuthStoreOptions): string {
+  return join(options?.baseDir ?? DATA_DIR, AUTH_PROFILE_STORE_FILENAME);
 }

 // ============================================================
@ -148,8 +153,8 @@ export function coerceStore(raw: unknown): AuthProfileStore {
 }

 /** Ensure the store file exists on disk (creates it if missing) */
-export function ensureAuthStoreFile(): string {
-  const storePath = resolveAuthStorePath();
+export function ensureAuthStoreFile(options?: AuthStoreOptions): string {
+  const storePath = resolveAuthStorePath(options);
  const dir = dirname(storePath);
  if (!existsSync(dir)) {
    mkdirSync(dir, { recursive: true });
@ -161,8 +166,8 @@ export function ensureAuthStoreFile(): string {
 }

 /** Load auth profile store from disk. Returns empty store if file doesn't exist. */
-export function loadAuthProfileStore(): AuthProfileStore {
-  const storePath = resolveAuthStorePath();
+export function loadAuthProfileStore(options?: AuthStoreOptions): AuthProfileStore {
+  const storePath = resolveAuthStorePath(options);
  if (!existsSync(storePath)) return createEmptyStore();

  try {
@ -174,8 +179,8 @@ export function loadAuthProfileStore(): AuthProfileStore {
 }

 /** Save auth profile store to disk */
-export function saveAuthProfileStore(store: AuthProfileStore): void {
-  const storePath = resolveAuthStorePath();
+export function saveAuthProfileStore(store: AuthProfileStore, options?: AuthStoreOptions): void {
+  const storePath = resolveAuthStorePath(options);
  const dir = dirname(storePath);
  if (!existsSync(dir)) {
    mkdirSync(dir, { recursive: true });
@ -191,24 +196,25 @@ export function saveAuthProfileStore(store: AuthProfileStore): void {
 */
 export function updateAuthProfileStore(
  updater: (store: AuthProfileStore) => void,
+  options?: AuthStoreOptions,
 ): AuthProfileStore {
-  const storePath = ensureAuthStoreFile();
+  const storePath = ensureAuthStoreFile(options);

  try {
    const release = acquireLockSync(storePath);
    try {
-      const store = loadAuthProfileStore();
+      const store = loadAuthProfileStore(options);
      updater(store);
-      saveAuthProfileStore(store);
+      saveAuthProfileStore(store, options);
      return store;
    } finally {
      release();
    }
  } catch {
    // Fallback: unlocked update (better than losing the write entirely)
-    const store = loadAuthProfileStore();
+    const store = loadAuthProfileStore(options);
    updater(store);
-    saveAuthProfileStore(store);
+    saveAuthProfileStore(store, options);
    return store;
  }
 }
--- a/packages/core/src/agent/session/compaction.test.ts
+++ b/packages/core/src/agent/session/compaction.test.ts
@ -7,59 +7,17 @@ import {
 } from "./compaction.js";
 import type { AgentMessage } from "@mariozechner/pi-agent-core";

-// Mock the token estimation functions
-vi.mock("../context-window/index.js", async () => {
-  const actual = await vi.importActual("../context-window/index.js");
-  return {
-    ...(actual as object),
-    estimateMessagesTokens: (messages: AgentMessage[]) => {
-      // Simple mock: 10 tokens per message
-      return messages.length * 10;
+// Mock only the third-party dependency (allowed by mock policy).
+// The internal context-window module uses real logic.
+vi.mock("@mariozechner/pi-coding-agent", () => ({
+  estimateTokens: (message: AgentMessage) => {
+    const msg = message as any;
+    if (typeof msg.content === "string") {
+      return Math.ceil(msg.content.length / 4);
+    }
+    return 50;
  },
-    compactMessagesTokenAware: (
-      messages: AgentMessage[],
-      availableTokens: number,
-      options?: { targetRatio?: number; minKeepMessages?: number },
-    ) => {
-      const minKeep = options?.minKeepMessages ?? 10;
-      if (messages.length <= minKeep) return null;
-
-      const targetTokens = availableTokens * (options?.targetRatio ?? 0.5);
-      const currentTokens = messages.length * 10;
-      if (currentTokens <= targetTokens) return null;
-
-      // Keep enough messages to be under target
-      const keepCount = Math.max(minKeep, Math.floor(targetTokens / 10));
-      const kept = messages.slice(-keepCount);
-
-      return {
-        kept,
-        removedCount: messages.length - kept.length,
-        tokensRemoved: (messages.length - kept.length) * 10,
-        tokensKept: kept.length * 10,
-      };
-    },
-    estimateTokenUsage: (params: any) => {
-      const messageTokens = params.messages.length * 10;
-      const systemPromptTokens = params.systemPrompt ? 100 : 0;
-      const reserve = params.reserveTokens ?? 1024;
-      const availableTokens = Math.max(0, params.contextWindowTokens - systemPromptTokens - reserve);
-      const utilizationRatio = availableTokens > 0 ? (messageTokens * 1.5) / availableTokens : 1;
-
-      return {
-        messageTokens,
-        systemPromptTokens,
-        availableTokens,
-        utilizationRatio,
-      };
-    },
-    shouldCompact: (estimation: any) => estimation.utilizationRatio >= 0.8,
-    compactMessagesWithSummary: vi.fn(),
-    compactMessagesWithChunkedSummary: vi.fn(),
-    COMPACTION_TARGET_RATIO: 0.5,
-    MIN_KEEP_MESSAGES: 10,
-  };
-});
+}));

 describe("compaction", () => {
  function createMessages(count: number, prefix = "Message"): AgentMessage[] {
@ -162,14 +120,14 @@ describe("compaction", () => {
  describe("compactMessagesByTokens", () => {
    it("should return null when under token limit", () => {
      const messages = createMessages(5);
-      // 5 messages * 10 tokens = 50 tokens, target = 1000 * 0.5 = 500
+      // ~15 tokens total, target = 1000 * 0.5 = 500 → no compact needed
      const result = compactMessagesByTokens(messages, 1000);
      expect(result).toBeNull();
    });

    it("should compact when over token limit", () => {
      const messages = createMessages(100);
-      // 100 messages * 10 tokens = 1000 tokens, target = 200 * 0.5 = 100
+      // ~300 tokens total, target = 200 * 0.5 = 100 → needs compact
      const result = compactMessagesByTokens(messages, 200, {
        targetRatio: 0.5,
        minKeepMessages: 5,
@ -231,14 +189,15 @@ describe("compaction", () => {
    describe("tokens mode", () => {
      it("should use token-based compaction when utilization is high", () => {
        const messages = createMessages(100);
-        // 100 * 10 = 1000 message tokens
-        // System: 100 tokens, Reserve: 1024
-        // Available: 2000 - 100 - 1024 = 876
-        // Utilization: (1000 * 1.5) / 876 = 1.71 > 0.8
+        // ~300 message tokens (real estimator: ~3 tokens/msg)
+        // systemPromptTokens ≈ 7, reserveTokens = 0
+        // available = 500 - 7 = 493
+        // utilization = (300 * 1.5) / 493 ≈ 0.91 > 0.8 → should compact
        const result = compactMessages(messages, {
          mode: "tokens",
-          contextWindowTokens: 2000,
+          contextWindowTokens: 500,
          systemPrompt: "System prompt",
+          reserveTokens: 0,
        });

        expect(result).not.toBeNull();
@ -247,9 +206,9 @@ describe("compaction", () => {

      it("should return null when utilization is low", () => {
        const messages = createMessages(5);
-        // 5 * 10 = 50 message tokens
-        // Available: 10000 - 100 - 1024 = 8876
-        // Utilization: (50 * 1.5) / 8876 = 0.008 < 0.8
+        // ~15 message tokens
+        // available = 10000 - 7 - 1024 = 8969
+        // utilization = (15 * 1.5) / 8969 ≈ 0.003 < 0.8
        const result = compactMessages(messages, {
          mode: "tokens",
          contextWindowTokens: 10000,
--- a/packages/core/src/agent/session/session-file-repair.test.ts
+++ b/packages/core/src/agent/session/session-file-repair.test.ts
@ -1,25 +1,10 @@
 import fs from "node:fs/promises";
 import os from "node:os";
 import path from "node:path";
-import { beforeEach, describe, expect, it, vi } from "vitest";
+import { describe, expect, it, vi } from "vitest";
 import { repairSessionFileIfNeeded } from "./session-file-repair.js";
-import { acquireSessionWriteLock } from "./session-write-lock.js";
-
-vi.mock("./session-write-lock.js", async () => {
-  const actual = await vi.importActual<typeof import("./session-write-lock.js")>(
-    "./session-write-lock.js",
-  );
-  return {
-    ...actual,
-    acquireSessionWriteLock: vi.fn(actual.acquireSessionWriteLock),
-  };
-});

 describe("repairSessionFileIfNeeded", () => {
-  beforeEach(() => {
-    vi.mocked(acquireSessionWriteLock).mockClear();
-  });
-
  it("rewrites session files that contain malformed lines", async () => {
    const dir = await fs.mkdtemp(path.join(os.tmpdir(), "multica-session-repair-"));
    const file = path.join(dir, "session.jsonl");
@ -93,13 +78,16 @@ describe("repairSessionFileIfNeeded", () => {
    expect(warn).toHaveBeenCalledTimes(1);
  });

-  it("acquires a write lock while repairing", async () => {
+  it("acquires a write lock while repairing (lock file cleaned up after)", async () => {
    const dir = await fs.mkdtemp(path.join(os.tmpdir(), "multica-session-repair-"));
    const file = path.join(dir, "session.jsonl");
    await fs.writeFile(file, "{broken\n{also broken\n", "utf-8");

    await repairSessionFileIfNeeded({ sessionFile: file });

-    expect(vi.mocked(acquireSessionWriteLock)).toHaveBeenCalledTimes(1);
+    // Lock file should be released (cleaned up) after repair
+    const lockFile = `${file}.lock`;
+    const lockExists = await fs.access(lockFile).then(() => true, () => false);
+    expect(lockExists).toBe(false);
  });
 });
--- a/packages/core/src/agent/subagent/announce-findings.test.ts
+++ b/packages/core/src/agent/subagent/announce-findings.test.ts
@ -1,20 +1,29 @@
-import { describe, it, expect, vi, beforeEach } from "vitest";
-
-const readEntriesMock = vi.fn();
-
-vi.mock("../session/storage.js", () => ({
-  readEntries: (sessionId: string) => readEntriesMock(sessionId),
-}));
-
+import { describe, it, expect, afterEach } from "vitest";
+import { mkdtempSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { writeEntries } from "../session/storage.js";
 import { readLatestAssistantReply } from "./announce.js";
+import type { SessionEntry } from "../session/types.js";

 describe("readLatestAssistantReply", () => {
-  beforeEach(() => {
-    readEntriesMock.mockReset();
+  let testDir: string;
+
+  afterEach(() => {
+    if (testDir) {
+      rmSync(testDir, { recursive: true, force: true });
+    }
  });

-  it("returns the latest non-empty assistant text when the last assistant message is tool-only", () => {
-    readEntriesMock.mockReturnValue([
+  async function seedSession(sessionId: string, entries: SessionEntry[]) {
+    await writeEntries(sessionId, entries, { baseDir: testDir });
+  }
+
+  it("returns the latest non-empty assistant text when the last assistant message is tool-only", async () => {
+    testDir = mkdtempSync(join(tmpdir(), "announce-test-"));
+    const sessionId = "child-session-1";
+
+    await seedSession(sessionId, [
      {
        type: "message",
        timestamp: 1,
@ -22,7 +31,7 @@ describe("readLatestAssistantReply", () => {
          role: "assistant",
          content: [{ type: "text", text: "南京天气：晴，12°C。" }],
        },
-      },
+      } as SessionEntry,
      {
        type: "message",
        timestamp: 2,
@ -30,15 +39,18 @@ describe("readLatestAssistantReply", () => {
          role: "assistant",
          content: [{ type: "toolCall", id: "tool-1", name: "weather", arguments: { city: "Nanjing" } }],
        },
-      },
+      } as SessionEntry,
    ]);

-    const result = readLatestAssistantReply("child-session");
+    const result = readLatestAssistantReply(sessionId, { baseDir: testDir });
    expect(result).toBe("南京天气：晴，12°C。");
  });

-  it("falls back to latest toolResult text when no assistant text exists", () => {
-    readEntriesMock.mockReturnValue([
+  it("falls back to latest toolResult text when no assistant text exists", async () => {
+    testDir = mkdtempSync(join(tmpdir(), "announce-test-"));
+    const sessionId = "child-session-2";
+
+    await seedSession(sessionId, [
      {
        type: "message",
        timestamp: 1,
@ -46,7 +58,7 @@ describe("readLatestAssistantReply", () => {
          role: "assistant",
          content: [{ type: "toolCall", id: "tool-2", name: "weather", arguments: { city: "Nanjing" } }],
        },
-      },
+      } as SessionEntry,
      {
        type: "message",
        timestamp: 2,
@ -57,10 +69,10 @@ describe("readLatestAssistantReply", () => {
          content: [{ type: "text", text: "{\"city\":\"Nanjing\",\"tempC\":12,\"condition\":\"Sunny\"}" }],
          isError: false,
        },
-      },
+      } as SessionEntry,
    ]);

-    const result = readLatestAssistantReply("child-session");
+    const result = readLatestAssistantReply(sessionId, { baseDir: testDir });
    expect(result).toContain("\"city\":\"Nanjing\"");
    expect(result).toContain("\"condition\":\"Sunny\"");
  });
--- a/packages/core/src/agent/subagent/announce.ts
+++ b/packages/core/src/agent/subagent/announce.ts
@ -38,8 +38,11 @@ export function buildSubagentSystemPrompt(params: SubagentSystemPromptParams): s
 /**
 * Read the latest assistant reply from a session's JSONL file.
 */
-export function readLatestAssistantReply(sessionId: string): string | undefined {
-  const entries = readEntries(sessionId);
+export function readLatestAssistantReply(
+  sessionId: string,
+  options?: { baseDir?: string },
+): string | undefined {
+  const entries = readEntries(sessionId, options);
  let latestToolResultText: string | undefined;

  // Walk backwards to find the last non-empty assistant reply.
--- a/packages/core/src/agent/subagent/registry.ts
+++ b/packages/core/src/agent/subagent/registry.ts
@ -244,6 +244,11 @@ export function resetSubagentRegistryForTests(): void {
  stopSweeper();
 }

+/** Seed a run record directly (for testing). Bypasses persistence and side effects. */
+export function seedSubagentRunForTests(record: SubagentRunRecord): void {
+  subagentRuns.set(record.runId, record);
+}
+
 // ============================================================================
 // Lifecycle watching
 // ============================================================================
--- a/packages/core/src/agent/tools/sessions-list.test.ts
+++ b/packages/core/src/agent/tools/sessions-list.test.ts
@ -1,17 +1,7 @@
-import { describe, it, expect, vi, beforeEach } from "vitest";
+import { describe, it, expect, beforeEach } from "vitest";
 import type { SubagentRunRecord } from "../subagent/types.js";
-
-// Mock the registry module before importing the tool
-vi.mock("../subagent/registry.js", () => ({
-  listSubagentRuns: vi.fn(),
-  getSubagentRun: vi.fn(),
-}));
-
+import { resetSubagentRegistryForTests, seedSubagentRunForTests } from "../subagent/registry.js";
 import { createSessionsListTool } from "./sessions-list.js";
-import { listSubagentRuns, getSubagentRun } from "../subagent/registry.js";
-
-const mockListSubagentRuns = vi.mocked(listSubagentRuns);
-const mockGetSubagentRun = vi.mocked(getSubagentRun);

 function makeRecord(overrides: Partial<SubagentRunRecord> = {}): SubagentRunRecord {
  return {
@ -27,11 +17,10 @@ function makeRecord(overrides: Partial<SubagentRunRecord> = {}): SubagentRunReco

 describe("sessions_list tool", () => {
  beforeEach(() => {
-    vi.clearAllMocks();
+    resetSubagentRegistryForTests();
  });

  it("returns empty message when no runs exist", async () => {
-    mockListSubagentRuns.mockReturnValue([]);
    const tool = createSessionsListTool({ sessionId: "parent-001" });
    const result = await tool.execute("call-1", {});

@ -44,12 +33,14 @@ describe("sessions_list tool", () => {

  it("lists multiple runs with correct status mapping", async () => {
    const now = Date.now();
-    const runs: SubagentRunRecord[] = [
+    seedSubagentRunForTests(
      makeRecord({
        runId: "run-aaa",
        label: "Code Review",
        startedAt: now - 45000,
      }),
+    );
+    seedSubagentRunForTests(
      makeRecord({
        runId: "run-bbb",
        label: "Test Analysis",
@ -57,6 +48,8 @@ describe("sessions_list tool", () => {
        endedAt: now - 30000,
        outcome: { status: "ok" },
      }),
+    );
+    seedSubagentRunForTests(
      makeRecord({
        runId: "run-ccc",
        label: "Lint Check",
@ -64,8 +57,7 @@ describe("sessions_list tool", () => {
        endedAt: now,
        outcome: { status: "error", error: "timeout" },
      }),
-    ];
-    mockListSubagentRuns.mockReturnValue(runs);
+    );

    const tool = createSessionsListTool({ sessionId: "parent-001" });
    const result = await tool.execute("call-1", {});
@ -88,7 +80,8 @@ describe("sessions_list tool", () => {

  it("returns detail for a specific runId", async () => {
    const now = Date.now();
-    const record = makeRecord({
+    seedSubagentRunForTests(
+      makeRecord({
        runId: "run-detail",
        label: "Deep Analysis",
        task: "Analyze the authentication module thoroughly",
@ -97,8 +90,8 @@ describe("sessions_list tool", () => {
        outcome: { status: "ok" },
        findings: "Found 2 potential issues in token validation.",
        findingsCaptured: true,
-    });
-    mockGetSubagentRun.mockReturnValue(record);
+      }),
+    );

    const tool = createSessionsListTool({ sessionId: "parent-001" });
    const result = await tool.execute("call-1", { runId: "run-detail" });
@ -115,8 +108,6 @@ describe("sessions_list tool", () => {
  });

  it("returns not found for unknown runId", async () => {
-    mockGetSubagentRun.mockReturnValue(undefined);
-
    const tool = createSessionsListTool({ sessionId: "parent-001" });
    const result = await tool.execute("call-1", { runId: "nonexistent" });

@ -126,11 +117,12 @@ describe("sessions_list tool", () => {
  });

  it("rejects runId belonging to a different requester", async () => {
-    const record = makeRecord({
+    seedSubagentRunForTests(
+      makeRecord({
        runId: "run-other",
        requesterSessionId: "other-parent",
-    });
-    mockGetSubagentRun.mockReturnValue(record);
+      }),
+    );

    const tool = createSessionsListTool({ sessionId: "parent-001" });
    const result = await tool.execute("call-1", { runId: "run-other" });
@ -151,13 +143,14 @@ describe("sessions_list tool", () => {

  it("shows findings status for running task", async () => {
    const now = Date.now();
-    const record = makeRecord({
+    seedSubagentRunForTests(
+      makeRecord({
        runId: "run-running",
        label: "Still Running",
        startedAt: now - 30000,
        // no endedAt
-    });
-    mockGetSubagentRun.mockReturnValue(record);
+      }),
+    );

    const tool = createSessionsListTool({ sessionId: "parent-001" });
    const result = await tool.execute("call-1", { runId: "run-running" });
--- a/vitest.config.ts
+++ b/vitest.config.ts
@ -1,9 +1,14 @@
+import { fileURLToPath } from "node:url";
+import path from "node:path";
 import { defineConfig } from "vitest/config";

+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+
 export default defineConfig({
  test: {
    globals: true,
    environment: "node",
+    setupFiles: [path.resolve(__dirname, "vitest.setup.ts")],
    include: ["src/**/*.test.ts"],
    coverage: {
      provider: "v8",
--- a/vitest.setup.ts
+++ b/vitest.setup.ts
@ -0,0 +1,72 @@
+/**
+ * Vitest global setup — statically scans test files for internal mock violations.
+ *
+ * Rule: only mock third-party / external dependencies.
+ * Internal modules (relative imports, @multica/* packages) must NOT be mocked.
+ *
+ * This runs once at startup and logs warnings for violations without
+ * interfering with vitest's mock hoisting mechanism.
+ */
+import { readFileSync, readdirSync, statSync } from "node:fs";
+import { join, resolve } from "node:path";
+
+const INTERNAL_MOCK_PATTERN =
+  /vi\.mock\(\s*["'](\.\/|\.\.\/|@multica\/)[^"']*["']/g;
+
+function scanDirectory(dir: string): string[] {
+  const files: string[] = [];
+  try {
+    for (const entry of readdirSync(dir)) {
+      const full = join(dir, entry);
+      const stat = statSync(full, { throwIfNoEntry: false });
+      if (!stat) continue;
+      if (stat.isDirectory() && entry !== "node_modules") {
+        files.push(...scanDirectory(full));
+      } else if (entry.endsWith(".test.ts")) {
+        files.push(full);
+      }
+    }
+  } catch {
+    // ignore unreadable directories
+  }
+  return files;
+}
+
+function checkMockPolicy(): void {
+  const srcDir = resolve(process.cwd(), "src");
+  const testFiles = scanDirectory(srcDir);
+  const violations: Array<{ file: string; line: number; match: string }> = [];
+
+  for (const file of testFiles) {
+    const content = readFileSync(file, "utf-8");
+    const lines = content.split("\n");
+    for (let i = 0; i < lines.length; i++) {
+      const line = lines[i];
+      INTERNAL_MOCK_PATTERN.lastIndex = 0;
+      const match = INTERNAL_MOCK_PATTERN.exec(line);
+      if (match) {
+        violations.push({
+          file: file.replace(process.cwd() + "/", ""),
+          line: i + 1,
+          match: match[0],
+        });
+      }
+    }
+  }
+
+  if (violations.length > 0) {
+    console.warn("\n[mock-policy] Internal module mock violations detected:");
+    console.warn(
+      "  Rule: Only mock third-party dependencies. See CLAUDE.md Testing Guidelines.\n",
+    );
+    for (const v of violations) {
+      console.warn(`  ${v.file}:${v.line}`);
+      console.warn(`    ${v.match}\n`);
+    }
+    console.warn(
+      `  Total: ${violations.length} violation(s) in ${new Set(violations.map((v) => v.file)).size} file(s)\n`,
+    );
+  }
+}
+
+checkMockPolicy();