diff --git a/packages/core/src/agent/auth-profiles/store.test.ts b/packages/core/src/agent/auth-profiles/store.test.ts
index 4e2d78f5..0601d56a 100644
--- a/packages/core/src/agent/auth-profiles/store.test.ts
+++ b/packages/core/src/agent/auth-profiles/store.test.ts
@@ -1,20 +1,14 @@
 import { describe, it, expect, beforeEach, afterEach } from "vitest";
 import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
+import { tmpdir } from "node:os";
 import { coerceStore, loadAuthProfileStore, saveAuthProfileStore, updateAuthProfileStore } from "./store.js";
-import { AUTH_STORE_VERSION } from "./constants.js";
+import { AUTH_STORE_VERSION, AUTH_PROFILE_STORE_FILENAME } from "./constants.js";
 import type { AuthProfileStore } from "./types.js";
 
-// Use a temp directory for tests to avoid touching real store
-const TEST_DIR = join(import.meta.dirname ?? ".", "__test_store_tmp__");
-const TEST_STORE_PATH = join(TEST_DIR, "auth-profiles.json");
-
-// We need to mock resolveAuthStorePath to point to our test dir
-import { vi } from "vitest";
-
-vi.mock("../../shared/paths.js", () => ({
-  DATA_DIR: join(import.meta.dirname ?? ".", "__test_store_tmp__"),
-}));
+const TEST_DIR = join(tmpdir(), `multica-store-test-${process.pid}`);
+const TEST_STORE_PATH = join(TEST_DIR, AUTH_PROFILE_STORE_FILENAME);
+const storeOptions = { baseDir: TEST_DIR };
 
 beforeEach(() => {
   if (!existsSync(TEST_DIR)) {
@@ -72,7 +66,7 @@ describe("coerceStore", () => {
 
 describe("loadAuthProfileStore / saveAuthProfileStore", () => {
   it("returns empty store when file does not exist", () => {
-    const store = loadAuthProfileStore();
+    const store = loadAuthProfileStore(storeOptions);
     expect(store.version).toBe(AUTH_STORE_VERSION);
   });
 
@@ -84,14 +78,14 @@ describe("loadAuthProfileStore / saveAuthProfileStore", () => {
         "anthropic:main": { lastUsed: 5000, errorCount: 1 },
       },
     };
-    saveAuthProfileStore(original);
-    const loaded = loadAuthProfileStore();
+    saveAuthProfileStore(original, storeOptions);
+    const loaded = loadAuthProfileStore(storeOptions);
     expect(loaded).toEqual(original);
   });
 
   it("handles corrupted JSON gracefully", () => {
     writeFileSync(TEST_STORE_PATH, "not valid json{{{", "utf8");
-    const store = loadAuthProfileStore();
+    const store = loadAuthProfileStore(storeOptions);
     expect(store.version).toBe(AUTH_STORE_VERSION);
   });
 });
@@ -105,11 +99,11 @@ describe("updateAuthProfileStore", () => {
     const result = updateAuthProfileStore((store) => {
       if (!store.lastGood) store.lastGood = {};
       store.lastGood.openai = "openai:primary";
-    });
+    }, storeOptions);
     expect(result.lastGood?.openai).toBe("openai:primary");
 
     // Verify persisted
-    const loaded = loadAuthProfileStore();
+    const loaded = loadAuthProfileStore(storeOptions);
     expect(loaded.lastGood?.openai).toBe("openai:primary");
   });
 
@@ -117,14 +111,14 @@ describe("updateAuthProfileStore", () => {
     saveAuthProfileStore({
       version: 1,
       lastGood: { anthropic: "anthropic" },
-    });
+    }, storeOptions);
 
     updateAuthProfileStore((store) => {
       if (!store.usageStats) store.usageStats = {};
       store.usageStats["anthropic"] = { lastUsed: 9999 };
-    });
+    }, storeOptions);
 
-    const loaded = loadAuthProfileStore();
+    const loaded = loadAuthProfileStore(storeOptions);
     expect(loaded.lastGood?.anthropic).toBe("anthropic");
     expect(loaded.usageStats?.anthropic?.lastUsed).toBe(9999);
   });
diff --git a/packages/core/src/agent/session/compaction.test.ts b/packages/core/src/agent/session/compaction.test.ts
index 3529b59d..e3087816 100644
--- a/packages/core/src/agent/session/compaction.test.ts
+++ b/packages/core/src/agent/session/compaction.test.ts
@@ -7,59 +7,17 @@ import {
 } from "./compaction.js";
 import type { AgentMessage } from "@mariozechner/pi-agent-core";
 
-// Mock the token estimation functions
-vi.mock("../context-window/index.js", async () => {
-  const actual = await vi.importActual("../context-window/index.js");
-  return {
-    ...(actual as object),
-    estimateMessagesTokens: (messages: AgentMessage[]) => {
-      // Simple mock: 10 tokens per message
-      return messages.length * 10;
-    },
-    compactMessagesTokenAware: (
-      messages: AgentMessage[],
-      availableTokens: number,
-      options?: { targetRatio?: number; minKeepMessages?: number },
-    ) => {
-      const minKeep = options?.minKeepMessages ?? 10;
-      if (messages.length <= minKeep) return null;
-
-      const targetTokens = availableTokens * (options?.targetRatio ?? 0.5);
-      const currentTokens = messages.length * 10;
-      if (currentTokens <= targetTokens) return null;
-
-      // Keep enough messages to be under target
-      const keepCount = Math.max(minKeep, Math.floor(targetTokens / 10));
-      const kept = messages.slice(-keepCount);
-
-      return {
-        kept,
-        removedCount: messages.length - kept.length,
-        tokensRemoved: (messages.length - kept.length) * 10,
-        tokensKept: kept.length * 10,
-      };
-    },
-    estimateTokenUsage: (params: any) => {
-      const messageTokens = params.messages.length * 10;
-      const systemPromptTokens = params.systemPrompt ? 100 : 0;
-      const reserve = params.reserveTokens ?? 1024;
-      const availableTokens = Math.max(0, params.contextWindowTokens - systemPromptTokens - reserve);
-      const utilizationRatio = availableTokens > 0 ? (messageTokens * 1.5) / availableTokens : 1;
-
-      return {
-        messageTokens,
-        systemPromptTokens,
-        availableTokens,
-        utilizationRatio,
-      };
-    },
-    shouldCompact: (estimation: any) => estimation.utilizationRatio >= 0.8,
-    compactMessagesWithSummary: vi.fn(),
-    compactMessagesWithChunkedSummary: vi.fn(),
-    COMPACTION_TARGET_RATIO: 0.5,
-    MIN_KEEP_MESSAGES: 10,
-  };
-});
+// Mock only the third-party dependency (allowed by mock policy).
+// The internal context-window module uses real logic.
+vi.mock("@mariozechner/pi-coding-agent", () => ({
+  estimateTokens: (message: AgentMessage) => {
+    const msg = message as any;
+    if (typeof msg.content === "string") {
+      return Math.ceil(msg.content.length / 4);
+    }
+    return 50;
+  },
+}));
 
 describe("compaction", () => {
   function createMessages(count: number, prefix = "Message"): AgentMessage[] {
@@ -162,14 +120,14 @@ describe("compaction", () => {
   describe("compactMessagesByTokens", () => {
     it("should return null when under token limit", () => {
       const messages = createMessages(5);
-      // 5 messages * 10 tokens = 50 tokens, target = 1000 * 0.5 = 500
+      // ~15 tokens total, target = 1000 * 0.5 = 500 → no compact needed
       const result = compactMessagesByTokens(messages, 1000);
       expect(result).toBeNull();
     });
 
     it("should compact when over token limit", () => {
       const messages = createMessages(100);
-      // 100 messages * 10 tokens = 1000 tokens, target = 200 * 0.5 = 100
+      // ~300 tokens total, target = 200 * 0.5 = 100 → needs compact
       const result = compactMessagesByTokens(messages, 200, {
         targetRatio: 0.5,
         minKeepMessages: 5,
@@ -231,14 +189,15 @@ describe("compaction", () => {
     describe("tokens mode", () => {
       it("should use token-based compaction when utilization is high", () => {
         const messages = createMessages(100);
-        // 100 * 10 = 1000 message tokens
-        // System: 100 tokens, Reserve: 1024
-        // Available: 2000 - 100 - 1024 = 876
-        // Utilization: (1000 * 1.5) / 876 = 1.71 > 0.8
+        // ~300 message tokens (real estimator: ~3 tokens/msg)
+        // systemPromptTokens ≈ 7, reserveTokens = 0
+        // available = 500 - 7 = 493
+        // utilization = (300 * 1.5) / 493 ≈ 0.91 > 0.8 → should compact
         const result = compactMessages(messages, {
           mode: "tokens",
-          contextWindowTokens: 2000,
+          contextWindowTokens: 500,
           systemPrompt: "System prompt",
+          reserveTokens: 0,
         });
 
         expect(result).not.toBeNull();
@@ -247,9 +206,9 @@ describe("compaction", () => {
 
       it("should return null when utilization is low", () => {
         const messages = createMessages(5);
-        // 5 * 10 = 50 message tokens
-        // Available: 10000 - 100 - 1024 = 8876
-        // Utilization: (50 * 1.5) / 8876 = 0.008 < 0.8
+        // ~15 message tokens
+        // available = 10000 - 7 - 1024 = 8969
+        // utilization = (15 * 1.5) / 8969 ≈ 0.003 < 0.8
         const result = compactMessages(messages, {
           mode: "tokens",
           contextWindowTokens: 10000,
diff --git a/packages/core/src/agent/session/session-file-repair.test.ts b/packages/core/src/agent/session/session-file-repair.test.ts
index b6ef250e..2847f5b8 100644
--- a/packages/core/src/agent/session/session-file-repair.test.ts
+++ b/packages/core/src/agent/session/session-file-repair.test.ts
@@ -1,25 +1,10 @@
 import fs from "node:fs/promises";
 import os from "node:os";
 import path from "node:path";
-import { beforeEach, describe, expect, it, vi } from "vitest";
+import { describe, expect, it, vi } from "vitest";
 import { repairSessionFileIfNeeded } from "./session-file-repair.js";
-import { acquireSessionWriteLock } from "./session-write-lock.js";
-
-vi.mock("./session-write-lock.js", async () => {
-  const actual = await vi.importActual<typeof import("./session-write-lock.js")>(
-    "./session-write-lock.js",
-  );
-  return {
-    ...actual,
-    acquireSessionWriteLock: vi.fn(actual.acquireSessionWriteLock),
-  };
-});
 
 describe("repairSessionFileIfNeeded", () => {
-  beforeEach(() => {
-    vi.mocked(acquireSessionWriteLock).mockClear();
-  });
-
   it("rewrites session files that contain malformed lines", async () => {
     const dir = await fs.mkdtemp(path.join(os.tmpdir(), "multica-session-repair-"));
     const file = path.join(dir, "session.jsonl");
@@ -93,13 +78,16 @@ describe("repairSessionFileIfNeeded", () => {
     expect(warn).toHaveBeenCalledTimes(1);
   });
 
-  it("acquires a write lock while repairing", async () => {
+  it("acquires a write lock while repairing (lock file cleaned up after)", async () => {
     const dir = await fs.mkdtemp(path.join(os.tmpdir(), "multica-session-repair-"));
     const file = path.join(dir, "session.jsonl");
     await fs.writeFile(file, "{broken\n{also broken\n", "utf-8");
 
     await repairSessionFileIfNeeded({ sessionFile: file });
 
-    expect(vi.mocked(acquireSessionWriteLock)).toHaveBeenCalledTimes(1);
+    // Lock file should be released (cleaned up) after repair
+    const lockFile = `${file}.lock`;
+    const lockExists = await fs.access(lockFile).then(() => true, () => false);
+    expect(lockExists).toBe(false);
   });
 });
diff --git a/packages/core/src/agent/subagent/announce-findings.test.ts b/packages/core/src/agent/subagent/announce-findings.test.ts
index 6a52fc76..71f76448 100644
--- a/packages/core/src/agent/subagent/announce-findings.test.ts
+++ b/packages/core/src/agent/subagent/announce-findings.test.ts
@@ -1,20 +1,29 @@
-import { describe, it, expect, vi, beforeEach } from "vitest";
-
-const readEntriesMock = vi.fn();
-
-vi.mock("../session/storage.js", () => ({
-  readEntries: (sessionId: string) => readEntriesMock(sessionId),
-}));
-
+import { describe, it, expect, afterEach } from "vitest";
+import { mkdtempSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { writeEntries } from "../session/storage.js";
 import { readLatestAssistantReply } from "./announce.js";
+import type { SessionEntry } from "../session/types.js";
 
 describe("readLatestAssistantReply", () => {
-  beforeEach(() => {
-    readEntriesMock.mockReset();
+  let testDir: string;
+
+  afterEach(() => {
+    if (testDir) {
+      rmSync(testDir, { recursive: true, force: true });
+    }
   });
 
-  it("returns the latest non-empty assistant text when the last assistant message is tool-only", () => {
-    readEntriesMock.mockReturnValue([
+  async function seedSession(sessionId: string, entries: SessionEntry[]) {
+    await writeEntries(sessionId, entries, { baseDir: testDir });
+  }
+
+  it("returns the latest non-empty assistant text when the last assistant message is tool-only", async () => {
+    testDir = mkdtempSync(join(tmpdir(), "announce-test-"));
+    const sessionId = "child-session-1";
+
+    await seedSession(sessionId, [
       {
         type: "message",
         timestamp: 1,
@@ -22,7 +31,7 @@ describe("readLatestAssistantReply", () => {
           role: "assistant",
           content: [{ type: "text", text: "南京天气：晴，12°C。" }],
         },
-      },
+      } as SessionEntry,
       {
         type: "message",
         timestamp: 2,
@@ -30,15 +39,18 @@ describe("readLatestAssistantReply", () => {
           role: "assistant",
           content: [{ type: "toolCall", id: "tool-1", name: "weather", arguments: { city: "Nanjing" } }],
         },
-      },
+      } as SessionEntry,
     ]);
 
-    const result = readLatestAssistantReply("child-session");
+    const result = readLatestAssistantReply(sessionId, { baseDir: testDir });
     expect(result).toBe("南京天气：晴，12°C。");
   });
 
-  it("falls back to latest toolResult text when no assistant text exists", () => {
-    readEntriesMock.mockReturnValue([
+  it("falls back to latest toolResult text when no assistant text exists", async () => {
+    testDir = mkdtempSync(join(tmpdir(), "announce-test-"));
+    const sessionId = "child-session-2";
+
+    await seedSession(sessionId, [
       {
         type: "message",
         timestamp: 1,
@@ -46,7 +58,7 @@ describe("readLatestAssistantReply", () => {
           role: "assistant",
           content: [{ type: "toolCall", id: "tool-2", name: "weather", arguments: { city: "Nanjing" } }],
         },
-      },
+      } as SessionEntry,
       {
         type: "message",
         timestamp: 2,
@@ -57,10 +69,10 @@ describe("readLatestAssistantReply", () => {
           content: [{ type: "text", text: "{\"city\":\"Nanjing\",\"tempC\":12,\"condition\":\"Sunny\"}" }],
           isError: false,
         },
-      },
+      } as SessionEntry,
     ]);
 
-    const result = readLatestAssistantReply("child-session");
+    const result = readLatestAssistantReply(sessionId, { baseDir: testDir });
     expect(result).toContain("\"city\":\"Nanjing\"");
     expect(result).toContain("\"condition\":\"Sunny\"");
   });
diff --git a/packages/core/src/agent/tools/sessions-list.test.ts b/packages/core/src/agent/tools/sessions-list.test.ts
index 989493de..230d8997 100644
--- a/packages/core/src/agent/tools/sessions-list.test.ts
+++ b/packages/core/src/agent/tools/sessions-list.test.ts
@@ -1,17 +1,7 @@
-import { describe, it, expect, vi, beforeEach } from "vitest";
+import { describe, it, expect, beforeEach } from "vitest";
 import type { SubagentRunRecord } from "../subagent/types.js";
-
-// Mock the registry module before importing the tool
-vi.mock("../subagent/registry.js", () => ({
-  listSubagentRuns: vi.fn(),
-  getSubagentRun: vi.fn(),
-}));
-
+import { resetSubagentRegistryForTests, seedSubagentRunForTests } from "../subagent/registry.js";
 import { createSessionsListTool } from "./sessions-list.js";
-import { listSubagentRuns, getSubagentRun } from "../subagent/registry.js";
-
-const mockListSubagentRuns = vi.mocked(listSubagentRuns);
-const mockGetSubagentRun = vi.mocked(getSubagentRun);
 
 function makeRecord(overrides: Partial<SubagentRunRecord> = {}): SubagentRunRecord {
   return {
@@ -27,11 +17,10 @@ function makeRecord(overrides: Partial<SubagentRunRecord> = {}): SubagentRunReco
 
 describe("sessions_list tool", () => {
   beforeEach(() => {
-    vi.clearAllMocks();
+    resetSubagentRegistryForTests();
   });
 
   it("returns empty message when no runs exist", async () => {
-    mockListSubagentRuns.mockReturnValue([]);
     const tool = createSessionsListTool({ sessionId: "parent-001" });
     const result = await tool.execute("call-1", {});
 
@@ -44,12 +33,14 @@ describe("sessions_list tool", () => {
 
   it("lists multiple runs with correct status mapping", async () => {
     const now = Date.now();
-    const runs: SubagentRunRecord[] = [
+    seedSubagentRunForTests(
       makeRecord({
         runId: "run-aaa",
         label: "Code Review",
         startedAt: now - 45000,
       }),
+    );
+    seedSubagentRunForTests(
       makeRecord({
         runId: "run-bbb",
         label: "Test Analysis",
@@ -57,6 +48,8 @@ describe("sessions_list tool", () => {
         endedAt: now - 30000,
         outcome: { status: "ok" },
       }),
+    );
+    seedSubagentRunForTests(
       makeRecord({
         runId: "run-ccc",
         label: "Lint Check",
@@ -64,8 +57,7 @@ describe("sessions_list tool", () => {
         endedAt: now,
         outcome: { status: "error", error: "timeout" },
       }),
-    ];
-    mockListSubagentRuns.mockReturnValue(runs);
+    );
 
     const tool = createSessionsListTool({ sessionId: "parent-001" });
     const result = await tool.execute("call-1", {});
@@ -88,17 +80,18 @@ describe("sessions_list tool", () => {
 
   it("returns detail for a specific runId", async () => {
     const now = Date.now();
-    const record = makeRecord({
-      runId: "run-detail",
-      label: "Deep Analysis",
-      task: "Analyze the authentication module thoroughly",
-      startedAt: now - 90000,
-      endedAt: now - 10000,
-      outcome: { status: "ok" },
-      findings: "Found 2 potential issues in token validation.",
-      findingsCaptured: true,
-    });
-    mockGetSubagentRun.mockReturnValue(record);
+    seedSubagentRunForTests(
+      makeRecord({
+        runId: "run-detail",
+        label: "Deep Analysis",
+        task: "Analyze the authentication module thoroughly",
+        startedAt: now - 90000,
+        endedAt: now - 10000,
+        outcome: { status: "ok" },
+        findings: "Found 2 potential issues in token validation.",
+        findingsCaptured: true,
+      }),
+    );
 
     const tool = createSessionsListTool({ sessionId: "parent-001" });
     const result = await tool.execute("call-1", { runId: "run-detail" });
@@ -115,8 +108,6 @@ describe("sessions_list tool", () => {
   });
 
   it("returns not found for unknown runId", async () => {
-    mockGetSubagentRun.mockReturnValue(undefined);
-
     const tool = createSessionsListTool({ sessionId: "parent-001" });
     const result = await tool.execute("call-1", { runId: "nonexistent" });
 
@@ -126,11 +117,12 @@ describe("sessions_list tool", () => {
   });
 
   it("rejects runId belonging to a different requester", async () => {
-    const record = makeRecord({
-      runId: "run-other",
-      requesterSessionId: "other-parent",
-    });
-    mockGetSubagentRun.mockReturnValue(record);
+    seedSubagentRunForTests(
+      makeRecord({
+        runId: "run-other",
+        requesterSessionId: "other-parent",
+      }),
+    );
 
     const tool = createSessionsListTool({ sessionId: "parent-001" });
     const result = await tool.execute("call-1", { runId: "run-other" });
@@ -151,13 +143,14 @@ describe("sessions_list tool", () => {
 
   it("shows findings status for running task", async () => {
     const now = Date.now();
-    const record = makeRecord({
-      runId: "run-running",
-      label: "Still Running",
-      startedAt: now - 30000,
-      // no endedAt
-    });
-    mockGetSubagentRun.mockReturnValue(record);
+    seedSubagentRunForTests(
+      makeRecord({
+        runId: "run-running",
+        label: "Still Running",
+        startedAt: now - 30000,
+        // no endedAt
+      }),
+    );
 
     const tool = createSessionsListTool({ sessionId: "parent-001" });
     const result = await tool.execute("call-1", { runId: "run-running" });