diff --git a/apps/desktop/src/main/electron-env.d.ts b/apps/desktop/src/main/electron-env.d.ts index 70720780..432437e9 100644 --- a/apps/desktop/src/main/electron-env.d.ts +++ b/apps/desktop/src/main/electron-env.d.ts @@ -247,18 +247,18 @@ interface ElectronAPI { setEnabled: (enabled: boolean) => Promise<{ ok: boolean; enabled?: boolean; error?: string }> wake: (reason?: string) => Promise<{ ok: boolean; result?: unknown; error?: string }> } - localChat: { - subscribe: (agentId: string) => Promise<{ ok?: boolean; error?: string; alreadySubscribed?: boolean }> - unsubscribe: (agentId: string) => Promise<{ ok: boolean }> - getHistory: (agentId: string, options?: { offset?: number; limit?: number }) => Promise<{ messages: unknown[]; total: number; offset: number; limit: number }> - send: (agentId: string, content: string) => Promise<{ ok?: boolean; error?: string }> - abort: (agentId: string) => Promise<{ ok?: boolean; error?: string }> - resolveExecApproval: (approvalId: string, decision: string) => Promise<{ ok: boolean }> - onEvent: (callback: (event: LocalChatEvent) => void) => void - offEvent: () => void - onApproval: (callback: (approval: LocalChatApproval) => void) => void - offApproval: () => void - } + localChat: { + subscribe: (agentId: string) => Promise<{ ok?: boolean; error?: string; alreadySubscribed?: boolean }> + unsubscribe: (agentId: string) => Promise<{ ok: boolean }> + getHistory: (agentId: string, options?: { offset?: number; limit?: number }) => Promise<{ messages: unknown[]; total: number; offset: number; limit: number; contextWindowTokens?: number }> + send: (agentId: string, content: string) => Promise<{ ok?: boolean; error?: string }> + abort: (agentId: string) => Promise<{ ok?: boolean; error?: string }> + resolveExecApproval: (approvalId: string, decision: string) => Promise<{ ok: boolean }> + onEvent: (callback: (event: LocalChatEvent) => void) => void + offEvent: () => void + onApproval: (callback: (approval: LocalChatApproval) => void) => void + offApproval: () => void + } } // Used in Renderer process, expose in `preload.ts` diff --git a/apps/desktop/src/main/ipc/hub.ts b/apps/desktop/src/main/ipc/hub.ts index 882ea55b..35441a1b 100644 --- a/apps/desktop/src/main/ipc/hub.ts +++ b/apps/desktop/src/main/ipc/hub.ts @@ -354,20 +354,21 @@ export function registerHubIpcHandlers(): void { const h = getHub() const agent = h.getAgent(agentId) if (!agent) { - return { messages: [], total: 0, offset: 0, limit: 0 } + return { messages: [], total: 0, offset: 0, limit: 0, contextWindowTokens: undefined } } try { await agent.ensureInitialized() const allMessages = agent.loadSessionMessagesForDisplay() + const contextWindowTokens = agent.getContextWindowTokens() const total = allMessages.length // Must match DEFAULT_MESSAGES_LIMIT from @multica/sdk/actions/rpc const limit = options?.limit ?? 200 const offset = options?.offset ?? Math.max(0, total - limit) const sliced = allMessages.slice(offset, offset + limit) - return { messages: sliced, total, offset, limit } + return { messages: sliced, total, offset, limit, contextWindowTokens } } catch { - return { messages: [], total: 0, offset: 0, limit: 0 } + return { messages: [], total: 0, offset: 0, limit: 0, contextWindowTokens: undefined } } }) diff --git a/apps/desktop/src/renderer/src/components/local-chat.tsx b/apps/desktop/src/renderer/src/components/local-chat.tsx index 7cbded04..2e083111 100644 --- a/apps/desktop/src/renderer/src/components/local-chat.tsx +++ b/apps/desktop/src/renderer/src/components/local-chat.tsx @@ -22,6 +22,7 @@ export function LocalChat({ initialPrompt }: LocalChatProps) { isLoadingHistory, isLoadingMore, hasMore, + contextWindowTokens, error, pendingApprovals, sendMessage, @@ -110,6 +111,7 @@ export function LocalChat({ initialPrompt }: LocalChatProps) { isLoadingHistory={isLoadingHistory} isLoadingMore={isLoadingMore} hasMore={hasMore} + contextWindowTokens={contextWindowTokens} error={error} pendingApprovals={pendingApprovals} sendMessage={sendMessage} diff --git a/apps/desktop/src/renderer/src/hooks/use-local-chat.ts b/apps/desktop/src/renderer/src/hooks/use-local-chat.ts index 1f5fce59..6dd512d3 100644 --- a/apps/desktop/src/renderer/src/hooks/use-local-chat.ts +++ b/apps/desktop/src/renderer/src/hooks/use-local-chat.ts @@ -93,6 +93,7 @@ export function useLocalChat() { chatRef.current.setHistory(result.messages as AgentMessageItem[], agentId, { total: result.total, offset: result.offset, + contextWindowTokens: result.contextWindowTokens, }) offsetRef.current = result.offset } @@ -140,6 +141,7 @@ export function useLocalChat() { chatRef.current.prependHistory(result.messages as AgentMessageItem[], agentId, { total: result.total, offset: result.offset, + contextWindowTokens: result.contextWindowTokens, }) offsetRef.current = result.offset } @@ -172,6 +174,7 @@ export function useLocalChat() { isLoadingHistory, isLoadingMore, hasMore: chat.hasMore, + contextWindowTokens: chat.contextWindowTokens, error: chat.error, pendingApprovals: chat.pendingApprovals, sendMessage, diff --git a/packages/core/src/agent/async-agent.ts b/packages/core/src/agent/async-agent.ts index 90763ff1..0244512d 100644 --- a/packages/core/src/agent/async-agent.ts +++ b/packages/core/src/agent/async-agent.ts @@ -553,6 +553,20 @@ export class AsyncAgent { return this.agent.getProviderInfo(); } + /** + * Get persisted session metadata. + */ + getSessionMeta(): import("./session/types.js").SessionMeta | undefined { + return this.agent.getSessionMeta(); + } + + /** + * Get effective context window token limit for this session. + */ + getContextWindowTokens(): number { + return this.agent.getContextWindowTokens(); + } + /** * Switch to a different provider and/or model. * This updates the agent's model without recreating the session. diff --git a/packages/core/src/agent/runner.ts b/packages/core/src/agent/runner.ts index c812b4a1..f36fe596 100644 --- a/packages/core/src/agent/runner.ts +++ b/packages/core/src/agent/runner.ts @@ -15,6 +15,7 @@ import { getDefaultModel, } from "./providers/index.js"; import { SessionManager } from "./session/session-manager.js"; +import type { SessionMeta } from "./session/types.js"; import { ProfileManager } from "./profile/index.js"; import { SkillManager } from "./skills/index.js"; import { credentialManager, getCredentialsPath } from "./credentials.js"; @@ -1199,6 +1200,20 @@ export class Agent { }; } + /** + * Get persisted session metadata. + */ + getSessionMeta(): SessionMeta | undefined { + return this.session.getMeta(); + } + + /** + * Get effective context window token limit for this session. + */ + getContextWindowTokens(): number { + return this.session.getMeta()?.contextWindowTokens ?? this.session.getContextWindowTokens(); + } + /** * Switch to a different provider and/or model. * This updates the agent's model without recreating the session. diff --git a/packages/core/src/hub/rpc/handlers/get-agent-messages.ts b/packages/core/src/hub/rpc/handlers/get-agent-messages.ts index 534ce025..3a008c70 100644 --- a/packages/core/src/hub/rpc/handlers/get-agent-messages.ts +++ b/packages/core/src/hub/rpc/handlers/get-agent-messages.ts @@ -31,6 +31,7 @@ export function createGetAgentMessagesHandler(): RpcHandler { const session = new SessionManager({ sessionId: agentId }); const allMessages = session.loadMessagesForDisplay(); const total = allMessages.length; + const contextWindowTokens = session.getMeta()?.contextWindowTokens ?? session.getContextWindowTokens(); // When offset is not provided, return the latest messages if (offset == null) { @@ -39,6 +40,6 @@ export function createGetAgentMessagesHandler(): RpcHandler { const sliced = allMessages.slice(offset, offset + limit); - return { messages: sliced, total, offset, limit }; + return { messages: sliced, total, offset, limit, contextWindowTokens }; }; } diff --git a/packages/hooks/src/use-chat.ts b/packages/hooks/src/use-chat.ts index 32d722fa..9c414a8e 100644 --- a/packages/hooks/src/use-chat.ts +++ b/packages/hooks/src/use-chat.ts @@ -84,6 +84,7 @@ export function useChat() { const [pendingApprovals, setPendingApprovals] = useState([]); const [error, setError] = useState(null); const [hasMore, setHasMore] = useState(false); + const [contextWindowTokens, setContextWindowTokens] = useState(undefined); const isStreaming = streamingIds.size > 0; @@ -125,19 +126,33 @@ export function useChat() { }, []); /** Load initial history (replaces all messages) */ - const setHistory = useCallback((raw: AgentMessageItem[], agentId: string, meta?: { total: number; offset: number }) => { + const setHistory = useCallback(( + raw: AgentMessageItem[], + agentId: string, + meta?: { total: number; offset: number; contextWindowTokens?: number }, + ) => { const loaded = convertMessages(raw, agentId); setMessages(loaded); if (meta) { setHasMore(meta.offset > 0); + if (meta.contextWindowTokens !== undefined) { + setContextWindowTokens(meta.contextWindowTokens); + } } }, [convertMessages]); /** Prepend older messages (for "load more" pagination) */ - const prependHistory = useCallback((raw: AgentMessageItem[], agentId: string, meta: { total: number; offset: number }) => { + const prependHistory = useCallback(( + raw: AgentMessageItem[], + agentId: string, + meta: { total: number; offset: number; contextWindowTokens?: number }, + ) => { const older = convertMessages(raw, agentId); setMessages((prev) => [...older, ...prev]); setHasMore(meta.offset > 0); + if (meta.contextWindowTokens !== undefined) { + setContextWindowTokens(meta.contextWindowTokens); + } }, [convertMessages]); /** Add a user message */ @@ -274,6 +289,7 @@ export function useChat() { streamingIds, isStreaming, hasMore, + contextWindowTokens, pendingApprovals, error, // State control (for transport layer to call) diff --git a/packages/hooks/src/use-gateway-chat.ts b/packages/hooks/src/use-gateway-chat.ts index 0986c7bb..5b98efa8 100644 --- a/packages/hooks/src/use-gateway-chat.ts +++ b/packages/hooks/src/use-gateway-chat.ts @@ -38,6 +38,7 @@ export function useGatewayChat({ client, hubId, agentId }: UseGatewayChatOptions chat.setHistory(result.messages, agentId, { total: result.total, offset: result.offset, + contextWindowTokens: result.contextWindowTokens, }); offsetRef.current = result.offset; }) @@ -100,6 +101,7 @@ export function useGatewayChat({ client, hubId, agentId }: UseGatewayChatOptions chat.prependHistory(result.messages, agentId, { total: result.total, offset: result.offset, + contextWindowTokens: result.contextWindowTokens, }); offsetRef.current = result.offset; } catch { @@ -125,6 +127,7 @@ export function useGatewayChat({ client, hubId, agentId }: UseGatewayChatOptions isLoadingHistory, isLoadingMore, hasMore: chat.hasMore, + contextWindowTokens: chat.contextWindowTokens, error: chat.error, pendingApprovals: chat.pendingApprovals, sendMessage, diff --git a/packages/sdk/src/actions/rpc.ts b/packages/sdk/src/actions/rpc.ts index 26921944..a49f1579 100644 --- a/packages/sdk/src/actions/rpc.ts +++ b/packages/sdk/src/actions/rpc.ts @@ -91,6 +91,8 @@ export interface GetAgentMessagesResult { total: number; offset: number; limit: number; + /** Context window size (tokens) used by this session */ + contextWindowTokens?: number; } /** getHubInfo - no params needed */ diff --git a/packages/ui/src/components/chat-input.tsx b/packages/ui/src/components/chat-input.tsx index 7098112a..de1a231f 100644 --- a/packages/ui/src/components/chat-input.tsx +++ b/packages/ui/src/components/chat-input.tsx @@ -4,7 +4,8 @@ import { useEditor, EditorContent, type Editor } from "@tiptap/react"; import StarterKit from "@tiptap/starter-kit"; import Placeholder from "@tiptap/extension-placeholder"; import { Button } from "@multica/ui/components/ui/button"; -import { ArrowUp, Square } from "lucide-react"; +import { HoverCard, HoverCardContent, HoverCardTrigger } from "@multica/ui/components/ui/hover-card"; +import { ArrowUp, Gauge, Square, TriangleAlert } from "lucide-react"; import { cn } from "@multica/ui/lib/utils"; import "./chat-input.css"; @@ -15,6 +16,22 @@ export interface ChatInputRef { clear: () => void; } +export interface ContextWindowUsage { + usedTokens: number; + totalTokens: number; + availableTokens: number; + usageRatio: number; + usagePercent: number; + isEstimated?: boolean; + lastCompaction?: { + removed: number; + kept: number; + tokensRemoved?: number; + tokensKept?: number; + reason: string; + }; +} + interface ChatInputProps { onSubmit?: (value: string) => void; onAbort?: () => void; @@ -23,10 +40,95 @@ interface ChatInputProps { placeholder?: string; /** Initial value to pre-fill the input */ defaultValue?: string; + /** Context usage stats shown in the input footer */ + contextWindowUsage?: ContextWindowUsage; +} + +function formatTokenCount(tokens: number): string { + if (tokens >= 1_000_000) return `${(tokens / 1_000_000).toFixed(1)}M`; + if (tokens >= 10_000) return `${Math.round(tokens / 1000)}k`; + if (tokens >= 1000) return `${(tokens / 1000).toFixed(1)}k`; + return `${tokens}`; +} + +function resolveUsageTone(ratio: number): { + dotClass: string; + textClass: string; +} { + if (ratio >= 0.9) { + return { dotClass: "bg-destructive", textClass: "text-destructive" }; + } + if (ratio >= 0.75) { + return { dotClass: "bg-foreground/80", textClass: "text-foreground" }; + } + return { dotClass: "bg-muted-foreground/60", textClass: "text-muted-foreground" }; +} + +function ContextWindowIndicator({ usage }: { usage: ContextWindowUsage }) { + const ratio = Math.max(0, usage.usageRatio); + const usagePercent = Math.max(0, usage.usagePercent); + const clampedPercent = Math.min(100, usagePercent); + const tone = resolveUsageTone(ratio); + const usedTokens = formatTokenCount(usage.usedTokens); + const totalTokens = formatTokenCount(usage.totalTokens); + const availableTokens = formatTokenCount(Math.max(0, usage.availableTokens)); + const compactionFreed = usage.lastCompaction?.tokensRemoved; + + return ( + + + + + {clampedPercent}% + + +
+

Context window

+

= 0.9 && "text-destructive")}> + {clampedPercent}% full +

+

+ {usedTokens} / {totalTokens} tokens used{usage.isEstimated ? " (est.)" : ""} +

+
+ +
+
+
+ +
+ {availableTokens} tokens left + + {compactionFreed != null + ? `Last compaction: -${formatTokenCount(compactionFreed)}` + : "Auto-compaction enabled"} + +
+ + {ratio > 1 && ( +
+ + Context is over capacity. The next run will compact history. +
+ )} + + + ); } export const ChatInput = forwardRef( - function ChatInput({ onSubmit, onAbort, isLoading, disabled, placeholder = "Type a message...", defaultValue }, ref) { + function ChatInput( + { onSubmit, onAbort, isLoading, disabled, placeholder = "Type a message...", defaultValue, contextWindowUsage }, + ref, + ) { // Use refs to avoid stale closures in Tiptap keydown handler const onSubmitRef = useRef(onSubmit); onSubmitRef.current = onSubmit; @@ -137,7 +239,12 @@ export const ChatInput = forwardRef( disabled && "is-disabled cursor-not-allowed opacity-60", )}> -
+
+ {contextWindowUsage ? ( + + ) : ( +
+ )} diff --git a/packages/ui/src/components/chat-view.tsx b/packages/ui/src/components/chat-view.tsx index 7bd6cffb..ba25aad4 100644 --- a/packages/ui/src/components/chat-view.tsx +++ b/packages/ui/src/components/chat-view.tsx @@ -1,9 +1,9 @@ "use client"; -import { useRef, useEffect, useCallback } from "react"; +import { useRef, useEffect, useCallback, useMemo } from "react"; import { Button } from "@multica/ui/components/ui/button"; import { Skeleton } from "@multica/ui/components/ui/skeleton"; -import { ChatInput } from "@multica/ui/components/chat-input"; +import { ChatInput, type ContextWindowUsage } from "@multica/ui/components/chat-input"; import { MessageList } from "@multica/ui/components/message-list"; import { MemoizedMarkdown } from "@multica/ui/components/markdown"; import { MulticaIcon } from "@multica/ui/components/multica-icon"; @@ -33,6 +33,7 @@ export interface ChatViewProps { isLoadingHistory: boolean; isLoadingMore?: boolean; hasMore?: boolean; + contextWindowTokens?: number; error: ChatViewError | null; pendingApprovals: ChatViewApproval[]; sendMessage: (text: string) => void; @@ -48,6 +49,96 @@ export interface ChatViewProps { bottomSlot?: React.ReactNode; } +const DEFAULT_CONTEXT_WINDOW_TOKENS = 200_000; +const CHARS_PER_TOKEN = 4; +const ESTIMATION_SAFETY_MARGIN = 1.2; +const MESSAGE_OVERHEAD_TOKENS = 12; +const RESPONSE_RESERVE_TOKENS = 1024; + +function safeJsonLength(value: unknown): number { + try { + return JSON.stringify(value)?.length ?? 0; + } catch { + return 0; + } +} + +function estimateMessageChars(message: Message): number { + let chars = 0; + + for (const block of message.content) { + if (block.type === "text") { + chars += block.text?.length ?? 0; + continue; + } + if (block.type === "thinking") { + chars += block.thinking?.length ?? 0; + continue; + } + if (block.type === "toolCall") { + chars += (block.name?.length ?? 0) + safeJsonLength(block.arguments) + 32; + continue; + } + if (block.type === "image") { + // Image blocks add prompt/metadata overhead even without inline text. + chars += 512; + continue; + } + chars += safeJsonLength(block); + } + + if (message.toolArgs) { + chars += safeJsonLength(message.toolArgs); + } + if (message.toolName) { + chars += message.toolName.length; + } + + return chars; +} + +function deriveContextWindowUsage( + messages: Message[], + contextWindowTokens?: number, +): ContextWindowUsage { + const totalTokens = Math.max(1, contextWindowTokens ?? DEFAULT_CONTEXT_WINDOW_TOKENS); + const contextMessages = messages.filter((message) => message.role !== "system"); + const baseReserve = contextMessages.length > 0 ? RESPONSE_RESERVE_TOKENS : 0; + + let estimatedUsedTokens = baseReserve; + for (const message of contextMessages) { + const chars = estimateMessageChars(message); + const tokenEstimate = Math.ceil((chars / CHARS_PER_TOKEN) * ESTIMATION_SAFETY_MARGIN); + estimatedUsedTokens += tokenEstimate + MESSAGE_OVERHEAD_TOKENS; + } + + let lastCompaction: ContextWindowUsage["lastCompaction"]; + for (let i = messages.length - 1; i >= 0; i--) { + const message = messages[i]; + if (message?.systemType === "compaction" && message.compaction) { + lastCompaction = { + removed: message.compaction.removed, + kept: message.compaction.kept, + tokensRemoved: message.compaction.tokensRemoved, + tokensKept: message.compaction.tokensKept, + reason: message.compaction.reason, + }; + break; + } + } + + const usageRatio = estimatedUsedTokens / totalTokens; + return { + usedTokens: estimatedUsedTokens, + totalTokens, + availableTokens: Math.max(0, totalTokens - estimatedUsedTokens), + usageRatio, + usagePercent: Math.round(usageRatio * 100), + isEstimated: true, + lastCompaction, + }; +} + export function ChatView({ messages, streamingIds, @@ -55,6 +146,7 @@ export function ChatView({ isLoadingHistory, isLoadingMore = false, hasMore = false, + contextWindowTokens, error, pendingApprovals, sendMessage, @@ -70,6 +162,10 @@ export function ChatView({ const sentinelRef = useRef(null); const fadeStyle = useScrollFade(mainRef); const { suppressAutoScroll } = useAutoScroll(mainRef); + const contextWindowUsage = useMemo( + () => deriveContextWindowUsage(messages, contextWindowTokens), + [messages, contextWindowTokens], + ); // scrollHeight compensation for prepended messages const prevScrollHeightRef = useRef(0); @@ -277,6 +373,7 @@ export function ChatView({ disabled={!!error && error.code !== 'AGENT_ERROR'} placeholder={error && error.code !== 'AGENT_ERROR' ? "Connection error" : "Ask your Agent..."} defaultValue={initialPrompt} + contextWindowUsage={contextWindowUsage} />