diff --git a/CLAUDE.md b/CLAUDE.md index 32d8a9d7..267369b7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,13 +4,13 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project Overview -Super Multica is a distributed AI agent framework with a monorepo architecture. It includes an agent engine with multi-provider LLM support, a WebSocket gateway, a console hub for multi-agent coordination, and frontend apps (Next.js web, Electron desktop). +Super Multica is a distributed AI agent framework with a monorepo architecture. It includes an agent engine with multi-provider LLM support, an Electron desktop app with embedded Hub, a WebSocket gateway for remote access, and a Next.js web app. ## Monorepo Structure -- **`src/`** — Core modules (agent engine, gateway, console, client, shared types) +- **`src/`** — Core modules (agent engine, gateway, hub, shared types) +- **`apps/desktop`** — Electron + Vite + React desktop app (`@multica/desktop`) — **primary development target** - **`apps/web`** — Next.js 16 web app (`@multica/web`, port 3001) -- **`apps/desktop`** — Electron + Vite + React desktop app (`@multica/desktop`) - **`packages/ui`** — Shared UI component library (`@multica/ui`, Shadcn/Tailwind CSS v4) - **`packages/sdk`** — Gateway client SDK (`@multica/sdk`, Socket.io) - **`packages/store`** — Zustand state management (`@multica/store`) @@ -31,15 +31,14 @@ multica profile list # List profiles multica skills list # List skills multica tools list # List tools multica credentials init # Initialize credentials -multica dev # Start all dev services +multica dev # Start desktop app (default) multica help # Show help # Development servers -multica dev # All services (gateway:3000, console:4000, web:3001) -multica dev gateway # WebSocket gateway only -multica dev console # NestJS console with agent +multica dev # Desktop app (default, recommended) +multica dev gateway # WebSocket gateway only (for remote clients) multica dev web # Next.js web app -multica dev desktop # Electron desktop app +multica dev all # Gateway + web app # Build (turbo-orchestrated) pnpm build @@ -56,18 +55,22 @@ pnpm test:coverage # With v8 coverage ## Architecture ``` -Frontend (web:3001 / desktop) +Desktop App (standalone, recommended) + └─ Hub (embedded) + └─ Agent Engine (LLM runner, sessions, skills, tools) + └─ (Optional) Gateway connection for remote access + +Web App (requires Gateway) → @multica/sdk (GatewayClient, Socket.io) → Gateway (NestJS, WebSocket, port 3000) - → Console Hub (multi-agent coordination) - → Agent Engine (LLM runner, sessions, skills, tools) + → Hub + Agent Engine ``` **Agent Engine** (`src/agent/`): Orchestrates LLM interactions with multi-provider support (OpenAI, Anthropic, DeepSeek, Kimi, Groq, Mistral, Google, Together). Features session management (JSONL-based, UUIDv7 IDs), profile system (`~/.super-multica/agent-profiles/`), modular skills with hot-reload, and token-aware context window guards (compaction modes: tokens, count, summary). Unified CLI in `src/agent/cli/index.ts` with subcommands in `src/agent/cli/commands/`. -**Gateway** (`src/gateway/`): NestJS WebSocket server with Socket.io for real-time message passing, RPC request/response, and streaming. +**Hub** (`src/hub/`): Manages agents and communication channels. Embedded in desktop app, or runs standalone for web clients. -**Console** (`src/console/`): NestJS hub for multi-agent coordination with a web dashboard. +**Gateway** (`src/gateway/`): NestJS WebSocket server with Socket.io for remote client access, message routing, and device verification. ## Tech Stack & Config diff --git a/README.md b/README.md index 5aee5c5e..c6fd6d28 100644 --- a/README.md +++ b/README.md @@ -13,18 +13,18 @@ src/ │ ├── skills/ # Modular skill system │ └── tools/ # Agent tools │ └── web/ # Web fetch and search tools -├── gateway/ # WebSocket gateway for distributed communication -├── hub/ # Multi-agent coordination hub -├── client/ # Client library -├── console/ # NestJS console application -└── shared/ # Shared types and gateway SDK - └── gateway-sdk/ # Gateway client SDK +├── gateway/ # WebSocket gateway for remote access +├── hub/ # Agent coordination hub +└── shared/ # Shared types apps/ +├── desktop/ # Electron desktop app (recommended) └── web/ # Next.js web application packages/ -└── sdk/ # SDK package for external use +├── sdk/ # Gateway client SDK +├── store/ # Zustand state management +└── ui/ # Shared UI components skills/ # Bundled skills (commit, code-review) ``` @@ -85,9 +85,8 @@ Example `skills.env.json5` (dynamic keys): Start services directly (no `source .env`): ```bash -multica dev console -multica run "hello" -multica dev gateway +multica dev # Start desktop app +multica run "hello" # Run CLI mode ``` Optional overrides: @@ -194,10 +193,10 @@ multica chat --profile my-agent multica run --thinking high "solve this complex problem" # Development servers -multica dev # Start all services -multica dev gateway # Gateway only (:3000) -multica dev console # Console only (:4000) +multica dev # Start desktop app (default) +multica dev gateway # Gateway only (:3000) - for remote clients multica dev web # Web app only (:3001) +multica dev all # Start gateway + web # Help multica help @@ -383,23 +382,33 @@ web_search({ }) ``` -## Distributed Architecture +## Architecture + +### Desktop App (Recommended) + +The Electron desktop app runs a standalone Hub with embedded Agent Engine: + +- **No Gateway required** for local development +- Direct IPC communication for optimal performance +- QR code pairing for mobile remote access +- Optional Gateway connection for web/remote clients ### Gateway -The WebSocket gateway enables distributed multi-agent communication: +The WebSocket gateway enables remote client access: -- Real-time message passing between agents +- Real-time message routing between clients and Hub - Streaming support for long-running operations - RPC-style request/response patterns +- Device verification and authentication ### Hub -The Hub manages multiple agents and gateway connections: +The Hub manages agents and communication: - Agent lifecycle management -- Communication channel coordination -- Device identification and tracking +- Multi-subscriber event distribution +- Device whitelist and token-based verification ## Scripts @@ -418,11 +427,11 @@ The Hub manages multiple agents and gateway connections: ### Development (shortcuts) -- `pnpm dev` - Run full stack (gateway + console + web) -- `pnpm dev:gateway` - Run gateway only -- `pnpm dev:console` - Run console only -- `pnpm dev:web` - Run web app only +- `pnpm dev` - Run desktop app (default, recommended) - `pnpm dev:desktop` - Run desktop app +- `pnpm dev:gateway` - Run gateway only (for remote clients) +- `pnpm dev:web` - Run web app only +- `pnpm dev:all` - Run gateway + web ### Build & Test diff --git a/apps/desktop/README.md b/apps/desktop/README.md index 44fc7955..0e202352 100644 --- a/apps/desktop/README.md +++ b/apps/desktop/README.md @@ -190,28 +190,6 @@ ipcMain.handle('skills:add', async (_, source: string) => { --- -## 三、实现优先级 - -### Phase 1: 基础框架 (MVP) - -1. **Layout 组件** - Header + Tabs 导航 -2. **Home 页面** - 二维码显示 + 连接状态 -3. **Gateway 连接** - 复用 @multica/store - -### Phase 2: 管理功能 - -4. **Tools 页面** - 列表展示 + 开关切换 -5. **Skills 页面** - 列表展示 + 基础操作 -6. **Settings** - Gateway URL + Theme - -### Phase 3: 完善体验 - -7. **Agent 页面** - 状态监控 + Provider 切换 -8. **二维码刷新机制** -9. **错误处理 + Toast 提示** - ---- - ## 四、Hub 集成技术方案 ### 架构概述 @@ -496,9 +474,17 @@ ChatInput → useMessagesStore.sendMessage() ### 复用层级 -| 层级 | 组件/模块 | 复用情况 | -| ---------- | ---------------------------------------- | -------- | -| UI 层 | `MessageList`, `ChatInput` | ✅ 完全复用 | -| Store 层 | `useMessagesStore` | ✅ 完全复用 | -| Agent 层 | `AsyncAgent.write()`, `subscribe()` | ✅ 完全复用 | -| 传输层 | IPC vs WebSocket | ❌ 各自实现 | +| 层级 | 组件/模块 | 复用情况 | +| -------- | ----------------------------------- | ----------- | +| UI 层 | `MessageList`, `ChatInput` | ✅ 完全复用 | +| Store 层 | `useMessagesStore` | ✅ 完全复用 | +| Agent 层 | `AsyncAgent.write()`, `subscribe()` | ✅ 完全复用 | +| 传输层 | IPC vs WebSocket | ❌ 各自实现 | + +--- + +## 九、TODO + +- [ ] **优化 Memory Tool 逻辑**: 当前 memory tool 和 memory.md 没有统一,需要整合 +- [ ] **优化 Agent Profile 加载逻辑**: 改进 Profile 的加载机制 +- [ ] **Agent 自我迭代 Profile**: 添加让 Agent 在对话过程中自己修改 Profile 内文件的能力 diff --git a/apps/desktop/electron/electron-env.d.ts b/apps/desktop/electron/electron-env.d.ts index 880abc6a..7777fdf2 100644 --- a/apps/desktop/electron/electron-env.d.ts +++ b/apps/desktop/electron/electron-env.d.ts @@ -101,6 +101,27 @@ interface LocalChatEvent { } } +interface ProviderStatus { + id: string + name: string + authMethod: 'api-key' | 'oauth' + available: boolean + configured: boolean + current: boolean + defaultModel: string + models: string[] + loginUrl?: string + loginCommand?: string + loginInstructions?: string +} + +interface CurrentProviderInfo { + provider: string + model: string | undefined + providerName: string | undefined + available: boolean +} + interface ElectronAPI { hub: { init: () => Promise @@ -145,6 +166,16 @@ interface ElectronAPI { updateStyle: (style: string) => Promise updateUser: (content: string) => Promise } + provider: { + list: () => Promise + listAvailable: () => Promise + current: () => Promise + set: (providerId: string, modelId?: string) => Promise<{ ok: boolean; provider?: string; model?: string; error?: string }> + getMeta: (providerId: string) => Promise + isAvailable: (providerId: string) => Promise + saveApiKey: (providerId: string, apiKey: string) => Promise<{ ok: boolean; error?: string }> + importOAuth: (providerId: string) => Promise<{ ok: boolean; expiresAt?: number; error?: string }> + } localChat: { subscribe: (agentId: string) => Promise<{ ok?: boolean; error?: string; alreadySubscribed?: boolean }> unsubscribe: (agentId: string) => Promise<{ ok: boolean }> diff --git a/apps/desktop/electron/ipc/agent.ts b/apps/desktop/electron/ipc/agent.ts index a358b88a..61622286 100644 --- a/apps/desktop/electron/ipc/agent.ts +++ b/apps/desktop/electron/ipc/agent.ts @@ -13,6 +13,7 @@ const TOOL_GROUPS: Record = { 'group:runtime': ['exec', 'process'], 'group:web': ['web_search', 'web_fetch'], 'group:memory': ['memory_get', 'memory_set', 'memory_delete', 'memory_list'], + 'group:subagent': ['sessions_spawn'], } // All known tool names (for display when agent not available) @@ -21,6 +22,7 @@ const ALL_KNOWN_TOOLS = [ ...TOOL_GROUPS['group:runtime'], ...TOOL_GROUPS['group:web'], ...TOOL_GROUPS['group:memory'], + ...TOOL_GROUPS['group:subagent'], ] /** diff --git a/apps/desktop/electron/ipc/index.ts b/apps/desktop/electron/ipc/index.ts index d0971eb0..fc11179c 100644 --- a/apps/desktop/electron/ipc/index.ts +++ b/apps/desktop/electron/ipc/index.ts @@ -5,11 +5,13 @@ export { registerAgentIpcHandlers, cleanupAgent } from './agent.js' export { registerSkillsIpcHandlers } from './skills.js' export { registerHubIpcHandlers, cleanupHub, initializeHub, setupDeviceConfirmation } from './hub.js' export { registerProfileIpcHandlers } from './profile.js' +export { registerProviderIpcHandlers } from './provider.js' import { registerAgentIpcHandlers, cleanupAgent } from './agent.js' import { registerSkillsIpcHandlers } from './skills.js' import { registerHubIpcHandlers, cleanupHub, initializeHub } from './hub.js' import { registerProfileIpcHandlers } from './profile.js' +import { registerProviderIpcHandlers } from './provider.js' /** * Register all IPC handlers. @@ -20,6 +22,7 @@ export function registerAllIpcHandlers(): void { registerAgentIpcHandlers() registerSkillsIpcHandlers() registerProfileIpcHandlers() + registerProviderIpcHandlers() } /** diff --git a/apps/desktop/electron/ipc/provider.ts b/apps/desktop/electron/ipc/provider.ts new file mode 100644 index 00000000..3f31dc9c --- /dev/null +++ b/apps/desktop/electron/ipc/provider.ts @@ -0,0 +1,312 @@ +/** + * Provider IPC handlers for Electron main process. + * + * Manages LLM provider listing, status checking, and switching. + * Mirrors the CLI `/provider` command functionality. + */ +import { ipcMain } from 'electron' +import { getCurrentHub } from './hub.js' +import { + getProviderList, + getAvailableProviders, + getCurrentProvider, + getProviderMeta, + isProviderAvailable, + getLoginInstructions, + type ProviderInfo, +} from '../../../../src/agent/providers/index.js' +import { + readClaudeCliCredentials, + readCodexCliCredentials, +} from '../../../../src/agent/providers/oauth/cli-credentials.js' +import { credentialManager } from '../../../../src/agent/credentials.js' + +/** + * Provider info returned to renderer (matches ProviderInfo from registry). + */ +export interface ProviderStatus { + id: string + name: string + authMethod: 'api-key' | 'oauth' + available: boolean + configured: boolean + current: boolean + defaultModel: string + models: string[] + loginUrl?: string + loginCommand?: string + loginInstructions?: string +} + +/** + * Current provider/model info returned to renderer. + */ +export interface CurrentProviderInfo { + provider: string + model: string | undefined + providerName: string | undefined + available: boolean +} + +/** + * Get the default agent from Hub. + */ +function getDefaultAgent() { + const hub = getCurrentHub() + if (!hub) return null + + const agentIds = hub.listAgents() + if (agentIds.length === 0) return null + + return hub.getAgent(agentIds[0]) ?? null +} + +/** + * Register all Provider-related IPC handlers. + */ +export function registerProviderIpcHandlers(): void { + /** + * List all providers with their status. + * This is the main listing function, similar to CLI `/provider` command. + */ + ipcMain.handle('provider:list', async (): Promise => { + const providers = getProviderList() + + return providers.map((p: ProviderInfo) => ({ + id: p.id, + name: p.name, + authMethod: p.authMethod, + available: p.available, + configured: p.configured, + current: p.current, + defaultModel: p.defaultModel, + models: p.models, + loginUrl: p.loginUrl, + loginCommand: p.loginCommand, + loginInstructions: getLoginInstructions(p.id), + })) + }) + + /** + * List only available (configured) providers. + */ + ipcMain.handle('provider:listAvailable', async (): Promise => { + const providers = getAvailableProviders() + + return providers.map((p: ProviderInfo) => ({ + id: p.id, + name: p.name, + authMethod: p.authMethod, + available: p.available, + configured: p.configured, + current: p.current, + defaultModel: p.defaultModel, + models: p.models, + loginUrl: p.loginUrl, + loginCommand: p.loginCommand, + loginInstructions: getLoginInstructions(p.id), + })) + }) + + /** + * Get current provider and model from the active agent. + */ + ipcMain.handle('provider:current', async (): Promise => { + const agent = getDefaultAgent() + + if (agent) { + // Get from actual agent instance + const info = agent.getProviderInfo() + const meta = getProviderMeta(info.provider) + + return { + provider: info.provider, + model: info.model, + providerName: meta?.name, + available: isProviderAvailable(info.provider), + } + } + + // Fallback to credentials default + const defaultProvider = getCurrentProvider() + const meta = getProviderMeta(defaultProvider) + + return { + provider: defaultProvider, + model: meta?.defaultModel, + providerName: meta?.name, + available: isProviderAvailable(defaultProvider), + } + }) + + /** + * Switch the agent to a different provider and/or model. + */ + ipcMain.handle( + 'provider:set', + async (_event, providerId: string, modelId?: string): Promise<{ ok: boolean; provider?: string; model?: string; error?: string }> => { + const agent = getDefaultAgent() + + if (!agent) { + return { ok: false, error: 'No agent available' } + } + + // Validate provider exists + const meta = getProviderMeta(providerId) + if (!meta) { + return { ok: false, error: `Unknown provider: ${providerId}` } + } + + // Check if provider is available + if (!isProviderAvailable(providerId)) { + const instructions = getLoginInstructions(providerId) + return { + ok: false, + error: `Provider "${providerId}" is not configured.\n${instructions}`, + } + } + + // Validate model if specified + if (modelId && !meta.models.includes(modelId)) { + return { + ok: false, + error: `Model "${modelId}" is not available for provider "${providerId}". Available: ${meta.models.join(', ')}`, + } + } + + try { + const result = agent.setProvider(providerId, modelId) + console.log(`[IPC] Provider switched to: ${result.provider}, model: ${result.model}`) + + return { + ok: true, + provider: result.provider, + model: result.model, + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + console.error(`[IPC] Failed to switch provider: ${message}`) + return { ok: false, error: message } + } + } + ) + + /** + * Get metadata for a specific provider. + */ + ipcMain.handle('provider:getMeta', async (_event, providerId: string) => { + const meta = getProviderMeta(providerId) + if (!meta) { + return { error: `Unknown provider: ${providerId}` } + } + + return { + id: meta.id, + name: meta.name, + authMethod: meta.authMethod, + defaultModel: meta.defaultModel, + models: meta.models, + loginUrl: meta.loginUrl, + loginCommand: meta.loginCommand, + available: isProviderAvailable(providerId), + loginInstructions: getLoginInstructions(providerId), + } + }) + + /** + * Check if a specific provider is available (has valid credentials). + */ + ipcMain.handle('provider:isAvailable', async (_event, providerId: string): Promise => { + return isProviderAvailable(providerId) + }) + + /** + * Save API key for a provider to credentials.json5. + * After saving, the provider should become available. + */ + ipcMain.handle( + 'provider:saveApiKey', + async (_event, providerId: string, apiKey: string): Promise<{ ok: boolean; error?: string }> => { + try { + // Validate provider exists and uses API key auth + const meta = getProviderMeta(providerId) + if (!meta) { + return { ok: false, error: `Unknown provider: ${providerId}` } + } + if (meta.authMethod !== 'api-key') { + return { ok: false, error: `Provider "${providerId}" uses ${meta.authMethod} authentication, not API key` } + } + + // Save the API key + credentialManager.setLlmProviderApiKey(providerId, apiKey) + console.log(`[IPC] API key saved for provider: ${providerId}`) + + return { ok: true } + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + console.error(`[IPC] Failed to save API key: ${message}`) + return { ok: false, error: message } + } + } + ) + + /** + * Import OAuth credentials from CLI tools (claude-code, codex). + * Reads from CLI credential storage and saves to credentials.json5. + */ + ipcMain.handle( + 'provider:importOAuth', + async (_event, providerId: string): Promise<{ ok: boolean; expiresAt?: number; error?: string }> => { + try { + const meta = getProviderMeta(providerId) + if (!meta) { + return { ok: false, error: `Unknown provider: ${providerId}` } + } + if (meta.authMethod !== 'oauth') { + return { ok: false, error: `Provider "${providerId}" does not use OAuth authentication` } + } + + // Read credentials from CLI tool + if (providerId === 'claude-code') { + const creds = readClaudeCliCredentials() + if (!creds) { + return { ok: false, error: 'No Claude Code credentials found. Run "claude login" first.' } + } + if (creds.expires <= Date.now()) { + return { ok: false, error: 'Claude Code credentials have expired. Run "claude login" again.' } + } + + // Save to credentials.json5 + const token = creds.type === 'oauth' ? creds.access : creds.token + const refreshToken = creds.type === 'oauth' ? creds.refresh : undefined + credentialManager.setLlmProviderOAuthToken(providerId, token, refreshToken, creds.expires) + console.log(`[IPC] OAuth credentials imported for: ${providerId}`) + + return { ok: true, expiresAt: creds.expires } + } + + if (providerId === 'openai-codex') { + const creds = readCodexCliCredentials() + if (!creds) { + return { ok: false, error: 'No Codex credentials found. Run "codex login" first.' } + } + if (creds.expires <= Date.now()) { + return { ok: false, error: 'Codex credentials have expired. Run "codex login" again.' } + } + + // Save to credentials.json5 + credentialManager.setLlmProviderOAuthToken(providerId, creds.access, creds.refresh, creds.expires) + console.log(`[IPC] OAuth credentials imported for: ${providerId}`) + + return { ok: true, expiresAt: creds.expires } + } + + return { ok: false, error: `OAuth import not supported for provider: ${providerId}` } + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + console.error(`[IPC] Failed to import OAuth credentials: ${message}`) + return { ok: false, error: message } + } + } + ) +} diff --git a/apps/desktop/electron/preload.ts b/apps/desktop/electron/preload.ts index f554ef4b..16093ac0 100644 --- a/apps/desktop/electron/preload.ts +++ b/apps/desktop/electron/preload.ts @@ -44,6 +44,27 @@ export interface ProfileData { userContent: string | undefined } +export interface ProviderStatus { + id: string + name: string + authMethod: 'api-key' | 'oauth' + available: boolean + configured: boolean + current: boolean + defaultModel: string + models: string[] + loginUrl?: string + loginCommand?: string + loginInstructions?: string +} + +export interface CurrentProviderInfo { + provider: string + model: string | undefined + providerName: string | undefined + available: boolean +} + // Local chat event types (for direct IPC communication without Gateway) export interface LocalChatEvent { agentId: string @@ -134,6 +155,29 @@ const electronAPI = { updateUser: (content: string) => ipcRenderer.invoke('profile:updateUser', content), }, + // Provider management + provider: { + /** List all providers with their status */ + list: (): Promise => ipcRenderer.invoke('provider:list'), + /** List only available (configured) providers */ + listAvailable: (): Promise => ipcRenderer.invoke('provider:listAvailable'), + /** Get current provider and model from the active agent */ + current: (): Promise => ipcRenderer.invoke('provider:current'), + /** Switch the agent to a different provider and/or model */ + set: (providerId: string, modelId?: string): Promise<{ ok: boolean; provider?: string; model?: string; error?: string }> => + ipcRenderer.invoke('provider:set', providerId, modelId), + /** Get metadata for a specific provider */ + getMeta: (providerId: string) => ipcRenderer.invoke('provider:getMeta', providerId), + /** Check if a specific provider is available */ + isAvailable: (providerId: string): Promise => ipcRenderer.invoke('provider:isAvailable', providerId), + /** Save API key for a provider */ + saveApiKey: (providerId: string, apiKey: string): Promise<{ ok: boolean; error?: string }> => + ipcRenderer.invoke('provider:saveApiKey', providerId, apiKey), + /** Import OAuth credentials from CLI tools (claude-code, codex) */ + importOAuth: (providerId: string): Promise<{ ok: boolean; expiresAt?: number; error?: string }> => + ipcRenderer.invoke('provider:importOAuth', providerId), + }, + // Local chat (direct IPC, no Gateway required) localChat: { /** Subscribe to agent events for local direct chat */ diff --git a/apps/desktop/src/components/api-key-dialog.tsx b/apps/desktop/src/components/api-key-dialog.tsx new file mode 100644 index 00000000..672cc9a8 --- /dev/null +++ b/apps/desktop/src/components/api-key-dialog.tsx @@ -0,0 +1,121 @@ +import { useState } from 'react' +import { + Dialog, + DialogContent, + DialogHeader, + DialogFooter, + DialogTitle, + DialogDescription, +} from '@multica/ui/components/ui/dialog' +import { Button } from '@multica/ui/components/ui/button' +import { Input } from '@multica/ui/components/ui/input' +import { Label } from '@multica/ui/components/ui/label' +import { HugeiconsIcon } from '@hugeicons/react' +import { Loading03Icon, Key01Icon } from '@hugeicons/core-free-icons' + +interface ApiKeyDialogProps { + open: boolean + onOpenChange: (open: boolean) => void + providerId: string + providerName: string + onSuccess?: () => void +} + +export function ApiKeyDialog({ + open, + onOpenChange, + providerId, + providerName, + onSuccess, +}: ApiKeyDialogProps) { + const [apiKey, setApiKey] = useState('') + const [saving, setSaving] = useState(false) + const [error, setError] = useState(null) + + const handleSave = async () => { + if (!apiKey.trim()) { + setError('API key is required') + return + } + + setSaving(true) + setError(null) + + try { + const result = await window.electronAPI.provider.saveApiKey(providerId, apiKey.trim()) + if (result.ok) { + setApiKey('') + onOpenChange(false) + onSuccess?.() + } else { + setError(result.error ?? 'Failed to save API key') + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + setError(message) + } finally { + setSaving(false) + } + } + + const handleClose = (isOpen: boolean) => { + if (!isOpen) { + setApiKey('') + setError(null) + } + onOpenChange(isOpen) + } + + return ( + + + + + + Configure {providerName} + + + Enter your API key to enable {providerName}. The key will be saved securely in your credentials file. + + + +
+
+ + setApiKey(e.target.value)} + placeholder="sk-..." + onKeyDown={(e) => { + if (e.key === 'Enter' && !saving) { + handleSave() + } + }} + /> + {error && ( +

{error}

+ )} +
+ +

+ Your API key is stored locally in ~/.super-multica/credentials.json5 +

+
+ + + + + +
+
+ ) +} + +export default ApiKeyDialog diff --git a/apps/desktop/src/components/oauth-dialog.tsx b/apps/desktop/src/components/oauth-dialog.tsx new file mode 100644 index 00000000..e6573cf0 --- /dev/null +++ b/apps/desktop/src/components/oauth-dialog.tsx @@ -0,0 +1,146 @@ +import { useState } from 'react' +import { + Dialog, + DialogContent, + DialogHeader, + DialogFooter, + DialogTitle, + DialogDescription, +} from '@multica/ui/components/ui/dialog' +import { Button } from '@multica/ui/components/ui/button' +import { HugeiconsIcon } from '@hugeicons/react' +import { Loading03Icon, CommandLineIcon, RefreshIcon, Tick02Icon } from '@hugeicons/core-free-icons' + +interface OAuthDialogProps { + open: boolean + onOpenChange: (open: boolean) => void + providerId: string + providerName: string + loginCommand?: string + onSuccess?: () => void +} + +export function OAuthDialog({ + open, + onOpenChange, + providerId, + providerName, + loginCommand, + onSuccess, +}: OAuthDialogProps) { + const [importing, setImporting] = useState(false) + const [error, setError] = useState(null) + const [success, setSuccess] = useState(false) + const [expiresAt, setExpiresAt] = useState(null) + + const handleImport = async () => { + setImporting(true) + setError(null) + setSuccess(false) + + try { + const result = await window.electronAPI.provider.importOAuth(providerId) + if (result.ok) { + setSuccess(true) + setExpiresAt(result.expiresAt ?? null) + // Auto-close after a short delay + setTimeout(() => { + onOpenChange(false) + onSuccess?.() + }, 1500) + } else { + setError(result.error ?? 'Failed to import credentials') + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + setError(message) + } finally { + setImporting(false) + } + } + + const handleClose = (isOpen: boolean) => { + if (!isOpen) { + setError(null) + setSuccess(false) + setExpiresAt(null) + } + onOpenChange(isOpen) + } + + const formatExpiry = (timestamp: number) => { + const remaining = timestamp - Date.now() + if (remaining <= 0) return 'expired' + const hours = Math.floor(remaining / (60 * 60 * 1000)) + const minutes = Math.floor((remaining % (60 * 60 * 1000)) / (60 * 1000)) + if (hours > 0) return `${hours}h ${minutes}m` + return `${minutes}m` + } + + return ( + + + + + + Configure {providerName} + + + {providerName} uses OAuth authentication. Please log in via the command line first. + + + +
+ {/* Login instructions */} +
+

+ 1. Open your terminal and run: +

+
+ {loginCommand ?? `${providerId} login`} +
+

+ 2. Complete the login process in your browser +

+

+ 3. Click "Refresh" below to import your credentials +

+
+ + {/* Status messages */} + {error && ( +
+ {error} +
+ )} + + {success && ( +
+ + + Credentials imported successfully! + {expiresAt && ` (expires in ${formatExpiry(expiresAt)})`} + +
+ )} +
+ + + + + +
+
+ ) +} + +export default OAuthDialog diff --git a/apps/desktop/src/hooks/use-local-chat.ts b/apps/desktop/src/hooks/use-local-chat.ts index caacf681..ebe19d0c 100644 --- a/apps/desktop/src/hooks/use-local-chat.ts +++ b/apps/desktop/src/hooks/use-local-chat.ts @@ -6,6 +6,7 @@ */ import { useState, useEffect, useCallback, useRef } from 'react' import { useMessagesStore } from '@multica/store' +import type { ContentBlock } from '@multica/sdk' interface UseLocalChatOptions { agentId: string @@ -45,7 +46,15 @@ export function useLocalChat({ agentId }: UseLocalChatOptions): UseLocalChatRetu try { const result = await window.electronAPI.localChat.getHistory(agentId) if (result.messages && result.messages.length > 0) { - useMessagesStore.getState().loadMessages(result.messages) + // Normalize: IPC may return content as string, store expects ContentBlock[] + useMessagesStore.getState().loadMessages( + result.messages.map((m: Record) => ({ + ...m, + content: typeof m.content === 'string' + ? (m.content ? [{ type: 'text' as const, text: m.content }] : []) + : (m.content ?? []), + })) as import('@multica/store').Message[] + ) } } catch { // History load is best-effort @@ -74,18 +83,17 @@ export function useLocalChat({ agentId }: UseLocalChatOptions): UseLocalChatRetu if (agentEvent.type === 'message_start') { currentStreamRef.current = streamId store.startStream(streamId, agentId) - // Extract initial text if any - const text = extractTextFromAgentEvent(agentEvent) - if (text) store.appendStream(streamId, text) + const content = extractContentFromAgentEvent(agentEvent) + if (content.length) store.appendStream(streamId, content) } else if (agentEvent.type === 'message_update') { - const text = extractTextFromAgentEvent(agentEvent) - if (text && currentStreamRef.current) { - store.appendStream(currentStreamRef.current, text) + const content = extractContentFromAgentEvent(agentEvent) + if (content.length && currentStreamRef.current) { + store.appendStream(currentStreamRef.current, content) } } else if (agentEvent.type === 'message_end') { - const text = extractTextFromAgentEvent(agentEvent) + const content = extractContentFromAgentEvent(agentEvent) if (currentStreamRef.current) { - store.endStream(currentStreamRef.current, text) + store.endStream(currentStreamRef.current, content) currentStreamRef.current = null } setIsLoading(false) @@ -131,14 +139,9 @@ export function useLocalChat({ agentId }: UseLocalChatOptions): UseLocalChatRetu } } -/** - * Extract text content from AgentEvent message. - * Same logic as @multica/sdk extractTextFromEvent. - */ -function extractTextFromAgentEvent(event: { message?: { content?: Array<{ type: string; text?: string }> } }): string { - if (!event.message?.content) return '' - return event.message.content - .filter((c): c is { type: 'text'; text: string } => c.type === 'text' && !!c.text) - .map((c) => c.text) - .join('') +/** Extract content blocks from AgentEvent message */ +function extractContentFromAgentEvent(event: { message?: { content?: unknown } }): ContentBlock[] { + if (!event.message?.content) return [] + const content = event.message.content + return Array.isArray(content) ? content as ContentBlock[] : [] } diff --git a/apps/desktop/src/hooks/use-provider.ts b/apps/desktop/src/hooks/use-provider.ts new file mode 100644 index 00000000..99cae540 --- /dev/null +++ b/apps/desktop/src/hooks/use-provider.ts @@ -0,0 +1,101 @@ +/** + * Hook for managing LLM providers in the Desktop App. + * + * Provides functionality similar to CLI `/provider` command: + * - List all providers with status + * - Get current provider/model + * - Switch provider/model + */ +import { useState, useEffect, useCallback } from 'react' + +// Types are defined in electron-env.d.ts and available globally + +interface UseProviderReturn { + /** All providers with their status */ + providers: ProviderStatus[] + /** Only available (configured) providers */ + availableProviders: ProviderStatus[] + /** Current provider and model info */ + current: CurrentProviderInfo | null + /** Loading state */ + loading: boolean + /** Error message if any */ + error: string | null + /** Refresh provider list and current status */ + refresh: () => Promise + /** Switch to a different provider (and optionally model) */ + setProvider: (providerId: string, modelId?: string) => Promise<{ ok: boolean; error?: string }> + /** Get metadata for a specific provider */ + getProviderMeta: (providerId: string) => ProviderStatus | undefined +} + +export function useProvider(): UseProviderReturn { + const [providers, setProviders] = useState([]) + const [current, setCurrent] = useState(null) + const [loading, setLoading] = useState(true) + const [error, setError] = useState(null) + + const refresh = useCallback(async () => { + setLoading(true) + setError(null) + + try { + const [providerList, currentInfo] = await Promise.all([ + window.electronAPI.provider.list(), + window.electronAPI.provider.current(), + ]) + + setProviders(providerList) + setCurrent(currentInfo) + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + setError(message) + console.error('[useProvider] Failed to load providers:', message) + } finally { + setLoading(false) + } + }, []) + + // Load providers on mount + useEffect(() => { + refresh() + }, [refresh]) + + const setProvider = useCallback(async (providerId: string, modelId?: string) => { + setError(null) + + try { + const result = await window.electronAPI.provider.set(providerId, modelId) + + if (result.ok) { + // Refresh to update current status + await refresh() + return { ok: true } + } else { + setError(result.error ?? 'Unknown error') + return { ok: false, error: result.error } + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + setError(message) + return { ok: false, error: message } + } + }, [refresh]) + + const getProviderMeta = useCallback((providerId: string) => { + return providers.find((p) => p.id === providerId) + }, [providers]) + + const availableProviders = providers.filter((p) => p.available) + + return { + providers, + availableProviders, + current, + loading, + error, + refresh, + setProvider, + getProviderMeta, + } +} diff --git a/apps/desktop/src/pages/home.tsx b/apps/desktop/src/pages/home.tsx index ba2767f0..9019a3ae 100644 --- a/apps/desktop/src/pages/home.tsx +++ b/apps/desktop/src/pages/home.tsx @@ -1,4 +1,4 @@ -import { useState, useEffect } from 'react' +import { useState, useEffect, useRef } from 'react' import { useNavigate } from 'react-router-dom' import { Button } from '@multica/ui/components/ui/button' import { HugeiconsIcon } from '@hugeicons/react' @@ -8,17 +8,52 @@ import { Loading03Icon, AlertCircleIcon, Edit02Icon, + ArrowDown01Icon, + Tick02Icon, + Alert02Icon, } from '@hugeicons/core-free-icons' import { ConnectionQRCode } from '../components/qr-code' import { DeviceList } from '../components/device-list' import { AgentSettingsDialog } from '../components/agent-settings-dialog' +import { ApiKeyDialog } from '../components/api-key-dialog' +import { OAuthDialog } from '../components/oauth-dialog' import { useHub } from '../hooks/use-hub' +import { useProvider } from '../hooks/use-provider' export default function HomePage() { const navigate = useNavigate() const { hubInfo, agents, loading, error } = useHub() + const { providers, current, setProvider, refresh, loading: providerLoading } = useProvider() const [settingsOpen, setSettingsOpen] = useState(false) const [agentName, setAgentName] = useState() + const [providerDropdownOpen, setProviderDropdownOpen] = useState(false) + const [switching, setSwitching] = useState(false) + const [apiKeyDialogOpen, setApiKeyDialogOpen] = useState(false) + const [oauthDialogOpen, setOauthDialogOpen] = useState(false) + const [selectedProvider, setSelectedProvider] = useState<{ + id: string + name: string + authMethod: 'api-key' | 'oauth' + loginCommand?: string + } | null>(null) + const dropdownRef = useRef(null) + + // Close dropdown when clicking outside + useEffect(() => { + const handleClickOutside = (event: MouseEvent) => { + if (dropdownRef.current && !dropdownRef.current.contains(event.target as Node)) { + setProviderDropdownOpen(false) + } + } + + if (providerDropdownOpen) { + document.addEventListener('mousedown', handleClickOutside) + } + + return () => { + document.removeEventListener('mousedown', handleClickOutside) + } + }, [providerDropdownOpen]) // Load agent profile info useEffect(() => { @@ -151,6 +186,92 @@ export default function HomePage() {

{agentName || 'Unnamed Agent'}

+ {/* Provider Selector */} +
+

+ LLM Provider +

+ + + {/* Provider Dropdown - Compact Grid */} + {providerDropdownOpen && ( +
+
+ {providers.map((p) => ( + + ))} +
+
+ )} +
+ {/* Stats Grid */}
@@ -167,20 +288,6 @@ export default function HomePage() {

{connectionState}

-
-

- Active Agents -

-

{hubInfo?.agentCount ?? 0}

-
-
-

- Primary Agent -

-

- {primaryAgent?.id ?? 'None'} -

-
@@ -194,6 +301,43 @@ export default function HomePage() { {/* Agent Settings Dialog */} + {/* API Key Dialog */} + {selectedProvider && selectedProvider.authMethod === 'api-key' && ( + { + // Refresh provider list and switch to the newly configured provider + await refresh() + const result = await setProvider(selectedProvider.id) + if (!result.ok) { + console.error('Failed to switch provider:', result.error) + } + }} + /> + )} + + {/* OAuth Dialog */} + {selectedProvider && selectedProvider.authMethod === 'oauth' && ( + { + // Refresh provider list and switch to the newly configured provider + await refresh() + const result = await setProvider(selectedProvider.id) + if (!result.ok) { + console.error('Failed to switch provider:', result.error) + } + }} + /> + )} + {/* Bottom: Actions */}
@@ -213,11 +357,9 @@ export default function HomePage() { variant="ghost" size="sm" className="text-muted-foreground gap-1.5" - disabled > Connect to Remote Agent - (Coming soon)
diff --git a/apps/web/package.json b/apps/web/package.json index 95b1b67c..0badea42 100644 --- a/apps/web/package.json +++ b/apps/web/package.json @@ -3,7 +3,7 @@ "version": "0.1.0", "private": true, "scripts": { - "dev": "next dev --port 3001", + "dev": "next dev --port 3001 --experimental-https", "build": "next build", "start": "next start", "lint": "eslint" diff --git a/apps/web/public/logo-192x192.png b/apps/web/public/logo-192x192.png index 0ebf6abf..ba718e6b 100644 Binary files a/apps/web/public/logo-192x192.png and b/apps/web/public/logo-192x192.png differ diff --git a/apps/web/public/logo-512x512.png b/apps/web/public/logo-512x512.png index 13c5558e..d21be718 100644 Binary files a/apps/web/public/logo-512x512.png and b/apps/web/public/logo-512x512.png differ diff --git a/docs/product-capabilities.md b/docs/product-capabilities.md new file mode 100644 index 00000000..2dd6dd46 --- /dev/null +++ b/docs/product-capabilities.md @@ -0,0 +1,847 @@ +# Super Multica Product Capabilities + +> This document is the single source of truth for all product capabilities. It describes **what exists**, not how to design or how to use it. All subsequent documents (user journeys, UI design, copywriting, design systems) should reference this document. + +--- + +## 1. Product Definition + +**Super Multica** is a distributed AI Agent framework. Users can create, customize, and deploy AI Agents with persistent memory, fine-grained capability control, and multi-provider LLM support. Agents run locally on the user's machine; remote access is optional. + +**Core architecture**: + +``` +Desktop App (standalone, recommended) + └─ Hub (embedded, manages agents) + └─ Agent Engine (LLM execution, sessions, skills, tools) + └─ (Optional) Gateway connection → remote clients (web/mobile) +``` + +--- + +## 2. User Roles + +| Role | Definition | Platform | Authority | +|------|-----------|----------|-----------| +| **Owner** | Runs the Desktop app, owns Hub and Agents | Desktop (Electron) | Full: create/delete agents, approve devices, configure providers, manage profiles/skills | +| **Collaborator** | Connects to Owner's Agent via Gateway | Web / Mobile | Limited: chat with agent, view message history. No agent management. | + +There is no formal role/permission system. The Owner is implicit admin by virtue of running the Hub. + +--- + +## 3. Functional Modules + +### 3.1 Agent Engine + +The core execution unit. An Agent receives user messages, calls an LLM, executes tools, and returns responses. + +#### 3.1.1 Agent Lifecycle + +| State | Description | +|-------|-------------| +| Created | AsyncAgent instantiated, assigned UUIDv7 session ID | +| Idle | Awaiting `write()` call (user message) | +| Running | Processing message: LLM call → tool execution → response | +| Closed | Agent terminated, no further messages accepted | + +Each `write()` call is queued. Messages are processed sequentially (one at a time). + +#### 3.1.2 Agent Execution Loop + +1. Receive user message via `write(content)` +2. Resolve API credentials (with auth profile rotation) +3. Build/update system prompt from profile +4. Call LLM provider with message history +5. If LLM requests tool calls → execute tools → feed results back to LLM → repeat +6. Save all messages to session storage +7. Check context window utilization → compact if needed +8. Emit events to subscribers (streaming to UI) + +#### 3.1.3 Auth Profile Rotation + +When an API call fails, the system classifies the error and may rotate to a different API key: + +| Error Type | Examples | Rotates? | +|-----------|----------|----------| +| `auth` | 401, 403, invalid key | Yes | +| `rate_limit` | 429, rate limit exceeded | Yes | +| `billing` | Out of credits, quota exceeded | Yes | +| `timeout` | Connection timeout | Yes | +| `format` | 400, malformed request | No | +| `unknown` | Other errors | No | + +Failed profiles enter cooldown. Rotation continues until success or all profiles exhausted. + +Tracking file: `~/.super-multica/.auth-profiles/usage-stats.json` + +#### 3.1.4 Subagent Spawning + +Agents can spawn child agents via the `sessions_spawn` tool: + +- Subagents get isolated sessions +- Tool restrictions: `sessions_spawn` denied (no nested spawning) +- System prompt mode: `minimal` or `none` +- Parameters: task (required), label, model override, cleanup policy (`delete` or `keep`), timeout +- Results announced back to parent automatically + +#### 3.1.5 Agent Configuration Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `profileId` | string | none | Agent profile to load | +| `provider` | string | `kimi-coding` | LLM provider | +| `model` | string | provider default | Model within provider | +| `reasoningMode` | `off` / `on` / `stream` | `off` | Display thinking/reasoning | +| `compactionMode` | `count` / `tokens` / `summary` | `tokens` | Context compaction strategy | +| `contextWindowTokens` | number | 200,000 | Override model's context window | +| `enableSkills` | boolean | `true` | Enable skills system | + +--- + +### 3.2 LLM Providers + +Ten providers supported. Two auth methods: OAuth (CLI login) and API Key. + +| ID | Display Name | Auth | Default Model | Available Models | +|----|-------------|------|---------------|------------------| +| `claude-code` | Claude Code | OAuth | claude-opus-4-5 | claude-opus-4-5, claude-sonnet-4-5, claude-haiku-4-5 | +| `openai-codex` | Codex | OAuth | gpt-5.2 | gpt-5.2, gpt-5.2-codex, gpt-5.1-codex, gpt-5.1-codex-mini, gpt-5.1-codex-max | +| `anthropic` | Anthropic | API Key | claude-sonnet-4-5 | claude-opus-4-5, claude-sonnet-4-5, claude-haiku-4-5 | +| `openai` | OpenAI | API Key | gpt-4o | gpt-4o, gpt-4o-mini, o1, o1-mini | +| `kimi-coding` | Kimi Code | API Key | kimi-k2-thinking | kimi-k2-thinking, k2p5 | +| `google` | Google AI | API Key | gemini-2.0-flash | gemini-2.0-flash, gemini-1.5-pro | +| `groq` | Groq | API Key | llama-3.3-70b-versatile | llama-3.3-70b-versatile, mixtral-8x7b-32768 | +| `mistral` | Mistral | API Key | mistral-large-latest | mistral-large-latest, codestral-latest | +| `xai` | xAI (Grok) | API Key | grok-beta | grok-beta, grok-vision-beta | +| `openrouter` | OpenRouter | API Key | anthropic/claude-3.5-sonnet | anthropic/claude-3.5-sonnet, openai/gpt-4o | + +**Default provider fallback**: config > credentials.json5 > `kimi-coding` + +**OAuth providers** require external CLI login (`claude login` / `codex login`). + +**API Key providers** are configured in `~/.super-multica/credentials.json5`. + +**Multiple API keys per provider** are supported via auth profiles (e.g., `openai`, `openai:backup`). The system rotates between them on failure. + +--- + +### 3.3 Tools + +Tools are capabilities the Agent can invoke during execution. + +#### 3.3.1 Built-in Tools + +| Tool | Category | Description | +|------|----------|-------------| +| `read` | File | Read file contents (with optional offset/limit) | +| `write` | File | Create or overwrite files | +| `edit` | File | Make precise edits to existing files | +| `glob` | File | Find files by pattern (default limit: 100, max: 1000) | +| `exec` | Runtime | Run shell commands (auto-backgrounds after 10s) | +| `process` | Runtime | Manage background processes (start, stop, list, output) | +| `web_search` | Web | Search the web (Brave or Perplexity provider) | +| `web_fetch` | Web | Fetch and extract URL content (markdown/text, max 50k chars, 15min cache) | +| `memory_get` | Memory | Read from agent's persistent memory | +| `memory_set` | Memory | Write to agent's persistent memory (max 1MB per value) | +| `memory_list` | Memory | List memory entries (default limit: 100, max: 1000) | +| `memory_delete` | Memory | Delete memory entries | +| `sessions_spawn` | Subagent | Spawn a child agent for a specific task | + +#### 3.3.2 Tool Groups (shortcuts for policy) + +| Group | Tools Included | +|-------|---------------| +| `group:fs` | read, write, edit, glob | +| `group:runtime` | exec, process | +| `group:web` | web_search, web_fetch | +| `group:memory` | memory_get, memory_set, memory_delete, memory_list | +| `group:subagent` | sessions_spawn | +| `group:core` | read, write, edit, glob, exec, process, web_search, web_fetch | + +#### 3.3.3 Tool Policy System (3 layers) + +| Layer | Scope | Description | +|-------|-------|-------------| +| 1. Global | All agents | `allow` / `deny` lists (wildcard supported: `mem*`, `*`) | +| 2. Provider | Per LLM provider | Narrower restrictions per provider (e.g., deny `exec` for Google) | +| 3. Subagent | Child agents only | `sessions_spawn` denied by default | + +**Priority**: Deny always overrides Allow. Empty allow list = deny all. + +#### 3.3.4 Exec Tool Details + +- Default yield timeout: 10,000ms (auto-backgrounds if not complete) +- Supports `timeoutMs` for hard kill (SIGTERM) +- Output includes: stdout+stderr, exitCode, truncation flag, process ID if backgrounded + +#### 3.3.5 Web Search Details + +- Brave provider: up to 10 results, country filtering, freshness filters (`pd`/`pw`/`pm`/`py`) +- Perplexity provider: AI-synthesized answers +- Default count: 5 results, 1 hour cache + +--- + +### 3.4 Profile System + +A Profile defines an Agent's identity, personality, knowledge, and configuration. + +#### 3.4.1 Profile File Structure + +``` +~/.super-multica/agent-profiles/{profileId}/ +├── soul.md # Identity: name, role, personality, behavior boundaries +├── user.md # User information: name, preferences, context +├── workspace.md # Workspace conventions, coding standards, project rules +├── memory.md # Long-term knowledge base (read by agent at startup) +├── config.json # Optional: provider, model, thinking level, tool policy +├── memory/ # Key-value persistent memory storage +│ ├── key1.json +│ └── key2.json +└── skills/ # Profile-specific skills (override global) + └── {skill-name}/ + └── SKILL.md +``` + +#### 3.4.2 Profile Config (config.json) + +```json +{ + "name": "Jarvis", + "style": "concise and direct", + "provider": "anthropic", + "model": "claude-sonnet-4-5", + "thinkingLevel": "medium", + "tools": { + "allow": ["group:fs", "web_fetch"], + "deny": ["exec"] + } +} +``` + +#### 3.4.3 Profile Operations + +| Operation | CLI | Desktop | +|-----------|-----|---------| +| List profiles | `multica profile list` | Via Hub info | +| Create profile | `multica profile new ` | - | +| Interactive setup | `multica profile setup ` | - | +| View profile | `multica profile show ` | - | +| Edit in file manager | `multica profile edit ` | - | +| Delete profile | `multica profile delete ` | - | + +**Profile ID rules**: alphanumeric, hyphens, underscores only. + +#### 3.4.4 System Prompt Composition + +The system prompt is built dynamically from profile files: + +| Section | Source | Mode: full | Mode: minimal | Mode: none | +|---------|--------|-----------|--------------|-----------| +| Identity | soul.md + config | Yes | Partial | Single line | +| User | user.md | On-demand | No | No | +| Workspace | workspace.md | Yes | No | No | +| Memory | memory.md | On-demand | No | No | +| Safety | Built-in constitution | Yes | Yes | Yes | +| Tools | Active tool list | Yes | Core only | No | +| Skills | Skill instructions | Yes | No | No | +| Runtime | OS, model, hostname | Yes | Essential | No | +| Subagent | Task context | If applicable | Yes | Yes | + +**Progressive disclosure**: soul.md, user.md, memory.md are loaded on-demand (not injected in full at startup) to save tokens. + +--- + +### 3.5 Memory System + +Agents can persistently store and recall information across sessions. + +#### 3.5.1 Storage + +- Location: `~/.super-multica/agent-profiles/{profileId}/memory/` +- Format: One JSON file per key +- Key rules: alphanumeric, underscore, dot, hyphen. Max 128 chars. +- Dots in keys are escaped as `__DOT__` in filenames +- Max value size: 1MB + +#### 3.5.2 Entry Format + +```json +{ + "value": "any JSON value", + "description": "optional human-readable description", + "createdAt": 1717689600000, + "updatedAt": 1717689600000 +} +``` + +#### 3.5.3 Memory Tools + +| Tool | Input | Output | +|------|-------|--------| +| `memory_get` | `{ key }` | `{ found, value?, description?, updatedAt? }` | +| `memory_set` | `{ key, value, description? }` | `{ success, error? }` | +| `memory_delete` | `{ key }` | `{ success, existed, error? }` | +| `memory_list` | `{ prefix?, limit? }` | `{ keys[], total, truncated }` | + +**Design principle**: Agents cannot "remember" mentally. All persistence must be file-based ("TEXT > BRAIN"). + +--- + +### 3.6 Skills System + +Skills are modular, self-contained capabilities defined via `SKILL.md` files. They extend what an Agent can do. + +#### 3.6.1 Skill File Format (SKILL.md) + +```yaml +--- +name: Skill Name +description: What this skill does +version: 1.0.0 +metadata: + emoji: "📝" + os: [darwin, linux] # Platform restriction (optional) + always: false # Skip eligibility checks (optional) + tags: [productivity, coding] + requires: + bins: [node, npm] # ALL must exist in PATH + anyBins: [python3, python] # At least ONE must exist + env: [OPENAI_API_KEY] # ALL must be set + config: [custom.setting] # Config paths must be truthy +--- +# Full markdown instructions follow... +``` + +#### 3.6.2 Skill Sources & Precedence + +| Source | Location | Precedence | +|--------|----------|-----------| +| Bundled | `skills/` in project | Lowest | +| Global (user-installed) | `~/.super-multica/skills/` | Medium | +| Profile-specific | `~/.super-multica/agent-profiles/{id}/skills/` | Highest (overrides) | + +Profile skills with the same ID completely replace global/bundled versions. + +#### 3.6.3 Bundled Skills + +| Skill | ID | Description | Requirements | +|-------|----|-------------|-------------| +| Git Commit Helper | `commit` | Create well-formatted conventional commits | `git` binary | +| Code Review | `code-review` | Structured code review with security focus | None | +| Profile Setup | `profile-setup` | Interactive wizard to personalize agent profile | None | +| Skill Creator | `skill-creator` | Create, edit, manage custom skills | None (always eligible) | + +#### 3.6.4 Eligibility Check Sequence + +1. Explicit disable in config → ineligible +2. Bundled + not in allowlist → ineligible +3. Platform mismatch (OS) → ineligible +4. `always: true` flag → eligible (skip remaining) +5. Missing required binary → ineligible +6. No alternative binary found → ineligible +7. Missing env var → ineligible +8. Missing config path → ineligible +9. All checks pass → eligible + +Returns human-readable failure reasons (e.g., "Required binary not found: git"). + +#### 3.6.5 Skill Invocation + +- **User invocation**: `/skillname args` in interactive CLI +- **Model invocation**: Agent reads skill instructions from system prompt and follows them +- **Hot reload**: File watcher detects SKILL.md changes, reloads automatically (250ms debounce) + +#### 3.6.6 Skill Installation + +```bash +multica skills add owner/repo # Clone entire repository +multica skills add owner/repo/skill-name # Clone single skill +multica skills add owner/repo@branch # Specific branch/tag +multica skills add owner/repo -p my-agent # Install to profile +``` + +--- + +### 3.7 Session Management + +Sessions persist conversation history across interactions. + +#### 3.7.1 Session Storage + +- Location: `~/.super-multica/sessions/{sessionId}/session.jsonl` +- Format: JSON Lines (one JSON object per line) +- Session IDs: UUIDv7 (time-ordered) +- Each line is either a message entry, meta entry, or compaction entry + +#### 3.7.2 Message Format + +Messages follow the LLM API format: + +```json +{"type": "message", "role": "user", "content": [{"type": "text", "text": "Hello"}]} +{"type": "message", "role": "assistant", "content": [{"type": "text", "text": "Hi!"}, {"type": "tool_use", "id": "...", "name": "read", "input": {"path": "/foo"}}]} +{"type": "message", "role": "user", "content": [{"type": "tool_result", "tool_use_id": "...", "content": "file contents"}]} +``` + +#### 3.7.3 Session Metadata + +```json +{"type": "meta", "provider": "anthropic", "model": "claude-sonnet-4-5", "reasoningMode": "off", "contextWindowTokens": 200000} +``` + +#### 3.7.4 Context Window Management + +| Parameter | Value | Description | +|-----------|-------|-------------| +| Hard minimum | 16,000 tokens | Block execution below this | +| Warning threshold | 32,000 tokens | Warn if context window smaller | +| Default context | 200,000 tokens | Fallback if model unknown | +| Safety margin | 20% | Buffer for estimation inaccuracy | +| Compaction trigger | 80% utilization | Start compacting | +| Compaction target | 50% utilization | Target after compaction | +| Min keep messages | 10 | Never remove below this | +| Reserve tokens | 1,024 | Reserved for response generation | + +#### 3.7.5 Compaction Modes + +| Mode | Strategy | Speed | Quality | +|------|----------|-------|---------| +| `tokens` (default) | Remove oldest messages until reaching 50% target | Fast | Good (preserves recent context) | +| `count` | Remove oldest when count > 80, keep last 60 | Fastest | Adequate | +| `summary` | LLM generates incremental summary of removed messages | Slow (API call) | Best (preserves meaning) | + +#### 3.7.6 Session Operations + +| Operation | CLI Command | +|-----------|-------------| +| List sessions | `multica session list` | +| View session | `multica session show ` (supports partial ID) | +| Delete session | `multica session delete ` | +| Resume session | `multica --session "continue..."` | + +--- + +### 3.8 Hub + +The Hub is the central coordinator. It manages agent lifecycle, routes messages, and handles device verification. + +#### 3.8.1 Responsibilities + +- Create, list, restore, close agents +- Persist agent metadata to disk (`~/.super-multica/agents/agents.json`) +- Route messages between local IPC and remote Gateway +- Handle device verification and whitelisting +- Process RPC requests from connected clients + +#### 3.8.2 Hub RPC Methods + +| Method | Description | Error Codes | +|--------|-------------|-------------| +| `verify` | Verify device with token | UNAUTHORIZED, REJECTED | +| `getAgentMessages` | Fetch message history (default: 50, offset: 0) | INVALID_PARAMS, AGENT_NOT_FOUND | +| `getHubInfo` | Get Hub ID and status | - | +| `listAgents` | List all agents | - | +| `createAgent` | Create new agent | - | +| `deleteAgent` | Delete agent | - | +| `updateGateway` | Update Gateway connection | - | + +#### 3.8.3 Hub Singleton + +One Hub per ecosystem. In Desktop mode, it's embedded in the Electron main process. It generates a persistent Hub ID stored at `~/.super-multica/hub-id`. + +--- + +### 3.9 Gateway + +NestJS WebSocket server that enables remote client access to the Hub. + +#### 3.9.1 Purpose + +Bridges remote clients (web/mobile) to the Hub. Not needed for local Desktop use. + +#### 3.9.2 Connection Protocol + +- Transport: Socket.io +- Path: `/ws` +- Port: 3000 (default) + +#### 3.9.3 Timeouts + +| Parameter | Value | +|-----------|-------| +| Ping interval | 25 seconds | +| Ping timeout | 20 seconds | +| RPC default timeout | 10 seconds | +| Verify timeout | 30 seconds | +| Reconnect delay | 1 second | + +#### 3.9.4 Message Routing + +- Each message has `from` (sender device ID) and `to` (target device ID) +- Gateway validates: sender is registered, `from` matches socket, target exists +- Supports streaming via `StreamAction` (message_start, message_update, message_end, tool events) + +#### 3.9.5 Error Codes + +| Code | Meaning | +|------|---------| +| NOT_REGISTERED | Sender not registered | +| INVALID_MESSAGE | `from` field mismatch | +| DEVICE_NOT_FOUND | Target device not online | + +--- + +### 3.10 Device Pairing & Verification + +How remote devices (web/mobile) connect to the Owner's Hub. + +#### 3.10.1 QR Code Generation (Desktop) + +The Desktop app generates a QR code containing: + +```json +{ + "type": "multica-connect", + "gateway": "http://localhost:3000", + "hubId": "uuid", + "agentId": "uuid", + "token": "random-uuid", + "expires": 1694000000000 +} +``` + +- Token: one-time use, random UUID +- Expiry: 30 seconds from generation +- Auto-refresh: new token generated when expired +- Also available as URL: `multica://connect?gateway=...&hub=...&agent=...&token=...&exp=...` + +#### 3.10.2 Connection Code Formats (accepted by client) + +| Format | Example | +|--------|---------| +| JSON | `{"type":"multica-connect","gateway":"..."}` | +| Base64 JSON | Base64-encoded JSON string | +| URL | `multica://connect?gateway=...&hub=...&agent=...&token=...&exp=...` | + +#### 3.10.3 Verification Flow + +``` +1. Mobile scans QR / pastes code +2. Client parses code, validates expiry +3. Client connects to Gateway via Socket.io +4. Gateway sends "registered" event +5. Client auto-sends "verify" RPC with token + device metadata +6. Hub validates token (one-time, checks expiry) +7. Hub triggers confirmation dialog on Desktop + - Shows: device name (parsed from User-Agent), device ID + - Options: "Allow" or "Reject" + - Timeout: 60 seconds (auto-reject) +8. If allowed: device added to whitelist, persisted to disk +9. If rejected: connection closed +``` + +#### 3.10.4 Device Whitelist + +- Location: `~/.super-multica/client-devices/whitelist.json` +- Format: + +```json +{ + "version": 1, + "devices": [{ + "deviceId": "uuid", + "agentId": "uuid", + "addedAt": 1694000000000, + "meta": { + "userAgent": "Mozilla/5.0...", + "platform": "Linux", + "language": "en-US" + } + }] +} +``` + +#### 3.10.5 Reconnection (whitelisted device) + +Whitelisted devices reconnect without needing a new token or user confirmation. Hub checks `isAllowed(deviceId)` and returns immediately. + +#### 3.10.6 Device Management (Desktop) + +- View verified devices list with metadata +- Revoke individual devices (remove from whitelist) +- No fine-grained permissions (all-or-nothing access) + +#### 3.10.7 Security Model + +| Aspect | Detail | +|--------|--------| +| Token lifetime | 30 seconds | +| Token usage | One-time (deleted after consumption) | +| Token storage | In-memory only (lost on Hub restart) | +| Device ID | Browser: UUID in localStorage. Persistent until cleared. | +| Whitelist | Persisted to disk. Survives restarts. | +| Authorization | All verified devices have equal access | +| Message auth | Hub checks whitelist on every non-verify message | + +--- + +### 3.11 Credentials System + +#### 3.11.1 Files + +| File | Purpose | Permissions | +|------|---------|-------------| +| `~/.super-multica/credentials.json5` | LLM providers + tool API keys | 0o600 | +| `~/.super-multica/skills.env.json5` | Skill/plugin environment variables | 0o600 | + +Format: JSON5 (supports comments, trailing commas, unquoted keys). + +#### 3.11.2 credentials.json5 Structure + +```json5 +{ + version: 1, + llm: { + provider: "openai", // Default provider + providers: { + openai: { apiKey: "sk-...", model: "gpt-4o" }, + anthropic: { apiKey: "sk-ant-...", model: "claude-sonnet-4-5" }, + "openai:backup": { apiKey: "sk-..." }, // Auth profile for rotation + }, + order: { + openai: ["openai", "openai:backup"], // Rotation order + }, + }, + tools: { + brave: { apiKey: "brv-..." }, + perplexity: { apiKey: "pplx-...", model: "perplexity/sonar-pro" }, + }, +} +``` + +#### 3.11.3 skills.env.json5 Structure + +```json5 +{ + env: { + LINEAR_API_KEY: "lin-...", + GITHUB_TOKEN: "ghp_...", + }, +} +``` + +#### 3.11.4 Environment Variable Overrides + +| Variable | Purpose | +|----------|---------| +| `SMC_CREDENTIALS_PATH` | Override credentials.json5 path | +| `SMC_SKILLS_ENV_PATH` | Override skills.env.json5 path | +| `SMC_CREDENTIALS_DISABLE=1` | Disable credentials loading | + +--- + +## 4. Platform Details + +### 4.1 Desktop App (Primary) + +**Technology**: Electron + Vite + React 19 + +**Window**: 1200x800, context isolation enabled, node integration disabled + +#### 4.1.1 Pages + +| Route | Page | Purpose | +|-------|------|---------| +| `/` | Home | Hub status, QR code, provider selector, agent settings, device list | +| `/chat` | Chat | Message history, chat input, mode switcher (local/remote) | +| `/tools` | Tools | Tool listing and inspection | +| `/skills` | Skills | Skill listing and management | + +**Navigation**: Tab bar at top (Home, Chat, Tools, Skills) + +#### 4.1.2 Home Page Components + +| Component | Description | +|-----------|-------------| +| QR Code | Left side. Shows connection code with 30s countdown. Refresh/copy link buttons. | +| Hub Status | Right side. Hub ID, connection state indicator (green/yellow/red). | +| Agent Settings | Agent name (editable). | +| Provider Selector | Dropdown showing all providers with availability status. API Key dialog or OAuth dialog based on provider type. | +| Device List | Verified devices with name, platform, revoke button. | +| Open Chat | Button. Disabled if Hub not connected. | +| Connect to Remote Agent | Button. Navigate to remote agent connection. | + +#### 4.1.3 Chat Page Modes + +| Mode | Transport | When Used | +|------|-----------|-----------| +| Local Agent | IPC (Electron) | Desktop user talks directly to embedded agent | +| Remote Agent | WebSocket via Gateway | Desktop user connects to another Hub's agent | + +Mode switcher available at top of chat page. + +#### 4.1.4 Desktop IPC Channels + +| Channel | Direction | Purpose | +|---------|-----------|---------| +| `localChat:send` | Renderer → Main | Send message to agent | +| `localChat:subscribe` | Renderer → Main | Subscribe to agent events | +| `hub:device-confirm-request` | Main → Renderer | Show device confirmation dialog | +| `hub:device-confirm-response` | Renderer → Main | User's allow/reject decision | + +--- + +### 4.2 Web App + +**Technology**: Next.js 16 + App Router + +**Port**: 3001 + +**Features**: +- Always requires Gateway connection (no local agent) +- Uses shared `@multica/ui` Chat component +- PWA-capable (service worker, offline page) +- Responsive layout (mobile-first) +- Light/dark theme toggle + +**Page**: Single page rendering `` component with `ConnectPrompt` for initial connection. + +--- + +### 4.3 Mobile App + +**Technology**: Expo + React Native + +**Status**: Demo/prototype (hardcoded mock messages) + +**Features**: +- QR code scanner for device pairing +- Keyboard-avoiding input bar +- Auto-expanding text input (max 120px) +- Auto-scroll to bottom on new messages + +--- + +### 4.4 CLI + +**Entry point**: `multica` (alias: `mu`) + +#### 4.4.1 Commands + +| Command | Description | +|---------|-------------| +| `multica` | Interactive chat mode (default) | +| `multica run ""` | Non-interactive single prompt | +| `multica chat` | Explicit interactive mode | +| `multica session list/show/delete` | Session management | +| `multica profile list/new/setup/show/edit/delete` | Profile management | +| `multica skills list/status/install/add/remove` | Skill management | +| `multica tools list/groups/profiles` | Tool inspection | +| `multica credentials init/show/edit` | Credentials management | +| `multica dev [service]` | Development servers | + +#### 4.4.2 Interactive Mode Commands + +| Command | Description | +|---------|-------------| +| `/help` | Show help | +| `/exit` `/quit` `/q` | Exit | +| `/clear` | Clear session | +| `/session` | Show current session ID | +| `/new` | Start new session | +| `/multiline` | Toggle multi-line input mode | +| `/provider` | Show provider status | +| `/model [name]` | Switch model | +| `/{skillName} [args]` | Execute skill | + +**Features**: Autocomplete (Shift+Tab), status bar (session/provider/model), multi-line mode (end with `.`). + +#### 4.4.3 Development Servers + +| Service | Command | Port | +|---------|---------|------| +| Desktop (default) | `multica dev` | Electron window | +| Gateway | `multica dev gateway` | 3000 | +| Web | `multica dev web` | 3001 | +| All | `multica dev all` | 3000 + 3001 | + +--- + +## 5. UI Component Library + +Shared package: `@multica/ui`. Used by Desktop, Web, and Mobile. + +### 5.1 Chat Components + +| Component | Props | Description | +|-----------|-------|-------------| +| `Chat` | (none, uses stores) | Full chat view: connect prompt + message list + input | +| `ChatInput` | `onSubmit`, `disabled`, `placeholder` | Tiptap editor. Enter=send, Shift+Enter=newline, IME-safe | +| `ChatInputRef` | (imperative) | `getText()`, `setText()`, `focus()`, `clear()` | +| `MessageList` | `messages`, `streamingIds` | Renders messages with markdown, tool calls, streaming | +| `ConnectPrompt` | (none, uses stores) | QR scan + paste code UI for remote connection | +| `ChatSkeleton` | (none) | Loading skeleton | +| `ToolCallItem` | `message` | Tool execution display: status dot, label, subtitle, expandable results | + +### 5.2 Markdown Components + +| Component | Props | Description | +|-----------|-------|-------------| +| `Markdown` | `children`, `mode` (`minimal`/`full`) | Rendered markdown with syntax highlighting | +| `StreamingMarkdown` | `content`, `isStreaming`, `mode` | Incremental markdown with animated cursor | +| `CodeBlock` | (internal) | Syntax-highlighted code block with copy button | + +### 5.3 Base UI Components (Shadcn/UI) + +button, input, textarea, card, dialog, alert-dialog, dropdown-menu, select, combobox, badge, label, field, input-group, switch, skeleton, separator, sheet, sidebar, tooltip, sonner (toasts) + +### 5.4 Utility Components + +| Component | Description | +|-----------|-------------| +| `QRScannerView` | Camera-based QR scanner | +| `QRScannerSheet` | Sheet variant of QR scanner | +| `Spinner` | Animated loading spinner | +| `ThemeProvider` | Light/dark theme context | +| `ThemeToggle` | Theme switch button | + +--- + +## 6. Data Persistence Locations + +| Data | Location | Format | Lifetime | +|------|----------|--------|----------| +| Credentials | `~/.super-multica/credentials.json5` | JSON5 | User-managed | +| Skills env | `~/.super-multica/skills.env.json5` | JSON5 | User-managed | +| Agent profiles | `~/.super-multica/agent-profiles/{id}/` | MD + JSON | User-managed | +| Agent memory | `~/.super-multica/agent-profiles/{id}/memory/` | JSON per key | Agent-managed | +| Sessions | `~/.super-multica/sessions/{id}/session.jsonl` | JSONL | Until deleted | +| Agent records | `~/.super-multica/agents/agents.json` | JSON | Persistent | +| Hub ID | `~/.super-multica/hub-id` | Plain text UUID | Generated once | +| Device whitelist | `~/.super-multica/client-devices/whitelist.json` | JSON | Until revoked | +| Auth profile stats | `~/.super-multica/.auth-profiles/usage-stats.json` | JSON | Runtime tracking | +| Verification tokens | In-memory | Map | Lost on restart | +| Browser device ID | localStorage: `multica-device` | UUID string | Until cleared | +| Saved connection | localStorage: `multica-connection` | JSON | Until disconnected | + +--- + +## 7. Current Limitations + +| Area | Limitation | Notes | +|------|-----------|-------| +| Agent count | Desktop creates 1 primary agent on startup | Hub API supports multi-agent (`createAgent`/`listAgents`), but UI only shows one | +| Device permissions | All-or-nothing access | No per-device capability restrictions | +| Role system | No formal RBAC | Owner is implicit admin | +| Mobile app | Demo/prototype | Hardcoded mock data, no real agent connection | +| Offline web | PWA shell only | Cannot function without Gateway | +| Skill marketplace | No registry | Install via GitHub URL only | +| Real-time collaboration | Single agent, sequential messages | No concurrent message processing | +| File upload | Not supported | Agent can only read files on Owner's filesystem | + +--- + +*Document generated: 2026-02-05* +*Source: codebase analysis at commit fc6c3e3 on branch feat/mobile-pwa-optimization* diff --git a/package.json b/package.json index 1396639c..38c7ec1a 100644 --- a/package.json +++ b/package.json @@ -12,10 +12,10 @@ "multica": "tsx src/agent/cli/index.ts", "mu": "tsx src/agent/cli/index.ts", "dev": "tsx src/agent/cli/index.ts dev", - "dev:gateway": "tsx src/agent/cli/index.ts dev gateway", - "dev:console": "tsx src/agent/cli/index.ts dev console", - "dev:web": "tsx src/agent/cli/index.ts dev web", "dev:desktop": "tsx src/agent/cli/index.ts dev desktop", + "dev:gateway": "tsx src/agent/cli/index.ts dev gateway", + "dev:web": "tsx src/agent/cli/index.ts dev web", + "dev:all": "tsx src/agent/cli/index.ts dev all", "build": "turbo build", "build:sdk": "pnpm --filter @multica/sdk build", "build:cli": "node scripts/build-cli.js", diff --git a/packages/sdk/package.json b/packages/sdk/package.json index 7586e1d5..3eef6b24 100644 --- a/packages/sdk/package.json +++ b/packages/sdk/package.json @@ -17,6 +17,8 @@ "uuid": "^13.0.0" }, "devDependencies": { + "@mariozechner/pi-agent-core": "^0.50.3", + "@mariozechner/pi-ai": "^0.50.3", "@types/uuid": "^11.0.0", "typescript": "^5.9.3" } diff --git a/packages/sdk/src/actions/index.ts b/packages/sdk/src/actions/index.ts index 2265d893..1ceffb73 100644 --- a/packages/sdk/src/actions/index.ts +++ b/packages/sdk/src/actions/index.ts @@ -33,10 +33,12 @@ export { export { StreamAction, type StreamPayload, - type StreamEvent, - type StreamMessageEvent, - type StreamToolEvent, - extractTextFromEvent, + type AgentEvent, + type ContentBlock, + type TextContent, + type ThinkingContent, + type ToolCall, + type ImageContent, extractThinkingFromEvent, } from "./stream"; diff --git a/packages/sdk/src/actions/rpc.ts b/packages/sdk/src/actions/rpc.ts index 1f49f1d1..32c605de 100644 --- a/packages/sdk/src/actions/rpc.ts +++ b/packages/sdk/src/actions/rpc.ts @@ -1,5 +1,7 @@ /** RPC Actions - 请求/响应模式 */ +import type { Message } from "@mariozechner/pi-ai"; + export const RequestAction = "request" as const; export const ResponseAction = "response" as const; @@ -65,34 +67,11 @@ export interface GetAgentMessagesParams { limit?: number; } -/** Content block types from the agent engine */ -export interface TextContentBlock { - type: "text"; - text: string; -} - -export interface ThinkingContentBlock { - type: "thinking"; - thinking: string; -} - -export interface ToolCallBlock { - type: "tool_use"; - id: string; - name: string; - input: unknown; -} - -export interface ImageContentBlock { - type: "image"; - url: string; -} - -/** Agent message returned by getAgentMessages (mirrors pi-ai Message) */ -export type AgentMessageItem = - | { role: "user"; content: string | (TextContentBlock | ImageContentBlock)[]; timestamp: number } - | { role: "assistant"; content: (TextContentBlock | ThinkingContentBlock | ToolCallBlock)[]; timestamp: number } - | { role: "tool_result"; toolCallId: string; content: (TextContentBlock | ImageContentBlock)[]; isError: boolean; timestamp: number } +/** + * Agent message returned by getAgentMessages. + * This is pi-ai's Message type — the backend returns it as-is from SessionManager.loadMessages(). + */ +export type AgentMessageItem = Message; /** getAgentMessages - response payload */ export interface GetAgentMessagesResult { diff --git a/packages/sdk/src/actions/stream.ts b/packages/sdk/src/actions/stream.ts index 032bb962..810f7355 100644 --- a/packages/sdk/src/actions/stream.ts +++ b/packages/sdk/src/actions/stream.ts @@ -1,59 +1,51 @@ -/** Stream Action - 流式消息传输 */ +/** Stream Action */ export const StreamAction = "stream" as const; +// --- Content block types (re-exported from pi-ai, the single source of truth) --- + +import type { + TextContent, + ThinkingContent, + ToolCall, + ImageContent, +} from "@mariozechner/pi-ai"; +import type { AgentEvent } from "@mariozechner/pi-agent-core"; + +export type { TextContent, ThinkingContent, ToolCall, ImageContent }; +export type { AgentEvent }; + /** - * AgentEvent types forwarded by the Hub to frontend clients. - * These mirror the subset of AgentEvent from @mariozechner/pi-agent-core - * that the Hub forwards (filtered at the Hub layer). + * Convenience union of all content block types across message roles. + * + * NOTE: This is a deliberate simplification. The backend uses narrower unions + * per role (e.g. AssistantMessage.content excludes ImageContent, UserMessage + * excludes ThinkingContent/ToolCall). We accept the wider union on the frontend + * for simpler handling — the backend already guarantees correctness. */ -export interface StreamMessageEvent { - type: "message_start" | "message_update" | "message_end"; - message: { - id?: string; - role: string; - content?: Array<{ type: string; text?: string; thinking?: string }>; - }; - assistantMessageEvent?: unknown; -} +export type ContentBlock = TextContent | ThinkingContent | ToolCall | ImageContent; -export interface StreamToolEvent { - type: "tool_execution_start" | "tool_execution_end"; - toolCallId: string; - toolName: string; - args?: unknown; - result?: unknown; - isError?: boolean; -} +// --- Stream event types --- -export type StreamEvent = StreamMessageEvent | StreamToolEvent; - -/** 流消息 payload — wraps a raw AgentEvent with stream/agent identifiers */ +/** + * Hub forwards AgentEvent from pi-agent-core as-is. + * StreamPayload wraps it with routing metadata. + */ export interface StreamPayload { - /** 流 ID,关联同一个流的所有消息 */ streamId: string; - /** 所属 agent ID */ agentId: string; - /** Raw agent event from the engine */ - event: StreamEvent; + event: AgentEvent; } -/** Extract plain text from an AgentMessage content array */ -export function extractTextFromEvent(event: StreamMessageEvent): string { - const content = event.message?.content; +/** Extract thinking/reasoning content from an AgentEvent that carries a message */ +export function extractThinkingFromEvent(event: AgentEvent): string { + if (!("message" in event)) return ""; + const msg = event.message; + if (!msg || !("content" in msg)) return ""; + const content = msg.content; if (!Array.isArray(content)) return ""; return content - .filter((c) => c.type === "text") - .map((c) => c.text ?? "") - .join(""); -} - -/** Extract thinking/reasoning content from an AgentMessage content array */ -export function extractThinkingFromEvent(event: StreamMessageEvent): string { - const content = event.message?.content; - if (!Array.isArray(content)) return ""; - return content - .filter((c) => c.type === "thinking") + .filter((c): c is ThinkingContent => c.type === "thinking") .map((c) => c.thinking ?? "") .join(""); } diff --git a/packages/sdk/src/client.ts b/packages/sdk/src/client.ts index fb362281..cf7bf932 100644 --- a/packages/sdk/src/client.ts +++ b/packages/sdk/src/client.ts @@ -234,7 +234,7 @@ export class GatewayClient { } /** Hub 验证成功回调 */ - onVerified(callback: (result: { hubId: string; agentId: string }) => void): this { + onVerified(callback: (result: { hubId: string; agentId: string; isNewDevice?: boolean }) => void): this { this.callbacks.onVerified = callback; return this; } @@ -312,12 +312,13 @@ export class GatewayClient { if (this.options.hubId) { // Set internal state to allow send/request during verify this._state = "registered"; + this.callbacks.onStateChange?.("verifying"); const meta = typeof navigator !== "undefined" ? { userAgent: navigator.userAgent, platform: navigator.platform, language: navigator.language, } : undefined; - this.request<{ hubId: string; agentId: string }>( + this.request<{ hubId: string; agentId: string; isNewDevice?: boolean }>( this.options.hubId, "verify", { token: this.options.token, meta }, diff --git a/packages/sdk/src/types.ts b/packages/sdk/src/types.ts index 88118021..f8a3f693 100644 --- a/packages/sdk/src/types.ts +++ b/packages/sdk/src/types.ts @@ -102,6 +102,7 @@ export type ConnectionState = | "disconnected" | "connecting" | "connected" + | "verifying" | "registered"; /** Event callback types */ @@ -109,7 +110,7 @@ export interface GatewayClientCallbacks { onConnect?: (socketId: string) => void; onDisconnect?: (reason: string) => void; onRegistered?: (deviceId: string) => void; - onVerified?: (result: { hubId: string; agentId: string }) => void; + onVerified?: (result: { hubId: string; agentId: string; isNewDevice?: boolean }) => void; onMessage?: (message: RoutedMessage) => void; onSendError?: (error: SendErrorResponse) => void; onPong?: (data: string) => void; diff --git a/packages/store/src/connection-store.ts b/packages/store/src/connection-store.ts index 19dde590..17b41a51 100644 --- a/packages/store/src/connection-store.ts +++ b/packages/store/src/connection-store.ts @@ -19,13 +19,13 @@ import { v7 as uuidv7 } from "uuid" import { GatewayClient, StreamAction, - extractTextFromEvent, type ConnectionState, - type SendErrorResponse, type StreamPayload, - type StreamMessageEvent, + type AgentEvent, + type GetAgentMessagesResult, + type ContentBlock, } from "@multica/sdk" -import { useMessagesStore } from "./messages" +import { useMessagesStore, type Message } from "./messages" import { clearConnection, type ConnectionInfo } from "./connection" interface ConnectionStoreState { @@ -34,7 +34,9 @@ interface ConnectionStoreState { hubId: string | null agentId: string | null connectionState: ConnectionState - lastError: SendErrorResponse | null + lastError: { code: string; message: string } | null + /** Whether the current connection required Owner approval (new device) */ + isNewDevice: boolean | null } interface ConnectionStoreActions { @@ -104,34 +106,63 @@ function createClient( switch (event.type) { case "message_start": { store.startStream(payload.streamId, payload.agentId) - const text = extractTextFromEvent(event as StreamMessageEvent) - if (text) store.appendStream(payload.streamId, text) + const content = extractContent(event) + if (content.length) store.appendStream(payload.streamId, content) break } case "message_update": { - const text = extractTextFromEvent(event as StreamMessageEvent) - store.appendStream(payload.streamId, text) + const content = extractContent(event) + store.appendStream(payload.streamId, content) break } case "message_end": { - const text = extractTextFromEvent(event as StreamMessageEvent) - store.endStream(payload.streamId, text) + const content = extractContent(event) + const stopReason = "message" in event + ? (event.message as { stopReason?: string })?.stopReason + : undefined + store.endStream(payload.streamId, content, stopReason) break } - case "tool_execution_start": - case "tool_execution_end": + case "tool_execution_start": { + store.startToolExecution( + payload.agentId, + event.toolCallId, + event.toolName, + event.args, + ) + break + } + case "tool_execution_end": { + store.endToolExecution( + event.toolCallId, + event.result, + event.isError, + ) + break + } + case "tool_execution_update": + // Partial results — not rendered yet, ignored for now break } return } + // Handle error messages from Hub (e.g. UNAUTHORIZED) + if (msg.action === "error") { + const payload = msg.payload as { code: string; message: string } + set({ lastError: { code: payload.code, message: payload.message } }) + return + } + // Handle direct (non-streaming) messages const payload = msg.payload as { agentId?: string; content?: string } if (payload?.agentId && payload?.content) { useMessagesStore.getState().addAssistantMessage(payload.content, payload.agentId) } }) - .onSendError((error) => set({ lastError: error })) + .onVerified((result) => set({ isNewDevice: result.isNewDevice ?? false })) + .onError((error) => set({ lastError: { code: "VERIFY_ERROR", message: error.message } })) + .onSendError((error) => set({ lastError: { code: error.code, message: error.error } })) } /** Fetch message history from Hub via RPC after connection is established */ @@ -140,20 +171,55 @@ async function fetchHistory(state: ConnectionStoreState): Promise { if (!client || !hubId || !agentId) return try { - const result = await client.request<{ - messages: Array<{ role: string; content: unknown }> - total: number - }>(hubId, "getAgentMessages", { agentId, limit: 200 }) + const result = await client.request( + hubId, "getAgentMessages", { agentId, limit: 200 }, + ) - const messages = result.messages - .filter((m) => m.role === "user" || m.role === "assistant") - .map((m) => ({ - id: uuidv7(), - role: m.role as "user" | "assistant", - content: extractText(m.content), - agentId: agentId, - })) - .filter((m) => m.content.length > 0) + // Build a lookup map: toolCallId → { name, arguments } from assistant ToolCall blocks + const toolCallArgsMap = new Map }>() + for (const m of result.messages) { + if (m.role === "assistant") { + for (const block of m.content) { + if (block.type === "toolCall") { + toolCallArgsMap.set(block.id, { name: block.name, args: block.arguments }) + } + } + } + } + + // Mirror the backend message array directly + const messages: Message[] = [] + for (const m of result.messages) { + if (m.role === "user") { + messages.push({ + id: uuidv7(), + role: "user", + content: toContentBlocks(m.content), + agentId, + }) + } else if (m.role === "assistant") { + messages.push({ + id: uuidv7(), + role: "assistant", + content: toContentBlocks(m.content), + agentId, + stopReason: m.stopReason, + }) + } else if (m.role === "toolResult") { + const callInfo = toolCallArgsMap.get(m.toolCallId) + messages.push({ + id: uuidv7(), + role: "toolResult", + content: toContentBlocks(m.content), + agentId, + toolCallId: m.toolCallId, + toolName: m.toolName, + toolArgs: callInfo?.args, + toolStatus: m.isError ? "error" : "success", + isError: m.isError, + }) + } + } if (messages.length > 0) { useMessagesStore.getState().loadMessages(messages) @@ -163,14 +229,22 @@ async function fetchHistory(state: ConnectionStoreState): Promise { } } -/** Extract plain text from AgentMessage content (string or content block array) */ -function extractText(content: unknown): string { - if (typeof content === "string") return content - if (!Array.isArray(content)) return "" - return content - .filter((c: { type?: string }) => c.type === "text") - .map((c: { text?: string }) => c.text ?? "") - .join("") +/** Convert raw backend content (string or block array) to ContentBlock[] */ +function toContentBlocks(content: string | ContentBlock[]): ContentBlock[] { + if (typeof content === "string") { + return content ? [{ type: "text", text: content }] : [] + } + if (Array.isArray(content)) return content + return [] +} + +/** Extract content blocks from an AgentEvent that carries a message */ +function extractContent(event: AgentEvent): ContentBlock[] { + if (!("message" in event)) return [] + const msg = event.message + if (!msg || !("content" in msg)) return [] + const content = msg.content + return Array.isArray(content) ? content as ContentBlock[] : [] } export const useConnectionStore = create()( @@ -182,6 +256,7 @@ export const useConnectionStore = create()( agentId: null, connectionState: "disconnected", lastError: null, + isNewDevice: null, // Connect using a connection code (disconnect existing connection first) connect: (code) => { @@ -214,6 +289,7 @@ export const useConnectionStore = create()( hubId: null, agentId: null, lastError: null, + isNewDevice: null, }) }, diff --git a/packages/store/src/index.ts b/packages/store/src/index.ts index b4e5a097..f2ee2670 100644 --- a/packages/store/src/index.ts +++ b/packages/store/src/index.ts @@ -2,6 +2,6 @@ export { useConnectionStore } from "./connection-store" export type { ConnectionStore } from "./connection-store" export { useAutoConnect } from "./use-auto-connect" export { useMessagesStore } from "./messages" -export type { Message, MessagesStore, SendContext } from "./messages" +export type { Message, MessagesStore, SendContext, ToolStatus } from "./messages" export { parseConnectionCode, saveConnection, loadConnection, clearConnection } from "./connection" export type { ConnectionInfo } from "./connection" diff --git a/packages/store/src/messages.ts b/packages/store/src/messages.ts index 0ced06aa..45555dd6 100644 --- a/packages/store/src/messages.ts +++ b/packages/store/src/messages.ts @@ -1,28 +1,33 @@ /** - * Messages Store - manages chat messages and streaming state for the current Agent + * Messages Store - manages chat messages and streaming state * - * Responsibilities: - * 1. Store current Agent's chat messages (replaced on Agent switch, not accumulated) - * 2. Manage streaming state (intermediate state while AI replies arrive in chunks) - * 3. Provide sendMessage() as the single entry point for sending messages + * Data model mirrors the backend (pi-ai / pi-agent-core) exactly: + * - UserMessage: { role: "user", content: ContentBlock[] } + * - AssistantMessage: { role: "assistant", content: ContentBlock[] } + * - ToolResultMessage: { role: "toolResult", toolCallId, toolName, content, isError } * - * Send flow: - * user input → sendMessage(text) - * → addUserMessage() immediately adds to local state (optimistic update) - * → ConnectionStore.send() sends to Gateway → Hub → Agent - * - * Receive flow (driven by ConnectionStore's onMessage callback): - * Streaming: startStream → appendStream (repeated) → endStream - * Non-streaming: addAssistantMessage (one-shot) + * Streaming simply updates the content of the current assistant message in-place. + * Tool execution events (start/end) create / update toolResult messages. */ import { create } from "zustand" import { v7 as uuidv7 } from "uuid" +import type { ContentBlock } from "@multica/sdk" + +export type ToolStatus = "running" | "success" | "error" | "interrupted" export interface Message { id: string - role: "user" | "assistant" - content: string + role: "user" | "assistant" | "toolResult" + content: ContentBlock[] agentId: string + // AssistantMessage metadata + stopReason?: string + // ToolResult fields (only when role === "toolResult") + toolCallId?: string + toolName?: string + toolArgs?: Record + toolStatus?: ToolStatus + isError?: boolean } /** Parameters needed to route a message through the gateway */ @@ -41,13 +46,16 @@ interface MessagesActions { sendMessage: (text: string, ctx: SendContext) => void addUserMessage: (content: string, agentId: string) => void addAssistantMessage: (content: string, agentId: string) => void - updateMessage: (id: string, content: string) => void - // Replace all messages (for Agent switch or loading history) + updateMessage: (id: string, content: ContentBlock[]) => void loadMessages: (msgs: Message[]) => void clearMessages: () => void + // Streaming startStream: (streamId: string, agentId: string) => void - appendStream: (streamId: string, content: string) => void - endStream: (streamId: string, content: string) => void + appendStream: (streamId: string, content: ContentBlock[]) => void + endStream: (streamId: string, content: ContentBlock[], stopReason?: string) => void + // Tool execution lifecycle + startToolExecution: (agentId: string, toolCallId: string, toolName: string, args?: unknown) => void + endToolExecution: (toolCallId: string, result?: unknown, isError?: boolean) => void } export type MessagesStore = MessagesState & MessagesActions @@ -56,7 +64,6 @@ export const useMessagesStore = create()((set, get) => ({ messages: [], streamingIds: new Set(), - // Single entry point for sending: optimistic local add, then send via WebSocket sendMessage: (text, ctx) => { get().addUserMessage(text, ctx.agentId) ctx.send(ctx.hubId, "message", { agentId: ctx.agentId, content: text }) @@ -64,13 +71,23 @@ export const useMessagesStore = create()((set, get) => ({ addUserMessage: (content, agentId) => { set((s) => ({ - messages: [...s.messages, { id: uuidv7(), role: "user", content, agentId }], + messages: [...s.messages, { + id: uuidv7(), + role: "user", + content: [{ type: "text" as const, text: content }], + agentId, + }], })) }, addAssistantMessage: (content, agentId) => { set((s) => ({ - messages: [...s.messages, { id: uuidv7(), role: "assistant", content, agentId }], + messages: [...s.messages, { + id: uuidv7(), + role: "assistant", + content: [{ type: "text" as const, text: content }], + agentId, + }], })) }, @@ -80,7 +97,6 @@ export const useMessagesStore = create()((set, get) => ({ })) }, - // Replace all messages (for Agent switch or loading history) loadMessages: (msgs) => { set({ messages: msgs }) }, @@ -89,35 +105,79 @@ export const useMessagesStore = create()((set, get) => ({ set({ messages: [], streamingIds: new Set() }) }, - // === The following three methods are called by ConnectionStore's onMessage callback === - // Stream start: create an empty placeholder message and mark as streaming + // --- Streaming: build assistant message incrementally --- + startStream: (streamId, agentId) => { set((s) => { const ids = new Set(s.streamingIds) ids.add(streamId) return { - messages: [...s.messages, { id: streamId, role: "assistant" as const, content: "", agentId }], + messages: [...s.messages, { id: streamId, role: "assistant" as const, content: [], agentId }], streamingIds: ids, } }) }, - // Stream update: replace message content (each update carries the full accumulated text) + // Replace the entire content array with the latest partial snapshot appendStream: (streamId, content) => { set((s) => ({ messages: s.messages.map((m) => (m.id === streamId ? { ...m, content } : m)), })) }, - // Stream end: write final content, remove streaming marker - endStream: (streamId, content) => { + endStream: (streamId, content, stopReason) => { set((s) => { const ids = new Set(s.streamingIds) ids.delete(streamId) + // Find the agentId of the stream being ended to scope tool interruption + const streamMsg = s.messages.find((m) => m.id === streamId) + const streamAgentId = streamMsg?.agentId return { - messages: s.messages.map((m) => (m.id === streamId ? { ...m, content } : m)), + messages: s.messages.map((m) => { + if (m.id === streamId) return { ...m, content, stopReason } + // Interrupt running tool executions belonging to the same agent + if (m.role === "toolResult" && m.toolStatus === "running" && m.agentId === streamAgentId) { + return { ...m, toolStatus: "interrupted" as ToolStatus } + } + return m + }), streamingIds: ids, } }) }, + + // --- Tool execution: create / update toolResult messages --- + + startToolExecution: (agentId, toolCallId, toolName, args) => { + set((s) => ({ + messages: [...s.messages, { + id: uuidv7(), + role: "toolResult" as const, + content: [], + agentId, + toolCallId, + toolName, + toolArgs: args as Record | undefined, + toolStatus: "running" as ToolStatus, + isError: false, + }], + })) + }, + + endToolExecution: (toolCallId, result, isError) => { + set((s) => ({ + messages: s.messages.map((m) => + m.role === "toolResult" && m.toolCallId === toolCallId + ? { + ...m, + toolStatus: (isError ? "error" : "success") as ToolStatus, + isError: isError ?? false, + content: result != null + ? [{ type: "text" as const, text: typeof result === "string" ? result : JSON.stringify(result) }] + : [], + } + : m + ), + })) + }, })) diff --git a/packages/ui/package.json b/packages/ui/package.json index a1299e4d..3bad8caa 100644 --- a/packages/ui/package.json +++ b/packages/ui/package.json @@ -3,6 +3,7 @@ "version": "0.1.0", "private": true, "type": "module", + "sideEffects": ["**/*.css"], "exports": { "./globals.css": "./src/styles/globals.css", "./postcss.config": "./postcss.config.mjs", @@ -18,6 +19,10 @@ "@hugeicons/core-free-icons": "^3.1.1", "@hugeicons/react": "^1.1.4", "@multica/store": "workspace:*", + "@tiptap/extension-placeholder": "^3.19.0", + "@tiptap/pm": "^3.19.0", + "@tiptap/react": "^3.19.0", + "@tiptap/starter-kit": "^3.19.0", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", "linkify-it": "^5.0.0", diff --git a/packages/ui/src/components/chat-input.css b/packages/ui/src/components/chat-input.css new file mode 100644 index 00000000..f81b734e --- /dev/null +++ b/packages/ui/src/components/chat-input.css @@ -0,0 +1,18 @@ +.chat-input-editor .ProseMirror { + outline: none; + min-height: 2.5rem; + max-height: 200px; + overflow-y: auto; +} + +.chat-input-editor .ProseMirror p.is-editor-empty:first-child::before { + content: attr(data-placeholder); + float: left; + pointer-events: none; + height: 0; + color: var(--muted-foreground); +} + +.chat-input-editor.is-disabled .ProseMirror { + cursor: not-allowed; +} diff --git a/packages/ui/src/components/chat-input.tsx b/packages/ui/src/components/chat-input.tsx index ce5a3e94..5d9d8f58 100644 --- a/packages/ui/src/components/chat-input.tsx +++ b/packages/ui/src/components/chat-input.tsx @@ -1,9 +1,20 @@ "use client"; -import { useRef } from "react"; +import { useRef, useEffect, useImperativeHandle, forwardRef } from "react"; +import { useEditor, EditorContent } from "@tiptap/react"; +import StarterKit from "@tiptap/starter-kit"; +import Placeholder from "@tiptap/extension-placeholder"; import { Button } from "@multica/ui/components/ui/button"; import { ArrowUpIcon } from "@hugeicons/core-free-icons"; import { HugeiconsIcon } from "@hugeicons/react"; import { cn } from "@multica/ui/lib/utils"; +import "./chat-input.css"; + +export interface ChatInputRef { + getText: () => string; + setText: (text: string) => void; + focus: () => void; + clear: () => void; +} interface ChatInputProps { onSubmit?: (value: string) => void; @@ -11,45 +22,105 @@ interface ChatInputProps { placeholder?: string; } -export function ChatInput({ onSubmit, disabled, placeholder = "Type a message..." }: ChatInputProps) { - const textareaRef = useRef(null); +export const ChatInput = forwardRef( + function ChatInput({ onSubmit, disabled, placeholder = "Type a message..." }, ref) { + // Use ref to avoid stale closure in Tiptap keydown handler + const onSubmitRef = useRef(onSubmit); + onSubmitRef.current = onSubmit; - const handleSubmit = () => { - const value = textareaRef.current?.value ?? ""; - if (!value.trim()) return; - onSubmit?.(value); - textareaRef.current!.value = ""; - // reset height - textareaRef.current!.style.height = "auto"; - }; + const editor = useEditor({ + extensions: [ + StarterKit.configure({ + // Disable all rich-text features — plain text only + heading: false, + bold: false, + italic: false, + strike: false, + code: false, + codeBlock: false, + blockquote: false, + bulletList: false, + orderedList: false, + listItem: false, + horizontalRule: false, + }), + Placeholder.configure({ placeholder }), + ], + immediatelyRender: false, + editorProps: { + attributes: { + class: + "w-full resize-none bg-transparent px-1 py-1 text-base text-foreground outline-none placeholder:text-muted-foreground disabled:cursor-not-allowed", + }, + handleKeyDown(_view, event) { + // Guard for IME composition (Chinese/Japanese input) + if (event.isComposing) return false; - return ( -
-