diff --git a/apps/desktop/electron/electron-env.d.ts b/apps/desktop/electron/electron-env.d.ts index 874eaa47..09ea4db7 100644 --- a/apps/desktop/electron/electron-env.d.ts +++ b/apps/desktop/electron/electron-env.d.ts @@ -131,6 +131,13 @@ interface CurrentProviderInfo { available: boolean } +interface ChannelAccountStateInfo { + channelId: string + accountId: string + status: 'stopped' | 'starting' | 'running' | 'error' + error?: string +} + interface ElectronAPI { hub: { init: () => Promise @@ -188,6 +195,14 @@ interface ElectronAPI { saveApiKey: (providerId: string, apiKey: string) => Promise<{ ok: boolean; error?: string }> importOAuth: (providerId: string) => Promise<{ ok: boolean; expiresAt?: number; error?: string }> } + channels: { + listStates: () => Promise + getConfig: () => Promise> | undefined>> + saveToken: (channelId: string, accountId: string, token: string) => Promise<{ ok: boolean; error?: string }> + removeToken: (channelId: string, accountId: string) => Promise<{ ok: boolean; error?: string }> + stop: (channelId: string, accountId: string) => Promise<{ ok: boolean; error?: string }> + start: (channelId: string, accountId: string) => Promise<{ ok: boolean; error?: string }> + } cron: { list: () => Promise toggle: (jobId: string) => Promise<{ ok: boolean }> diff --git a/apps/desktop/electron/ipc/channels.ts b/apps/desktop/electron/ipc/channels.ts new file mode 100644 index 00000000..49662e94 --- /dev/null +++ b/apps/desktop/electron/ipc/channels.ts @@ -0,0 +1,178 @@ +/** + * Channel IPC handlers for Electron main process. + * + * Manages channel account configuration, start/stop lifecycle. + * The Channels page in the renderer uses these to configure + * Telegram (and future channels) with immediate effect. + */ +import { ipcMain } from 'electron' +import { getCurrentHub } from './hub.js' +import { credentialManager } from '../../../../src/agent/credentials.js' +import { listChannels } from '../../../../src/channels/registry.js' + +/** Validate that a string is a safe identifier (alphanumeric, dashes, underscores) */ +function isValidId(value: unknown): value is string { + return typeof value === 'string' && /^[a-zA-Z0-9_-]+$/.test(value) && value.length <= 64 +} + +/** + * Mask a token string for safe display: show first 5 and last 5 chars. + * Returns undefined if the input is not a string. + */ +function maskToken(token: unknown): string | undefined { + if (typeof token !== 'string' || token.length === 0) return undefined + if (token.length <= 12) return '*'.repeat(token.length) + return `${token.slice(0, 5)}${'*'.repeat(10)}${token.slice(-5)}` +} + +/** + * Register all Channel-related IPC handlers. + */ +export function registerChannelsIpcHandlers(): void { + /** + * List all channel account states (running / stopped / error). + */ + ipcMain.handle('channels:listStates', async () => { + const hub = getCurrentHub() + if (!hub) return [] + return hub.channelManager.listAccountStates() + }) + + /** + * Get the channels config from credentials.json5. + * Returns a sanitized version with tokens masked (not the raw secret values). + */ + ipcMain.handle('channels:getConfig', async () => { + const raw = credentialManager.getChannelsConfig() + // Mask secret values before sending to renderer + const masked: Record> | undefined> = {} + for (const [channelId, accounts] of Object.entries(raw)) { + if (!accounts) continue + const maskedAccounts: Record> = {} + for (const [accountId, accountConfig] of Object.entries(accounts)) { + const maskedConfig = { ...accountConfig } + if ('botToken' in maskedConfig) { + maskedConfig.botToken = maskToken(maskedConfig.botToken) + } + maskedAccounts[accountId] = maskedConfig + } + masked[channelId] = maskedAccounts + } + return masked + }) + + /** + * Save a channel account token and start the bot immediately. + * Flow: validate → write to credentials.json5 → start the channel account. + */ + ipcMain.handle( + 'channels:saveToken', + async (_event, channelId: string, accountId: string, token: string): Promise<{ ok: boolean; error?: string }> => { + try { + // Validate inputs + if (!isValidId(channelId)) return { ok: false, error: 'Invalid channel ID' } + if (!isValidId(accountId)) return { ok: false, error: 'Invalid account ID' } + if (typeof token !== 'string' || token.trim().length === 0 || token.length > 256) { + return { ok: false, error: 'Invalid token' } + } + + const hub = getCurrentHub() + if (!hub) return { ok: false, error: 'Hub not initialized' } + + // Find the plugin to validate channelId + const plugin = listChannels().find((p) => p.id === channelId) + if (!plugin) return { ok: false, error: `Unknown channel: ${channelId}` } + + // Persist config to credentials.json5 + credentialManager.setChannelAccountConfig(channelId, accountId, { botToken: token }) + console.log(`[IPC] Channel config saved: ${channelId}:${accountId}`) + + // Stop existing account if running (e.g. token update) + hub.channelManager.stopAccount(channelId, accountId) + + // Start the account with the new config + await hub.channelManager.startAccount(channelId, accountId, { botToken: token }) + console.log(`[IPC] Channel started: ${channelId}:${accountId}`) + + return { ok: true } + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + console.error(`[IPC] Failed to save channel token: ${message}`) + return { ok: false, error: message } + } + } + ) + + /** + * Remove a channel account token and stop the bot. + */ + ipcMain.handle( + 'channels:removeToken', + async (_event, channelId: string, accountId: string): Promise<{ ok: boolean; error?: string }> => { + try { + if (!isValidId(channelId)) return { ok: false, error: 'Invalid channel ID' } + if (!isValidId(accountId)) return { ok: false, error: 'Invalid account ID' } + + const hub = getCurrentHub() + if (!hub) return { ok: false, error: 'Hub not initialized' } + + // Stop the account + hub.channelManager.stopAccount(channelId, accountId) + + // Remove from credentials.json5 + credentialManager.removeChannelAccountConfig(channelId, accountId) + console.log(`[IPC] Channel config removed: ${channelId}:${accountId}`) + + return { ok: true } + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + console.error(`[IPC] Failed to remove channel token: ${message}`) + return { ok: false, error: message } + } + } + ) + + /** + * Stop a channel account without removing its config. + */ + ipcMain.handle( + 'channels:stop', + async (_event, channelId: string, accountId: string): Promise<{ ok: boolean; error?: string }> => { + if (!isValidId(channelId)) return { ok: false, error: 'Invalid channel ID' } + if (!isValidId(accountId)) return { ok: false, error: 'Invalid account ID' } + const hub = getCurrentHub() + if (!hub) return { ok: false, error: 'Hub not initialized' } + hub.channelManager.stopAccount(channelId, accountId) + return { ok: true } + } + ) + + /** + * Start a channel account using its saved config. + */ + ipcMain.handle( + 'channels:start', + async (_event, channelId: string, accountId: string): Promise<{ ok: boolean; error?: string }> => { + try { + if (!isValidId(channelId)) return { ok: false, error: 'Invalid channel ID' } + if (!isValidId(accountId)) return { ok: false, error: 'Invalid account ID' } + + const hub = getCurrentHub() + if (!hub) return { ok: false, error: 'Hub not initialized' } + + // Read config from credentials + const config = credentialManager.getChannelsConfig() + const accountConfig = config[channelId]?.[accountId] + if (!accountConfig) { + return { ok: false, error: `No config found for ${channelId}:${accountId}` } + } + + await hub.channelManager.startAccount(channelId, accountId, accountConfig) + return { ok: true } + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + return { ok: false, error: message } + } + } + ) +} diff --git a/apps/desktop/electron/ipc/hub.ts b/apps/desktop/electron/ipc/hub.ts index f71fe7f7..1e5446ce 100644 --- a/apps/desktop/electron/ipc/hub.ts +++ b/apps/desktop/electron/ipc/hub.ts @@ -236,6 +236,7 @@ export function registerHubIpcHandlers(): void { if (agent.closed) { return { error: `Agent is closed: ${agentId}` } } + h.channelManager.clearLastRoute() agent.write(content) return { ok: true } }) @@ -268,11 +269,11 @@ export function registerHubIpcHandlers(): void { return } - // Compaction events: forward with no stream tracking - const isCompactionEvent = - event.type === 'compaction_start' || event.type === 'compaction_end' - if (isCompactionEvent) { - safeLog(`[IPC] Sending compaction event to renderer: ${event.type}`) + // Compaction and error events: forward with no stream tracking + const isPassthroughEvent = + event.type === 'compaction_start' || event.type === 'compaction_end' || event.type === 'agent_error' + if (isPassthroughEvent) { + safeLog(`[IPC] Sending ${event.type} event to renderer`) mainWindowRef.webContents.send('localChat:event', { agentId, streamId: null, @@ -281,16 +282,6 @@ export function registerHubIpcHandlers(): void { return } - // Agent error events: forward so the UI can display them - if (event.type === 'agent_error') { - safeLog(`[IPC] Sending agent_error event to renderer: ${(event as { message: string }).message}`) - mainWindowRef.webContents.send('localChat:event', { - agentId, - streamId: null, - event, - }) - return - } // Filter events same as Hub.consumeAgent() const maybeMessage = (event as { message?: { role?: string } }).message @@ -398,6 +389,7 @@ export function registerHubIpcHandlers(): void { return { error: 'Not subscribed to agent events. Call subscribe first.' } } + h.channelManager.clearLastRoute() agent.write(content) safeLog(`[IPC] Local chat message sent to agent: ${agentId}`) return { ok: true } diff --git a/apps/desktop/electron/ipc/index.ts b/apps/desktop/electron/ipc/index.ts index a528e74a..b733a320 100644 --- a/apps/desktop/electron/ipc/index.ts +++ b/apps/desktop/electron/ipc/index.ts @@ -6,6 +6,7 @@ export { registerSkillsIpcHandlers } from './skills.js' export { registerHubIpcHandlers, cleanupHub, initializeHub, setupDeviceConfirmation } from './hub.js' export { registerProfileIpcHandlers } from './profile.js' export { registerProviderIpcHandlers } from './provider.js' +export { registerChannelsIpcHandlers } from './channels.js' export { registerCronIpcHandlers } from './cron.js' export { registerHeartbeatIpcHandlers } from './heartbeat.js' @@ -14,6 +15,7 @@ import { registerSkillsIpcHandlers } from './skills.js' import { registerHubIpcHandlers, cleanupHub, initializeHub } from './hub.js' import { registerProfileIpcHandlers } from './profile.js' import { registerProviderIpcHandlers } from './provider.js' +import { registerChannelsIpcHandlers } from './channels.js' import { registerCronIpcHandlers } from './cron.js' import { registerHeartbeatIpcHandlers } from './heartbeat.js' @@ -27,6 +29,7 @@ export function registerAllIpcHandlers(): void { registerSkillsIpcHandlers() registerProfileIpcHandlers() registerProviderIpcHandlers() + registerChannelsIpcHandlers() registerCronIpcHandlers() registerHeartbeatIpcHandlers() } diff --git a/apps/desktop/electron/main.ts b/apps/desktop/electron/main.ts index bdf7168c..ea0acb6d 100644 --- a/apps/desktop/electron/main.ts +++ b/apps/desktop/electron/main.ts @@ -44,7 +44,7 @@ process.stderr?.on?.('error', (err: NodeJS.ErrnoException) => { throw err }) -import { app, BrowserWindow } from 'electron' +import { app, BrowserWindow, shell } from 'electron' import { fileURLToPath } from 'node:url' import path from 'node:path' import { registerAllIpcHandlers, initializeApp, cleanupAll, setupDeviceConfirmation } from './ipc/index.js' @@ -73,6 +73,12 @@ function createWindow() { }, }) + // Open external links in system browser instead of inside Electron + win.webContents.setWindowOpenHandler(({ url }) => { + shell.openExternal(url) + return { action: 'deny' } + }) + if (VITE_DEV_SERVER_URL) { win.loadURL(VITE_DEV_SERVER_URL) } else { diff --git a/apps/desktop/electron/preload.ts b/apps/desktop/electron/preload.ts index e0295898..60d6a2fc 100644 --- a/apps/desktop/electron/preload.ts +++ b/apps/desktop/electron/preload.ts @@ -195,6 +195,26 @@ const electronAPI = { ipcRenderer.invoke('provider:importOAuth', providerId), }, + // Channel management (Telegram, Discord, etc.) + channels: { + /** List all channel account states */ + listStates: () => ipcRenderer.invoke('channels:listStates'), + /** Get channels config from credentials.json5 */ + getConfig: () => ipcRenderer.invoke('channels:getConfig'), + /** Save a channel token and start the bot immediately */ + saveToken: (channelId: string, accountId: string, token: string) => + ipcRenderer.invoke('channels:saveToken', channelId, accountId, token), + /** Remove a channel token and stop the bot */ + removeToken: (channelId: string, accountId: string) => + ipcRenderer.invoke('channels:removeToken', channelId, accountId), + /** Stop a channel account */ + stop: (channelId: string, accountId: string) => + ipcRenderer.invoke('channels:stop', channelId, accountId), + /** Start a channel account from saved config */ + start: (channelId: string, accountId: string) => + ipcRenderer.invoke('channels:start', channelId, accountId), + }, + // Cron jobs management cron: { list: () => ipcRenderer.invoke('cron:list'), diff --git a/apps/desktop/src/App.tsx b/apps/desktop/src/App.tsx index 48cc4883..bdc50f7a 100644 --- a/apps/desktop/src/App.tsx +++ b/apps/desktop/src/App.tsx @@ -1,8 +1,10 @@ import { createHashRouter, RouterProvider } from 'react-router-dom' import Layout from './pages/layout' import HomePage from './pages/home' +import ChatPage from './pages/chat' import ToolsPage from './pages/tools' import SkillsPage from './pages/skills' +import ChannelsPage from './pages/channels' import CronsPage from './pages/crons' const router = createHashRouter([ @@ -11,9 +13,10 @@ const router = createHashRouter([ element: , children: [ { index: true, element: }, - { path: 'chat' }, + { path: 'chat', element: }, { path: 'tools', element: }, { path: 'skills', element: }, + { path: 'channels', element: }, { path: 'crons', element: }, ], }, diff --git a/apps/desktop/src/hooks/use-channels.ts b/apps/desktop/src/hooks/use-channels.ts new file mode 100644 index 00000000..ba8ae386 --- /dev/null +++ b/apps/desktop/src/hooks/use-channels.ts @@ -0,0 +1,135 @@ +/** + * Hook for managing channel accounts (Telegram, Discord, etc.) in the Desktop App. + * + * Provides state and actions for the Channels settings page: + * - List channel account states (running / stopped / error) + * - Read channel config (tokens) + * - Save / remove tokens with immediate start/stop + */ +import { useState, useEffect, useCallback } from 'react' + +export interface UseChannelsReturn { + /** Runtime states of all channel accounts */ + states: ChannelAccountStateInfo[] + /** Raw channel config from credentials.json5 */ + config: Record> | undefined> + /** Loading state */ + loading: boolean + /** Error message if any */ + error: string | null + /** Refresh states and config */ + refresh: () => Promise + /** Save a bot token — persists to file and starts the bot immediately */ + saveToken: (channelId: string, accountId: string, token: string) => Promise<{ ok: boolean; error?: string }> + /** Remove a bot token — stops the bot and removes from file */ + removeToken: (channelId: string, accountId: string) => Promise<{ ok: boolean; error?: string }> + /** Stop a channel account without removing config */ + stopChannel: (channelId: string, accountId: string) => Promise + /** Start a channel account from saved config */ + startChannel: (channelId: string, accountId: string) => Promise +} + +export function useChannels(): UseChannelsReturn { + const [states, setStates] = useState([]) + const [config, setConfig] = useState> | undefined>>({}) + const [loading, setLoading] = useState(true) + const [error, setError] = useState(null) + + const refresh = useCallback(async () => { + setLoading(true) + setError(null) + + try { + const [stateList, channelConfig] = await Promise.all([ + window.electronAPI.channels.listStates(), + window.electronAPI.channels.getConfig(), + ]) + + setStates(stateList) + setConfig(channelConfig) + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + setError(message) + console.error('[useChannels] Failed to load:', message) + } finally { + setLoading(false) + } + }, []) + + useEffect(() => { + refresh() + }, [refresh]) + + const saveToken = useCallback(async (channelId: string, accountId: string, token: string) => { + setError(null) + try { + const result = await window.electronAPI.channels.saveToken(channelId, accountId, token) + if (!result.ok) { + setError(result.error ?? 'Failed to save token') + } + // Refresh to pick up new state + await refresh() + return result + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + setError(message) + return { ok: false, error: message } + } + }, [refresh]) + + const removeToken = useCallback(async (channelId: string, accountId: string) => { + setError(null) + try { + const result = await window.electronAPI.channels.removeToken(channelId, accountId) + if (!result.ok) { + setError(result.error ?? 'Failed to remove token') + } + await refresh() + return result + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + setError(message) + return { ok: false, error: message } + } + }, [refresh]) + + const stopChannel = useCallback(async (channelId: string, accountId: string) => { + setError(null) + try { + const result = await window.electronAPI.channels.stop(channelId, accountId) + if (!result.ok) { + setError(result.error ?? 'Failed to stop channel') + } + await refresh() + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + setError(message) + } + }, [refresh]) + + const startChannel = useCallback(async (channelId: string, accountId: string) => { + setError(null) + try { + const result = await window.electronAPI.channels.start(channelId, accountId) + if (!result.ok) { + setError(result.error ?? 'Failed to start channel') + } + await refresh() + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + setError(message) + } + }, [refresh]) + + return { + states, + config, + loading, + error, + refresh, + saveToken, + removeToken, + stopChannel, + startChannel, + } +} diff --git a/apps/desktop/src/pages/channels.tsx b/apps/desktop/src/pages/channels.tsx new file mode 100644 index 00000000..81bc8926 --- /dev/null +++ b/apps/desktop/src/pages/channels.tsx @@ -0,0 +1,177 @@ +import { useState } from 'react' +import { + Card, + CardContent, + CardDescription, + CardHeader, + CardTitle, +} from '@multica/ui/components/ui/card' +import { Button } from '@multica/ui/components/ui/button' +import { Input } from '@multica/ui/components/ui/input' +import { Badge } from '@multica/ui/components/ui/badge' +import { useChannels, type UseChannelsReturn } from '../hooks/use-channels' + +/** Status badge color mapping */ +function statusVariant(status: string): 'default' | 'secondary' | 'destructive' | 'outline' { + switch (status) { + case 'running': return 'default' + case 'starting': return 'secondary' + case 'error': return 'destructive' + default: return 'outline' + } +} + +function TelegramCard({ channels }: { channels: UseChannelsReturn }) { + const { states, config, saveToken, removeToken, startChannel, stopChannel } = channels + const [token, setToken] = useState('') + const [saving, setSaving] = useState(false) + const [localError, setLocalError] = useState(null) + + // Current state and config for telegram:default + const state = states.find((s) => s.channelId === 'telegram' && s.accountId === 'default') + const savedConfig = config['telegram']?.['default'] as { botToken?: string } | undefined + const hasToken = Boolean(savedConfig?.botToken) + const isRunning = state?.status === 'running' + const isStarting = state?.status === 'starting' + + const handleSave = async () => { + if (!token.trim()) return + setSaving(true) + setLocalError(null) + const result = await saveToken('telegram', 'default', token.trim()) + if (!result.ok) { + setLocalError(result.error ?? 'Failed to save') + } else { + setToken('') // Clear input on success + } + setSaving(false) + } + + const handleRemove = async () => { + setSaving(true) + setLocalError(null) + const result = await removeToken('telegram', 'default') + if (!result.ok) { + setLocalError(result.error ?? 'Failed to remove') + } + setSaving(false) + } + + const handleToggle = async () => { + setSaving(true) + setLocalError(null) + if (isRunning || isStarting) { + await stopChannel('telegram', 'default') + } else { + await startChannel('telegram', 'default') + } + setSaving(false) + } + + // Mask the token for display: show first 5 and last 5 chars + const maskedToken = savedConfig?.botToken + ? `${savedConfig.botToken.slice(0, 5)}${'*'.repeat(10)}${savedConfig.botToken.slice(-5)}` + : null + + return ( + + +
+
+ Telegram + + Connect a Telegram bot via Bot API long polling. + +
+ {state && ( + + {state.status} + + )} +
+
+ + {hasToken ? ( + // Token is configured — show masked token and actions +
+
+ + {maskedToken} + +
+ + {state?.error && ( +

{state.error}

+ )} + +
+ + +
+
+ ) : ( + // No token — show input form +
+ setToken(e.target.value)} + onKeyDown={(e) => e.key === 'Enter' && handleSave()} + /> + +
+ )} + + {localError && ( +

{localError}

+ )} +
+
+ ) +} + +export default function ChannelsPage() { + const channels = useChannels() + const { loading, error } = channels + + return ( +
+
+

Channels

+

+ Connect messaging platforms to your Agent. +

+
+ + {loading ? ( +

Loading...

+ ) : error ? ( +

{error}

+ ) : ( + + )} +
+ ) +} diff --git a/apps/desktop/src/pages/layout.tsx b/apps/desktop/src/pages/layout.tsx index 7cb021a0..4f3e3867 100644 --- a/apps/desktop/src/pages/layout.tsx +++ b/apps/desktop/src/pages/layout.tsx @@ -8,6 +8,7 @@ import { CodeIcon, PlugIcon, Comment01Icon, + Share08Icon, Time04Icon, } from '@hugeicons/core-free-icons' import { cn } from '@multica/ui/lib/utils' @@ -19,6 +20,7 @@ const tabs = [ { path: '/chat', label: 'Chat', icon: Comment01Icon }, { path: '/tools', label: 'Tools', icon: CodeIcon }, { path: '/skills', label: 'Skills', icon: PlugIcon }, + { path: '/channels', label: 'Channels', icon: Share08Icon }, { path: '/crons', label: 'Cron', icon: Time04Icon }, ] diff --git a/docs/channel/openclaw-research.md b/docs/channel/openclaw-research.md new file mode 100644 index 00000000..c804d5ef --- /dev/null +++ b/docs/channel/openclaw-research.md @@ -0,0 +1,1186 @@ +# OpenClaw Channel 系统源码调研 + +> 源码位置: `~/Desktop/参考项目/openclaw` +> +> 调研目的: 深入理解 OpenClaw 的 Channel 架构、消息流转机制、第三方渠道集成模式,为 Super Multica 的 Channel 系统设计提供参考。 + +--- + +## 目录 + +1. [项目整体结构](#1-项目整体结构) +2. [Channel 插件体系架构](#2-channel-插件体系架构) +3. [核心类型定义](#3-核心类型定义) +4. [插件发现与加载机制](#4-插件发现与加载机制) +5. [路由与会话管理](#5-路由与会话管理) +6. [消息流转: 正常发消息 → AI 回复 全链路](#6-消息流转-正常发消息--ai-回复-全链路) +7. [第三方渠道集成: Telegram 完整流程](#7-第三方渠道集成-telegram-完整流程) +8. [Outbound 投递管线](#8-outbound-投递管线) +9. [安全与访问控制](#9-安全与访问控制) +10. [Channel Manager 生命周期管理](#10-channel-manager-生命周期管理) +11. [设计亮点与可借鉴之处](#11-设计亮点与可借鉴之处) +12. [关键文件索引](#12-关键文件索引) + +--- + +## 1. 项目整体结构 + +``` +openclaw/ +├── src/ # 核心模块 +│ ├── channels/ # Channel 插件系统 (类型、注册表、工具函数) +│ │ ├── registry.ts # 内置 Channel 元信息注册表 +│ │ ├── plugins/ # 插件类型定义与加载 +│ │ │ ├── types.core.ts # 基础类型 (ChannelId, ChannelMeta, ChannelCapabilities) +│ │ │ ├── types.adapters.ts # Adapter 接口 (Config, Outbound, Gateway, Security...) +│ │ │ ├── types.plugin.ts # ChannelPlugin 顶层接口 +│ │ │ ├── catalog.ts # 插件发现与目录管理 +│ │ │ └── load.ts # 插件加载 (带缓存) +│ │ ├── mention-gating.ts # 群组 @提及 门控逻辑 +│ │ ├── sender-identity.ts # 发送者身份验证 +│ │ ├── chat-type.ts # 聊天类型标准化 (direct/group/channel/thread) +│ │ └── ack-reactions.ts # ACK 表情反应 +│ ├── telegram/ # Telegram 内置实现 +│ │ ├── monitor.ts # 长轮询/Webhook 启动入口 +│ │ ├── webhook.ts # HTTP Webhook 服务器 +│ │ ├── bot.ts # Grammy Bot 创建与中间件编排 +│ │ ├── bot-handlers.ts # 消息/回调/反应处理器注册 +│ │ ├── bot-message.ts # 消息处理器工厂 +│ │ ├── bot-message-context.ts # Inbound 上下文构建 (路由、安全、信封) +│ │ ├── bot-message-dispatch.ts # 调度到 Agent 并处理流式回复 +│ │ ├── bot/delivery.ts # 回复投递 (文本分块、媒体、线程) +│ │ └── send.ts # 独立 Outbound 发送函数 +│ ├── routing/ # 消息路由与会话管理 +│ │ ├── resolve-route.ts # Agent 路由解析 (binding 匹配) +│ │ ├── bindings.ts # 路由绑定配置读取 +│ │ └── session-key.ts # 会话 Key 构建 (DM/Group/Thread) +│ ├── plugins/ # 通用插件系统 +│ │ ├── registry.ts # 插件注册表 (工具/钩子/Channel/Provider) +│ │ ├── runtime.ts # 全局插件注册表单例 (Symbol-based) +│ │ ├── loader.ts # 插件加载器 (jiti + discovery) +│ │ └── discovery.ts # 插件发现 +│ ├── infra/outbound/ # Outbound 投递基础设施 +│ │ ├── deliver.ts # 主投递编排 +│ │ ├── payloads.ts # Payload 标准化 +│ │ ├── channel-selection.ts # 多 Channel 选择 +│ │ └── target-resolver.ts # 目标解析 (带缓存) +│ ├── auto-reply/ # Agent 回复管线 +│ │ ├── dispatch.ts # 入站消息调度 +│ │ ├── reply/ # 回复生成 +│ │ │ ├── dispatch-from-config.ts # 核心回复流程 +│ │ │ └── get-reply.ts # LLM 调用 +│ │ ├── types.ts # ReplyPayload, GetReplyOptions +│ │ └── envelope.ts # 消息信封格式化 +│ ├── gateway/ # WebSocket 网关 +│ │ └── server-channels.ts # ChannelManager 生命周期管理 +│ └── config/ # 配置类型 +│ ├── types.channels.ts # Channel 配置汇总 +│ └── types.telegram.ts # Telegram 专属配置 +├── extensions/ # 33+ 外部插件 +│ ├── telegram/ # Telegram Channel 插件 +│ │ ├── index.ts # 插件入口 (register) +│ │ └── src/ +│ │ ├── channel.ts # ChannelPlugin 完整实现 +│ │ └── runtime.ts # 全局 Runtime +│ ├── discord/ # Discord Channel 插件 +│ ├── slack/ # Slack Channel 插件 +│ ├── signal/ # Signal Channel 插件 +│ └── ... # 更多渠道 +└── apps/ # Web/Desktop 应用 +``` + +--- + +## 2. Channel 插件体系架构 + +OpenClaw 采用**插件化 Adapter 模式**来统一所有 Channel 的接入。每个 Channel 实现一个 `ChannelPlugin` 合约,包含多个可选的 Adapter: + +``` +┌─────────────────────────────────────────────────────────────┐ +│ ChannelPlugin │ +├─────────┬──────────┬──────────┬──────────┬─────────────────┤ +│ config │ outbound │ gateway │ security │ 其他 Adapter │ +│ │ │ │ │ │ +│ 账号管理 │ 消息发送 │ 生命周期 │ 访问控制 │ groups, mentions │ +│ 启用检查 │ 媒体发送 │ start │ DM策略 │ directory, status │ +│ 配置描述 │ 目标解析 │ stop │ 告警收集 │ actions, threading│ +│ │ 文本分块 │ QR登录 │ │ heartbeat, setup │ +└─────────┴──────────┴──────────┴──────────┴─────────────────┘ +``` + +### ChannelPlugin 接口 + +```typescript +// src/channels/plugins/types.plugin.ts +type ChannelPlugin = { + id: ChannelId; // "telegram" | "discord" | ... + meta: ChannelMeta; // 元信息 (标签、图标、文档路径) + capabilities: ChannelCapabilities; // 能力声明 (chatTypes, reactions, threads...) + + // --- 必选 Adapter --- + config: ChannelConfigAdapter; // 账号配置管理 + + // --- 可选 Adapter --- + outbound?: ChannelOutboundAdapter; // 消息发送 + gateway?: ChannelGatewayAdapter; // 生命周期 (启动/停止/QR登录) + security?: ChannelSecurityAdapter; // DM安全策略 + setup?: ChannelSetupAdapter; // 初始化配置 + groups?: ChannelGroupAdapter; // 群组行为 + mentions?: ChannelMentionAdapter; // @提及处理 + status?: ChannelStatusAdapter; // 状态监控 + directory?: ChannelDirectoryAdapter; // 联系人/群组目录 + actions?: ChannelMessageActionAdapter; // 消息动作 (反应、按钮、卡片) + threading?: ChannelThreadingAdapter; // 线程处理 + streaming?: ChannelStreamingAdapter; // 流式输出 + messaging?: ChannelMessagingAdapter; // 目标格式化 + auth?: ChannelAuthAdapter; // 认证 + heartbeat?: ChannelHeartbeatAdapter; // 心跳检测 + pairing?: ChannelPairingAdapter; // 配对/白名单 + elevated?: ChannelElevatedAdapter; // 提权 + commands?: ChannelCommandAdapter; // 命令控制 + agentPrompt?: ChannelAgentPromptAdapter; // Agent 提示词 + resolver?: ChannelResolverAdapter; // 目标解析 + agentTools?: ChannelAgentToolFactory; // Channel 自带的 Agent 工具 +}; +``` + +### Adapter 职责一览 + +| Adapter | 职责 | 关键方法 | +|---------|------|---------| +| **config** | 账号管理 | `listAccountIds`, `resolveAccount`, `isConfigured`, `isEnabled` | +| **outbound** | 消息发送 | `sendText`, `sendMedia`, `sendPayload`, `resolveTarget` | +| **gateway** | 生命周期 | `startAccount`, `stopAccount`, `loginWithQrStart`, `loginWithQrWait` | +| **security** | 访问控制 | `resolveDmPolicy`, `collectWarnings` | +| **groups** | 群组行为 | `resolveRequireMention`, `resolveToolPolicy` | +| **status** | 状态监控 | `probeAccount`, `auditAccount`, `buildAccountSnapshot` | +| **directory** | 目录查询 | `listPeers`, `listGroups`, `listGroupMembers` | +| **actions** | 消息交互 | `handleAction` (reactions, buttons, cards, polls) | +| **mentions** | @提及 | `stripMentions`, `stripPatterns` | +| **setup** | 初始化 | `applyAccountConfig`, `validateInput` | +| **pairing** | 配对 | `normalizeAllowEntry`, `notifyApproval` | + +--- + +## 3. 核心类型定义 + +### ChannelCapabilities — 渠道能力声明 + +```typescript +// src/channels/plugins/types.core.ts +type ChannelCapabilities = { + chatTypes: Array<"direct" | "group" | "channel" | "thread">; + polls?: boolean; // 原生投票 + reactions?: boolean; // 表情反应 + edit?: boolean; // 编辑消息 + unsend?: boolean; // 撤回消息 + reply?: boolean; // 引用回复 + threads?: boolean; // 线程支持 + media?: boolean; // 媒体支持 + nativeCommands?: boolean; // 原生命令 (如 Telegram /start) + blockStreaming?: boolean; // 流式输出聚合 +}; +``` + +### ChannelMeta — 渠道元信息 + +```typescript +type ChannelMeta = { + id: ChannelId; + label: string; // "Telegram" + selectionLabel: string; // "Telegram (Bot API)" + detailLabel?: string; // "Telegram Bot" + docsPath: string; // "/channels/telegram" + blurb: string; // 简介 + systemImage?: string; // SF Symbol 图标名 + aliases?: string[]; // 别名 + order?: number; // 排序权重 + // ... +}; +``` + +### ChannelAccountSnapshot — 账号运行时快照 + +```typescript +type ChannelAccountSnapshot = { + accountId: string; + name?: string; + enabled?: boolean; + configured?: boolean; + running?: boolean; + connected?: boolean; + lastConnectedAt?: number | null; + lastMessageAt?: number | null; + lastError?: string | null; + lastStartAt?: number | null; + lastStopAt?: number | null; + dmPolicy?: string; + allowFrom?: string[]; + // ... +}; +``` + +### ReplyPayload — Agent 回复载荷 + +```typescript +// src/auto-reply/types.ts +type ReplyPayload = { + text?: string; + mediaUrl?: string; + mediaUrls?: string[]; + replyToId?: string; + audioAsVoice?: boolean; + isError?: boolean; + channelData?: Record; +}; +``` + +--- + +## 4. 插件发现与加载机制 + +### 4.1 插件发现 + +``` +发现源 (优先级从高到低): + 1. config — 配置文件指定的路径 (plugins.load.paths) + 2. workspace — 项目本地 extensions/ + 3. global — ~/.super-multica/extensions/ + 4. bundled — 内置 extensions/ +``` + +每个插件目录需包含 `openclaw.plugin.json` 清单文件,声明插件 ID、名称、类型、配置 Schema 等。 + +### 4.2 插件加载流程 + +``` +loadOpenClawPlugins(options) + │ + ├─ normalizePluginsConfig() // 处理 allow/deny 列表 + ├─ 检查缓存 (cacheKey = workspace + plugins config) + │ + ├─ discoverOpenClawPlugins() // 扫描插件候选 + ├─ loadPluginManifestRegistry() // 加载清单文件 + │ + ├─ for each candidate: + │ ├─ 检查启用/禁用状态 + │ ├─ 验证配置 Schema (JSON Schema) + │ ├─ jiti(candidate.source) // 使用 jiti 动态加载 TypeScript + │ ├─ 解析 module export (default export or register/activate) + │ ├─ createApi(record, config) // 创建插件 API + │ └─ register(api) // 调用插件注册函数 + │ ├─ api.registerChannel({ plugin: channelPlugin }) + │ ├─ api.registerTool(tool) + │ ├─ api.registerHook(events, handler) + │ └─ api.registerProvider(provider) + │ + ├─ setActivePluginRegistry(registry) // 设置为全局活跃注册表 + └─ return registry +``` + +### 4.3 插件注册表 (全局单例) + +```typescript +// src/plugins/runtime.ts +const REGISTRY_STATE = Symbol.for("openclaw.pluginRegistryState"); + +// 使用 Symbol 确保跨模块共享同一个注册表实例 +type RegistryState = { + registry: PluginRegistry | null; + key: string | null; +}; + +export function setActivePluginRegistry(registry: PluginRegistry, cacheKey?: string); +export function getActivePluginRegistry(): PluginRegistry | null; +export function requireActivePluginRegistry(): PluginRegistry; +``` + +### 4.4 Telegram 插件注册示例 + +```typescript +// extensions/telegram/index.ts +const plugin = { + id: "telegram", + name: "Telegram", + register(api: OpenClawPluginApi) { + setTelegramRuntime(api.runtime); // 保存全局 Runtime + api.registerChannel({ plugin: telegramPlugin }); // 注册 Channel 插件 + }, +}; +export default plugin; +``` + +--- + +## 5. 路由与会话管理 + +### 5.1 路由解析 (Binding 匹配) + +当一条消息进入系统时,需要确定由哪个 Agent 处理。OpenClaw 使用 **Binding 优先级匹配**: + +```typescript +// src/routing/resolve-route.ts +function resolveAgentRoute(input: ResolveAgentRouteInput): ResolvedAgentRoute { + // input: { cfg, channel, accountId, peer, parentPeer, guildId, teamId } + + // 1. 过滤出匹配 channel + accountId 的 bindings + const bindings = listBindings(cfg).filter(b => + matchesChannel(b.match, channel) && matchesAccountId(b.match?.accountId, accountId) + ); + + // 2. 按优先级匹配 + // peer (DM/群组精确匹配) → parentPeer (线程父级继承) + // → guild (Discord服务器) → team (MS Teams团队) + // → account (账号级别) → channel (渠道级别) → default (默认Agent) + + // 3. 返回结果 + return { + agentId: "assistant", + channel: "telegram", + accountId: "default", + sessionKey: "agent:assistant:peer:telegram:default:dm:123456", + mainSessionKey: "agent:assistant:main", + matchedBy: "binding.peer", // 调试信息 + }; +} +``` + +### 5.2 Binding 配置 + +```typescript +// 配置文件中的 bindings 数组 +type AgentBinding = { + agentId: string; // 目标 Agent ID + match?: { + channel?: string; // "telegram" + accountId?: string; // "default" 或 "*" (匹配所有) + peer?: { kind: string; id: string }; // 精确匹配特定聊天 + guildId?: string; // Discord 服务器 + teamId?: string; // MS Teams 团队 + }; +}; +``` + +### 5.3 Session Key 构建 + +Session Key 是会话持久化的核心标识,格式根据 DM Scope 不同而变化: + +``` +DM Scope 模式: + "main" → agent:{agentId}:main + "per-peer" → agent:{agentId}:dm:{peerId} + "per-channel-peer" → agent:{agentId}:{channel}:dm:{peerId} + "per-account-channel-peer"→ agent:{agentId}:{channel}:{accountId}:dm:{peerId} + +Group/Channel: + → agent:{agentId}:{channel}:{peerKind}:{peerId} + +Thread (线程): + → {baseSessionKey}:thread:{threadId} +``` + +**Identity Linking**: 支持跨渠道身份关联,例如 Telegram 用户 `123` 和 WhatsApp 用户 `456` 映射到同一个 canonical ID,共享同一个 session。 + +```typescript +// 配置 +session: { + dmScope: "per-peer", + identityLinks: { + "alice": ["telegram:123", "whatsapp:456"], + } +} +``` + +--- + +## 6. 消息流转: 正常发消息 → AI 回复 全链路 + +以下是一条用户消息从进入系统到 AI 回复的**完整流转路径**: + +``` + ┌─────────────────────────────────┐ + │ 用户发送消息 │ + └──────────────┬──────────────────┘ + │ + ┌──────────────▼──────────────────┐ + │ Channel 接收 (Inbound) │ + │ Polling / Webhook / WebSocket │ + └──────────────┬──────────────────┘ + │ + ┌─────────────────────▼─────────────────────┐ + │ 消息预处理 (bot-handlers) │ + │ │ + │ • 去重 (update offset + dedup) │ + │ • 媒体组缓冲 (multi-image → single event) │ + │ • 文本片段组装 (>4000 字符分片重组) │ + │ • Inbound 防抖 (快速连续消息合并) │ + │ • 媒体文件下载 (图片/视频/语音/贴纸) │ + └─────────────────────┬─────────────────────┘ + │ + ┌─────────────────────▼─────────────────────┐ + │ 上下文构建 (bot-message-context) │ + │ │ + │ 1. 解析 chatType (DM / Group / Thread) │ + │ 2. 解析 Agent 路由 (resolveAgentRoute) │ + │ → agentId, sessionKey │ + │ 3. 安全检查: │ + │ - DM 策略 (pairing/allowlist/open) │ + │ - Group 策略 (open/allowlist/disabled) │ + │ 4. @提及检测与门控 │ + │ - 显式 @bot 提及 │ + │ - 正则模式匹配 │ + │ - 回复链隐式提及 │ + │ 5. 发送 ACK 表情 (👀 处理中) │ + │ 6. 构建消息信封 [Channel From Time] body │ + │ 7. 提取上下文 (引用/转发/位置/群历史) │ + └─────────────────────┬─────────────────────┘ + │ + ┌─────────────────────▼─────────────────────┐ + │ 调度到 Agent (bot-message-dispatch) │ + │ │ + │ 1. 设置流式输出模式: │ + │ - "off": 等完整回复再发送 │ + │ - "partial": 逐 token 实时编辑消息 │ + │ - "block": 语义块级流式 │ + │ 2. 调用 dispatchReplyFromConfig() │ + └─────────────────────┬─────────────────────┘ + │ + ┌───────────────────────────▼───────────────────────────┐ + │ Auto-Reply 管线 (dispatch-from-config) │ + │ │ + │ 1. 检查重复入站消息 │ + │ 2. 触发 message_received 钩子 (插件) │ + │ 3. 检查 /stop 命令 (快速中断) │ + │ 4. 调用 getReplyFromConfig() → LLM 推理 │ + │ ├─ 加载 session transcript │ + │ ├─ 构建 Agent 上下文 (system prompt + tools + skills)│ + │ ├─ 调用 LLM (OpenAI/Anthropic/DeepSeek/...) │ + │ ├─ 执行 tools (如需要) │ + │ └─ 生成 ReplyPayload │ + │ 5. 应用 TTS (如配置) │ + │ 6. 处理跨 Channel 回复路由 │ + └───────────────────────────┬───────────────────────────┘ + │ + ┌─────────────────────▼─────────────────────┐ + │ 回复投递 (delivery / deliver.ts) │ + │ │ + │ 1. 加载 Channel Outbound Adapter │ + │ 2. 标准化 Payload (解析指令, 合并媒体) │ + │ 3. 文本分块: │ + │ - 按字符限制 (Telegram: 4096) │ + │ - 按段落/Markdown 块 │ + │ - Signal: Markdown → 富文本样式 │ + │ 4. 发送: │ + │ - sendText(text) │ + │ - sendMedia(caption, mediaUrl) │ + │ - sendPayload(payload) (channelData) │ + │ 5. 线程引用 (replyToId / threadId) │ + │ 6. 移除 ACK 表情 │ + │ 7. 记录 session transcript │ + └─────────────────────┬─────────────────────┘ + │ + ┌──────────────▼──────────────────┐ + │ Channel 发送 (Outbound) │ + │ Channel API → 用户收到回复 │ + └──────────────────────────────────┘ +``` + +### 关键步骤详解 + +#### Step 1: 消息接收 + +Channel 通过两种方式接收消息: +- **Long Polling**: 主动轮询 API 获取新消息 (Telegram, WhatsApp) +- **Webhook**: 被动接收 HTTP POST 推送 (Telegram 可选, Google Chat) +- **WebSocket**: 实时双向连接 (Discord via discord.js, Slack Socket Mode) + +#### Step 2: 消息预处理 + +```typescript +// Telegram 特有的预处理: + +// 1. 媒体组缓冲 — Telegram 将多图消息拆成多个 update,需要合并 +const MEDIA_GROUP_TIMEOUT_MS = 1500; // 等待 1.5s 收集同组媒体 + +// 2. 文本片段重组 — 超长消息被 Telegram 分片 +const TEXT_FRAGMENT_START_THRESHOLD = 4000; // >4000字符触发分片检测 + +// 3. Inbound 防抖 — 用户快速连发消息时合并处理 +createInboundDebouncer({ delayMs, maxWaitMs }); +``` + +#### Step 3: 路由解析 + +```typescript +const route = resolveAgentRoute({ + cfg, + channel: "telegram", + accountId: "default", + peer: { kind: "dm", id: "123456" }, +}); +// → { agentId: "assistant", sessionKey: "agent:assistant:main", matchedBy: "default" } +``` + +#### Step 4: Agent 调用 + +核心函数 `getReplyFromConfig()` 负责: +1. 从 sessionKey 加载历史 transcript +2. 根据 agentId 加载 Agent 配置 (system prompt, tools, skills) +3. 调用 LLM Provider (支持 OpenAI, Anthropic, DeepSeek, Kimi, Groq, Mistral, Google, Together) +4. 处理 tool calls (循环执行) +5. 返回 `ReplyPayload[]` + +#### Step 5: 回复投递 + +```typescript +await deliverOutboundPayloads({ + cfg, + channel: "telegram", + to: "123456", + accountId: "default", + payloads: [{ text: "Hello! I'm your AI assistant.", mediaUrl: "..." }], + replyToId: originalMessageId, +}); +``` + +--- + +## 7. 第三方渠道集成: Telegram 完整流程 + +以 Telegram 为例,详细说明第三方渠道的集成方式和消息流转。 + +### 7.1 插件注册 + +```typescript +// extensions/telegram/index.ts +export default { + id: "telegram", + register(api: OpenClawPluginApi) { + setTelegramRuntime(api.runtime); + api.registerChannel({ plugin: telegramPlugin }); + }, +}; +``` + +### 7.2 Channel Plugin 实现 + +```typescript +// extensions/telegram/src/channel.ts +export const telegramPlugin: ChannelPlugin = { + id: "telegram", + meta: getChatChannelMeta("telegram"), + capabilities: { + chatTypes: ["direct", "group", "channel", "thread"], + reactions: true, + threads: true, + media: true, + nativeCommands: true, + blockStreaming: true, + }, + + config: { + listAccountIds(cfg) { + // 返回配置中的所有 Telegram 账号 ID + return Object.keys(cfg.channels?.telegram?.accounts ?? {}); + }, + resolveAccount(cfg, accountId) { + // 解析账号配置 (botToken, dmPolicy, allowFrom 等) + }, + isConfigured(account) { + // 检查 botToken 是否存在 + }, + isEnabled(account) { + return account.enabled !== false; + }, + }, + + outbound: { + deliveryMode: "direct", // 直接调用 Bot API + textChunkLimit: 4000, // Telegram 限制 + chunker: markdownToTelegramChunks, // Markdown → Telegram HTML 分块 + + async sendText(ctx) { + return sendMessageTelegram(ctx.to, ctx.text, { + accountId: ctx.accountId, + replyToId: ctx.replyToId, + threadId: ctx.threadId, + }); + }, + + async sendMedia(ctx) { + return sendMessageTelegram(ctx.to, ctx.text, { + mediaUrl: ctx.mediaUrl, + accountId: ctx.accountId, + }); + }, + + resolveTarget({ to, allowFrom, accountId }) { + // 验证并标准化 Telegram chat ID + // 支持: 纯数字 ID, @username, t.me/ 链接 + }, + }, + + gateway: { + async startAccount(ctx) { + // 启动 Telegram 监听 + return monitorTelegramProvider({ + cfg: ctx.cfg, + accountId: ctx.accountId, + abortSignal: ctx.abortSignal, + setStatus: ctx.setStatus, + }); + }, + + async stopAccount(ctx) { + // 通过 AbortController 停止 + ctx.abortSignal.abort(); + }, + }, + + security: { + resolveDmPolicy(ctx) { + return { + policy: ctx.account.dmPolicy ?? "pairing", + allowFrom: ctx.account.allowFrom, + approveHint: "approve via /allow command", + }; + }, + }, +}; +``` + +### 7.3 Telegram 消息接收 (Inbound) 详细流程 + +``` +Telegram 用户发送消息 + │ + ▼ +┌───────────────────────────────────────┐ +│ Telegram API Server │ +│ (api.telegram.org) │ +└───────────────┬───────────────────────┘ + │ + ┌───────────┴───────────┐ + │ │ + ▼ ▼ +┌─────────┐ ┌──────────┐ +│ Polling │ │ Webhook │ +│ (默认) │ │ (可选) │ +│ │ │ │ +│ Grammy │ │ HTTP POST│ +│ Runner │ │ /webhook │ +│ getUpdates│ │ grammy │ +│ + backoff│ │ callback │ +└────┬─────┘ └────┬─────┘ + │ │ + └──────────┬──────────┘ + │ + ▼ +┌──────────────────────────────────────┐ +│ Grammy Middleware Pipeline │ +│ │ +│ 1. apiThrottler() — 速率限制 │ +│ 2. sequentialize() — 按 chat/topic │ +│ 序列化更新, 保证处理顺序 │ +│ 3. 原始更新日志 (debug) │ +│ 4. Update offset 追踪 + 去重 │ +└───────────────┬──────────────────────┘ + │ + ▼ +┌──────────────────────────────────────┐ +│ bot.on("message") Handler │ +│ (bot-handlers.ts — 928 行) │ +│ │ +│ 1. 验证 chatType, 群组策略 │ +│ 2. 文本片段缓冲 (>4000字符) │ +│ 3. 媒体组缓冲 (多图合并) │ +│ 4. 单媒体解析 (resolveMedia) │ +│ 5. Inbound 防抖 (快速连发合并) │ +│ 6. 调用 processMessage() │ +└───────────────┬──────────────────────┘ + │ + ▼ +┌──────────────────────────────────────┐ +│ processMessage() │ +│ (bot-message.ts) │ +│ │ +│ ┌─ buildTelegramMessageContext() │ +│ │ (bot-message-context.ts 700行) │ +│ │ │ +│ │ • 记录 channel activity │ +│ │ • 解析 chatType + threadId │ +│ │ • resolveAgentRoute() → agent │ +│ │ • DM 安全检查: │ +│ │ - "pairing": 发送配对码 │ +│ │ - "allowlist": 检查白名单 │ +│ │ - "open": 放行 │ +│ │ • Group 安全检查 │ +│ │ • @提及检测: │ +│ │ - @bot 显式提及 │ +│ │ - 正则模式匹配 │ +│ │ - 回复链隐式提及 │ +│ │ • Mention Gating (群组中未提及 │ +│ │ 则跳过) │ +│ │ • 发送 ACK 反应 (👀) │ +│ │ • formatInboundEnvelope() │ +│ │ • 提取引用/转发/位置/贴纸 │ +│ │ • 群组历史上下文 │ +│ └────────────────────────┐ │ +│ │ │ +│ ┌─ dispatchTelegramMessage() │ +│ │ (bot-message-dispatch.ts 357行) │ +│ │ │ +│ │ • 配置流式模式: │ +│ │ - "off": 完整回复后发送 │ +│ │ - "partial": token级实时编辑 │ +│ │ - "block": 语义块级流式 │ +│ │ • dispatchReplyFromConfig() │ +│ │ → getReplyFromConfig() │ +│ │ → LLM 推理 + Tool 执行 │ +│ │ • 流式回调: │ +│ │ - onBlockReply → 编辑草稿消息 │ +│ │ - onToolResult → 中间结果 │ +│ │ • deliverReplies() → 发送回复 │ +│ │ • 移除 ACK 反应 │ +│ └───────────────────────────────────┘ +└──────────────────────────────────────┘ +``` + +### 7.4 Telegram 消息发送 (Outbound) 详细流程 + +```typescript +// src/telegram/send.ts — 754 行 + +async function sendMessageTelegram( + to: string, + text: string, + opts?: { + mediaUrl?: string; + accountId?: string; + replyToId?: string; + threadId?: string | number; + retry?: OutboundRetryConfig; + } +): Promise { + + // 1. 解析账号配置、Bot Token、代理 + const account = resolveAccount(cfg, opts.accountId); + const token = account.botToken; + const api = new Api(token, { proxy }); + + // 2. 标准化 chatId + // 支持: "123456", "@channel_name", "t.me/+xxxxx" + const chatId = normalizeTelegramChatId(to); + + // 3. 文本转换: Markdown → Telegram HTML + const html = markdownToTelegramHtml(text); + + // 4. 发送消息 + if (opts.mediaUrl) { + // 带媒体: sendPhoto / sendVideo / sendAudio / sendVoice / sendDocument / sendAnimation + const mediaType = detectMediaType(opts.mediaUrl); + const result = await api[`send${mediaType}`](chatId, { + caption: html, + parse_mode: "HTML", + reply_parameters: opts.replyToId ? { message_id: opts.replyToId } : undefined, + message_thread_id: opts.threadId, + }); + return { channel: "telegram", messageId: result.message_id, chatId }; + } else { + // 纯文本 + const result = await api.sendMessage(chatId, html, { + parse_mode: "HTML", + reply_parameters: opts.replyToId ? { message_id: opts.replyToId } : undefined, + message_thread_id: opts.threadId, + link_preview_options: { is_disabled: !account.linkPreview }, + }); + return { channel: "telegram", messageId: result.message_id, chatId }; + } + + // 5. 错误处理 + // - HTML 解析失败 → 降级为纯文本重试 + // - 网络错误 → 指数退避重试 + // - 语音消息被禁止 → 降级为文档发送 + + // 6. 记录已发送消息 (用于反应追踪) +} +``` + +### 7.5 Telegram 配置示例 + +```json5 +// ~/.super-multica/credentials.json5 +{ + channels: { + telegram: { + accounts: { + default: { + botToken: "123456:ABC-DEF...", // BotFather 获取 + // 或 tokenFile: "/path/to/token", // 密钥管理器 + dmPolicy: "pairing", // DM 安全策略 + allowFrom: [123456789], // 白名单 (Telegram user ID) + groupPolicy: "open", // 群组策略 + streamMode: "partial", // 流式输出模式 + textChunkLimit: 4000, // 文本分块大小 + replyToMode: "first", // 引用回复模式 + reactionLevel: "ack", // ACK 反应级别 + actions: { + reactions: true, + sendMessage: true, + }, + groups: { + "-1001234567890": { // 群组 ID + requireMention: true, // 需要 @提及 + tools: { allow: ["search", "calculator"] }, + topics: { + "42": { enabled: true }, // 论坛 topic + }, + }, + }, + }, + }, + }, + }, +} +``` + +--- + +## 8. Outbound 投递管线 + +### 8.1 投递编排 + +```typescript +// src/infra/outbound/deliver.ts + +async function deliverOutboundPayloads(params: { + cfg: OpenClawConfig; + channel: "telegram" | "discord" | "slack" | ...; + to: string; // 目标 ID + accountId?: string; + payloads: ReplyPayload[]; // 回复载荷数组 + replyToId?: string; // 引用的消息 ID + threadId?: string | number;// 线程 ID + abortSignal?: AbortSignal; // 中止信号 + mirror?: { // Session transcript 镜像 + sessionKey: string; + text?: string; + }; +}): Promise { + + // 1. 加载 Channel Outbound Adapter + const handler = await createChannelHandler({ + cfg, channel, to, accountId, ... + }); + // → loadChannelOutboundAdapter(channel) + // → plugin.outbound.sendText / sendMedia + + // 2. 标准化 Payload + const normalized = normalizeReplyPayloadsForDelivery(payloads); + // → 解析文本指令 (mediaUrl, replyToId) + // → 合并多个媒体 URL + // → 过滤空/静默 payload + + // 3. 逐个 Payload 发送 + for (const payload of normalized) { + if (payload.mediaUrls.length === 0) { + // 纯文本 → 分块发送 + await sendTextChunks(payload.text); + } else { + // 带媒体 → 逐媒体发送 (首个附带 caption) + for (const url of payload.mediaUrls) { + await handler.sendMedia(first ? payload.text : "", url); + } + } + } + + // 4. 镜像到 session transcript + if (params.mirror) { + await appendAssistantMessageToSessionTranscript(mirror); + } +} +``` + +### 8.2 文本分块策略 + +``` +分块模式: + "length" (默认) — 按字符限制硬切 (chunker 函数) + "newline" — 先按段落/换行拆分, 再按字符限制 + +特殊处理: + Signal — Markdown → 富文本样式 (SignalTextStyleRange) + Telegram — Markdown → HTML (Telegram flavor) + Discord — 原生 Markdown + Embed +``` + +### 8.3 Channel 选择 + +当系统需要主动发消息(非回复),需要确定使用哪个 Channel: + +```typescript +// src/infra/outbound/channel-selection.ts + +async function resolveMessageChannelSelection(params: { + cfg: OpenClawConfig; + channel?: string; +}) { + // 1. 如果指定了 channel, 直接使用 + // 2. 列出所有已配置的 channel + // 3. 只有一个 → 自动选择 + // 4. 多个 → 抛错要求明确指定 +} + +async function listConfiguredMessageChannels(cfg) { + // 遍历所有已注册的 channel 插件 + // 检查每个插件是否有启用且已配置的账号 + for (const plugin of listChannelPlugins()) { + if (await isPluginConfigured(plugin, cfg)) { + channels.push(plugin.id); + } + } +} +``` + +--- + +## 9. 安全与访问控制 + +### 9.1 DM 安全策略 + +``` +策略类型 (dmPolicy): + "pairing" (默认) — 未知发送者收到配对码, 需管理员批准 + "allowlist" — 仅允许 allowFrom 列表中的用户 + "open" — 允许所有 DM (需 allowFrom 包含 "*") + "disabled" — 忽略所有 DM +``` + +**配对流程 (Pairing)**: +1. 未知用户发送 DM +2. 系统生成配对码,回复给用户 +3. 管理员通过 `/allow` 命令批准 +4. 用户 ID 被加入持久化白名单 + +### 9.2 群组安全策略 + +``` +策略类型 (groupPolicy): + "open" — 绕过 allowFrom, 仅受 mention-gating 控制 + "allowlist" — 仅允许 groupAllowFrom/allowFrom 中的发送者 + "disabled" — 完全阻止群消息 +``` + +### 9.3 Mention Gating (提及门控) + +```typescript +// src/channels/mention-gating.ts + +function resolveMentionGating(params: { + requireMention: boolean; // 是否需要 @提及 + canDetectMention: boolean; // 渠道是否能检测提及 + wasMentioned: boolean; // 是否被提及 + implicitMention?: boolean; // 隐式提及 (回复链) + shouldBypassMention?: boolean; // 命令绕过 +}): { + effectiveWasMentioned: boolean; + shouldSkip: boolean; // true = 跳过处理 +}; +``` + +在群组中,当 `requireMention = true` 时: +- 显式 `@bot` 提及 → 处理 +- 回复 bot 消息 (隐式提及) → 处理 +- 授权用户发送控制命令 → 绕过门控 +- 其他消息 → 跳过 + +--- + +## 10. Channel Manager 生命周期管理 + +```typescript +// src/gateway/server-channels.ts + +type ChannelManager = { + getRuntimeSnapshot: () => ChannelRuntimeSnapshot; // 获取所有 channel 运行状态 + startChannels: () => Promise; // 启动所有已配置 channel + startChannel: (channel, accountId?) => Promise; // 启动单个 channel + stopChannel: (channel, accountId?) => Promise; // 停止单个 channel + markChannelLoggedOut: (channelId, cleared, accountId?) => void; // 标记登出 +}; +``` + +### 启动流程 + +``` +createChannelManager(opts) + │ + ├─ startChannels() + │ └─ for each plugin in listChannelPlugins(): + │ └─ startChannel(plugin.id) + │ + └─ startChannel(channelId, accountId?) + │ + ├─ 获取 plugin = getChannelPlugin(channelId) + ├─ 获取 startAccount = plugin.gateway.startAccount + │ + ├─ for each accountId in plugin.config.listAccountIds(cfg): + │ ├─ 检查是否已启动 (store.tasks.has(id)) + │ ├─ 解析账号配置: plugin.config.resolveAccount(cfg, id) + │ ├─ 检查启用状态: plugin.config.isEnabled(account, cfg) + │ ├─ 检查配置完整: plugin.config.isConfigured(account, cfg) + │ │ + │ ├─ 创建 AbortController + │ ├─ 更新运行状态: setRuntime(running: true, lastStartAt: now) + │ │ + │ └─ startAccount({ + │ cfg, accountId, account, + │ runtime, + │ abortSignal: abort.signal, + │ log: channelLogs[channelId], + │ getStatus, setStatus, + │ }) + │ │ + │ └─ (Telegram) → monitorTelegramProvider() + │ → Grammy Runner / Webhook Server + │ + └─ 错误处理: + ├─ catch → setRuntime(lastError: message) + └─ finally → setRuntime(running: false, lastStopAt: now) +``` + +### 运行时状态追踪 + +```typescript +type ChannelRuntimeStore = { + aborts: Map; // 每个账号的中止控制器 + tasks: Map>; // 每个账号的运行任务 + runtimes: Map; // 每个账号的状态快照 +}; +``` + +`getRuntimeSnapshot()` 聚合所有 channel 的账号状态,用于 UI 展示和健康监控。 + +--- + +## 11. 设计亮点与可借鉴之处 + +### 11.1 Adapter 模式 + +每个 Channel 只需实现必要的 Adapter,无需实现全部。这种**可选 Adapter 组合**模式比传统的继承/全量接口更灵活: + +```typescript +// 最简 Channel 实现只需要: +{ + id: "my-channel", + meta: { ... }, + capabilities: { chatTypes: ["direct"] }, + config: { listAccountIds, resolveAccount }, // 必选 + outbound: { sendText, sendMedia }, // 发消息 + gateway: { startAccount }, // 生命周期 +} +``` + +### 11.2 插件发现的层级优先级 + +``` +config > workspace > global > bundled +``` + +允许用户在项目级、全局级、以及内置级别分别管理插件,高优先级覆盖低优先级。 + +### 11.3 Session Key 的灵活设计 + +通过 `dmScope` 控制 DM 会话的隔离粒度: +- `"main"` — 所有 DM 共享一个 session (跨渠道统一上下文) +- `"per-peer"` — 每个联系人独立 session +- `"per-channel-peer"` — 每个渠道+联系人独立 +- `"per-account-channel-peer"` — 最细粒度 + +配合 `identityLinks` 实现跨渠道身份关联。 + +### 11.4 流式输出的三级模式 + +``` +"off" — 完整回复后一次性发送 +"partial" — Token 级实时编辑消息 (Telegram editMessageText) +"block" — 语义块级流式 (一段完成后发送) +``` + +### 11.5 安全模型分层 + +``` +DM 层: dmPolicy (pairing/allowlist/open/disabled) +Group 层: groupPolicy (open/allowlist/disabled) +提及层: mention-gating (requireMention + 检测) +命令层: command-gating (权限控制) +``` + +### 11.6 统一的 Outbound 投递管线 + +所有 Channel 共享同一个 `deliverOutboundPayloads()` 入口,通过 `loadChannelOutboundAdapter()` 动态加载具体 Channel 的发送逻辑。文本分块、Payload 标准化、错误处理、transcript 镜像等逻辑全部复用。 + +### 11.7 值得注意的工程实践 + +- **Update Offset 持久化** — Telegram 轮询重启后从上次 offset 恢复,避免重复处理 +- **媒体组缓冲** — 解决 Telegram 多图消息拆分为多个 update 的问题 +- **文本片段重组** — 解决超长消息被 Telegram 拆分的问题 +- **Grammy sequentialize** — 保证同一聊天的消息按顺序处理 +- **AbortController** — 优雅的生命周期控制 +- **Symbol.for 全局单例** — 跨模块共享插件注册表 + +--- + +## 12. 关键文件索引 + +### Inbound 链路 +| 文件 | 行数 | 职责 | +|------|------|------| +| `src/telegram/monitor.ts` | 215 | 长轮询/Webhook 启动入口 | +| `src/telegram/webhook.ts` | 127 | HTTP Webhook 服务器 | +| `src/telegram/bot.ts` | 494 | Grammy Bot 创建与中间件编排 | +| `src/telegram/bot-handlers.ts` | 928 | 消息/回调/反应处理器注册 | +| `src/telegram/bot-message.ts` | 92 | 消息处理器工厂 | +| `src/telegram/bot-message-context.ts` | 700 | Inbound 上下文构建 | +| `src/telegram/bot-message-dispatch.ts` | 357 | 调度到 Agent 并处理流式回复 | + +### Outbound 链路 +| 文件 | 行数 | 职责 | +|------|------|------| +| `src/infra/outbound/deliver.ts` | 376 | 主投递编排 | +| `src/infra/outbound/payloads.ts` | ~150 | Payload 标准化 | +| `src/infra/outbound/channel-selection.ts` | ~100 | 多 Channel 选择 | +| `src/telegram/send.ts` | 754 | Telegram 发送函数 | +| `src/telegram/bot/delivery.ts` | 562 | Telegram 回复投递 | + +### 插件系统 +| 文件 | 行数 | 职责 | +|------|------|------| +| `src/plugins/registry.ts` | ~350 | 插件注册表 | +| `src/plugins/runtime.ts` | ~50 | 全局单例管理 | +| `src/plugins/loader.ts` | ~400 | 插件加载器 | +| `src/channels/plugins/types.plugin.ts` | 85 | ChannelPlugin 接口 | +| `src/channels/plugins/types.adapters.ts` | 313 | Adapter 接口 | +| `src/channels/plugins/types.core.ts` | 332 | 基础类型 | +| `src/channels/plugins/catalog.ts` | ~300 | 插件发现与目录 | + +### 路由与会话 +| 文件 | 行数 | 职责 | +|------|------|------| +| `src/routing/resolve-route.ts` | 261 | Agent 路由解析 | +| `src/routing/bindings.ts` | 121 | 路由绑定 | +| `src/routing/session-key.ts` | 250 | Session Key 构建 | + +### 生命周期 +| 文件 | 行数 | 职责 | +|------|------|------| +| `src/gateway/server-channels.ts` | 309 | ChannelManager | +| `src/channels/registry.ts` | 180 | 内置 Channel 注册表 | +| `extensions/telegram/index.ts` | ~15 | 插件入口 | +| `extensions/telegram/src/channel.ts` | 482 | Telegram ChannelPlugin 实现 | + +### 配置 +| 文件 | 职责 | +|------|------| +| `src/config/types.channels.ts` | Channel 配置汇总 | +| `src/config/types.telegram.ts` | Telegram 专属配置 (~200 行) | diff --git a/docs/channels/README.md b/docs/channels/README.md new file mode 100644 index 00000000..426ebdb3 --- /dev/null +++ b/docs/channels/README.md @@ -0,0 +1,175 @@ +# Channel System + +The Channel system connects external messaging platforms (Telegram, Discord, etc.) to the Hub's agent. Each platform is a **plugin** that translates platform-specific APIs into a unified interface. + +> For media handling details (audio transcription, image/video description), see [media-handling.md](./media-handling.md). +> For message flow across all three I/O paths (Desktop / Web / Channel), see [message-paths.md](../message-paths.md). + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────┐ +│ credentials.json5 │ +│ { channels: { telegram: { default: { botToken } } } } │ +└──────────────────────┬──────────────────────────────────────┘ + │ loadChannelsConfig() + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ Channel Manager (manager.ts) │ +│ │ +│ startAll() → iterate plugins → startAccount() per account │ +│ subscribeToAgent() → listen for AI replies │ +│ │ +│ Incoming: routeIncoming() → routeMedia() → agent.write() │ +│ Outgoing: lastRoute → aggregator → plugin.outbound.*() │ +└──────────┬──────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ Plugin Registry (registry.ts) │ +│ registerChannel(plugin) / listChannels() / getChannel(id) │ +└──────────┬──────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ Channel Plugins (e.g. telegram.ts) │ +│ │ +│ config — resolve account credentials │ +│ gateway — receive messages (polling / webhook) │ +│ outbound — send replies back to platform │ +│ downloadMedia() — download media files to local disk │ +└─────────────────────────────────────────────────────────────┘ +``` + +## Plugin Interface + +Each channel plugin implements `ChannelPlugin` (defined in `types.ts`): + +```typescript +interface ChannelPlugin { + readonly id: string; // "telegram", "discord", etc. + readonly meta: { name: string; description: string }; + readonly chunkerConfig?: BlockChunkerConfig; // override text chunking per platform + readonly config: ChannelConfigAdapter; // credential resolution + readonly gateway: ChannelGatewayAdapter; // receive messages + readonly outbound: ChannelOutboundAdapter; // send replies + downloadMedia?(fileId: string, accountId: string): Promise; // optional +} +``` + +### Three Adapters + +| Adapter | Role | Key Methods | +|---------|------|-------------| +| **config** | Resolve credentials from `credentials.json5` | `listAccountIds()`, `resolveAccount()`, `isConfigured()` | +| **gateway** | Receive inbound messages from the platform | `start(accountId, config, onMessage, signal)` | +| **outbound** | Send replies back to the platform | `sendText()`, `replyText()`, `sendTyping?()` | + +### downloadMedia (optional) + +Platforms that support media (voice, image, video, document) implement `downloadMedia()` to download files to `~/.super-multica/cache/media/` with UUID filenames. The Manager calls this before processing media. + +## Message Flow + +### Inbound (Platform → Agent) + +``` +User sends message in Telegram + → grammy long-polling → onMessage callback + → ChannelManager.routeIncoming() + 1. Update lastRoute (reply target) + 2. Start typing indicator + 3. If media: routeMedia() → download → transcribe/describe → text + 4. agent.write(text) +``` + +All media is converted to text before the agent sees it. See [media-handling.md](./media-handling.md) for details. + +### Outbound (Agent → Platform) + +``` +Agent produces reply + → agent.subscribe() in ChannelManager + → Check: if (!lastRoute) return // not from a channel, skip + → message_start → create MessageAggregator + → message_update → feed text to aggregator + → message_end → aggregator flushes final block + → Aggregator emits BlockReply chunks + → Block 0: plugin.outbound.replyText() // Telegram reply format + → Block N: plugin.outbound.sendText() // follow-up messages +``` + +The **MessageAggregator** buffers streaming LLM output and splits it into blocks at natural text boundaries (paragraphs, code blocks). This is necessary because messaging platforms cannot consume raw streaming deltas. + +## lastRoute Pattern + +The `lastRoute` tracks which channel last sent a message: + +- **Channel message arrives** → `lastRoute` is set to that plugin + conversation +- **Desktop/Web message arrives** → `clearLastRoute()` is called +- **Agent replies** → if `lastRoute` is set, reply goes to that channel; otherwise skipped + +This ensures replies go back to the originating channel. Desktop and Web always receive agent events independently via their own mechanisms (IPC / Gateway). + +## Configuration + +Channel credentials are stored in `~/.super-multica/credentials.json5` under the `channels` key: + +```json5 +{ + channels: { + telegram: { + default: { + botToken: "123456:ABC-DEF..." + } + }, + // discord: { default: { botToken: "..." } }, + } +} +``` + +Each channel ID maps to accounts (keyed by account ID, typically `"default"`). The config adapter for each plugin knows how to extract and validate its credentials. + +## Adding a New Plugin + +1. Create `src/channels/plugins/.ts` implementing `ChannelPlugin` +2. Register it in `src/channels/index.ts`: + ```typescript + import { Channel } from "./plugins/.js"; + registerChannel(Channel); + ``` +3. Add the config shape to the `channels` section of `credentials.json5` + +### Implementation Checklist + +- [ ] `config` adapter: parse credentials from `credentials.json5` +- [ ] `gateway` adapter: connect to platform, normalize messages to `ChannelMessage` +- [ ] `outbound` adapter: `sendText`, `replyText`, optional `sendTyping` +- [ ] `downloadMedia` (if platform supports media): download to `MEDIA_CACHE_DIR` +- [ ] Group filtering: only respond to messages directed at the bot +- [ ] Graceful shutdown: respect the `AbortSignal` passed to `gateway.start()` + +## File Map + +| File | Role | +|------|------| +| `src/channels/types.ts` | All type definitions (`ChannelPlugin`, `ChannelMessage`, `DeliveryContext`, etc.) | +| `src/channels/manager.ts` | `ChannelManager` — bridges plugins to the Hub's agent | +| `src/channels/registry.ts` | Plugin registry (`registerChannel`, `listChannels`, `getChannel`) | +| `src/channels/config.ts` | Load channel config from `credentials.json5` | +| `src/channels/index.ts` | Bootstrap: register built-in plugins, re-export public API | +| `src/channels/plugins/telegram.ts` | Telegram plugin (grammy, long polling) | +| `src/channels/plugins/telegram-format.ts` | Markdown → Telegram HTML converter | +| `src/media/transcribe.ts` | Audio transcription (local whisper → OpenAI API) | +| `src/media/describe-image.ts` | Image description (OpenAI Vision API) | +| `src/media/describe-video.ts` | Video description (ffmpeg frame + Vision API) | +| `src/shared/paths.ts` | `MEDIA_CACHE_DIR` path constant | +| `src/hub/message-aggregator.ts` | Streaming text → block chunking for channel delivery | + +## Current Plugins + +| Plugin | Platform | Transport | Library | +|--------|----------|-----------|---------| +| `telegram` | Telegram | Long polling | grammy | + +Planned: Discord, Feishu, LINE, etc. diff --git a/docs/channels/media-handling.md b/docs/channels/media-handling.md new file mode 100644 index 00000000..bfed1ff7 --- /dev/null +++ b/docs/channels/media-handling.md @@ -0,0 +1,161 @@ +# Channel Media Handling + +How multimedia messages (voice, image, video, document) from messaging platforms are processed before reaching the Agent. + +## Core Principle + +All media is converted to text before the Agent sees it. The Agent only ever receives plain text via `agent.write()`. + +``` +Platform message (voice/image/video/doc) + → Plugin: detect type + download file + → Manager: convert to text (API transcription / vision description) + → Agent receives text via agent.write() +``` + +## Reference Architecture (OpenClaw) + +OpenClaw supports 6 platforms (Telegram, Discord, LINE, Signal, iMessage, Slack). All share the same media processing pipeline. + +### Per-Platform Layer (different for each platform) + +Each platform detects media type using its own API: + +| Platform | Detection Method | +|----------|-----------------| +| Telegram | `msg.voice`, `msg.audio`, `msg.photo`, `msg.video`, `msg.document` | +| Discord | `attachment.content_type` MIME prefix (`audio/`, `image/`, `video/`) | +| LINE | `message.type` field (`"audio"`, `"image"`, `"video"`, `"file"`) | +| Signal | `attachment.contentType` MIME prefix | +| iMessage | `attachment.mime_type` MIME prefix | +| Slack | Any file attachment (MIME-based detection happens later) | + +Each platform downloads the file using its own API, saves to local disk, and tags it: +- `` for voice/audio +- `` for images +- `` for video +- `` for files + +### Shared Layer (`applyMediaUnderstanding()`) + +One function handles all conversions, called automatically before the Agent sees the message: + +1. Reads local file path + MIME type +2. Selects conversion method based on type: + - **audio** → transcription (whisper local / OpenAI API / Groq / Deepgram / Google) + - **image** → vision model description (Gemini / OpenAI / Anthropic) + - **video** → vision model description +3. Replaces placeholder with formatted text: + - Audio: `[Audio]\nTranscript:\n` + - Image: `[Image]\nDescription:\n` +4. If conversion fails (no provider configured), the raw placeholder stays in the message + +### Transcription Provider Priority + +Auto-detection order: +1. sherpa-onnx-offline (local) +2. whisper-cli / whisper.cpp (local) +3. whisper Python CLI (local) +4. gemini CLI (local) +5. API providers: OpenAI → Groq → Deepgram → Google + +### Skill Integration + +Whisper skills declare requirements in `SKILL.md` metadata: +```yaml +requires: + bins: ["whisper"] # must exist in PATH +``` + +If the binary is missing, the skill is filtered out — the Agent never sees it. If present, the Agent can use it for transcription. + +--- + +## Our Implementation + +All media is converted to text in the Manager layer (`routeMedia()`) before reaching the Agent, matching OpenClaw's `applyMediaUnderstanding()` pattern. + +### Architecture + +``` +┌─────────────────────────────────────────────────────┐ +│ Platform Plugin (e.g. telegram.ts) │ +│ │ +│ bot.on("message:voice") → detect type │ +│ bot.api.getFile() → download to local disk │ +│ Emit ChannelMessage with media attachment │ +└──────────────────┬──────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────┐ +│ Channel Manager (manager.ts → routeMedia()) │ +│ │ +│ Download file via plugin.downloadMedia() │ +│ audio → transcribeAudio() → text │ +│ image → describeImage() → text │ +│ video → describeVideo() (ffmpeg frame + vision) → text │ +│ document → file path info │ +│ All results → agent.write(text) │ +└──────────────────┬──────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────┐ +│ Agent receives plain text only │ +│ e.g. "[Voice Message]\nTranscript: ..." │ +│ e.g. "[Image]\nDescription: ..." │ +│ e.g. "[Video]\nDescription: ..." │ +└─────────────────────────────────────────────────────┘ +``` + +### Media Processing Modules + +| Type | Module | Method | API | +|------|--------|--------|-----| +| audio | `src/media/transcribe.ts` | `transcribeAudio()` | Local whisper/whisper-cli → OpenAI Whisper API (`whisper-1`) | +| image | `src/media/describe-image.ts` | `describeImage()` | OpenAI Vision API (`gpt-4o-mini`) | +| video | `src/media/describe-video.ts` | `describeVideo()` | ffmpeg frame extraction + Vision API | +| document | (inline in manager) | — | File path info only | + +### Agent Output Format + +| Type | Success | No API Key | +|------|---------|------------| +| audio | `[Voice Message]\nTranscript: ` | `[audio message received]\nFile: ` | +| image | `[Image]\nDescription: ` | `[image message received]\nFile: ` | +| video | `[Video]\nDescription: ` | `[video message received]\nFile: ` | +| document | `[document message received]\nFile: ` | same | + +### Audio Transcription Priority + +`transcribeAudio()` tries providers in order, matching OpenClaw's local-first approach: + +1. **Local whisper/whisper-cli** — Free, no latency, works offline. Detected via `which` and cached. +2. **OpenAI Whisper API** (`whisper-1`) — Requires API key in `credentials.json5`. +3. **null** — No provider available. Placeholder stays in message, agent naturally responds (e.g. suggests installing whisper). + +### Whisper Skill (Agent Fallback) + +The `skills/whisper/SKILL.md` skill is a secondary safety net. If transcription returned null (no local binary, no API key), the agent receives a placeholder with the file path. If whisper is installed, the skill tells the agent how to transcribe it via the exec tool. + +### File Map + +| File | Role | +|------|------| +| `src/channels/types.ts` | `ChannelMediaAttachment`, `ChannelMessage.media`, `ChannelPlugin.downloadMedia` | +| `src/channels/plugins/telegram.ts` | Detect voice/audio/photo/video/document + download via Grammy API | +| `src/channels/manager.ts` | `routeMedia()` — download, convert, `agent.write(text)` | +| `src/media/transcribe.ts` | Audio → text (local whisper → OpenAI Whisper API) | +| `src/media/describe-image.ts` | Image → text via OpenAI Vision API (gpt-4o-mini) | +| `src/media/describe-video.ts` | Video → extract frame (ffmpeg) → text via Vision API | +| `src/shared/paths.ts` | `MEDIA_CACHE_DIR` (`~/.super-multica/cache/media/`) | +| `skills/whisper/SKILL.md` | Local whisper CLI fallback skill | + +### Future Work + +| Task | Scope | +|------|-------| +| Groq / Deepgram fallback for audio | `src/media/transcribe.ts` | +| Multi-provider vision support (Gemini, Anthropic) | `src/media/describe-image.ts` | +| Document text extraction (PDF, DOCX) | `src/media/` | +| Media cache cleanup (delete old files) | `src/shared/` | +| Outbound media (send images/audio back to channels) | `types.ts`, plugins | diff --git a/docs/message-paths.md b/docs/message-paths.md new file mode 100644 index 00000000..2b8d24c7 --- /dev/null +++ b/docs/message-paths.md @@ -0,0 +1,232 @@ +# Message Paths — Desktop / Web / Channel + +Three independent paths deliver messages to and from the Hub's agent. +All three share the same `AsyncAgent` instance — they are just different I/O surfaces. + +--- + +## Overview + +``` +Desktop (Electron IPC) Web (WebSocket via Gateway) Channel (Bot API, e.g. Telegram) + │ │ │ + ▼ ▼ ▼ + localChat:send IPC client.send → Gateway WS plugin.gateway (polling/webhook) + │ │ │ + ▼ ▼ ▼ + hub.ts / ipc/hub.ts hub.ts / onMessage manager.ts / routeIncoming + clearLastRoute() clearLastRoute() set lastRoute + │ │ │ + └────────────────► agent.write(text) ◄──────────────────────────────┘ + │ + ▼ + AsyncAgent.run() + │ + ┌────────────┴────────────────┐ + ▼ ▼ + agent.subscribe() agent.read() + (multi-consumer) (single-consumer iterable) + │ │ + ┌────────┴────────┐ ▼ + ▼ ▼ hub.ts / consumeAgent() + Desktop IPC Channel Manager │ + (ipc/hub.ts) (manager.ts) ▼ + │ │ Gateway WS → Web client + ▼ ▼ + localChat:event Bot API reply + → renderer (via lastRoute) +``` + +--- + +## Path 1: Desktop (Electron IPC) + +### Send (User → Agent) + +``` +Renderer: sendMessage(text) + → IPC: localChat:send + → ipc/hub.ts handler + → hub.channelManager.clearLastRoute() // reply stays in desktop + → agent.write(text) +``` + +**File**: `apps/desktop/electron/ipc/hub.ts` — `localChat:send` handler (line ~373) + +### Receive (Agent → User) + +``` +Agent runs LLM + → pi-agent-core fires AgentEvent + → Agent.subscribeAll() → AsyncAgent channel + subscribers + → agent.subscribe() callback in ipc/hub.ts + → Filter: assistant messages + tool_execution + passthrough (compaction, agent_error) + → IPC: mainWindow.webContents.send('localChat:event', { agentId, streamId, event }) + → Renderer: use-local-chat.ts onEvent callback + → chat.handleStream(payload) +``` + +**Files**: +- `apps/desktop/electron/ipc/hub.ts` — `localChat:subscribe` handler (line ~248) +- `apps/desktop/src/hooks/use-local-chat.ts` — `onEvent` listener (line ~54) +- `packages/hooks/src/use-chat.ts` — `handleStream()` (line ~133) + +### Error Handling + +``` +Agent.run() throws / returns error + → AsyncAgent.write() catch block + → channel.send(legacy Message) // for read() consumers (Web) + → agent.emitMulticaEvent({ type: "agent_error", error }) // for subscribe() consumers + → ipc/hub.ts subscriber → passthrough event → localChat:event + → use-local-chat.ts → chat.setError() + setIsLoading(false) +``` + +--- + +## Path 2: Web (WebSocket via Gateway) + +### Send (User → Agent) + +``` +Web app: sendMessage(text) + → GatewayClient.send(hubId, "message", { agentId, content }) + → Socket.io → Gateway server → routes to Hub device + → hub.ts / onMessage handler + → channelManager.clearLastRoute() // reply stays in gateway + → agentSenders.set(agentId, deviceId) + → agent.write(content) +``` + +**File**: `src/hub/hub.ts` — `onMessage` handler (line ~154) + +### Receive (Agent → User) + +``` +Agent runs LLM + → pi-agent-core fires AgentEvent + → Agent.subscribeAll() → AsyncAgent channel + subscribers + → agent.read() consumed by hub.ts / consumeAgent() + → Filter: assistant messages + tool_execution + passthrough (compaction, agent_error) + → client.send(targetDeviceId, StreamAction, { streamId, agentId, event }) + → Socket.io → Gateway → routes to Web client device + → GatewayClient.onMessage callback + → use-gateway-chat.ts → chat.handleStream(payload) +``` + +**Files**: +- `src/hub/hub.ts` — `consumeAgent()` (line ~314) +- `packages/hooks/src/use-gateway-chat.ts` — `onMessage` listener (line ~50) +- `packages/hooks/src/use-chat.ts` — `handleStream()` (line ~133) + +### Error Handling + +``` +Agent.run() throws / returns error + → AsyncAgent.write() catch block + → channel.send(legacy Message) // consumed by consumeAgent() → sent as "message" action + → agent.emitMulticaEvent({ type: "agent_error", error }) + → read() → consumeAgent() → passthrough event → StreamAction + → GatewayClient → use-gateway-chat.ts → chat.setError() + setIsLoading(false) +``` + +**Note**: Legacy error Messages also reach the Web client as `"message"` action (a plain text fallback). The `agent_error` event provides structured error info for proper UI rendering. + +--- + +## Path 3: Channel (Bot API, e.g. Telegram) + +### Send (User → Agent) + +``` +User sends message in Telegram + → grammy long-polling receives Update + → plugin.gateway.start() callback: onMessage(channelMessage) + → ChannelManager.routeIncoming() + → Set lastRoute = { plugin, deliveryCtx } // reply goes back to Telegram + → agent.write(text) // same as desktop/web +``` + +**File**: `src/channels/manager.ts` — `routeIncoming()` (line ~233) + +### Receive (Agent → User) + +``` +Agent runs LLM + → pi-agent-core fires AgentEvent + → Agent.subscribeAll() → AsyncAgent channel + subscribers + → agent.subscribe() callback in ChannelManager.subscribeToAgent() + → Check: if (!lastRoute) return // no active channel route, skip + → Filter: only assistant messages + → message_start → createAggregator() // MessageAggregator buffers/chunks text + → message_update → aggregator.handleEvent() + → message_end → aggregator.handleEvent() → null aggregator + → Aggregator emits text blocks + → Block 0: plugin.outbound.replyText(deliveryCtx, text) // Telegram reply + → Block N: plugin.outbound.sendText(deliveryCtx, text) // follow-up messages +``` + +**Files**: +- `src/channels/manager.ts` — `subscribeToAgent()` (line ~151), `createAggregator()` (line ~205) +- `src/hub/message-aggregator.ts` — text chunking/buffering logic + +### Error Handling + +``` +Agent.run() throws / returns error + → AsyncAgent.write() catch block + → agent.emitMulticaEvent({ type: "agent_error", error }) + → subscribe() → ChannelManager subscriber + → if lastRoute exists: + → plugin.outbound.sendText(deliveryCtx, "[Error] ${errorMsg}") +``` + +--- + +## Comparison Table + +| Aspect | Desktop (IPC) | Web (WebSocket) | Channel (Bot API) | +|---------------------|------------------------|---------------------------|--------------------------| +| **Transport** | Electron IPC | Socket.io via Gateway | Bot API (HTTP) | +| **Send entry** | `localChat:send` | `client.send` → Gateway | `routeIncoming` | +| **Receive method** | `agent.subscribe()` | `agent.read()` (iterable) | `agent.subscribe()` | +| **Consumer** | ipc/hub.ts subscriber | hub.ts `consumeAgent()` | manager.ts subscriber | +| **Frontend hook** | `use-local-chat.ts` | `use-gateway-chat.ts` | N/A (Bot API) | +| **State hook** | `use-chat.ts` | `use-chat.ts` | N/A | +| **Reply routing** | Always (IPC channel) | `agentSenders` Map | `lastRoute` pattern | +| **clearLastRoute** | Yes (on send) | Yes (on send) | No (sets lastRoute) | +| **Error display** | `agent_error` → UI | `agent_error` → UI | `agent_error` → Bot text | +| **Tool results** | Rendered in UI | Rendered in UI | Skipped (text only) | +| **Text chunking** | No (full stream) | No (full stream) | Yes (MessageAggregator) | + +--- + +## lastRoute Pattern + +The `lastRoute` tracks which channel last sent a message. When the agent replies: +- If `lastRoute` is set → reply goes to that channel (e.g. Telegram) +- If `lastRoute` is null → reply goes to Desktop/Web only (via their own mechanisms) + +**Clearing**: Desktop and Web both call `channelManager.clearLastRoute()` before `agent.write()`, so channel replies stop when the user switches to desktop/web. + +**Setting**: `routeIncoming()` sets `lastRoute` when a channel message arrives. + +Desktop and Web always receive agent events regardless of `lastRoute` — they use their own independent delivery mechanisms (IPC subscribe / Gateway read). + +--- + +## Event Filtering + +All three paths filter raw agent events. Only these are forwarded to consumers: + +| Event Type | Desktop | Web | Channel | +|-------------------------|---------|-----|---------| +| `message_start` | assistant only | assistant only | assistant only | +| `message_update` | assistant only | assistant only | assistant only | +| `message_end` | assistant only | assistant only | assistant only | +| `tool_execution_start` | Yes | Yes | No | +| `tool_execution_end` | Yes | Yes | No | +| `compaction_start` | Yes (passthrough) | Yes (passthrough) | No | +| `compaction_end` | Yes (passthrough) | Yes (passthrough) | No | +| `agent_error` | Yes (passthrough) | Yes (passthrough) | Yes (→ text) | +| User message events | Filtered out | Filtered out | Filtered out | diff --git a/docs/mobile/app-store-submission-guide.md b/docs/mobile/app-store-submission-guide.md deleted file mode 100644 index c5da7cdc..00000000 --- a/docs/mobile/app-store-submission-guide.md +++ /dev/null @@ -1,234 +0,0 @@ -# App Store Submission Guide - -Complete guide for publishing the Expo React Native app to Apple App Store and Google Play Store. - -## 1. Prerequisites - -### Accounts & Fees - -| Platform | Cost | Notes | -|----------|------|-------| -| Apple Developer Program | $99/year | Required for App Store distribution | -| Google Play Console | $25 one-time | Developer registration | -| Expo Account | Free (paid plans available) | Required for EAS Build/Submit | - -- Apple Developer account review: 1-2 days -- Google Play developer account review: days to weeks - -### Tools - -```bash -npm install -g eas-cli -eas login -eas whoami # verify login -``` - -## 2. Project Configuration - -### Initialize EAS - -```bash -eas build:configure -``` - -Generates `eas.json` with three build profiles: `development`, `preview`, `production`. - -### Key `app.json` / `app.config.ts` Fields - -```jsonc -{ - "name": "Multica", - "slug": "multica", - "version": "1.0.0", - "ios": { - "bundleIdentifier": "com.multica.app", - "buildNumber": "1" // increment on each submission - }, - "android": { - "package": "com.multica.app", - "versionCode": 1 // increment on each submission - }, - "icon": "./assets/icon.png", // 1024x1024 PNG - "splash": { - "image": "./assets/splash.png" - } -} -``` - -## 3. App Signing & Credentials - -### iOS - -- EAS auto-manages credentials (recommended): Distribution Certificate + Provisioning Profile -- Or create manually in Apple Developer Portal - -### Android - -- EAS auto-generates Keystore (recommended), stored securely on EAS servers -- **Back up Keystore** — losing it means you cannot update the published app -- Play Store requires AAB (Android App Bundle) format - -## 4. Production Build - -```bash -# iOS -eas build --platform ios --profile production - -# Android -eas build --platform android --profile production - -# Both -eas build --platform all --profile production -``` - -Builds run in Expo cloud — no local Xcode or Android Studio needed. - -## 5. Store Listing Preparation - -### Required for Both Platforms - -#### Privacy Policy - -- **Mandatory** — must be a publicly accessible URL -- Must clearly state: - - What data the app collects and how - - Whether data is shared with third parties - - Data retention and deletion policies - - How users can request data deletion -- **2025 rule**: If data is sent to third-party AI, must disclose explicitly and obtain user consent -- Tools: Termly, PrivacyPolicies.com, or custom page - -#### App Screenshots - -- **iOS**: Multiple sizes required (6.7", 6.5", 5.5" iPhone + iPad) -- **Android**: 2-8 screenshots -- Must accurately reflect current app interface - -#### App Icon - -- 1024x1024 high-resolution PNG -- No alpha/transparency for iOS - -#### App Description - -- Short description (≤80 chars for Google Play) -- Full description - -#### Support URL - -- A link where users can get help - -#### Account Deletion - -- If the app supports registration, users **must** be able to delete their account and data in-app -- Both Apple and Google require this - -### Apple App Store Connect — Additional Requirements - -| Item | Details | -|------|---------| -| Privacy Nutrition Labels | Fill out data collection practices per category in App Store Connect | -| App Review Information | Reviewer contact info, demo/test account credentials | -| Content Rating | Age classification | -| Export Compliance | Encryption usage declaration | -| Info.plist Permission Strings | Clear purpose description for each permission (camera, location, etc.) | - -### Google Play Console — Additional Requirements - -| Item | Details | -|------|---------| -| Data Safety Form | Detail data collection and sharing (required even if no data is collected) | -| Content Rating Questionnaire | IARC rating questionnaire | -| Target Audience | Declare if the app targets children | -| First Upload | Must be done manually via Play Console (Google Play API limitation) | - -## 6. Submit to Stores - -### Apple App Store - -```bash -eas submit --platform ios -``` - -This uploads the build to **App Store Connect / TestFlight**. Then you must: - -1. Log into App Store Connect -2. Select the uploaded build -3. Associate it with a version -4. Fill in all metadata, screenshots, privacy labels -5. Submit for App Review - -### Google Play Store - -```bash -eas submit --platform android -``` - -**First time**: Must upload AAB manually in Play Console. - -After initial upload: -1. Navigate to Production → Create new release -2. Upload AAB or use the EAS-submitted build -3. Fill in description, screenshots, data safety form -4. Submit for review - -### Auto-Submit (Optional) - -```bash -eas build --platform all --profile production --auto-submit -``` - -## 7. App Review - -| | Apple | Google | -|---|---|---| -| Review time | Typically 24-48 hours | Hours to 7 days | -| Common rejections | Incomplete features, misleading screenshots, missing privacy policy, unclear permission strings | Data safety form mismatch, policy violations | -| After rejection | Fix issues, resubmit | Fix issues, resubmit | - -## 8. Post-Launch - -### OTA Updates (No Re-Review Needed) - -```bash -eas update --branch production -``` - -- Only for JS/asset-level changes -- Native code changes still require a new build + review - -### CI/CD Automation - -Create `.eas/workflows/build-and-submit.yml` to auto-build and submit on push to main. - -### Google Service Account Key (for Automated Android Submissions) - -1. Go to EAS dashboard → Credentials → Android -2. Click Application identifier → Service Credentials -3. Add Google Service Account Key - -## 9. Checklist - -- [ ] Register Apple Developer + Google Play Console accounts -- [ ] Configure `app.json` and `eas.json` -- [ ] Prepare app icon, splash screen, screenshots -- [ ] Write and host privacy policy URL -- [ ] Implement in-app account deletion (if registration exists) -- [ ] Add Info.plist permission descriptions (iOS) -- [ ] Run `eas build --platform all --profile production` -- [ ] Create app in App Store Connect, fill metadata + privacy labels -- [ ] Create app in Google Play Console, fill data safety form, manual first AAB upload -- [ ] `eas submit` or submit manually for review -- [ ] Wait for review approval → live -- [ ] Set up `eas update` for OTA updates - -## References - -- [Expo: Submit to App Stores](https://docs.expo.dev/deploy/submit-to-app-stores/) -- [Expo: EAS Submit](https://docs.expo.dev/submit/introduction/) -- [Expo: Build Your Project](https://docs.expo.dev/deploy/build-project/) -- [Expo: App Stores Best Practices](https://docs.expo.dev/distribution/app-stores/) -- [Apple App Review Guidelines](https://developer.apple.com/app-store/review/guidelines/) -- [Apple App Privacy Details](https://developer.apple.com/app-store/app-privacy-details/) -- [Google Play Data Safety](https://support.google.com/googleplay/android-developer/answer/10787469) -- [Google Play Developer Policy Center](https://play.google/developer-content-policy/) diff --git a/docs/mobile/guide.md b/docs/mobile/guide.md new file mode 100644 index 00000000..5940c108 --- /dev/null +++ b/docs/mobile/guide.md @@ -0,0 +1,497 @@ +# Mobile Development Guide + +Complete lifecycle guide for developing, testing, and publishing the Expo React Native app — from first line of code to App Store / Google Play. + +## Overview + +``` +Phase 1: Environment Setup You are here if starting fresh + ↓ +Phase 2: Development & Testing Daily work loop + ↓ +Phase 3: Pre-Release Preparation Before your first submission + ↓ +Phase 4: Build & Submit Ship to stores + ↓ +Phase 5: Post-Launch Maintain and update +``` + +--- + +## Phase 1: Environment Setup + +### 1.1 Required Software + +| Tool | Purpose | Install | +|------|---------|---------| +| **Node.js** (LTS) | JS runtime | `brew install node` or [nodejs.org](https://nodejs.org) | +| **pnpm** | Package manager | `corepack enable && corepack prepare pnpm@latest --activate` | +| **Xcode** | iOS build toolchain | Mac App Store (free) | +| **Xcode Command Line Tools** | Compilers, simulators | `xcode-select --install` | +| **CocoaPods** | iOS dependency manager | `sudo gem install cocoapods` | +| **Android Studio** | Android emulator + SDK (optional, iOS-first) | [developer.android.com](https://developer.android.com/studio) | +| **EAS CLI** | Expo build & submit | `npm install -g eas-cli` | +| **Expo CLI** | Dev server | Bundled with `npx expo` | + +### 1.2 Xcode First-Time Setup + +1. Open Xcode at least once to accept the license and install components +2. **Add your Apple ID** (free account is enough for development): + - Xcode → Settings → Accounts → `+` → Apple ID + - This creates a "Personal Team" for free code signing +3. Verify simulators are installed: + - Xcode → Settings → Components → download an iOS Simulator runtime + +### 1.3 iPhone First-Time Setup (for Real Device Testing) + +1. **Enable Developer Mode** (required on iOS 16+): + - Settings → Privacy & Security → Developer Mode → ON + - Device will restart +2. Connect iPhone to Mac via USB/USB-C cable +3. When prompted "Trust This Computer?" → tap Trust + +### 1.4 Project Setup + +```bash +# Install dependencies +pnpm install + +# Generate native project files (creates ios/ and android/ directories) +npx expo prebuild + +# Initialize EAS configuration (creates eas.json) +eas build:configure +``` + +### 1.5 Expo Account + +```bash +# Create account at expo.dev, then: +eas login +eas whoami # verify +``` + +**No paid accounts needed at this stage.** Free Apple ID + free Expo account is enough for development. + +--- + +## Phase 2: Development & Testing + +### 2.1 Running on iOS Simulator + +```bash +# Start the app in iOS simulator (no real device needed) +npx expo run:ios +``` + +- Fastest iteration loop — code changes hot-reload instantly +- Good for: UI layout, navigation, business logic, API calls +- **Cannot test**: camera, barcode scanner, real push notifications, biometrics + +### 2.2 Running on Real iPhone + +```bash +# Connect iPhone via USB, then: +npx expo run:ios --device +``` + +Expo CLI will: +1. Detect your connected device +2. Sign the app with your Personal Team (free Apple ID) +3. Build, install, and launch the app + +**First time only**: After installation, go to: +- Settings → General → VPN & Device Management → Trust your developer certificate + +#### Free Signing Limitations + +| Limitation | Detail | +|-----------|--------| +| 7-day expiry | App stops launching after 7 days — just re-run `npx expo run:ios --device` | +| 3 devices max | Can register up to 3 test devices per Apple ID | +| Some entitlements unavailable | Push notifications, Apple Pay, iCloud require paid account | +| Cannot distribute to others | Only works on your own registered devices | + +**Camera, barcode scanner, GPS, sensors all work fine with free signing.** + +### 2.3 Daily Development Workflow + +``` +First time (or after native config changes): + npx expo prebuild Generate/update native projects + npx expo run:ios --device Build and install on device + +Every day after that: + npx expo start --dev-client Start dev server only (no rebuild) + → Open the app on device It connects automatically + → Edit code, save Hot-reload updates instantly +``` + +**When do you need to rebuild?** + +| Change | Rebuild needed? | +|--------|----------------| +| JS/TS code, React components | No — hot-reload | +| Styles, images, assets | No — hot-reload | +| Added new Expo SDK module | **Yes** — `npx expo prebuild && npx expo run:ios --device` | +| Changed `app.json` permissions | **Yes** — rebuild | +| Updated native dependency | **Yes** — rebuild | +| Upgraded Expo SDK version | **Yes** — rebuild | + +### 2.4 Testing Native Features (Camera, Scanner) + +| Feature | Simulator | Real Device | +|---------|-----------|-------------| +| Camera preview | Not available | Works | +| Barcode / QR scan | Not available | Works | +| GPS location | Simulated location via Xcode menu | Real GPS | +| Push notifications | Not available | Requires paid Apple Developer account | +| Haptic feedback | Not available | Works | +| Device sensors (accelerometer, gyroscope) | Not available | Works | + +For camera/scanner features, **always test on a real device**. + +### 2.5 Debugging Tools + +#### Developer Menu + +Press `m` in the terminal (or shake the device) to open: +- Toggle Performance Monitor +- Toggle Element Inspector +- Open React Native DevTools + +#### React Native DevTools + +The primary debugging tool (replaced Chrome DevTools since RN 0.76): + +| Tab | Use | +|-----|-----| +| Console | View logs, execute JS in app context | +| Sources | Set breakpoints, step through code | +| Network | Inspect API requests (Expo only) | +| Components | Inspect React component tree and props | +| Profiler | Measure render performance | + +#### VS Code Integration + +Install the **Expo Tools** extension for: +- Breakpoint debugging directly in VS Code +- `app.json` / `app.config.ts` IntelliSense + +#### Native Crash Debugging + +For crashes in native modules (not JS): +- **iOS**: Open Xcode → Window → Devices and Simulators → View Device Logs +- **Android**: `adb logcat` in terminal + +--- + +## Phase 3: Pre-Release Preparation + +**This is when you need to start spending money.** + +### 3.1 Accounts & Fees + +| Platform | Cost | Registration Time | Required For | +|----------|------|-------------------|--------------| +| **Apple Developer Program** | $99/year | 1-2 days review | App Store distribution | +| **Google Play Console** | $25 one-time | Days to weeks review | Play Store distribution | +| **Expo Account** | Free tier sufficient | Instant | EAS Build & Submit | + +Register early — account review takes time, especially Google. + +### 3.2 App Configuration + +Update `app.json` or `app.config.ts`: + +```jsonc +{ + "name": "Multica", + "slug": "multica", + "version": "1.0.0", + "ios": { + "bundleIdentifier": "com.multica.app", + "buildNumber": "1", // increment each submission + "infoPlist": { + "NSCameraUsageDescription": "Used to scan QR codes and take photos", + "NSPhotoLibraryUsageDescription": "Used to save scanned images" + } + }, + "android": { + "package": "com.multica.app", + "versionCode": 1, // increment each submission + "permissions": ["CAMERA"] + }, + "icon": "./assets/icon.png", // 1024x1024 PNG, no transparency + "splash": { + "image": "./assets/splash.png" + } +} +``` + +### 3.3 EAS Build Profiles + +`eas.json`: + +```json +{ + "cli": { "version": ">= 10.0.0" }, + "build": { + "development": { + "developmentClient": true, + "distribution": "internal" + }, + "preview": { + "distribution": "internal" + }, + "production": {} + }, + "submit": { + "production": {} + } +} +``` + +### 3.4 App Signing & Credentials + +#### iOS + +EAS auto-manages credentials (recommended): +- Distribution Certificate +- Provisioning Profile +- Or create manually in [Apple Developer Portal](https://developer.apple.com) + +#### Android + +- EAS auto-generates Keystore, stored securely on EAS servers +- **Back up your Keystore** — losing it means you can never update the published app +- Play Store requires AAB (Android App Bundle) format + +### 3.5 Required Assets + +| Asset | Spec | +|-------|------| +| **App Icon** | 1024x1024 PNG, no alpha/transparency (iOS) | +| **Splash Screen** | Platform-appropriate sizes | +| **iOS Screenshots** | 6.7", 6.5", 5.5" iPhone sizes + iPad (if universal) | +| **Android Screenshots** | 2-8 screenshots | + +### 3.6 Required Metadata + +#### Both Platforms + +| Item | Notes | +|------|-------| +| **Privacy Policy URL** | Publicly accessible. Must disclose data collection, third-party sharing, AI usage, deletion rights | +| **App Description** | Short (≤80 chars for Google) + full description | +| **Support URL** | Where users can get help | +| **Account Deletion** | If app has registration, must support in-app account + data deletion | + +#### Apple App Store Connect + +| Item | Details | +|------|---------| +| Privacy Nutrition Labels | Data collection practices per category | +| App Review Information | Reviewer contact info, demo/test account | +| Content Rating | Age classification | +| Export Compliance | Encryption usage declaration | +| Info.plist Permission Strings | Clear purpose description for each permission | + +#### Google Play Console + +| Item | Details | +|------|---------| +| Data Safety Form | Required even if no data is collected | +| Content Rating Questionnaire | IARC rating | +| Target Audience | Must declare if targeting children | +| First Upload | Must upload AAB manually (Google API limitation) | + +--- + +## Phase 4: Build & Submit + +### 4.1 Production Build + +```bash +# iOS +eas build --platform ios --profile production + +# Android +eas build --platform android --profile production + +# Both platforms +eas build --platform all --profile production +``` + +Builds run in Expo cloud — no local Xcode or Android Studio needed for production builds. + +### 4.2 Submit to Apple App Store + +```bash +eas submit --platform ios +``` + +This uploads the build to **App Store Connect / TestFlight**. Then: + +1. Log into [App Store Connect](https://appstoreconnect.apple.com) +2. Select the uploaded build +3. Associate it with a version +4. Fill in all metadata, screenshots, privacy nutrition labels +5. Submit for App Review + +### 4.3 Submit to Google Play Store + +```bash +eas submit --platform android +``` + +**First time**: Must upload AAB manually in [Play Console](https://play.google.com/console). + +After initial upload: +1. Navigate to Production → Create new release +2. Upload AAB or use the EAS-submitted build +3. Fill in description, screenshots, data safety form +4. Submit for review + +### 4.4 Auto-Submit (Optional) + +Build and submit in one step: + +```bash +eas build --platform all --profile production --auto-submit +``` + +### 4.5 App Review + +| | Apple | Google | +|---|---|---| +| Review time | Typically 24-48 hours | Hours to 7 days | +| Common rejections | Incomplete features, misleading screenshots, missing privacy policy, unclear permission strings | Data safety form mismatch, policy violations | +| After rejection | Fix issues, resubmit | Fix issues, resubmit | + +--- + +## Phase 5: Post-Launch + +### 5.1 OTA Updates (No Re-Review) + +For JS/asset-only changes, push updates without going through App Review: + +```bash +eas update --branch production +``` + +- Instant delivery to users — no store review +- Only works for JavaScript and asset changes +- **Native code changes still require a new build + review** + +### 5.2 Version Bumping + +For each new store submission: +- iOS: increment `buildNumber` in `app.json` +- Android: increment `versionCode` in `app.json` +- Bump `version` for user-visible version changes + +### 5.3 CI/CD Automation + +Create `.eas/workflows/build-and-submit.yml` to auto-build and submit on push to main. + +#### Google Service Account Key (Automated Android Submissions) + +1. EAS dashboard → Credentials → Android +2. Click Application identifier → Service Credentials +3. Add Google Service Account Key + +--- + +## Quick Reference + +### Common Commands + +```bash +# Development +npx expo prebuild # Generate native projects +npx expo run:ios # Run on iOS simulator +npx expo run:ios --device # Run on connected iPhone +npx expo start --dev-client # Start dev server (after initial install) + +# Building +eas build --platform ios --profile development # Dev build (for device testing) +eas build --platform ios --profile production # Production build +eas build --platform all --profile production # Both platforms + +# Submitting +eas submit --platform ios # Submit to App Store +eas submit --platform android # Submit to Play Store + +# OTA Updates +eas update --branch production # Push JS update to users +``` + +### Cost Summary + +| Phase | Cost | +|-------|------| +| Development + local testing | **Free** (free Apple ID + Xcode) | +| EAS cloud builds | Free tier: 30 iOS + 30 Android builds/month | +| App Store submission | **$99/year** (Apple Developer Program) | +| Play Store submission | **$25 one-time** (Google Play Console) | + +--- + +## Master Checklist + +### Development Phase +- [ ] Install Node.js, pnpm, Xcode, EAS CLI +- [ ] Add Apple ID to Xcode (Settings → Accounts) +- [ ] Enable Developer Mode on iPhone +- [ ] Run `npx expo prebuild` +- [ ] Test on simulator: `npx expo run:ios` +- [ ] Test on real device: `npx expo run:ios --device` +- [ ] Trust developer certificate on device +- [ ] Verify camera/scanner functionality on real device + +### Pre-Release Phase +- [ ] Register Apple Developer Program ($99/year) +- [ ] Register Google Play Console ($25) +- [ ] Configure `app.json` (bundleIdentifier, permissions, icon, splash) +- [ ] Configure `eas.json` build profiles +- [ ] Prepare app icon (1024x1024 PNG) +- [ ] Prepare splash screen +- [ ] Take App Store screenshots (all required sizes) +- [ ] Write and host privacy policy URL +- [ ] Write app description (short + full) +- [ ] Set up support URL +- [ ] Implement in-app account deletion (if registration exists) + +### Submission Phase +- [ ] Run `eas build --platform all --profile production` +- [ ] iOS: `eas submit --platform ios` +- [ ] iOS: Fill metadata + privacy labels in App Store Connect +- [ ] iOS: Submit for App Review +- [ ] Android: Upload first AAB manually in Play Console +- [ ] Android: `eas submit --platform android` +- [ ] Android: Fill data safety form + metadata in Play Console +- [ ] Android: Submit for review +- [ ] Wait for review approval → app goes live + +### Post-Launch Phase +- [ ] Set up `eas update` for OTA updates +- [ ] Set up CI/CD workflow (optional) +- [ ] Configure Google Service Account Key for automated Android submissions (optional) + +--- + +## References + +- [Expo: Getting Started](https://docs.expo.dev/get-started/introduction/) +- [Expo: Development Builds](https://docs.expo.dev/develop/development-builds/introduction/) +- [Expo: Local App Development](https://docs.expo.dev/guides/local-app-development/) +- [Expo: Debugging Tools](https://docs.expo.dev/debugging/tools/) +- [Expo: Submit to App Stores](https://docs.expo.dev/deploy/submit-to-app-stores/) +- [Expo: EAS Submit](https://docs.expo.dev/submit/introduction/) +- [Expo: EAS Update](https://docs.expo.dev/eas-update/introduction/) +- [Apple App Review Guidelines](https://developer.apple.com/app-store/review/guidelines/) +- [Apple App Privacy Details](https://developer.apple.com/app-store/app-privacy-details/) +- [Google Play Data Safety](https://support.google.com/googleplay/android-developer/answer/10787469) +- [Google Play Developer Policy Center](https://play.google/developer-content-policy/) diff --git a/package.json b/package.json index ec334f55..72b0a54f 100644 --- a/package.json +++ b/package.json @@ -62,6 +62,7 @@ "@sinclair/typebox": "^0.34.41", "croner": "^10.0.1", "fast-glob": "^3.3.3", + "grammy": "^1.39.3", "json5": "^2.2.3", "linkedom": "^0.18.12", "nestjs-pino": "^4.5.0", diff --git a/packages/hooks/src/use-gateway-chat.ts b/packages/hooks/src/use-gateway-chat.ts index 4613bbfb..0986c7bb 100644 --- a/packages/hooks/src/use-gateway-chat.ts +++ b/packages/hooks/src/use-gateway-chat.ts @@ -50,6 +50,12 @@ export function useGatewayChat({ client, hubId, agentId }: UseGatewayChatOptions client.onMessage((msg) => { if (msg.action === StreamAction) { const payload = msg.payload as StreamPayload; + if (payload.event.type === "agent_error") { + const errorMsg = (payload.event as { message?: string }).message ?? "Unknown error"; + chat.setError({ code: "AGENT_ERROR", message: errorMsg }); + setIsLoading(false); + return; + } chat.handleStream(payload); if (payload.event.type === "message_start") setIsLoading(true); if (payload.event.type === "message_end") setIsLoading(false); diff --git a/packages/ui/src/components/chat-input.tsx b/packages/ui/src/components/chat-input.tsx index 28e01c72..fafbdfb2 100644 --- a/packages/ui/src/components/chat-input.tsx +++ b/packages/ui/src/components/chat-input.tsx @@ -4,7 +4,7 @@ import { useEditor, EditorContent } from "@tiptap/react"; import StarterKit from "@tiptap/starter-kit"; import Placeholder from "@tiptap/extension-placeholder"; import { Button } from "@multica/ui/components/ui/button"; -import { ArrowUpIcon } from "@hugeicons/core-free-icons"; +import { ArrowUp02Icon } from "@hugeicons/core-free-icons"; import { HugeiconsIcon } from "@hugeicons/react"; import { cn } from "@multica/ui/lib/utils"; import "./chat-input.css"; @@ -111,13 +111,13 @@ export const ChatInput = forwardRef( return (
- +
-
diff --git a/packages/ui/src/components/chat-view.tsx b/packages/ui/src/components/chat-view.tsx index b47edf0b..dcdded48 100644 --- a/packages/ui/src/components/chat-view.tsx +++ b/packages/ui/src/components/chat-view.tsx @@ -5,6 +5,7 @@ import { Button } from "@multica/ui/components/ui/button"; import { Skeleton } from "@multica/ui/components/ui/skeleton"; import { ChatInput } from "@multica/ui/components/chat-input"; import { MessageList } from "@multica/ui/components/message-list"; +import { MemoizedMarkdown } from "@multica/ui/components/markdown"; import { MulticaIcon } from "@multica/ui/components/multica-icon"; import { ExecApprovalItem } from "@multica/ui/components/exec-approval-item"; import { useScrollFade } from "@multica/ui/hooks/use-scroll-fade"; @@ -221,7 +222,11 @@ export function ChatView({ {error && (
- {error.message} + + + {error.message} + +
{errorAction && (