# Conflicts:
#	apps/desktop/electron/electron-env.d.ts
This commit is contained in:
Jiang Bohan 2026-02-09 14:34:48 +08:00
commit d6993000ca
41 changed files with 4667 additions and 272 deletions

View file

@ -131,6 +131,13 @@ interface CurrentProviderInfo {
available: boolean
}
interface ChannelAccountStateInfo {
channelId: string
accountId: string
status: 'stopped' | 'starting' | 'running' | 'error'
error?: string
}
interface ElectronAPI {
hub: {
init: () => Promise<unknown>
@ -188,6 +195,14 @@ interface ElectronAPI {
saveApiKey: (providerId: string, apiKey: string) => Promise<{ ok: boolean; error?: string }>
importOAuth: (providerId: string) => Promise<{ ok: boolean; expiresAt?: number; error?: string }>
}
channels: {
listStates: () => Promise<ChannelAccountStateInfo[]>
getConfig: () => Promise<Record<string, Record<string, Record<string, unknown>> | undefined>>
saveToken: (channelId: string, accountId: string, token: string) => Promise<{ ok: boolean; error?: string }>
removeToken: (channelId: string, accountId: string) => Promise<{ ok: boolean; error?: string }>
stop: (channelId: string, accountId: string) => Promise<{ ok: boolean; error?: string }>
start: (channelId: string, accountId: string) => Promise<{ ok: boolean; error?: string }>
}
cron: {
list: () => Promise<unknown[]>
toggle: (jobId: string) => Promise<{ ok: boolean }>

View file

@ -0,0 +1,178 @@
/**
* Channel IPC handlers for Electron main process.
*
* Manages channel account configuration, start/stop lifecycle.
* The Channels page in the renderer uses these to configure
* Telegram (and future channels) with immediate effect.
*/
import { ipcMain } from 'electron'
import { getCurrentHub } from './hub.js'
import { credentialManager } from '../../../../src/agent/credentials.js'
import { listChannels } from '../../../../src/channels/registry.js'
/** Validate that a string is a safe identifier (alphanumeric, dashes, underscores) */
function isValidId(value: unknown): value is string {
return typeof value === 'string' && /^[a-zA-Z0-9_-]+$/.test(value) && value.length <= 64
}
/**
* Mask a token string for safe display: show first 5 and last 5 chars.
* Returns undefined if the input is not a string.
*/
function maskToken(token: unknown): string | undefined {
if (typeof token !== 'string' || token.length === 0) return undefined
if (token.length <= 12) return '*'.repeat(token.length)
return `${token.slice(0, 5)}${'*'.repeat(10)}${token.slice(-5)}`
}
/**
* Register all Channel-related IPC handlers.
*/
export function registerChannelsIpcHandlers(): void {
/**
* List all channel account states (running / stopped / error).
*/
ipcMain.handle('channels:listStates', async () => {
const hub = getCurrentHub()
if (!hub) return []
return hub.channelManager.listAccountStates()
})
/**
* Get the channels config from credentials.json5.
* Returns a sanitized version with tokens masked (not the raw secret values).
*/
ipcMain.handle('channels:getConfig', async () => {
const raw = credentialManager.getChannelsConfig()
// Mask secret values before sending to renderer
const masked: Record<string, Record<string, Record<string, unknown>> | undefined> = {}
for (const [channelId, accounts] of Object.entries(raw)) {
if (!accounts) continue
const maskedAccounts: Record<string, Record<string, unknown>> = {}
for (const [accountId, accountConfig] of Object.entries(accounts)) {
const maskedConfig = { ...accountConfig }
if ('botToken' in maskedConfig) {
maskedConfig.botToken = maskToken(maskedConfig.botToken)
}
maskedAccounts[accountId] = maskedConfig
}
masked[channelId] = maskedAccounts
}
return masked
})
/**
* Save a channel account token and start the bot immediately.
* Flow: validate write to credentials.json5 start the channel account.
*/
ipcMain.handle(
'channels:saveToken',
async (_event, channelId: string, accountId: string, token: string): Promise<{ ok: boolean; error?: string }> => {
try {
// Validate inputs
if (!isValidId(channelId)) return { ok: false, error: 'Invalid channel ID' }
if (!isValidId(accountId)) return { ok: false, error: 'Invalid account ID' }
if (typeof token !== 'string' || token.trim().length === 0 || token.length > 256) {
return { ok: false, error: 'Invalid token' }
}
const hub = getCurrentHub()
if (!hub) return { ok: false, error: 'Hub not initialized' }
// Find the plugin to validate channelId
const plugin = listChannels().find((p) => p.id === channelId)
if (!plugin) return { ok: false, error: `Unknown channel: ${channelId}` }
// Persist config to credentials.json5
credentialManager.setChannelAccountConfig(channelId, accountId, { botToken: token })
console.log(`[IPC] Channel config saved: ${channelId}:${accountId}`)
// Stop existing account if running (e.g. token update)
hub.channelManager.stopAccount(channelId, accountId)
// Start the account with the new config
await hub.channelManager.startAccount(channelId, accountId, { botToken: token })
console.log(`[IPC] Channel started: ${channelId}:${accountId}`)
return { ok: true }
} catch (err) {
const message = err instanceof Error ? err.message : String(err)
console.error(`[IPC] Failed to save channel token: ${message}`)
return { ok: false, error: message }
}
}
)
/**
* Remove a channel account token and stop the bot.
*/
ipcMain.handle(
'channels:removeToken',
async (_event, channelId: string, accountId: string): Promise<{ ok: boolean; error?: string }> => {
try {
if (!isValidId(channelId)) return { ok: false, error: 'Invalid channel ID' }
if (!isValidId(accountId)) return { ok: false, error: 'Invalid account ID' }
const hub = getCurrentHub()
if (!hub) return { ok: false, error: 'Hub not initialized' }
// Stop the account
hub.channelManager.stopAccount(channelId, accountId)
// Remove from credentials.json5
credentialManager.removeChannelAccountConfig(channelId, accountId)
console.log(`[IPC] Channel config removed: ${channelId}:${accountId}`)
return { ok: true }
} catch (err) {
const message = err instanceof Error ? err.message : String(err)
console.error(`[IPC] Failed to remove channel token: ${message}`)
return { ok: false, error: message }
}
}
)
/**
* Stop a channel account without removing its config.
*/
ipcMain.handle(
'channels:stop',
async (_event, channelId: string, accountId: string): Promise<{ ok: boolean; error?: string }> => {
if (!isValidId(channelId)) return { ok: false, error: 'Invalid channel ID' }
if (!isValidId(accountId)) return { ok: false, error: 'Invalid account ID' }
const hub = getCurrentHub()
if (!hub) return { ok: false, error: 'Hub not initialized' }
hub.channelManager.stopAccount(channelId, accountId)
return { ok: true }
}
)
/**
* Start a channel account using its saved config.
*/
ipcMain.handle(
'channels:start',
async (_event, channelId: string, accountId: string): Promise<{ ok: boolean; error?: string }> => {
try {
if (!isValidId(channelId)) return { ok: false, error: 'Invalid channel ID' }
if (!isValidId(accountId)) return { ok: false, error: 'Invalid account ID' }
const hub = getCurrentHub()
if (!hub) return { ok: false, error: 'Hub not initialized' }
// Read config from credentials
const config = credentialManager.getChannelsConfig()
const accountConfig = config[channelId]?.[accountId]
if (!accountConfig) {
return { ok: false, error: `No config found for ${channelId}:${accountId}` }
}
await hub.channelManager.startAccount(channelId, accountId, accountConfig)
return { ok: true }
} catch (err) {
const message = err instanceof Error ? err.message : String(err)
return { ok: false, error: message }
}
}
)
}

View file

@ -236,6 +236,7 @@ export function registerHubIpcHandlers(): void {
if (agent.closed) {
return { error: `Agent is closed: ${agentId}` }
}
h.channelManager.clearLastRoute()
agent.write(content)
return { ok: true }
})
@ -268,11 +269,11 @@ export function registerHubIpcHandlers(): void {
return
}
// Compaction events: forward with no stream tracking
const isCompactionEvent =
event.type === 'compaction_start' || event.type === 'compaction_end'
if (isCompactionEvent) {
safeLog(`[IPC] Sending compaction event to renderer: ${event.type}`)
// Compaction and error events: forward with no stream tracking
const isPassthroughEvent =
event.type === 'compaction_start' || event.type === 'compaction_end' || event.type === 'agent_error'
if (isPassthroughEvent) {
safeLog(`[IPC] Sending ${event.type} event to renderer`)
mainWindowRef.webContents.send('localChat:event', {
agentId,
streamId: null,
@ -281,16 +282,6 @@ export function registerHubIpcHandlers(): void {
return
}
// Agent error events: forward so the UI can display them
if (event.type === 'agent_error') {
safeLog(`[IPC] Sending agent_error event to renderer: ${(event as { message: string }).message}`)
mainWindowRef.webContents.send('localChat:event', {
agentId,
streamId: null,
event,
})
return
}
// Filter events same as Hub.consumeAgent()
const maybeMessage = (event as { message?: { role?: string } }).message
@ -398,6 +389,7 @@ export function registerHubIpcHandlers(): void {
return { error: 'Not subscribed to agent events. Call subscribe first.' }
}
h.channelManager.clearLastRoute()
agent.write(content)
safeLog(`[IPC] Local chat message sent to agent: ${agentId}`)
return { ok: true }

View file

@ -6,6 +6,7 @@ export { registerSkillsIpcHandlers } from './skills.js'
export { registerHubIpcHandlers, cleanupHub, initializeHub, setupDeviceConfirmation } from './hub.js'
export { registerProfileIpcHandlers } from './profile.js'
export { registerProviderIpcHandlers } from './provider.js'
export { registerChannelsIpcHandlers } from './channels.js'
export { registerCronIpcHandlers } from './cron.js'
export { registerHeartbeatIpcHandlers } from './heartbeat.js'
@ -14,6 +15,7 @@ import { registerSkillsIpcHandlers } from './skills.js'
import { registerHubIpcHandlers, cleanupHub, initializeHub } from './hub.js'
import { registerProfileIpcHandlers } from './profile.js'
import { registerProviderIpcHandlers } from './provider.js'
import { registerChannelsIpcHandlers } from './channels.js'
import { registerCronIpcHandlers } from './cron.js'
import { registerHeartbeatIpcHandlers } from './heartbeat.js'
@ -27,6 +29,7 @@ export function registerAllIpcHandlers(): void {
registerSkillsIpcHandlers()
registerProfileIpcHandlers()
registerProviderIpcHandlers()
registerChannelsIpcHandlers()
registerCronIpcHandlers()
registerHeartbeatIpcHandlers()
}

View file

@ -44,7 +44,7 @@ process.stderr?.on?.('error', (err: NodeJS.ErrnoException) => {
throw err
})
import { app, BrowserWindow } from 'electron'
import { app, BrowserWindow, shell } from 'electron'
import { fileURLToPath } from 'node:url'
import path from 'node:path'
import { registerAllIpcHandlers, initializeApp, cleanupAll, setupDeviceConfirmation } from './ipc/index.js'
@ -73,6 +73,12 @@ function createWindow() {
},
})
// Open external links in system browser instead of inside Electron
win.webContents.setWindowOpenHandler(({ url }) => {
shell.openExternal(url)
return { action: 'deny' }
})
if (VITE_DEV_SERVER_URL) {
win.loadURL(VITE_DEV_SERVER_URL)
} else {

View file

@ -195,6 +195,26 @@ const electronAPI = {
ipcRenderer.invoke('provider:importOAuth', providerId),
},
// Channel management (Telegram, Discord, etc.)
channels: {
/** List all channel account states */
listStates: () => ipcRenderer.invoke('channels:listStates'),
/** Get channels config from credentials.json5 */
getConfig: () => ipcRenderer.invoke('channels:getConfig'),
/** Save a channel token and start the bot immediately */
saveToken: (channelId: string, accountId: string, token: string) =>
ipcRenderer.invoke('channels:saveToken', channelId, accountId, token),
/** Remove a channel token and stop the bot */
removeToken: (channelId: string, accountId: string) =>
ipcRenderer.invoke('channels:removeToken', channelId, accountId),
/** Stop a channel account */
stop: (channelId: string, accountId: string) =>
ipcRenderer.invoke('channels:stop', channelId, accountId),
/** Start a channel account from saved config */
start: (channelId: string, accountId: string) =>
ipcRenderer.invoke('channels:start', channelId, accountId),
},
// Cron jobs management
cron: {
list: () => ipcRenderer.invoke('cron:list'),

View file

@ -1,8 +1,10 @@
import { createHashRouter, RouterProvider } from 'react-router-dom'
import Layout from './pages/layout'
import HomePage from './pages/home'
import ChatPage from './pages/chat'
import ToolsPage from './pages/tools'
import SkillsPage from './pages/skills'
import ChannelsPage from './pages/channels'
import CronsPage from './pages/crons'
const router = createHashRouter([
@ -11,9 +13,10 @@ const router = createHashRouter([
element: <Layout />,
children: [
{ index: true, element: <HomePage /> },
{ path: 'chat' },
{ path: 'chat', element: <ChatPage /> },
{ path: 'tools', element: <ToolsPage /> },
{ path: 'skills', element: <SkillsPage /> },
{ path: 'channels', element: <ChannelsPage /> },
{ path: 'crons', element: <CronsPage /> },
],
},

View file

@ -0,0 +1,135 @@
/**
* Hook for managing channel accounts (Telegram, Discord, etc.) in the Desktop App.
*
* Provides state and actions for the Channels settings page:
* - List channel account states (running / stopped / error)
* - Read channel config (tokens)
* - Save / remove tokens with immediate start/stop
*/
import { useState, useEffect, useCallback } from 'react'
export interface UseChannelsReturn {
/** Runtime states of all channel accounts */
states: ChannelAccountStateInfo[]
/** Raw channel config from credentials.json5 */
config: Record<string, Record<string, Record<string, unknown>> | undefined>
/** Loading state */
loading: boolean
/** Error message if any */
error: string | null
/** Refresh states and config */
refresh: () => Promise<void>
/** Save a bot token — persists to file and starts the bot immediately */
saveToken: (channelId: string, accountId: string, token: string) => Promise<{ ok: boolean; error?: string }>
/** Remove a bot token — stops the bot and removes from file */
removeToken: (channelId: string, accountId: string) => Promise<{ ok: boolean; error?: string }>
/** Stop a channel account without removing config */
stopChannel: (channelId: string, accountId: string) => Promise<void>
/** Start a channel account from saved config */
startChannel: (channelId: string, accountId: string) => Promise<void>
}
export function useChannels(): UseChannelsReturn {
const [states, setStates] = useState<ChannelAccountStateInfo[]>([])
const [config, setConfig] = useState<Record<string, Record<string, Record<string, unknown>> | undefined>>({})
const [loading, setLoading] = useState(true)
const [error, setError] = useState<string | null>(null)
const refresh = useCallback(async () => {
setLoading(true)
setError(null)
try {
const [stateList, channelConfig] = await Promise.all([
window.electronAPI.channels.listStates(),
window.electronAPI.channels.getConfig(),
])
setStates(stateList)
setConfig(channelConfig)
} catch (err) {
const message = err instanceof Error ? err.message : String(err)
setError(message)
console.error('[useChannels] Failed to load:', message)
} finally {
setLoading(false)
}
}, [])
useEffect(() => {
refresh()
}, [refresh])
const saveToken = useCallback(async (channelId: string, accountId: string, token: string) => {
setError(null)
try {
const result = await window.electronAPI.channels.saveToken(channelId, accountId, token)
if (!result.ok) {
setError(result.error ?? 'Failed to save token')
}
// Refresh to pick up new state
await refresh()
return result
} catch (err) {
const message = err instanceof Error ? err.message : String(err)
setError(message)
return { ok: false, error: message }
}
}, [refresh])
const removeToken = useCallback(async (channelId: string, accountId: string) => {
setError(null)
try {
const result = await window.electronAPI.channels.removeToken(channelId, accountId)
if (!result.ok) {
setError(result.error ?? 'Failed to remove token')
}
await refresh()
return result
} catch (err) {
const message = err instanceof Error ? err.message : String(err)
setError(message)
return { ok: false, error: message }
}
}, [refresh])
const stopChannel = useCallback(async (channelId: string, accountId: string) => {
setError(null)
try {
const result = await window.electronAPI.channels.stop(channelId, accountId)
if (!result.ok) {
setError(result.error ?? 'Failed to stop channel')
}
await refresh()
} catch (err) {
const message = err instanceof Error ? err.message : String(err)
setError(message)
}
}, [refresh])
const startChannel = useCallback(async (channelId: string, accountId: string) => {
setError(null)
try {
const result = await window.electronAPI.channels.start(channelId, accountId)
if (!result.ok) {
setError(result.error ?? 'Failed to start channel')
}
await refresh()
} catch (err) {
const message = err instanceof Error ? err.message : String(err)
setError(message)
}
}, [refresh])
return {
states,
config,
loading,
error,
refresh,
saveToken,
removeToken,
stopChannel,
startChannel,
}
}

View file

@ -0,0 +1,177 @@
import { useState } from 'react'
import {
Card,
CardContent,
CardDescription,
CardHeader,
CardTitle,
} from '@multica/ui/components/ui/card'
import { Button } from '@multica/ui/components/ui/button'
import { Input } from '@multica/ui/components/ui/input'
import { Badge } from '@multica/ui/components/ui/badge'
import { useChannels, type UseChannelsReturn } from '../hooks/use-channels'
/** Status badge color mapping */
function statusVariant(status: string): 'default' | 'secondary' | 'destructive' | 'outline' {
switch (status) {
case 'running': return 'default'
case 'starting': return 'secondary'
case 'error': return 'destructive'
default: return 'outline'
}
}
function TelegramCard({ channels }: { channels: UseChannelsReturn }) {
const { states, config, saveToken, removeToken, startChannel, stopChannel } = channels
const [token, setToken] = useState('')
const [saving, setSaving] = useState(false)
const [localError, setLocalError] = useState<string | null>(null)
// Current state and config for telegram:default
const state = states.find((s) => s.channelId === 'telegram' && s.accountId === 'default')
const savedConfig = config['telegram']?.['default'] as { botToken?: string } | undefined
const hasToken = Boolean(savedConfig?.botToken)
const isRunning = state?.status === 'running'
const isStarting = state?.status === 'starting'
const handleSave = async () => {
if (!token.trim()) return
setSaving(true)
setLocalError(null)
const result = await saveToken('telegram', 'default', token.trim())
if (!result.ok) {
setLocalError(result.error ?? 'Failed to save')
} else {
setToken('') // Clear input on success
}
setSaving(false)
}
const handleRemove = async () => {
setSaving(true)
setLocalError(null)
const result = await removeToken('telegram', 'default')
if (!result.ok) {
setLocalError(result.error ?? 'Failed to remove')
}
setSaving(false)
}
const handleToggle = async () => {
setSaving(true)
setLocalError(null)
if (isRunning || isStarting) {
await stopChannel('telegram', 'default')
} else {
await startChannel('telegram', 'default')
}
setSaving(false)
}
// Mask the token for display: show first 5 and last 5 chars
const maskedToken = savedConfig?.botToken
? `${savedConfig.botToken.slice(0, 5)}${'*'.repeat(10)}${savedConfig.botToken.slice(-5)}`
: null
return (
<Card>
<CardHeader>
<div className="flex items-center justify-between">
<div>
<CardTitle>Telegram</CardTitle>
<CardDescription>
Connect a Telegram bot via Bot API long polling.
</CardDescription>
</div>
{state && (
<Badge variant={statusVariant(state.status)}>
{state.status}
</Badge>
)}
</div>
</CardHeader>
<CardContent className="space-y-4">
{hasToken ? (
// Token is configured — show masked token and actions
<div className="space-y-3">
<div className="flex items-center gap-2">
<code className="text-sm text-muted-foreground bg-muted px-2 py-1 rounded flex-1 truncate">
{maskedToken}
</code>
</div>
{state?.error && (
<p className="text-sm text-destructive">{state.error}</p>
)}
<div className="flex gap-2">
<Button
variant={isRunning ? 'outline' : 'default'}
size="sm"
onClick={handleToggle}
disabled={saving}
>
{isRunning ? 'Stop' : isStarting ? 'Starting...' : 'Start'}
</Button>
<Button
variant="destructive"
size="sm"
onClick={handleRemove}
disabled={saving || isRunning}
title={isRunning ? 'Stop the bot before removing' : undefined}
>
Remove
</Button>
</div>
</div>
) : (
// No token — show input form
<div className="space-y-3">
<Input
type="password"
placeholder="Bot Token (from @BotFather)"
value={token}
onChange={(e) => setToken(e.target.value)}
onKeyDown={(e) => e.key === 'Enter' && handleSave()}
/>
<Button
size="sm"
onClick={handleSave}
disabled={saving || !token.trim()}
>
{saving ? 'Saving...' : 'Save & Connect'}
</Button>
</div>
)}
{localError && (
<p className="text-sm text-destructive">{localError}</p>
)}
</CardContent>
</Card>
)
}
export default function ChannelsPage() {
const channels = useChannels()
const { loading, error } = channels
return (
<div className="max-w-4xl mx-auto space-y-4">
<div>
<h2 className="text-lg font-semibold">Channels</h2>
<p className="text-sm text-muted-foreground">
Connect messaging platforms to your Agent.
</p>
</div>
{loading ? (
<p className="text-sm text-muted-foreground">Loading...</p>
) : error ? (
<p className="text-sm text-destructive">{error}</p>
) : (
<TelegramCard channels={channels} />
)}
</div>
)
}

View file

@ -8,6 +8,7 @@ import {
CodeIcon,
PlugIcon,
Comment01Icon,
Share08Icon,
Time04Icon,
} from '@hugeicons/core-free-icons'
import { cn } from '@multica/ui/lib/utils'
@ -19,6 +20,7 @@ const tabs = [
{ path: '/chat', label: 'Chat', icon: Comment01Icon },
{ path: '/tools', label: 'Tools', icon: CodeIcon },
{ path: '/skills', label: 'Skills', icon: PlugIcon },
{ path: '/channels', label: 'Channels', icon: Share08Icon },
{ path: '/crons', label: 'Cron', icon: Time04Icon },
]

File diff suppressed because it is too large Load diff

175
docs/channels/README.md Normal file
View file

@ -0,0 +1,175 @@
# Channel System
The Channel system connects external messaging platforms (Telegram, Discord, etc.) to the Hub's agent. Each platform is a **plugin** that translates platform-specific APIs into a unified interface.
> For media handling details (audio transcription, image/video description), see [media-handling.md](./media-handling.md).
> For message flow across all three I/O paths (Desktop / Web / Channel), see [message-paths.md](../message-paths.md).
## Architecture
```
┌─────────────────────────────────────────────────────────────┐
│ credentials.json5 │
│ { channels: { telegram: { default: { botToken } } } } │
└──────────────────────┬──────────────────────────────────────┘
│ loadChannelsConfig()
┌─────────────────────────────────────────────────────────────┐
│ Channel Manager (manager.ts) │
│ │
│ startAll() → iterate plugins → startAccount() per account │
│ subscribeToAgent() → listen for AI replies │
│ │
│ Incoming: routeIncoming() → routeMedia() → agent.write() │
│ Outgoing: lastRoute → aggregator → plugin.outbound.*() │
└──────────┬──────────────────────────────────────────────────┘
┌─────────────────────────────────────────────────────────────┐
│ Plugin Registry (registry.ts) │
│ registerChannel(plugin) / listChannels() / getChannel(id) │
└──────────┬──────────────────────────────────────────────────┘
┌─────────────────────────────────────────────────────────────┐
│ Channel Plugins (e.g. telegram.ts) │
│ │
│ config — resolve account credentials │
│ gateway — receive messages (polling / webhook) │
│ outbound — send replies back to platform │
│ downloadMedia() — download media files to local disk │
└─────────────────────────────────────────────────────────────┘
```
## Plugin Interface
Each channel plugin implements `ChannelPlugin` (defined in `types.ts`):
```typescript
interface ChannelPlugin {
readonly id: string; // "telegram", "discord", etc.
readonly meta: { name: string; description: string };
readonly chunkerConfig?: BlockChunkerConfig; // override text chunking per platform
readonly config: ChannelConfigAdapter; // credential resolution
readonly gateway: ChannelGatewayAdapter; // receive messages
readonly outbound: ChannelOutboundAdapter; // send replies
downloadMedia?(fileId: string, accountId: string): Promise<string>; // optional
}
```
### Three Adapters
| Adapter | Role | Key Methods |
|---------|------|-------------|
| **config** | Resolve credentials from `credentials.json5` | `listAccountIds()`, `resolveAccount()`, `isConfigured()` |
| **gateway** | Receive inbound messages from the platform | `start(accountId, config, onMessage, signal)` |
| **outbound** | Send replies back to the platform | `sendText()`, `replyText()`, `sendTyping?()` |
### downloadMedia (optional)
Platforms that support media (voice, image, video, document) implement `downloadMedia()` to download files to `~/.super-multica/cache/media/` with UUID filenames. The Manager calls this before processing media.
## Message Flow
### Inbound (Platform → Agent)
```
User sends message in Telegram
→ grammy long-polling → onMessage callback
→ ChannelManager.routeIncoming()
1. Update lastRoute (reply target)
2. Start typing indicator
3. If media: routeMedia() → download → transcribe/describe → text
4. agent.write(text)
```
All media is converted to text before the agent sees it. See [media-handling.md](./media-handling.md) for details.
### Outbound (Agent → Platform)
```
Agent produces reply
→ agent.subscribe() in ChannelManager
→ Check: if (!lastRoute) return // not from a channel, skip
→ message_start → create MessageAggregator
→ message_update → feed text to aggregator
→ message_end → aggregator flushes final block
→ Aggregator emits BlockReply chunks
→ Block 0: plugin.outbound.replyText() // Telegram reply format
→ Block N: plugin.outbound.sendText() // follow-up messages
```
The **MessageAggregator** buffers streaming LLM output and splits it into blocks at natural text boundaries (paragraphs, code blocks). This is necessary because messaging platforms cannot consume raw streaming deltas.
## lastRoute Pattern
The `lastRoute` tracks which channel last sent a message:
- **Channel message arrives**`lastRoute` is set to that plugin + conversation
- **Desktop/Web message arrives**`clearLastRoute()` is called
- **Agent replies** → if `lastRoute` is set, reply goes to that channel; otherwise skipped
This ensures replies go back to the originating channel. Desktop and Web always receive agent events independently via their own mechanisms (IPC / Gateway).
## Configuration
Channel credentials are stored in `~/.super-multica/credentials.json5` under the `channels` key:
```json5
{
channels: {
telegram: {
default: {
botToken: "123456:ABC-DEF..."
}
},
// discord: { default: { botToken: "..." } },
}
}
```
Each channel ID maps to accounts (keyed by account ID, typically `"default"`). The config adapter for each plugin knows how to extract and validate its credentials.
## Adding a New Plugin
1. Create `src/channels/plugins/<name>.ts` implementing `ChannelPlugin`
2. Register it in `src/channels/index.ts`:
```typescript
import { <name>Channel } from "./plugins/<name>.js";
registerChannel(<name>Channel);
```
3. Add the config shape to the `channels` section of `credentials.json5`
### Implementation Checklist
- [ ] `config` adapter: parse credentials from `credentials.json5`
- [ ] `gateway` adapter: connect to platform, normalize messages to `ChannelMessage`
- [ ] `outbound` adapter: `sendText`, `replyText`, optional `sendTyping`
- [ ] `downloadMedia` (if platform supports media): download to `MEDIA_CACHE_DIR`
- [ ] Group filtering: only respond to messages directed at the bot
- [ ] Graceful shutdown: respect the `AbortSignal` passed to `gateway.start()`
## File Map
| File | Role |
|------|------|
| `src/channels/types.ts` | All type definitions (`ChannelPlugin`, `ChannelMessage`, `DeliveryContext`, etc.) |
| `src/channels/manager.ts` | `ChannelManager` — bridges plugins to the Hub's agent |
| `src/channels/registry.ts` | Plugin registry (`registerChannel`, `listChannels`, `getChannel`) |
| `src/channels/config.ts` | Load channel config from `credentials.json5` |
| `src/channels/index.ts` | Bootstrap: register built-in plugins, re-export public API |
| `src/channels/plugins/telegram.ts` | Telegram plugin (grammy, long polling) |
| `src/channels/plugins/telegram-format.ts` | Markdown → Telegram HTML converter |
| `src/media/transcribe.ts` | Audio transcription (local whisper → OpenAI API) |
| `src/media/describe-image.ts` | Image description (OpenAI Vision API) |
| `src/media/describe-video.ts` | Video description (ffmpeg frame + Vision API) |
| `src/shared/paths.ts` | `MEDIA_CACHE_DIR` path constant |
| `src/hub/message-aggregator.ts` | Streaming text → block chunking for channel delivery |
## Current Plugins
| Plugin | Platform | Transport | Library |
|--------|----------|-----------|---------|
| `telegram` | Telegram | Long polling | grammy |
Planned: Discord, Feishu, LINE, etc.

View file

@ -0,0 +1,161 @@
# Channel Media Handling
How multimedia messages (voice, image, video, document) from messaging platforms are processed before reaching the Agent.
## Core Principle
All media is converted to text before the Agent sees it. The Agent only ever receives plain text via `agent.write()`.
```
Platform message (voice/image/video/doc)
→ Plugin: detect type + download file
→ Manager: convert to text (API transcription / vision description)
→ Agent receives text via agent.write()
```
## Reference Architecture (OpenClaw)
OpenClaw supports 6 platforms (Telegram, Discord, LINE, Signal, iMessage, Slack). All share the same media processing pipeline.
### Per-Platform Layer (different for each platform)
Each platform detects media type using its own API:
| Platform | Detection Method |
|----------|-----------------|
| Telegram | `msg.voice`, `msg.audio`, `msg.photo`, `msg.video`, `msg.document` |
| Discord | `attachment.content_type` MIME prefix (`audio/`, `image/`, `video/`) |
| LINE | `message.type` field (`"audio"`, `"image"`, `"video"`, `"file"`) |
| Signal | `attachment.contentType` MIME prefix |
| iMessage | `attachment.mime_type` MIME prefix |
| Slack | Any file attachment (MIME-based detection happens later) |
Each platform downloads the file using its own API, saves to local disk, and tags it:
- `<media:audio>` for voice/audio
- `<media:image>` for images
- `<media:video>` for video
- `<media:document>` for files
### Shared Layer (`applyMediaUnderstanding()`)
One function handles all conversions, called automatically before the Agent sees the message:
1. Reads local file path + MIME type
2. Selects conversion method based on type:
- **audio** → transcription (whisper local / OpenAI API / Groq / Deepgram / Google)
- **image** → vision model description (Gemini / OpenAI / Anthropic)
- **video** → vision model description
3. Replaces placeholder with formatted text:
- Audio: `[Audio]\nTranscript:\n<transcribed text>`
- Image: `[Image]\nDescription:\n<description text>`
4. If conversion fails (no provider configured), the raw placeholder stays in the message
### Transcription Provider Priority
Auto-detection order:
1. sherpa-onnx-offline (local)
2. whisper-cli / whisper.cpp (local)
3. whisper Python CLI (local)
4. gemini CLI (local)
5. API providers: OpenAI → Groq → Deepgram → Google
### Skill Integration
Whisper skills declare requirements in `SKILL.md` metadata:
```yaml
requires:
bins: ["whisper"] # must exist in PATH
```
If the binary is missing, the skill is filtered out — the Agent never sees it. If present, the Agent can use it for transcription.
---
## Our Implementation
All media is converted to text in the Manager layer (`routeMedia()`) before reaching the Agent, matching OpenClaw's `applyMediaUnderstanding()` pattern.
### Architecture
```
┌─────────────────────────────────────────────────────┐
│ Platform Plugin (e.g. telegram.ts) │
│ │
│ bot.on("message:voice") → detect type │
│ bot.api.getFile() → download to local disk │
│ Emit ChannelMessage with media attachment │
└──────────────────┬──────────────────────────────────┘
┌─────────────────────────────────────────────────────┐
│ Channel Manager (manager.ts → routeMedia()) │
│ │
│ Download file via plugin.downloadMedia() │
│ audio → transcribeAudio() → text │
│ image → describeImage() → text │
│ video → describeVideo() (ffmpeg frame + vision) → text │
│ document → file path info │
│ All results → agent.write(text) │
└──────────────────┬──────────────────────────────────┘
┌─────────────────────────────────────────────────────┐
│ Agent receives plain text only │
│ e.g. "[Voice Message]\nTranscript: ..." │
│ e.g. "[Image]\nDescription: ..." │
│ e.g. "[Video]\nDescription: ..." │
└─────────────────────────────────────────────────────┘
```
### Media Processing Modules
| Type | Module | Method | API |
|------|--------|--------|-----|
| audio | `src/media/transcribe.ts` | `transcribeAudio()` | Local whisper/whisper-cli → OpenAI Whisper API (`whisper-1`) |
| image | `src/media/describe-image.ts` | `describeImage()` | OpenAI Vision API (`gpt-4o-mini`) |
| video | `src/media/describe-video.ts` | `describeVideo()` | ffmpeg frame extraction + Vision API |
| document | (inline in manager) | — | File path info only |
### Agent Output Format
| Type | Success | No API Key |
|------|---------|------------|
| audio | `[Voice Message]\nTranscript: <text>` | `[audio message received]\nFile: <path>` |
| image | `[Image]\nDescription: <text>` | `[image message received]\nFile: <path>` |
| video | `[Video]\nDescription: <text>` | `[video message received]\nFile: <path>` |
| document | `[document message received]\nFile: <path>` | same |
### Audio Transcription Priority
`transcribeAudio()` tries providers in order, matching OpenClaw's local-first approach:
1. **Local whisper/whisper-cli** — Free, no latency, works offline. Detected via `which` and cached.
2. **OpenAI Whisper API** (`whisper-1`) — Requires API key in `credentials.json5`.
3. **null** — No provider available. Placeholder stays in message, agent naturally responds (e.g. suggests installing whisper).
### Whisper Skill (Agent Fallback)
The `skills/whisper/SKILL.md` skill is a secondary safety net. If transcription returned null (no local binary, no API key), the agent receives a placeholder with the file path. If whisper is installed, the skill tells the agent how to transcribe it via the exec tool.
### File Map
| File | Role |
|------|------|
| `src/channels/types.ts` | `ChannelMediaAttachment`, `ChannelMessage.media`, `ChannelPlugin.downloadMedia` |
| `src/channels/plugins/telegram.ts` | Detect voice/audio/photo/video/document + download via Grammy API |
| `src/channels/manager.ts` | `routeMedia()` — download, convert, `agent.write(text)` |
| `src/media/transcribe.ts` | Audio → text (local whisper → OpenAI Whisper API) |
| `src/media/describe-image.ts` | Image → text via OpenAI Vision API (gpt-4o-mini) |
| `src/media/describe-video.ts` | Video → extract frame (ffmpeg) → text via Vision API |
| `src/shared/paths.ts` | `MEDIA_CACHE_DIR` (`~/.super-multica/cache/media/`) |
| `skills/whisper/SKILL.md` | Local whisper CLI fallback skill |
### Future Work
| Task | Scope |
|------|-------|
| Groq / Deepgram fallback for audio | `src/media/transcribe.ts` |
| Multi-provider vision support (Gemini, Anthropic) | `src/media/describe-image.ts` |
| Document text extraction (PDF, DOCX) | `src/media/` |
| Media cache cleanup (delete old files) | `src/shared/` |
| Outbound media (send images/audio back to channels) | `types.ts`, plugins |

232
docs/message-paths.md Normal file
View file

@ -0,0 +1,232 @@
# Message Paths — Desktop / Web / Channel
Three independent paths deliver messages to and from the Hub's agent.
All three share the same `AsyncAgent` instance — they are just different I/O surfaces.
---
## Overview
```
Desktop (Electron IPC) Web (WebSocket via Gateway) Channel (Bot API, e.g. Telegram)
│ │ │
▼ ▼ ▼
localChat:send IPC client.send → Gateway WS plugin.gateway (polling/webhook)
│ │ │
▼ ▼ ▼
hub.ts / ipc/hub.ts hub.ts / onMessage manager.ts / routeIncoming
clearLastRoute() clearLastRoute() set lastRoute
│ │ │
└────────────────► agent.write(text) ◄──────────────────────────────┘
AsyncAgent.run()
┌────────────┴────────────────┐
▼ ▼
agent.subscribe() agent.read()
(multi-consumer) (single-consumer iterable)
│ │
┌────────┴────────┐ ▼
▼ ▼ hub.ts / consumeAgent()
Desktop IPC Channel Manager │
(ipc/hub.ts) (manager.ts) ▼
│ │ Gateway WS → Web client
▼ ▼
localChat:event Bot API reply
→ renderer (via lastRoute)
```
---
## Path 1: Desktop (Electron IPC)
### Send (User → Agent)
```
Renderer: sendMessage(text)
→ IPC: localChat:send
→ ipc/hub.ts handler
→ hub.channelManager.clearLastRoute() // reply stays in desktop
→ agent.write(text)
```
**File**: `apps/desktop/electron/ipc/hub.ts``localChat:send` handler (line ~373)
### Receive (Agent → User)
```
Agent runs LLM
→ pi-agent-core fires AgentEvent
→ Agent.subscribeAll() → AsyncAgent channel + subscribers
→ agent.subscribe() callback in ipc/hub.ts
→ Filter: assistant messages + tool_execution + passthrough (compaction, agent_error)
→ IPC: mainWindow.webContents.send('localChat:event', { agentId, streamId, event })
→ Renderer: use-local-chat.ts onEvent callback
→ chat.handleStream(payload)
```
**Files**:
- `apps/desktop/electron/ipc/hub.ts``localChat:subscribe` handler (line ~248)
- `apps/desktop/src/hooks/use-local-chat.ts``onEvent` listener (line ~54)
- `packages/hooks/src/use-chat.ts``handleStream()` (line ~133)
### Error Handling
```
Agent.run() throws / returns error
→ AsyncAgent.write() catch block
→ channel.send(legacy Message) // for read() consumers (Web)
→ agent.emitMulticaEvent({ type: "agent_error", error }) // for subscribe() consumers
→ ipc/hub.ts subscriber → passthrough event → localChat:event
→ use-local-chat.ts → chat.setError() + setIsLoading(false)
```
---
## Path 2: Web (WebSocket via Gateway)
### Send (User → Agent)
```
Web app: sendMessage(text)
→ GatewayClient.send(hubId, "message", { agentId, content })
→ Socket.io → Gateway server → routes to Hub device
→ hub.ts / onMessage handler
→ channelManager.clearLastRoute() // reply stays in gateway
→ agentSenders.set(agentId, deviceId)
→ agent.write(content)
```
**File**: `src/hub/hub.ts``onMessage` handler (line ~154)
### Receive (Agent → User)
```
Agent runs LLM
→ pi-agent-core fires AgentEvent
→ Agent.subscribeAll() → AsyncAgent channel + subscribers
→ agent.read() consumed by hub.ts / consumeAgent()
→ Filter: assistant messages + tool_execution + passthrough (compaction, agent_error)
→ client.send(targetDeviceId, StreamAction, { streamId, agentId, event })
→ Socket.io → Gateway → routes to Web client device
→ GatewayClient.onMessage callback
→ use-gateway-chat.ts → chat.handleStream(payload)
```
**Files**:
- `src/hub/hub.ts``consumeAgent()` (line ~314)
- `packages/hooks/src/use-gateway-chat.ts``onMessage` listener (line ~50)
- `packages/hooks/src/use-chat.ts``handleStream()` (line ~133)
### Error Handling
```
Agent.run() throws / returns error
→ AsyncAgent.write() catch block
→ channel.send(legacy Message) // consumed by consumeAgent() → sent as "message" action
→ agent.emitMulticaEvent({ type: "agent_error", error })
→ read() → consumeAgent() → passthrough event → StreamAction
→ GatewayClient → use-gateway-chat.ts → chat.setError() + setIsLoading(false)
```
**Note**: Legacy error Messages also reach the Web client as `"message"` action (a plain text fallback). The `agent_error` event provides structured error info for proper UI rendering.
---
## Path 3: Channel (Bot API, e.g. Telegram)
### Send (User → Agent)
```
User sends message in Telegram
→ grammy long-polling receives Update
→ plugin.gateway.start() callback: onMessage(channelMessage)
→ ChannelManager.routeIncoming()
→ Set lastRoute = { plugin, deliveryCtx } // reply goes back to Telegram
→ agent.write(text) // same as desktop/web
```
**File**: `src/channels/manager.ts``routeIncoming()` (line ~233)
### Receive (Agent → User)
```
Agent runs LLM
→ pi-agent-core fires AgentEvent
→ Agent.subscribeAll() → AsyncAgent channel + subscribers
→ agent.subscribe() callback in ChannelManager.subscribeToAgent()
→ Check: if (!lastRoute) return // no active channel route, skip
→ Filter: only assistant messages
→ message_start → createAggregator() // MessageAggregator buffers/chunks text
→ message_update → aggregator.handleEvent()
→ message_end → aggregator.handleEvent() → null aggregator
→ Aggregator emits text blocks
→ Block 0: plugin.outbound.replyText(deliveryCtx, text) // Telegram reply
→ Block N: plugin.outbound.sendText(deliveryCtx, text) // follow-up messages
```
**Files**:
- `src/channels/manager.ts``subscribeToAgent()` (line ~151), `createAggregator()` (line ~205)
- `src/hub/message-aggregator.ts` — text chunking/buffering logic
### Error Handling
```
Agent.run() throws / returns error
→ AsyncAgent.write() catch block
→ agent.emitMulticaEvent({ type: "agent_error", error })
→ subscribe() → ChannelManager subscriber
→ if lastRoute exists:
→ plugin.outbound.sendText(deliveryCtx, "[Error] ${errorMsg}")
```
---
## Comparison Table
| Aspect | Desktop (IPC) | Web (WebSocket) | Channel (Bot API) |
|---------------------|------------------------|---------------------------|--------------------------|
| **Transport** | Electron IPC | Socket.io via Gateway | Bot API (HTTP) |
| **Send entry** | `localChat:send` | `client.send` → Gateway | `routeIncoming` |
| **Receive method** | `agent.subscribe()` | `agent.read()` (iterable) | `agent.subscribe()` |
| **Consumer** | ipc/hub.ts subscriber | hub.ts `consumeAgent()` | manager.ts subscriber |
| **Frontend hook** | `use-local-chat.ts` | `use-gateway-chat.ts` | N/A (Bot API) |
| **State hook** | `use-chat.ts` | `use-chat.ts` | N/A |
| **Reply routing** | Always (IPC channel) | `agentSenders` Map | `lastRoute` pattern |
| **clearLastRoute** | Yes (on send) | Yes (on send) | No (sets lastRoute) |
| **Error display** | `agent_error` → UI | `agent_error` → UI | `agent_error` → Bot text |
| **Tool results** | Rendered in UI | Rendered in UI | Skipped (text only) |
| **Text chunking** | No (full stream) | No (full stream) | Yes (MessageAggregator) |
---
## lastRoute Pattern
The `lastRoute` tracks which channel last sent a message. When the agent replies:
- If `lastRoute` is set → reply goes to that channel (e.g. Telegram)
- If `lastRoute` is null → reply goes to Desktop/Web only (via their own mechanisms)
**Clearing**: Desktop and Web both call `channelManager.clearLastRoute()` before `agent.write()`, so channel replies stop when the user switches to desktop/web.
**Setting**: `routeIncoming()` sets `lastRoute` when a channel message arrives.
Desktop and Web always receive agent events regardless of `lastRoute` — they use their own independent delivery mechanisms (IPC subscribe / Gateway read).
---
## Event Filtering
All three paths filter raw agent events. Only these are forwarded to consumers:
| Event Type | Desktop | Web | Channel |
|-------------------------|---------|-----|---------|
| `message_start` | assistant only | assistant only | assistant only |
| `message_update` | assistant only | assistant only | assistant only |
| `message_end` | assistant only | assistant only | assistant only |
| `tool_execution_start` | Yes | Yes | No |
| `tool_execution_end` | Yes | Yes | No |
| `compaction_start` | Yes (passthrough) | Yes (passthrough) | No |
| `compaction_end` | Yes (passthrough) | Yes (passthrough) | No |
| `agent_error` | Yes (passthrough) | Yes (passthrough) | Yes (→ text) |
| User message events | Filtered out | Filtered out | Filtered out |

View file

@ -1,234 +0,0 @@
# App Store Submission Guide
Complete guide for publishing the Expo React Native app to Apple App Store and Google Play Store.
## 1. Prerequisites
### Accounts & Fees
| Platform | Cost | Notes |
|----------|------|-------|
| Apple Developer Program | $99/year | Required for App Store distribution |
| Google Play Console | $25 one-time | Developer registration |
| Expo Account | Free (paid plans available) | Required for EAS Build/Submit |
- Apple Developer account review: 1-2 days
- Google Play developer account review: days to weeks
### Tools
```bash
npm install -g eas-cli
eas login
eas whoami # verify login
```
## 2. Project Configuration
### Initialize EAS
```bash
eas build:configure
```
Generates `eas.json` with three build profiles: `development`, `preview`, `production`.
### Key `app.json` / `app.config.ts` Fields
```jsonc
{
"name": "Multica",
"slug": "multica",
"version": "1.0.0",
"ios": {
"bundleIdentifier": "com.multica.app",
"buildNumber": "1" // increment on each submission
},
"android": {
"package": "com.multica.app",
"versionCode": 1 // increment on each submission
},
"icon": "./assets/icon.png", // 1024x1024 PNG
"splash": {
"image": "./assets/splash.png"
}
}
```
## 3. App Signing & Credentials
### iOS
- EAS auto-manages credentials (recommended): Distribution Certificate + Provisioning Profile
- Or create manually in Apple Developer Portal
### Android
- EAS auto-generates Keystore (recommended), stored securely on EAS servers
- **Back up Keystore** — losing it means you cannot update the published app
- Play Store requires AAB (Android App Bundle) format
## 4. Production Build
```bash
# iOS
eas build --platform ios --profile production
# Android
eas build --platform android --profile production
# Both
eas build --platform all --profile production
```
Builds run in Expo cloud — no local Xcode or Android Studio needed.
## 5. Store Listing Preparation
### Required for Both Platforms
#### Privacy Policy
- **Mandatory** — must be a publicly accessible URL
- Must clearly state:
- What data the app collects and how
- Whether data is shared with third parties
- Data retention and deletion policies
- How users can request data deletion
- **2025 rule**: If data is sent to third-party AI, must disclose explicitly and obtain user consent
- Tools: Termly, PrivacyPolicies.com, or custom page
#### App Screenshots
- **iOS**: Multiple sizes required (6.7", 6.5", 5.5" iPhone + iPad)
- **Android**: 2-8 screenshots
- Must accurately reflect current app interface
#### App Icon
- 1024x1024 high-resolution PNG
- No alpha/transparency for iOS
#### App Description
- Short description (≤80 chars for Google Play)
- Full description
#### Support URL
- A link where users can get help
#### Account Deletion
- If the app supports registration, users **must** be able to delete their account and data in-app
- Both Apple and Google require this
### Apple App Store Connect — Additional Requirements
| Item | Details |
|------|---------|
| Privacy Nutrition Labels | Fill out data collection practices per category in App Store Connect |
| App Review Information | Reviewer contact info, demo/test account credentials |
| Content Rating | Age classification |
| Export Compliance | Encryption usage declaration |
| Info.plist Permission Strings | Clear purpose description for each permission (camera, location, etc.) |
### Google Play Console — Additional Requirements
| Item | Details |
|------|---------|
| Data Safety Form | Detail data collection and sharing (required even if no data is collected) |
| Content Rating Questionnaire | IARC rating questionnaire |
| Target Audience | Declare if the app targets children |
| First Upload | Must be done manually via Play Console (Google Play API limitation) |
## 6. Submit to Stores
### Apple App Store
```bash
eas submit --platform ios
```
This uploads the build to **App Store Connect / TestFlight**. Then you must:
1. Log into App Store Connect
2. Select the uploaded build
3. Associate it with a version
4. Fill in all metadata, screenshots, privacy labels
5. Submit for App Review
### Google Play Store
```bash
eas submit --platform android
```
**First time**: Must upload AAB manually in Play Console.
After initial upload:
1. Navigate to Production → Create new release
2. Upload AAB or use the EAS-submitted build
3. Fill in description, screenshots, data safety form
4. Submit for review
### Auto-Submit (Optional)
```bash
eas build --platform all --profile production --auto-submit
```
## 7. App Review
| | Apple | Google |
|---|---|---|
| Review time | Typically 24-48 hours | Hours to 7 days |
| Common rejections | Incomplete features, misleading screenshots, missing privacy policy, unclear permission strings | Data safety form mismatch, policy violations |
| After rejection | Fix issues, resubmit | Fix issues, resubmit |
## 8. Post-Launch
### OTA Updates (No Re-Review Needed)
```bash
eas update --branch production
```
- Only for JS/asset-level changes
- Native code changes still require a new build + review
### CI/CD Automation
Create `.eas/workflows/build-and-submit.yml` to auto-build and submit on push to main.
### Google Service Account Key (for Automated Android Submissions)
1. Go to EAS dashboard → Credentials → Android
2. Click Application identifier → Service Credentials
3. Add Google Service Account Key
## 9. Checklist
- [ ] Register Apple Developer + Google Play Console accounts
- [ ] Configure `app.json` and `eas.json`
- [ ] Prepare app icon, splash screen, screenshots
- [ ] Write and host privacy policy URL
- [ ] Implement in-app account deletion (if registration exists)
- [ ] Add Info.plist permission descriptions (iOS)
- [ ] Run `eas build --platform all --profile production`
- [ ] Create app in App Store Connect, fill metadata + privacy labels
- [ ] Create app in Google Play Console, fill data safety form, manual first AAB upload
- [ ] `eas submit` or submit manually for review
- [ ] Wait for review approval → live
- [ ] Set up `eas update` for OTA updates
## References
- [Expo: Submit to App Stores](https://docs.expo.dev/deploy/submit-to-app-stores/)
- [Expo: EAS Submit](https://docs.expo.dev/submit/introduction/)
- [Expo: Build Your Project](https://docs.expo.dev/deploy/build-project/)
- [Expo: App Stores Best Practices](https://docs.expo.dev/distribution/app-stores/)
- [Apple App Review Guidelines](https://developer.apple.com/app-store/review/guidelines/)
- [Apple App Privacy Details](https://developer.apple.com/app-store/app-privacy-details/)
- [Google Play Data Safety](https://support.google.com/googleplay/android-developer/answer/10787469)
- [Google Play Developer Policy Center](https://play.google/developer-content-policy/)

497
docs/mobile/guide.md Normal file
View file

@ -0,0 +1,497 @@
# Mobile Development Guide
Complete lifecycle guide for developing, testing, and publishing the Expo React Native app — from first line of code to App Store / Google Play.
## Overview
```
Phase 1: Environment Setup You are here if starting fresh
Phase 2: Development & Testing Daily work loop
Phase 3: Pre-Release Preparation Before your first submission
Phase 4: Build & Submit Ship to stores
Phase 5: Post-Launch Maintain and update
```
---
## Phase 1: Environment Setup
### 1.1 Required Software
| Tool | Purpose | Install |
|------|---------|---------|
| **Node.js** (LTS) | JS runtime | `brew install node` or [nodejs.org](https://nodejs.org) |
| **pnpm** | Package manager | `corepack enable && corepack prepare pnpm@latest --activate` |
| **Xcode** | iOS build toolchain | Mac App Store (free) |
| **Xcode Command Line Tools** | Compilers, simulators | `xcode-select --install` |
| **CocoaPods** | iOS dependency manager | `sudo gem install cocoapods` |
| **Android Studio** | Android emulator + SDK (optional, iOS-first) | [developer.android.com](https://developer.android.com/studio) |
| **EAS CLI** | Expo build & submit | `npm install -g eas-cli` |
| **Expo CLI** | Dev server | Bundled with `npx expo` |
### 1.2 Xcode First-Time Setup
1. Open Xcode at least once to accept the license and install components
2. **Add your Apple ID** (free account is enough for development):
- Xcode → Settings → Accounts → `+` → Apple ID
- This creates a "Personal Team" for free code signing
3. Verify simulators are installed:
- Xcode → Settings → Components → download an iOS Simulator runtime
### 1.3 iPhone First-Time Setup (for Real Device Testing)
1. **Enable Developer Mode** (required on iOS 16+):
- Settings → Privacy & Security → Developer Mode → ON
- Device will restart
2. Connect iPhone to Mac via USB/USB-C cable
3. When prompted "Trust This Computer?" → tap Trust
### 1.4 Project Setup
```bash
# Install dependencies
pnpm install
# Generate native project files (creates ios/ and android/ directories)
npx expo prebuild
# Initialize EAS configuration (creates eas.json)
eas build:configure
```
### 1.5 Expo Account
```bash
# Create account at expo.dev, then:
eas login
eas whoami # verify
```
**No paid accounts needed at this stage.** Free Apple ID + free Expo account is enough for development.
---
## Phase 2: Development & Testing
### 2.1 Running on iOS Simulator
```bash
# Start the app in iOS simulator (no real device needed)
npx expo run:ios
```
- Fastest iteration loop — code changes hot-reload instantly
- Good for: UI layout, navigation, business logic, API calls
- **Cannot test**: camera, barcode scanner, real push notifications, biometrics
### 2.2 Running on Real iPhone
```bash
# Connect iPhone via USB, then:
npx expo run:ios --device
```
Expo CLI will:
1. Detect your connected device
2. Sign the app with your Personal Team (free Apple ID)
3. Build, install, and launch the app
**First time only**: After installation, go to:
- Settings → General → VPN & Device Management → Trust your developer certificate
#### Free Signing Limitations
| Limitation | Detail |
|-----------|--------|
| 7-day expiry | App stops launching after 7 days — just re-run `npx expo run:ios --device` |
| 3 devices max | Can register up to 3 test devices per Apple ID |
| Some entitlements unavailable | Push notifications, Apple Pay, iCloud require paid account |
| Cannot distribute to others | Only works on your own registered devices |
**Camera, barcode scanner, GPS, sensors all work fine with free signing.**
### 2.3 Daily Development Workflow
```
First time (or after native config changes):
npx expo prebuild Generate/update native projects
npx expo run:ios --device Build and install on device
Every day after that:
npx expo start --dev-client Start dev server only (no rebuild)
→ Open the app on device It connects automatically
→ Edit code, save Hot-reload updates instantly
```
**When do you need to rebuild?**
| Change | Rebuild needed? |
|--------|----------------|
| JS/TS code, React components | No — hot-reload |
| Styles, images, assets | No — hot-reload |
| Added new Expo SDK module | **Yes**`npx expo prebuild && npx expo run:ios --device` |
| Changed `app.json` permissions | **Yes** — rebuild |
| Updated native dependency | **Yes** — rebuild |
| Upgraded Expo SDK version | **Yes** — rebuild |
### 2.4 Testing Native Features (Camera, Scanner)
| Feature | Simulator | Real Device |
|---------|-----------|-------------|
| Camera preview | Not available | Works |
| Barcode / QR scan | Not available | Works |
| GPS location | Simulated location via Xcode menu | Real GPS |
| Push notifications | Not available | Requires paid Apple Developer account |
| Haptic feedback | Not available | Works |
| Device sensors (accelerometer, gyroscope) | Not available | Works |
For camera/scanner features, **always test on a real device**.
### 2.5 Debugging Tools
#### Developer Menu
Press `m` in the terminal (or shake the device) to open:
- Toggle Performance Monitor
- Toggle Element Inspector
- Open React Native DevTools
#### React Native DevTools
The primary debugging tool (replaced Chrome DevTools since RN 0.76):
| Tab | Use |
|-----|-----|
| Console | View logs, execute JS in app context |
| Sources | Set breakpoints, step through code |
| Network | Inspect API requests (Expo only) |
| Components | Inspect React component tree and props |
| Profiler | Measure render performance |
#### VS Code Integration
Install the **Expo Tools** extension for:
- Breakpoint debugging directly in VS Code
- `app.json` / `app.config.ts` IntelliSense
#### Native Crash Debugging
For crashes in native modules (not JS):
- **iOS**: Open Xcode → Window → Devices and Simulators → View Device Logs
- **Android**: `adb logcat` in terminal
---
## Phase 3: Pre-Release Preparation
**This is when you need to start spending money.**
### 3.1 Accounts & Fees
| Platform | Cost | Registration Time | Required For |
|----------|------|-------------------|--------------|
| **Apple Developer Program** | $99/year | 1-2 days review | App Store distribution |
| **Google Play Console** | $25 one-time | Days to weeks review | Play Store distribution |
| **Expo Account** | Free tier sufficient | Instant | EAS Build & Submit |
Register early — account review takes time, especially Google.
### 3.2 App Configuration
Update `app.json` or `app.config.ts`:
```jsonc
{
"name": "Multica",
"slug": "multica",
"version": "1.0.0",
"ios": {
"bundleIdentifier": "com.multica.app",
"buildNumber": "1", // increment each submission
"infoPlist": {
"NSCameraUsageDescription": "Used to scan QR codes and take photos",
"NSPhotoLibraryUsageDescription": "Used to save scanned images"
}
},
"android": {
"package": "com.multica.app",
"versionCode": 1, // increment each submission
"permissions": ["CAMERA"]
},
"icon": "./assets/icon.png", // 1024x1024 PNG, no transparency
"splash": {
"image": "./assets/splash.png"
}
}
```
### 3.3 EAS Build Profiles
`eas.json`:
```json
{
"cli": { "version": ">= 10.0.0" },
"build": {
"development": {
"developmentClient": true,
"distribution": "internal"
},
"preview": {
"distribution": "internal"
},
"production": {}
},
"submit": {
"production": {}
}
}
```
### 3.4 App Signing & Credentials
#### iOS
EAS auto-manages credentials (recommended):
- Distribution Certificate
- Provisioning Profile
- Or create manually in [Apple Developer Portal](https://developer.apple.com)
#### Android
- EAS auto-generates Keystore, stored securely on EAS servers
- **Back up your Keystore** — losing it means you can never update the published app
- Play Store requires AAB (Android App Bundle) format
### 3.5 Required Assets
| Asset | Spec |
|-------|------|
| **App Icon** | 1024x1024 PNG, no alpha/transparency (iOS) |
| **Splash Screen** | Platform-appropriate sizes |
| **iOS Screenshots** | 6.7", 6.5", 5.5" iPhone sizes + iPad (if universal) |
| **Android Screenshots** | 2-8 screenshots |
### 3.6 Required Metadata
#### Both Platforms
| Item | Notes |
|------|-------|
| **Privacy Policy URL** | Publicly accessible. Must disclose data collection, third-party sharing, AI usage, deletion rights |
| **App Description** | Short (≤80 chars for Google) + full description |
| **Support URL** | Where users can get help |
| **Account Deletion** | If app has registration, must support in-app account + data deletion |
#### Apple App Store Connect
| Item | Details |
|------|---------|
| Privacy Nutrition Labels | Data collection practices per category |
| App Review Information | Reviewer contact info, demo/test account |
| Content Rating | Age classification |
| Export Compliance | Encryption usage declaration |
| Info.plist Permission Strings | Clear purpose description for each permission |
#### Google Play Console
| Item | Details |
|------|---------|
| Data Safety Form | Required even if no data is collected |
| Content Rating Questionnaire | IARC rating |
| Target Audience | Must declare if targeting children |
| First Upload | Must upload AAB manually (Google API limitation) |
---
## Phase 4: Build & Submit
### 4.1 Production Build
```bash
# iOS
eas build --platform ios --profile production
# Android
eas build --platform android --profile production
# Both platforms
eas build --platform all --profile production
```
Builds run in Expo cloud — no local Xcode or Android Studio needed for production builds.
### 4.2 Submit to Apple App Store
```bash
eas submit --platform ios
```
This uploads the build to **App Store Connect / TestFlight**. Then:
1. Log into [App Store Connect](https://appstoreconnect.apple.com)
2. Select the uploaded build
3. Associate it with a version
4. Fill in all metadata, screenshots, privacy nutrition labels
5. Submit for App Review
### 4.3 Submit to Google Play Store
```bash
eas submit --platform android
```
**First time**: Must upload AAB manually in [Play Console](https://play.google.com/console).
After initial upload:
1. Navigate to Production → Create new release
2. Upload AAB or use the EAS-submitted build
3. Fill in description, screenshots, data safety form
4. Submit for review
### 4.4 Auto-Submit (Optional)
Build and submit in one step:
```bash
eas build --platform all --profile production --auto-submit
```
### 4.5 App Review
| | Apple | Google |
|---|---|---|
| Review time | Typically 24-48 hours | Hours to 7 days |
| Common rejections | Incomplete features, misleading screenshots, missing privacy policy, unclear permission strings | Data safety form mismatch, policy violations |
| After rejection | Fix issues, resubmit | Fix issues, resubmit |
---
## Phase 5: Post-Launch
### 5.1 OTA Updates (No Re-Review)
For JS/asset-only changes, push updates without going through App Review:
```bash
eas update --branch production
```
- Instant delivery to users — no store review
- Only works for JavaScript and asset changes
- **Native code changes still require a new build + review**
### 5.2 Version Bumping
For each new store submission:
- iOS: increment `buildNumber` in `app.json`
- Android: increment `versionCode` in `app.json`
- Bump `version` for user-visible version changes
### 5.3 CI/CD Automation
Create `.eas/workflows/build-and-submit.yml` to auto-build and submit on push to main.
#### Google Service Account Key (Automated Android Submissions)
1. EAS dashboard → Credentials → Android
2. Click Application identifier → Service Credentials
3. Add Google Service Account Key
---
## Quick Reference
### Common Commands
```bash
# Development
npx expo prebuild # Generate native projects
npx expo run:ios # Run on iOS simulator
npx expo run:ios --device # Run on connected iPhone
npx expo start --dev-client # Start dev server (after initial install)
# Building
eas build --platform ios --profile development # Dev build (for device testing)
eas build --platform ios --profile production # Production build
eas build --platform all --profile production # Both platforms
# Submitting
eas submit --platform ios # Submit to App Store
eas submit --platform android # Submit to Play Store
# OTA Updates
eas update --branch production # Push JS update to users
```
### Cost Summary
| Phase | Cost |
|-------|------|
| Development + local testing | **Free** (free Apple ID + Xcode) |
| EAS cloud builds | Free tier: 30 iOS + 30 Android builds/month |
| App Store submission | **$99/year** (Apple Developer Program) |
| Play Store submission | **$25 one-time** (Google Play Console) |
---
## Master Checklist
### Development Phase
- [ ] Install Node.js, pnpm, Xcode, EAS CLI
- [ ] Add Apple ID to Xcode (Settings → Accounts)
- [ ] Enable Developer Mode on iPhone
- [ ] Run `npx expo prebuild`
- [ ] Test on simulator: `npx expo run:ios`
- [ ] Test on real device: `npx expo run:ios --device`
- [ ] Trust developer certificate on device
- [ ] Verify camera/scanner functionality on real device
### Pre-Release Phase
- [ ] Register Apple Developer Program ($99/year)
- [ ] Register Google Play Console ($25)
- [ ] Configure `app.json` (bundleIdentifier, permissions, icon, splash)
- [ ] Configure `eas.json` build profiles
- [ ] Prepare app icon (1024x1024 PNG)
- [ ] Prepare splash screen
- [ ] Take App Store screenshots (all required sizes)
- [ ] Write and host privacy policy URL
- [ ] Write app description (short + full)
- [ ] Set up support URL
- [ ] Implement in-app account deletion (if registration exists)
### Submission Phase
- [ ] Run `eas build --platform all --profile production`
- [ ] iOS: `eas submit --platform ios`
- [ ] iOS: Fill metadata + privacy labels in App Store Connect
- [ ] iOS: Submit for App Review
- [ ] Android: Upload first AAB manually in Play Console
- [ ] Android: `eas submit --platform android`
- [ ] Android: Fill data safety form + metadata in Play Console
- [ ] Android: Submit for review
- [ ] Wait for review approval → app goes live
### Post-Launch Phase
- [ ] Set up `eas update` for OTA updates
- [ ] Set up CI/CD workflow (optional)
- [ ] Configure Google Service Account Key for automated Android submissions (optional)
---
## References
- [Expo: Getting Started](https://docs.expo.dev/get-started/introduction/)
- [Expo: Development Builds](https://docs.expo.dev/develop/development-builds/introduction/)
- [Expo: Local App Development](https://docs.expo.dev/guides/local-app-development/)
- [Expo: Debugging Tools](https://docs.expo.dev/debugging/tools/)
- [Expo: Submit to App Stores](https://docs.expo.dev/deploy/submit-to-app-stores/)
- [Expo: EAS Submit](https://docs.expo.dev/submit/introduction/)
- [Expo: EAS Update](https://docs.expo.dev/eas-update/introduction/)
- [Apple App Review Guidelines](https://developer.apple.com/app-store/review/guidelines/)
- [Apple App Privacy Details](https://developer.apple.com/app-store/app-privacy-details/)
- [Google Play Data Safety](https://support.google.com/googleplay/android-developer/answer/10787469)
- [Google Play Developer Policy Center](https://play.google/developer-content-policy/)

View file

@ -62,6 +62,7 @@
"@sinclair/typebox": "^0.34.41",
"croner": "^10.0.1",
"fast-glob": "^3.3.3",
"grammy": "^1.39.3",
"json5": "^2.2.3",
"linkedom": "^0.18.12",
"nestjs-pino": "^4.5.0",

View file

@ -50,6 +50,12 @@ export function useGatewayChat({ client, hubId, agentId }: UseGatewayChatOptions
client.onMessage((msg) => {
if (msg.action === StreamAction) {
const payload = msg.payload as StreamPayload;
if (payload.event.type === "agent_error") {
const errorMsg = (payload.event as { message?: string }).message ?? "Unknown error";
chat.setError({ code: "AGENT_ERROR", message: errorMsg });
setIsLoading(false);
return;
}
chat.handleStream(payload);
if (payload.event.type === "message_start") setIsLoading(true);
if (payload.event.type === "message_end") setIsLoading(false);

View file

@ -4,7 +4,7 @@ import { useEditor, EditorContent } from "@tiptap/react";
import StarterKit from "@tiptap/starter-kit";
import Placeholder from "@tiptap/extension-placeholder";
import { Button } from "@multica/ui/components/ui/button";
import { ArrowUpIcon } from "@hugeicons/core-free-icons";
import { ArrowUp02Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { cn } from "@multica/ui/lib/utils";
import "./chat-input.css";
@ -111,13 +111,13 @@ export const ChatInput = forwardRef<ChatInputRef, ChatInputProps>(
return (
<div className={cn(
"chat-input-editor bg-card rounded-xl p-3 border border-border transition-colors",
"chat-input-editor bg-card rounded-xl p-2 border border-border transition-colors",
disabled && "is-disabled cursor-not-allowed opacity-60",
)}>
<EditorContent editor={editor} />
<EditorContent className="min-h-12" editor={editor} />
<div className="flex items-center justify-end pt-2">
<Button size="icon-lg" onClick={handleSubmit} disabled={disabled}>
<HugeiconsIcon className="size-4.5" strokeWidth={2.5} icon={ArrowUpIcon} />
<Button size="icon" onClick={handleSubmit} disabled={disabled}>
<HugeiconsIcon strokeWidth={2.5} icon={ArrowUp02Icon} />
</Button>
</div>
</div>

View file

@ -5,6 +5,7 @@ import { Button } from "@multica/ui/components/ui/button";
import { Skeleton } from "@multica/ui/components/ui/skeleton";
import { ChatInput } from "@multica/ui/components/chat-input";
import { MessageList } from "@multica/ui/components/message-list";
import { MemoizedMarkdown } from "@multica/ui/components/markdown";
import { MulticaIcon } from "@multica/ui/components/multica-icon";
import { ExecApprovalItem } from "@multica/ui/components/exec-approval-item";
import { useScrollFade } from "@multica/ui/hooks/use-scroll-fade";
@ -221,7 +222,11 @@ export function ChatView({
{error && (
<div className="container px-4" role="alert" aria-live="polite">
<div className="rounded-lg bg-destructive/5 border border-destructive/15 text-xs px-3 py-2 flex items-center justify-between gap-3">
<span className="text-foreground leading-snug">{error.message}</span>
<span className="text-foreground leading-snug">
<MemoizedMarkdown mode="minimal" id={`error-${error.code}`}>
{error.message}
</MemoizedMarkdown>
</span>
<div className="flex items-center gap-2 shrink-0">
{errorAction && (
<Button

26
pnpm-lock.yaml generated
View file

@ -74,6 +74,9 @@ importers:
fast-glob:
specifier: ^3.3.3
version: 3.3.3
grammy:
specifier: ^1.39.3
version: 1.39.3
json5:
specifier: ^2.2.3
version: 2.2.3
@ -1832,6 +1835,9 @@ packages:
'@modelcontextprotocol/sdk':
optional: true
'@grammyjs/types@3.23.0':
resolution: {integrity: sha512-D3jQ4UWERPsyR3op/YFudMMIPNTU47vy7L51uO9/73tMELmjO/+LX5N36/Y0CG5IQfIsz43MxiHI5rgsK0/k+g==}
'@hono/node-server@1.19.9':
resolution: {integrity: sha512-vHL6w3ecZsky+8P5MD+eFfaGTyCeOHUIFYMGpQGbrBTSmNNoxv0if69rEZ5giu36weC5saFuznL411gRX7bJDw==}
engines: {node: '>=18.14.1'}
@ -5797,6 +5803,10 @@ packages:
graceful-fs@4.2.11:
resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==}
grammy@1.39.3:
resolution: {integrity: sha512-7arRRoOtOh9UwMwANZ475kJrWV6P3/EGNooeHlY0/SwZv4t3ZZ3Uiz9cAXK8Zg9xSdgmm8T21kx6n7SZaWvOcw==}
engines: {node: ^12.20.0 || >=14.13.1}
graphemer@1.4.0:
resolution: {integrity: sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag==}
@ -11228,6 +11238,8 @@ snapshots:
- supports-color
- utf-8-validate
'@grammyjs/types@3.23.0': {}
'@hono/node-server@1.19.9(hono@4.11.7)':
dependencies:
hono: 4.11.7
@ -14947,7 +14959,7 @@ snapshots:
transitivePeerDependencies:
- supports-color
eslint-module-utils@2.12.1(@typescript-eslint/parser@8.54.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1)):
eslint-module-utils@2.12.1(@typescript-eslint/parser@8.54.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1)):
dependencies:
debug: 3.2.7
optionalDependencies:
@ -14978,7 +14990,7 @@ snapshots:
doctrine: 2.1.0
eslint: 9.39.2(jiti@2.6.1)
eslint-import-resolver-node: 0.3.9
eslint-module-utils: 2.12.1(@typescript-eslint/parser@8.54.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.2(jiti@2.6.1))
eslint-module-utils: 2.12.1(@typescript-eslint/parser@8.54.0(eslint@9.39.2(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.2(jiti@2.6.1)))(eslint@9.39.2(jiti@2.6.1))
hasown: 2.0.2
is-core-module: 2.16.1
is-glob: 4.0.3
@ -15893,6 +15905,16 @@ snapshots:
graceful-fs@4.2.11: {}
grammy@1.39.3:
dependencies:
'@grammyjs/types': 3.23.0
abort-controller: 3.0.0
debug: 4.4.3
node-fetch: 2.7.0
transitivePeerDependencies:
- encoding
- supports-color
graphemer@1.4.0: {}
graphql@16.12.0: {}

36
skills/whisper/SKILL.md Normal file
View file

@ -0,0 +1,36 @@
---
name: Audio Transcription
description: Transcribe audio files using local Whisper CLI (fallback when API is unavailable)
version: 1.0.0
metadata:
emoji: "🎙️"
requires:
anyBins:
- whisper
- whisper-cli
install:
- id: brew-whisper
kind: brew
formula: openai-whisper
bins: [whisper]
label: "Install OpenAI Whisper via Homebrew"
os: [darwin]
tags:
- audio
- transcription
- media
userInvocable: false
disableModelInvocation: false
---
## Audio Transcription (Local Fallback)
Voice messages from channels are normally transcribed automatically via the OpenAI Whisper API before reaching you. This skill is only needed when the API is unavailable.
If you receive `[audio message received]` with a `File:` path (instead of `[Voice Message]` with a transcript), it means the API transcription was not available. Use local whisper to transcribe:
```
whisper "<file_path>" --model base --output_format txt --output_dir /tmp
```
Then read the `.txt` file from `/tmp/` and respond based on the transcribed content.

View file

@ -71,12 +71,14 @@ export class AsyncAgent {
await this.agent.flushSession();
// Normal text is delivered via message_end event; only handle errors here
if (result.error) {
console.error(`[AsyncAgent] Agent run error: ${result.error}`);
this.channel.send({ id: uuidv7(), content: `[error] ${result.error}` });
this.agent.emitError(result.error);
}
})
.catch((err) => {
const message = err instanceof Error ? err.message : String(err);
console.error(`[AsyncAgent] Agent run exception: ${message}`);
this.channel.send({ id: uuidv7(), content: `[error] ${message}` });
// Also emit through subscriber mechanism so IPC listeners receive the error
this.agent.emitError(message);

View file

@ -1,4 +1,4 @@
import { existsSync, readFileSync, writeFileSync, mkdirSync } from "node:fs";
import { existsSync, readFileSync, writeFileSync, mkdirSync, statSync } from "node:fs";
import { join, dirname } from "node:path";
import { homedir } from "node:os";
import JSON5 from "json5";
@ -31,6 +31,8 @@ export type CredentialsConfig = {
order?: Record<string, string[]> | undefined;
} | undefined;
tools?: Record<string, ToolConfig> | undefined;
/** Channel plugin configs (telegram, discord, etc.) */
channels?: Record<string, Record<string, Record<string, unknown>> | undefined> | undefined;
};
type SkillsEnvConfig = {
@ -90,6 +92,8 @@ export class CredentialManager {
private coreConfig: CredentialsConfig | null = null;
private skillsConfig: SkillsEnvConfig | null = null;
private resolvedSkillsEnv: Record<string, string> | null = null;
private coreMtimeMs: number | null = null;
private skillsMtimeMs: number | null = null;
private isDisabled(): boolean {
if (process.env.SMC_CREDENTIALS_DISABLE === "1") return true;
@ -99,17 +103,32 @@ export class CredentialManager {
private loadCore(): void {
const path = getCredentialsPath();
const disabled = this.isDisabled();
let mtimeMs: number | null = null;
if (this.corePath === path && this.disabledState === disabled && this.coreConfig) {
if (!disabled && existsSync(path)) {
try {
mtimeMs = statSync(path).mtimeMs;
} catch {
mtimeMs = null;
}
}
if (
this.corePath === path
&& this.disabledState === disabled
&& this.coreConfig
&& this.coreMtimeMs === mtimeMs
) {
return;
}
this.corePath = path;
this.disabledState = disabled;
this.coreConfig = null;
this.coreMtimeMs = mtimeMs;
if (disabled) return;
if (!existsSync(path)) return;
if (mtimeMs === null) return;
const raw = readFileSync(path, "utf8");
try {
@ -123,8 +142,22 @@ export class CredentialManager {
private loadSkillsEnv(): void {
const path = getSkillsEnvPath();
const disabled = this.isDisabled();
let mtimeMs: number | null = null;
if (this.skillsPath === path && this.disabledState === disabled && this.resolvedSkillsEnv) {
if (!disabled && existsSync(path)) {
try {
mtimeMs = statSync(path).mtimeMs;
} catch {
mtimeMs = null;
}
}
if (
this.skillsPath === path
&& this.disabledState === disabled
&& this.resolvedSkillsEnv
&& this.skillsMtimeMs === mtimeMs
) {
return;
}
@ -132,9 +165,10 @@ export class CredentialManager {
this.disabledState = disabled;
this.skillsConfig = null;
this.resolvedSkillsEnv = null;
this.skillsMtimeMs = mtimeMs;
if (disabled) return;
if (!existsSync(path)) return;
if (mtimeMs === null) return;
const raw = readFileSync(path, "utf8");
try {
@ -217,6 +251,12 @@ export class CredentialManager {
);
}
/** Get channel plugin configs from credentials.json5 `channels` section. */
getChannelsConfig(): Record<string, Record<string, Record<string, unknown>> | undefined> {
this.loadCore();
return this.coreConfig?.channels ?? {};
}
getResolvedEnvSnapshot(): Record<string, string> {
return { ...this.getResolvedSkillsEnv() };
}
@ -228,6 +268,8 @@ export class CredentialManager {
this.coreConfig = null;
this.skillsConfig = null;
this.resolvedSkillsEnv = null;
this.coreMtimeMs = null;
this.skillsMtimeMs = null;
}
/**
@ -322,6 +364,75 @@ export class CredentialManager {
this.reset();
}
/**
* Set a channel account config and save to credentials.json5.
* Creates the channels section if it doesn't exist.
* Used by the desktop Channels page to persist bot tokens.
*/
setChannelAccountConfig(channelId: string, accountId: string, accountConfig: Record<string, unknown>): void {
const path = getCredentialsPath();
let config: CredentialsConfig = { version: 1 };
if (existsSync(path)) {
try {
const raw = readFileSync(path, "utf8");
config = JSON5.parse(raw) as CredentialsConfig;
} catch {
config = { version: 1 };
}
}
// Ensure channels.[channelId] structure exists
if (!config.channels) {
config.channels = {};
}
if (!config.channels[channelId]) {
config.channels[channelId] = {};
}
// Set or update the account config
config.channels[channelId]![accountId] = accountConfig;
mkdirSync(dirname(path), { recursive: true });
const content = JSON.stringify(config, null, 2);
writeFileSync(path, content, "utf8");
this.reset();
}
/**
* Remove a channel account config from credentials.json5.
* Cleans up the parent channel section if no accounts remain.
*/
removeChannelAccountConfig(channelId: string, accountId: string): void {
const path = getCredentialsPath();
if (!existsSync(path)) return;
let config: CredentialsConfig;
try {
const raw = readFileSync(path, "utf8");
config = JSON5.parse(raw) as CredentialsConfig;
} catch {
return;
}
const channelSection = config.channels?.[channelId];
if (!channelSection) return;
delete channelSection[accountId];
// Clean up empty channel section
if (Object.keys(channelSection).length === 0) {
delete config.channels![channelId];
}
mkdirSync(dirname(path), { recursive: true });
const content = JSON.stringify(config, null, 2);
writeFileSync(path, content, "utf8");
this.reset();
}
/**
* Set the default LLM provider and save to credentials.json5.
*/

View file

@ -264,7 +264,7 @@ export function getLoginInstructions(providerId: string): string {
if (info.authMethod === "oauth") {
if (info.loginCommand) {
return `Run: ${info.loginCommand}\nThen restart Super Multica to use the credentials.`;
return `Run: ${info.loginCommand}\nThen retry in Super Multica to use the credentials.`;
}
}

View file

@ -99,6 +99,7 @@ export class Agent {
private profileCandidates: string[];
private profileIndex: number;
private readonly pinnedProfile: boolean;
private readonly explicitApiKey: boolean;
/** Current session ID */
readonly sessionId: string;
@ -130,6 +131,7 @@ export class Agent {
// === Auth profile resolution ===
this.pinnedProfile = !!(options.authProfileId || options.apiKey);
this.explicitApiKey = !!options.apiKey;
if (options.apiKey) {
// Explicit API key — no rotation
@ -349,7 +351,7 @@ export class Agent {
};
}
private emitMulticaEvent(event: MulticaEvent): void {
emitMulticaEvent(event: MulticaEvent): void {
for (const fn of this.multicaListeners) {
try {
fn(event);
@ -408,6 +410,7 @@ export class Agent {
private async _run(prompt: string): Promise<AgentRunResult> {
await this.ensureInitialized();
this.refreshAuthState();
this.output.state.lastAssistantText = "";
// Early validation: check API key before calling PiAgentCore.prompt(),
@ -495,12 +498,67 @@ export class Agent {
this.currentApiKey = apiKey;
this.currentProfileId = candidateId;
this.profileIndex = nextIndex;
this.updateSessionApiKey();
return true;
}
return false;
}
private refreshAuthState(): void {
if (this.explicitApiKey) {
return;
}
const store = loadAuthProfileStore();
if (this.pinnedProfile) {
const profileId = this.currentProfileId ?? this.resolvedProvider;
this.currentApiKey = resolveApiKeyForProfile(profileId) ?? resolveApiKey(this.resolvedProvider);
this.currentProfileId = profileId;
this.profileCandidates = [];
this.profileIndex = 0;
this.updateSessionApiKey();
return;
}
const candidates = resolveAuthProfileOrder(this.resolvedProvider, store);
this.profileCandidates = candidates;
if (this.currentProfileId) {
const currentIndex = candidates.indexOf(this.currentProfileId);
if (currentIndex >= 0) {
const stats = store.usageStats?.[this.currentProfileId];
if (!stats || !isProfileInCooldown(stats)) {
const apiKey = resolveApiKeyForProfile(this.currentProfileId);
if (apiKey) {
this.currentApiKey = apiKey;
this.profileIndex = currentIndex;
this.updateSessionApiKey();
return;
}
}
}
}
const resolved = resolveApiKeyForProvider(this.resolvedProvider);
if (resolved) {
this.currentApiKey = resolved.apiKey;
this.currentProfileId = resolved.profileId;
this.profileIndex = Math.max(0, candidates.indexOf(resolved.profileId));
} else {
this.currentApiKey = undefined;
this.currentProfileId = undefined;
this.profileIndex = 0;
}
this.updateSessionApiKey();
}
private updateSessionApiKey(): void {
if (this.session.getCompactionMode() !== "summary") return;
this.session.setApiKey(this.currentApiKey);
}
private handleSessionEvent(event: AgentEvent) {
if (event.type === "message_end") {
const message = event.message as AgentMessage;
@ -794,6 +852,8 @@ export class Agent {
throw new Error(`No API key configured for provider: ${providerId}`);
}
this.updateSessionApiKey();
// Update the agent's model and API key
const baseUrl = resolveBaseUrl(actualProvider);
const modelWithBaseUrl = baseUrl ? { ...model, baseUrl } : model;

20
src/channels/config.ts Normal file
View file

@ -0,0 +1,20 @@
/**
* Channel configuration loader.
*
* Reads the `channels` section from ~/.super-multica/credentials.json5.
*/
import { credentialManager } from "../agent/credentials.js";
import type { ChannelsConfig } from "./types.js";
/** Load channels config from credentials.json5 `channels` section */
export function loadChannelsConfig(): ChannelsConfig {
const channels = credentialManager.getChannelsConfig();
const keys = Object.keys(channels);
if (keys.length === 0) {
console.log("[Channels] No channels configured in credentials.json5, skipping");
return {};
}
console.log(`[Channels] Loaded config for: ${keys.join(", ")}`);
return channels;
}

View file

@ -0,0 +1,93 @@
/**
* Inbound message debouncer batches rapid-fire messages from the same
* conversation into a single agent.write() call.
*
* When a message arrives:
* 1. Start a timer (delayMs, default 500ms)
* 2. If another message from the same conversationId arrives before timer fires,
* reset the timer and append the text
* 3. If maxWaitMs (default 2000ms) has elapsed since the first message,
* fire immediately regardless of timer
* 4. When timer fires, call the flush callback with all accumulated text
*
* This prevents rapid-fire messages from triggering multiple separate Agent
* runs. Instead, messages sent within a short window are concatenated with
* newlines and dispatched as one combined prompt.
*
* Inspired by OpenClaw's createInboundDebouncer pattern.
* @see docs/channel/openclaw-research.md Section 7.3 message preprocessing
*/
interface PendingBatch {
/** Accumulated message texts in arrival order */
texts: string[];
/** Timestamp of the first message in this batch */
firstArrival: number;
/** Idle timer — fires when no new message arrives within delayMs */
timer: ReturnType<typeof setTimeout>;
}
export class InboundDebouncer {
private pending = new Map<string, PendingBatch>();
/**
* @param flushFn - Called when a batch is ready; receives conversationId and combined text
* @param delayMs - Idle window: how long to wait after each message before flushing (default 500ms)
* @param maxWaitMs - Hard cap: max time since first message before force-flushing (default 2000ms)
*/
constructor(
private readonly flushFn: (conversationId: string, combinedText: string) => void,
private readonly delayMs = 500,
private readonly maxWaitMs = 2000,
) {}
/** Add a message to the buffer. May trigger an immediate flush if maxWaitMs exceeded. */
push(conversationId: string, text: string): void {
const existing = this.pending.get(conversationId);
if (existing) {
// Append to existing batch, reset idle timer
existing.texts.push(text);
clearTimeout(existing.timer);
// Check hard cap: if we've been buffering too long, flush now
const elapsed = Date.now() - existing.firstArrival;
if (elapsed >= this.maxWaitMs) {
this.flush(conversationId);
return;
}
// Reset idle timer
existing.timer = setTimeout(() => this.flush(conversationId), this.delayMs);
} else {
// Start a new batch
const timer = setTimeout(() => this.flush(conversationId), this.delayMs);
this.pending.set(conversationId, {
texts: [text],
firstArrival: Date.now(),
timer,
});
}
}
/** Flush all pending messages for a conversation, invoking the flush callback */
private flush(conversationId: string): void {
const batch = this.pending.get(conversationId);
if (!batch) return;
clearTimeout(batch.timer);
this.pending.delete(conversationId);
// Join multiple messages with newlines so the Agent sees them as one prompt
const combined = batch.texts.join("\n");
this.flushFn(conversationId, combined);
}
/** Clean up all pending timers (call on shutdown) */
dispose(): void {
for (const batch of this.pending.values()) {
clearTimeout(batch.timer);
}
this.pending.clear();
}
}

25
src/channels/index.ts Normal file
View file

@ -0,0 +1,25 @@
/**
* Channel system bootstrap and exports.
*/
export { ChannelManager } from "./manager.js";
export { registerChannel, getChannel, listChannels } from "./registry.js";
export { loadChannelsConfig } from "./config.js";
export type {
ChannelPlugin,
ChannelMessage,
DeliveryContext,
ChannelAccountState,
ChannelsConfig,
} from "./types.js";
// Built-in channel plugins
import { registerChannel } from "./registry.js";
import { telegramChannel } from "./plugins/telegram.js";
/** Register all built-in channel plugins. Call once at startup. */
export function initChannels(): void {
registerChannel(telegramChannel);
// Future: registerChannel(discordChannel);
// Future: registerChannel(feishuChannel);
}

503
src/channels/manager.ts Normal file
View file

@ -0,0 +1,503 @@
/**
* Channel Manager bridges messaging channels to the Hub's agent.
*
* Design: One Hub, one Agent. Channels are just alternative input/output surfaces.
* - Incoming: channel message agent.write(text) (same as desktop/gateway)
* - Outgoing: agent reply check lastRoute forward to originating channel
*
* Uses "last route" pattern: whoever sent the last message gets the reply.
*
* @see docs/channels/README.md Channel system overview
* @see docs/channels/media-handling.md Media processing pipeline
* @see docs/message-paths.md All three message paths (Desktop / Web / Channel)
*/
import type { Hub } from "../hub/hub.js";
import type {
ChannelPlugin,
ChannelMessage,
ChannelAccountState,
DeliveryContext,
} from "./types.js";
import { listChannels } from "./registry.js";
import { loadChannelsConfig } from "./config.js";
import { MessageAggregator, DEFAULT_CHUNKER_CONFIG } from "../hub/message-aggregator.js";
import type { AsyncAgent } from "../agent/async-agent.js";
import { transcribeAudio } from "../media/transcribe.js";
import { describeImage } from "../media/describe-image.js";
import { describeVideo } from "../media/describe-video.js";
import { InboundDebouncer } from "./inbound-debouncer.js";
interface AccountHandle {
channelId: string;
accountId: string;
abortController: AbortController;
state: ChannelAccountState;
}
/** Tracks where the last message came from, so replies go back there. */
interface LastRoute {
plugin: ChannelPlugin;
deliveryCtx: DeliveryContext;
}
export class ChannelManager {
private readonly hub: Hub;
/** Running accounts keyed by "channelId:accountId" */
private readonly accounts = new Map<string, AccountHandle>();
/** Where the last channel message came from (reply target) */
private lastRoute: LastRoute | null = null;
/** Unsubscribe function for the agent subscriber */
private agentUnsubscribe: (() => void) | null = null;
/** Session ID of the currently subscribed agent (for stale detection) */
private subscribedAgentId: string | null = null;
/** Current aggregator for buffering streaming responses */
private aggregator: MessageAggregator | null = null;
/** Typing indicator interval (repeats every 5s to keep Telegram typing visible) */
private typingTimer: ReturnType<typeof setInterval> | null = null;
/**
* Inbound message debouncer batches rapid-fire messages from the same
* conversation into a single agent.write() call.
* Initialized lazily on first message; uses the current agent reference.
*/
private debouncer: InboundDebouncer | null = null;
constructor(hub: Hub) {
this.hub = hub;
}
/** Start all configured channel accounts */
async startAll(): Promise<void> {
console.log("[Channels] Starting all channels...");
const config = loadChannelsConfig();
const plugins = listChannels();
if (plugins.length === 0) {
console.log("[Channels] No plugins registered");
return;
}
for (const plugin of plugins) {
const accountIds = plugin.config.listAccountIds(config);
if (accountIds.length === 0) {
console.log(`[Channels] Skipping ${plugin.id} (not configured)`);
continue;
}
for (const accountId of accountIds) {
const account = plugin.config.resolveAccount(config, accountId);
if (!account || !plugin.config.isConfigured(account)) {
console.log(`[Channels] Skipping ${plugin.id}:${accountId} (incomplete config)`);
continue;
}
await this.startAccount(plugin.id, accountId, account);
}
}
// Try to subscribe eagerly; if no agent yet, routeIncoming will retry lazily
this.ensureSubscribed();
}
/**
* Start a specific channel account.
* Public so the desktop IPC layer can call it after saving config.
*/
async startAccount(
channelId: string,
accountId: string,
accountConfig: Record<string, unknown>,
): Promise<void> {
const key = `${channelId}:${accountId}`;
if (this.accounts.has(key)) {
console.warn(`[Channels] ${key} is already running`);
return;
}
const plugin = listChannels().find((p) => p.id === channelId);
if (!plugin) {
console.error(`[Channels] Plugin "${channelId}" not found`);
return;
}
const abortController = new AbortController();
const handle: AccountHandle = {
channelId,
accountId,
abortController,
state: { channelId, accountId, status: "starting" },
};
this.accounts.set(key, handle);
console.log(`[Channels] Starting ${key}`);
try {
const startPromise = plugin.gateway.start(
accountId,
accountConfig,
(message: ChannelMessage) => {
this.routeIncoming(plugin, accountId, message);
},
abortController.signal,
);
await Promise.race([
startPromise,
new Promise<void>((resolve) => setTimeout(resolve, 3000)),
]);
handle.state = { channelId, accountId, status: "running" };
console.log(`[Channels] ${key} is running`);
} catch (err) {
const errorMsg = err instanceof Error ? err.message : String(err);
handle.state = { channelId, accountId, status: "error", error: errorMsg };
console.error(`[Channels] Failed to start ${key}: ${errorMsg}`);
}
}
/** Get the Hub's current agent (the first active one) */
private getHubAgent(): AsyncAgent | undefined {
const agentIds = this.hub.listAgents();
if (agentIds.length === 0) {
console.warn("[Channels] No agent available in Hub");
return undefined;
}
const agent = this.hub.getAgent(agentIds[0]!);
return agent;
}
/**
* Ensure we're subscribed to the current Hub agent for outbound routing.
* Lazily called from routeIncoming handles agent not yet available at
* startup and re-subscribes if the agent has changed.
*/
private ensureSubscribed(): void {
const agent = this.getHubAgent();
if (!agent) return;
// Already subscribed to the current agent
if (this.subscribedAgentId === agent.sessionId) return;
// Unsubscribe from stale agent
if (this.agentUnsubscribe) {
console.log(`[Channels] Agent changed, re-subscribing (${this.subscribedAgentId}${agent.sessionId})`);
this.agentUnsubscribe();
}
console.log(`[Channels] Subscribing to agent ${agent.sessionId} for outbound routing`);
this.subscribedAgentId = agent.sessionId;
this.agentUnsubscribe = agent.subscribe((event) => {
// No active channel route — skip (reply goes to desktop/gateway only)
if (!this.lastRoute) return;
// Handle agent errors — notify the channel user
if (event.type === "agent_error") {
this.stopTyping();
this.removeAckReaction();
const errorMsg = (event as { message?: string }).message ?? "Unknown error";
console.error(`[Channels] Agent error: ${errorMsg}`);
const route = this.lastRoute;
if (route) {
void route.plugin.outbound.sendText(route.deliveryCtx, `[Error] ${errorMsg}`).catch((err) => {
console.error(`[Channels] Failed to send error to channel: ${err}`);
});
}
return;
}
const maybeMessage = (event as { message?: { role?: string } }).message;
const role = maybeMessage?.role;
// Only forward assistant message events
if (event.type === "message_start" || event.type === "message_update" || event.type === "message_end") {
if (role !== "assistant") return;
} else {
// Non-message events (tool_execution etc.) — skip for channels
return;
}
// Ensure aggregator exists for this response
if (event.type === "message_start") {
this.createAggregator();
}
if (this.aggregator) {
this.aggregator.handleEvent(event);
}
// Clean up after response complete
if (event.type === "message_end" && role === "assistant") {
this.stopTyping();
this.removeAckReaction();
this.aggregator = null;
}
});
}
/** Create a fresh aggregator wired to the current lastRoute */
private createAggregator(): void {
const route = this.lastRoute;
if (!route) return;
const { plugin, deliveryCtx } = route;
const chunkerConfig = plugin.chunkerConfig ?? DEFAULT_CHUNKER_CONFIG;
this.aggregator = new MessageAggregator(
chunkerConfig,
async (block) => {
try {
console.log(`[Channels] Sending block ${block.index} (${block.text.length} chars${block.isFinal ? ", final" : ""}) → ${deliveryCtx.channel}:${deliveryCtx.conversationId}`);
if (block.index === 0) {
await plugin.outbound.replyText(deliveryCtx, block.text);
} else {
await plugin.outbound.sendText(deliveryCtx, block.text);
}
} catch (err) {
console.error(`[Channels] Failed to send reply: ${err}`);
}
},
() => {},
);
}
/**
* Incoming channel message update lastRoute forward to Hub's agent.
*/
private routeIncoming(
plugin: ChannelPlugin,
accountId: string,
message: ChannelMessage,
): void {
const { conversationId, senderId, text, messageId } = message;
console.log(
`[Channels] Incoming: channel=${plugin.id} conv=${conversationId} sender=${senderId} text="${text.slice(0, 50)}${text.length > 50 ? "..." : ""}"`,
);
const agent = this.getHubAgent();
if (!agent) {
console.error("[Channels] No agent available, dropping message");
return;
}
// Ensure we're subscribed to this agent (handles late startup / agent change)
this.ensureSubscribed();
// Update last route — replies will go back here
this.lastRoute = {
plugin,
deliveryCtx: {
channel: plugin.id,
accountId,
conversationId,
replyToMessageId: messageId,
},
};
console.log(`[Channels] lastRoute updated → ${plugin.id}:${conversationId}`);
console.log(`[Channels] Forwarding to agent ${agent.sessionId}`);
// Show typing indicator and ACK reaction while agent processes
this.startTyping();
this.addAckReaction();
// Handle media messages (processed async, then fed through debouncer)
if (message.media && plugin.downloadMedia) {
void this.routeMedia(plugin, accountId, message, agent);
} else {
// Text messages go through debouncer to batch rapid-fire sends
this.getDebouncer(agent).push(conversationId, text);
}
}
/**
* Download media file, process it (transcribe/describe), and forward
* the resulting text through the debouncer to the agent.
* Media results are also debounced so that a rapid "photo + text" combo
* from the same conversation gets batched into one agent prompt.
*/
private async routeMedia(
plugin: ChannelPlugin,
accountId: string,
message: ChannelMessage,
agent: AsyncAgent,
): Promise<void> {
const media = message.media!;
const debouncer = this.getDebouncer(agent);
try {
const filePath = await plugin.downloadMedia!(media.fileId, accountId);
if (media.type === "image") {
// Images: describe via Vision API before reaching agent
const description = await describeImage(filePath);
if (description) {
const parts = ["[Image]", `Description: ${description}`];
if (media.caption) parts.push(`Caption: ${media.caption}`);
debouncer.push(message.conversationId, parts.join("\n"));
} else {
// No API key — fall back to file path
const parts = ["[image message received]", `File: ${filePath}`];
if (media.caption) parts.push(`Caption: ${media.caption}`);
debouncer.push(message.conversationId, parts.join("\n"));
}
} else if (media.type === "audio") {
// Audio: transcribe via Whisper API before reaching agent
const transcript = await transcribeAudio(filePath);
if (transcript) {
const parts = ["[Voice Message]", `Transcript: ${transcript}`];
if (media.caption) parts.push(`Caption: ${media.caption}`);
debouncer.push(message.conversationId, parts.join("\n"));
} else {
// No API key configured — fall back to file path
const parts = ["[audio message received]", `File: ${filePath}`];
if (media.mimeType) parts.push(`Type: ${media.mimeType}`);
if (media.duration) parts.push(`Duration: ${media.duration}s`);
if (media.caption) parts.push(`Caption: ${media.caption}`);
debouncer.push(message.conversationId, parts.join("\n"));
}
} else if (media.type === "video") {
// Video: extract frame + describe via Vision API
const description = await describeVideo(filePath);
if (description) {
const parts = ["[Video]", `Description: ${description}`];
if (media.duration) parts.push(`Duration: ${media.duration}s`);
if (media.caption) parts.push(`Caption: ${media.caption}`);
debouncer.push(message.conversationId, parts.join("\n"));
} else {
// ffmpeg unavailable or no API key — fall back to file path
const parts = ["[video message received]", `File: ${filePath}`];
if (media.mimeType) parts.push(`Type: ${media.mimeType}`);
if (media.duration) parts.push(`Duration: ${media.duration}s`);
if (media.caption) parts.push(`Caption: ${media.caption}`);
debouncer.push(message.conversationId, parts.join("\n"));
}
} else {
// Document: tell agent the file path
const parts: string[] = [];
parts.push(`[document message received]`);
parts.push(`File: ${filePath}`);
if (media.mimeType) parts.push(`Type: ${media.mimeType}`);
if (media.caption) parts.push(`Caption: ${media.caption}`);
debouncer.push(message.conversationId, parts.join("\n"));
}
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
console.error(`[Channels] Failed to process media: ${msg}`);
debouncer.push(message.conversationId, message.text || `[Failed to process ${media.type}]`);
}
}
/**
* Get or create the inbound debouncer, wired to the given agent.
* The debouncer batches rapid-fire messages by conversationId, then
* calls agent.write() once with the combined text.
*/
private getDebouncer(agent: AsyncAgent): InboundDebouncer {
if (!this.debouncer) {
this.debouncer = new InboundDebouncer(
(_conversationId, combinedText) => {
console.log(`[Channels] Debouncer flushing ${combinedText.length} chars to agent`);
agent.write(combinedText);
},
);
}
return this.debouncer;
}
/** Add 👀 reaction to acknowledge message receipt */
private addAckReaction(): void {
const route = this.lastRoute;
if (!route?.plugin.outbound.addReaction) return;
void route.plugin.outbound.addReaction(route.deliveryCtx, "👀").catch(() => {});
}
/** Remove ACK reaction when processing completes */
private removeAckReaction(): void {
const route = this.lastRoute;
if (!route?.plugin.outbound.removeReaction) return;
void route.plugin.outbound.removeReaction(route.deliveryCtx).catch(() => {});
}
/** Start sending typing indicators (repeats every 5s until stopped) */
private startTyping(): void {
this.stopTyping();
const route = this.lastRoute;
if (!route?.plugin.outbound.sendTyping) return;
const send = () => route.plugin.outbound.sendTyping!(route.deliveryCtx).catch(() => {});
void send();
this.typingTimer = setInterval(send, 5000);
}
/** Stop typing indicator interval */
private stopTyping(): void {
if (this.typingTimer) {
clearInterval(this.typingTimer);
this.typingTimer = null;
}
}
/**
* Stop a specific channel account.
* Public so the desktop IPC layer can call it when removing config.
* Cleans up typing timer, debouncer, aggregator, and lastRoute if they
* belong to this account.
*/
stopAccount(channelId: string, accountId: string): void {
const key = `${channelId}:${accountId}`;
const handle = this.accounts.get(key);
if (!handle) return;
// Clean up shared resources if they target this account
if (this.lastRoute && this.lastRoute.plugin.id === channelId && this.lastRoute.deliveryCtx.accountId === accountId) {
this.stopTyping();
this.lastRoute = null;
this.aggregator = null;
}
handle.abortController.abort();
handle.state = { ...handle.state, status: "stopped" };
this.accounts.delete(key);
// Dispose debouncer if no accounts remain
if (this.accounts.size === 0 && this.debouncer) {
this.debouncer.dispose();
this.debouncer = null;
}
console.log(`[Channels] Stopped ${key}`);
}
/** Stop all running channel accounts */
stopAll(): void {
console.log("[Channels] Stopping all channels...");
this.stopTyping();
this.debouncer?.dispose();
this.debouncer = null;
if (this.agentUnsubscribe) {
this.agentUnsubscribe();
this.agentUnsubscribe = null;
}
for (const [key, handle] of this.accounts) {
handle.abortController.abort();
handle.state = { ...handle.state, status: "stopped" };
console.log(`[Channels] Stopped ${key}`);
}
this.accounts.clear();
this.lastRoute = null;
this.aggregator = null;
}
/** Clear the last route (e.g. when desktop user sends a message) */
clearLastRoute(): void {
if (this.lastRoute) {
this.stopTyping();
console.log("[Channels] lastRoute cleared (non-channel message received)");
this.lastRoute = null;
}
}
/** Get status of all accounts */
listAccountStates(): ChannelAccountState[] {
return Array.from(this.accounts.values()).map((h) => ({ ...h.state }));
}
}

View file

@ -0,0 +1,81 @@
/**
* Markdown Telegram HTML converter.
*
* Telegram supports a subset of HTML:
* <b>, <i>, <u>, <s>, <code>, <pre>, <a href="...">, <blockquote>
*
* Strategy:
* 1. Extract code blocks and inline code (protect from further processing)
* 2. Escape HTML entities in remaining text
* 3. Convert Markdown syntax to HTML tags
* 4. Restore code blocks
*/
/** Escape HTML special characters */
function escapeHtml(text: string): string {
return text
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;");
}
/**
* Convert Markdown text to Telegram-compatible HTML.
* Handles: bold, italic, strikethrough, inline code, code blocks, links, blockquotes.
*/
export function markdownToTelegramHtml(markdown: string): string {
// Placeholder system: replace code blocks/inline code with placeholders,
// process markdown on the rest, then restore.
const placeholders: string[] = [];
const placeholder = (content: string): string => {
const idx = placeholders.length;
placeholders.push(content);
return `\x00PH${idx}\x00`;
};
let text = markdown;
// 1. Fenced code blocks: ```lang\n...\n```
text = text.replace(/```(\w*)\n([\s\S]*?)```/g, (_match, lang: string, code: string) => {
const escaped = escapeHtml(code.replace(/\n$/, ""));
const langAttr = lang ? ` class="language-${escapeHtml(lang)}"` : "";
return placeholder(`<pre><code${langAttr}>${escaped}</code></pre>`);
});
// 2. Inline code: `...`
text = text.replace(/`([^`\n]+)`/g, (_match, code: string) => {
return placeholder(`<code>${escapeHtml(code)}</code>`);
});
// 3. Escape HTML in remaining text
text = escapeHtml(text);
// 4. Links: [text](url) — escape quotes in URL to prevent attribute breakout
text = text.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (_m, label: string, url: string) =>
`<a href="${url.replace(/"/g, "&quot;")}">${label}</a>`,
);
// 5. Bold: **text** or __text__
text = text.replace(/\*\*(.+?)\*\*/g, "<b>$1</b>");
text = text.replace(/__(.+?)__/g, "<b>$1</b>");
// 6. Italic: *text* or _text_ (but not inside words with underscores)
text = text.replace(/(?<!\w)\*(?!\s)(.+?)(?<!\s)\*(?!\w)/g, "<i>$1</i>");
text = text.replace(/(?<!\w)_(?!\s)(.+?)(?<!\s)_(?!\w)/g, "<i>$1</i>");
// 7. Strikethrough: ~~text~~
text = text.replace(/~~(.+?)~~/g, "<s>$1</s>");
// 8. Blockquotes: > text (at line start)
text = text.replace(/^&gt; (.+)$/gm, "<blockquote>$1</blockquote>");
// Merge adjacent blockquotes
text = text.replace(/<\/blockquote>\n<blockquote>/g, "\n");
// 9. Headings: strip # markers, make bold
text = text.replace(/^#{1,6}\s+(.+)$/gm, "<b>$1</b>");
// Restore placeholders
text = text.replace(/\x00PH(\d+)\x00/g, (_match, idx: string) => placeholders[Number(idx)]!);
return text;
}

View file

@ -0,0 +1,348 @@
/**
* Telegram channel plugin.
*
* Uses grammy to connect to Telegram Bot API via long polling.
* - Private chats: all messages are processed
* - Group chats: only messages that @mention the bot or reply to the bot
*
* @see docs/channels/README.md Channel system overview
* @see docs/channels/media-handling.md Media processing pipeline
*/
import { writeFile, mkdir } from "node:fs/promises";
import { join, extname } from "node:path";
import { v7 as uuidv7 } from "uuid";
import { Bot, GrammyError } from "grammy";
import type { ChannelPlugin, ChannelMessage, ChannelConfigAdapter, ChannelsConfig, DeliveryContext } from "../types.js";
import { markdownToTelegramHtml } from "./telegram-format.js";
import { MEDIA_CACHE_DIR } from "../../shared/paths.js";
/** Telegram account config shape */
interface TelegramAccountConfig {
botToken: string;
}
/** Keep bot instances per account for outbound use */
const bots = new Map<string, Bot>();
/** Check if a GrammyError is an HTML parse failure */
function isParseError(err: unknown): boolean {
return err instanceof GrammyError && err.description.includes("can't parse entities");
}
/** Send a message with HTML formatting, fallback to plain text on parse error */
async function sendFormatted(
bot: Bot,
chatId: number,
text: string,
extra?: Record<string, unknown>,
): Promise<void> {
const html = markdownToTelegramHtml(text);
try {
await bot.api.sendMessage(chatId, html, { ...extra, parse_mode: "HTML" });
} catch (err) {
if (isParseError(err)) {
console.warn("[Telegram] HTML parse failed, retrying as plain text");
await bot.api.sendMessage(chatId, text, extra);
} else {
throw err;
}
}
}
export const telegramChannel: ChannelPlugin = {
id: "telegram",
meta: {
name: "Telegram",
description: "Telegram bot integration via long polling",
},
chunkerConfig: {
minChars: 3800, // Buffer the full response; only chunk when approaching platform limit
maxChars: 4000, // Telegram API limit: 4096; leave room for HTML formatting overhead
breakPreference: "paragraph",
},
config: {
listAccountIds(config: ChannelsConfig): string[] {
const section = config["telegram"];
return section ? Object.keys(section) : [];
},
resolveAccount(config: ChannelsConfig, accountId: string): Record<string, unknown> | undefined {
return config["telegram"]?.[accountId];
},
isConfigured(account: Record<string, unknown>): boolean {
return Boolean((account as unknown as TelegramAccountConfig).botToken);
},
} satisfies ChannelConfigAdapter,
gateway: {
async start(
accountId: string,
config: Record<string, unknown>,
onMessage: (message: ChannelMessage) => void,
signal: AbortSignal,
): Promise<void> {
const { botToken } = config as unknown as TelegramAccountConfig;
const bot = new Bot(botToken);
bots.set(accountId, bot);
// Get bot info for mention/reply detection
const botInfo = await bot.api.getMe();
const botId = botInfo.id;
const botUsername = botInfo.username;
console.log(`[Telegram] Starting bot: @${botUsername} (id=${botId})`);
// ── Sequentialize middleware ──
// Ensures updates from the same chat are processed one at a time,
// preventing race conditions on shared state (e.g. ChannelManager.lastRoute).
// Grammy processes updates concurrently by default — without this,
// two messages arriving near-simultaneously could interleave.
// Lightweight alternative to @grammyjs/runner's sequentialize().
// @see docs/channel/openclaw-research.md — Grammy middleware pipeline
const chatQueues = new Map<string, Promise<void>>();
bot.use(async (ctx, next) => {
const chatId = ctx.chat?.id;
if (!chatId) return next();
const key = String(chatId);
const prev = chatQueues.get(key) ?? Promise.resolve();
// Chain this handler onto the per-chat queue
const current = prev.then(() => next()).catch(() => {});
chatQueues.set(key, current);
await current;
// Clean up resolved entries to prevent memory leak
if (chatQueues.get(key) === current) {
chatQueues.delete(key);
}
});
// Handle text messages
bot.on("message:text", (ctx) => {
const msg = ctx.message;
const isGroup = msg.chat.type === "group" || msg.chat.type === "supergroup";
// In groups, only respond if bot is mentioned or replied to
if (isGroup) {
const isMentioned = msg.entities?.some(
(e) =>
e.type === "mention" &&
msg.text.substring(e.offset, e.offset + e.length).toLowerCase() === `@${botUsername?.toLowerCase()}`,
);
const isReplyToBot = msg.reply_to_message?.from?.id === botId;
if (!isMentioned && !isReplyToBot) {
return; // Ignore group messages not directed at bot
}
console.log(`[Telegram] Received message: chatId=${msg.chat.id} from=${msg.from?.id} type=group text="${msg.text.slice(0, 50)}"`);
} else {
console.log(`[Telegram] Received message: chatId=${msg.chat.id} from=${msg.from?.id} type=direct text="${msg.text.slice(0, 50)}"`);
}
// Strip @mention from text for cleaner agent input
let text = msg.text;
if (botUsername) {
text = text.replace(new RegExp(`@${botUsername}\\s*`, "gi"), "").trim();
}
if (!text) return;
onMessage({
messageId: String(msg.message_id),
conversationId: String(msg.chat.id),
senderId: String(msg.from?.id ?? "unknown"),
text,
chatType: isGroup ? "group" : "direct",
});
});
// Handle media messages (voice, audio, photo, video, document)
const mediaTypes = [
{ filter: "message:voice" as const, getMedia: (msg: any) => ({
type: "audio" as const,
fileId: msg.voice.file_id as string,
mimeType: msg.voice.mime_type as string | undefined,
duration: msg.voice.duration as number | undefined,
})},
{ filter: "message:audio" as const, getMedia: (msg: any) => ({
type: "audio" as const,
fileId: msg.audio.file_id as string,
mimeType: msg.audio.mime_type as string | undefined,
duration: msg.audio.duration as number | undefined,
})},
{ filter: "message:photo" as const, getMedia: (msg: any) => {
// Pick the largest photo size (last in array)
const photos = msg.photo as Array<{ file_id: string }>;
const largest = photos[photos.length - 1]!;
return {
type: "image" as const,
fileId: largest.file_id,
mimeType: "image/jpeg",
};
}},
{ filter: "message:video" as const, getMedia: (msg: any) => ({
type: "video" as const,
fileId: msg.video.file_id as string,
mimeType: msg.video.mime_type as string | undefined,
duration: msg.video.duration as number | undefined,
})},
{ filter: "message:document" as const, getMedia: (msg: any) => ({
type: "document" as const,
fileId: msg.document.file_id as string,
mimeType: msg.document.mime_type as string | undefined,
})},
] as const;
for (const { filter, getMedia } of mediaTypes) {
bot.on(filter, (ctx) => {
const msg = ctx.message;
const isGroup = msg.chat.type === "group" || msg.chat.type === "supergroup";
if (isGroup) {
const isReplyToBot = msg.reply_to_message?.from?.id === botId;
const caption = (msg as any).caption as string | undefined;
const isMentionedInCaption = caption && botUsername
? caption.toLowerCase().includes(`@${botUsername.toLowerCase()}`)
: false;
if (!isReplyToBot && !isMentionedInCaption) return;
}
const media = getMedia(msg);
const caption = (msg as any).caption as string | undefined;
console.log(`[Telegram] Received ${media.type}: chatId=${msg.chat.id} from=${msg.from?.id} fileId=${media.fileId}`);
onMessage({
messageId: String(msg.message_id),
conversationId: String(msg.chat.id),
senderId: String(msg.from?.id ?? "unknown"),
text: caption ?? "",
chatType: isGroup ? "group" : "direct",
media: {
type: media.type,
fileId: media.fileId,
mimeType: media.mimeType,
duration: (media as any).duration,
caption,
},
});
});
}
// Graceful shutdown on abort
signal.addEventListener("abort", () => {
console.log("[Telegram] Bot stopped");
bot.stop();
bots.delete(accountId);
});
// Start long polling (fire-and-forget, errors are caught here)
console.log("[Telegram] Bot is polling for messages");
bot.start({
onStart: () => {
// Already logged above
},
}).catch((err: unknown) => {
const msg = err instanceof Error ? err.message : String(err);
if (msg.includes("409") || msg.includes("Conflict")) {
console.error(`[Telegram] Bot conflict: another instance is already polling with this token. Stop the other process and restart.`);
} else {
console.error(`[Telegram] Bot polling error: ${msg}`);
}
bots.delete(accountId);
});
},
},
outbound: {
async sendText(ctx: DeliveryContext, text: string): Promise<void> {
const bot = bots.get(ctx.accountId);
if (!bot) throw new Error(`No Telegram bot for account ${ctx.accountId}`);
console.log(`[Telegram] Sending message to chatId=${ctx.conversationId}`);
await sendFormatted(bot, Number(ctx.conversationId), text);
},
async replyText(ctx: DeliveryContext, text: string): Promise<void> {
const bot = bots.get(ctx.accountId);
if (!bot) throw new Error(`No Telegram bot for account ${ctx.accountId}`);
if (ctx.replyToMessageId) {
console.log(`[Telegram] Sending reply to chatId=${ctx.conversationId} (replyTo=${ctx.replyToMessageId})`);
await sendFormatted(bot, Number(ctx.conversationId), text, {
reply_to_message_id: Number(ctx.replyToMessageId),
});
} else {
await telegramChannel.outbound.sendText(ctx, text);
}
},
async sendTyping(ctx: DeliveryContext): Promise<void> {
const bot = bots.get(ctx.accountId);
if (!bot) return;
try {
await bot.api.sendChatAction(Number(ctx.conversationId), "typing");
} catch {
// Best-effort — typing indicator failure is not critical
}
},
async addReaction(ctx: DeliveryContext, emoji: string): Promise<void> {
const bot = bots.get(ctx.accountId);
if (!bot || !ctx.replyToMessageId) return;
try {
await bot.api.setMessageReaction(
Number(ctx.conversationId),
Number(ctx.replyToMessageId),
// Grammy expects a specific emoji union type; cast since our interface accepts any string
[{ type: "emoji", emoji } as unknown as { type: "emoji"; emoji: "👀" }],
);
} catch {
// Best-effort — reaction failure is not critical
// (e.g. bot may lack permission in some groups)
}
},
async removeReaction(ctx: DeliveryContext): Promise<void> {
const bot = bots.get(ctx.accountId);
if (!bot || !ctx.replyToMessageId) return;
try {
await bot.api.setMessageReaction(
Number(ctx.conversationId),
Number(ctx.replyToMessageId),
[], // Empty array clears all bot reactions
);
} catch {
// Best-effort
}
},
},
async downloadMedia(fileId: string, accountId: string): Promise<string> {
const bot = bots.get(accountId);
if (!bot) throw new Error(`No Telegram bot for account ${accountId}`);
const file = await bot.api.getFile(fileId);
const filePath = file.file_path;
if (!filePath) throw new Error(`Telegram returned no file_path for fileId=${fileId}`);
const url = `https://api.telegram.org/file/bot${bot.token}/${filePath}`;
const ext = extname(filePath) || ".bin";
const localPath = join(MEDIA_CACHE_DIR, `${uuidv7()}${ext}`);
await mkdir(MEDIA_CACHE_DIR, { recursive: true });
const res = await fetch(url);
if (!res.ok) throw new Error(`Failed to download file: HTTP ${res.status}`);
const buffer = Buffer.from(await res.arrayBuffer());
await writeFile(localPath, buffer);
console.log(`[Telegram] Downloaded media: ${filePath}${localPath}`);
return localPath;
},
};

31
src/channels/registry.ts Normal file
View file

@ -0,0 +1,31 @@
/**
* Channel plugin registry.
*
* Simple array + Map registry. Plugins are registered at startup
* via registerChannel() and looked up by ID.
*/
import type { ChannelPlugin } from "./types.js";
const plugins: ChannelPlugin[] = [];
const pluginMap = new Map<string, ChannelPlugin>();
/** Register a channel plugin. Throws if ID is already registered. */
export function registerChannel(plugin: ChannelPlugin): void {
if (pluginMap.has(plugin.id)) {
throw new Error(`Channel plugin "${plugin.id}" is already registered`);
}
plugins.push(plugin);
pluginMap.set(plugin.id, plugin);
console.log(`[Channels] Registered plugin: ${plugin.id}`);
}
/** Get a registered channel plugin by ID */
export function getChannel(id: string): ChannelPlugin | undefined {
return pluginMap.get(id);
}
/** List all registered channel plugins */
export function listChannels(): readonly ChannelPlugin[] {
return plugins;
}

163
src/channels/types.ts Normal file
View file

@ -0,0 +1,163 @@
/**
* Channel plugin system types.
*
* Each messaging platform (Telegram, Discord, Feishu, etc.) implements the
* ChannelPlugin interface with three adapters: config, gateway, outbound.
*
* @see docs/channels/README.md Channel system overview and plugin guide
*/
import type { BlockChunkerConfig } from "../hub/block-chunker.js";
// ─── Media Attachment ───
/** Media type for incoming channel attachments */
export type ChannelMediaType = "audio" | "image" | "video" | "document";
/** Media attachment from a channel message */
export interface ChannelMediaAttachment {
/** Media type */
type: ChannelMediaType;
/** Platform-specific file ID (used for download) */
fileId: string;
/** MIME type if known (e.g. "audio/ogg", "image/jpeg") */
mimeType?: string | undefined;
/** Duration in seconds (for audio/video) */
duration?: number | undefined;
/** Caption text attached to the media */
caption?: string | undefined;
}
// ─── Normalized Incoming Message ───
/** Platform-agnostic incoming message */
export interface ChannelMessage {
/** Unique message ID from the platform */
messageId: string;
/** Conversation ID (group ID or DM chat ID) */
conversationId: string;
/** Sender identifier on the platform */
senderId: string;
/** Plain text content */
text: string;
/** Chat type: "direct" (1:1) or "group" */
chatType: "direct" | "group";
/** Optional media attachment (voice, image, video, document) */
media?: ChannelMediaAttachment | undefined;
}
// ─── Delivery Context ───
/** Context for sending a reply back to a specific conversation */
export interface DeliveryContext {
/** Channel plugin ID (e.g. "telegram", "discord") */
channel: string;
/** Account identifier (supports multi-account per channel) */
accountId: string;
/** Target conversation ID */
conversationId: string;
/** Original message ID (for reply-style responses) */
replyToMessageId?: string | undefined;
}
// ─── Config Adapter ───
/** Resolves and validates channel credentials from the config file */
export interface ChannelConfigAdapter<TAccount = Record<string, unknown>> {
/** List all configured account IDs for this channel */
listAccountIds(config: ChannelsConfig): string[];
/** Resolve a specific account's config */
resolveAccount(config: ChannelsConfig, accountId: string): TAccount | undefined;
/** Check if a given account config has all required credentials */
isConfigured(account: TAccount): boolean;
}
// ─── Gateway Adapter ───
/** Manages the lifecycle of a channel account connection (receiving messages) */
export interface ChannelGatewayAdapter {
/**
* Start receiving messages for an account.
* Must respect the AbortSignal for graceful shutdown.
*/
start(
accountId: string,
config: Record<string, unknown>,
onMessage: (message: ChannelMessage) => void,
signal: AbortSignal,
): Promise<void>;
}
// ─── Outbound Adapter ───
/** Sends messages back to the platform */
export interface ChannelOutboundAdapter {
/** Send a text message to a conversation */
sendText(ctx: DeliveryContext, text: string): Promise<void>;
/** Reply to a specific message */
replyText(ctx: DeliveryContext, text: string): Promise<void>;
/** Send "typing" indicator (optional, not all platforms support it) */
sendTyping?(ctx: DeliveryContext): Promise<void>;
/**
* Add a reaction emoji to the incoming message (optional).
* Used for ACK feedback e.g. 👀 to signal "processing started".
*/
addReaction?(ctx: DeliveryContext, emoji: string): Promise<void>;
/**
* Remove reaction from the incoming message (optional).
* Called when processing completes to clear the ACK indicator.
*/
removeReaction?(ctx: DeliveryContext): Promise<void>;
}
// ─── Channel Plugin ───
/** The main plugin interface. Each channel implements this. */
export interface ChannelPlugin {
/** Unique channel identifier (e.g. "telegram", "discord", "feishu") */
readonly id: string;
/** Display metadata */
readonly meta: {
name: string;
description: string;
};
/** Optional chunker config override per channel */
readonly chunkerConfig?: BlockChunkerConfig | undefined;
/** Config resolution adapter */
readonly config: ChannelConfigAdapter;
/** Connection lifecycle adapter (receive messages) */
readonly gateway: ChannelGatewayAdapter;
/** Message sending adapter */
readonly outbound: ChannelOutboundAdapter;
/** Download a media file to local disk (optional, platform-specific) */
downloadMedia?(fileId: string, accountId: string): Promise<string>;
}
// ─── Channels Config File Shape ───
/**
* Shape of ~/.super-multica/channels.json5
*
* Each top-level key is a channel ID. Under it, each key is an account ID.
* Example:
* {
* telegram: { default: { botToken: "xxx" } },
* discord: { default: { botToken: "xxx" } },
* }
*/
export interface ChannelsConfig {
[channelId: string]: {
[accountId: string]: Record<string, unknown>;
} | undefined;
}
// ─── Account State ───
export type ChannelAccountStatus = "stopped" | "starting" | "running" | "error";
export interface ChannelAccountState {
channelId: string;
accountId: string;
status: ChannelAccountStatus;
error?: string | undefined;
}

View file

@ -68,6 +68,36 @@ function isInsideFence(text: string, position: number): boolean {
return detectFenceAt(text, position) !== null;
}
/** A line that looks like a Markdown table row (starts with |) */
function isTableRow(line: string): boolean {
return line.startsWith("|");
}
/**
* Check if position falls between two Markdown table rows.
* Prevents breaking a table in the middle table rows lose their header
* context when split across messages.
*/
function isInsideTable(text: string, position: number): boolean {
// Find the last non-empty line before position
let i = position - 1;
while (i >= 0 && text[i] === "\n") i--;
if (i < 0) return false;
let lineStart = i;
while (lineStart > 0 && text[lineStart - 1] !== "\n") lineStart--;
const prevLine = text.slice(lineStart, i + 1).trim();
// Find the first non-empty line at/after position
let j = position;
while (j < text.length && text[j] === "\n") j++;
if (j >= text.length) return false;
let lineEnd = j;
while (lineEnd < text.length && text[lineEnd] !== "\n") lineEnd++;
const nextLine = text.slice(j, lineEnd).trim();
return isTableRow(prevLine) && isTableRow(nextLine);
}
/**
* If a chunk ends inside an open fence, close it in the chunk
* and reopen it in the remainder.
@ -152,7 +182,7 @@ export class BlockChunker {
for (const breaker of breakers) {
const index = breaker(buffer, searchStart, searchEnd, bufLen);
if (index !== -1 && !isInsideFence(buffer, index)) {
if (index !== -1 && !isInsideFence(buffer, index) && !isInsideTable(buffer, index)) {
return index;
}
}

View file

@ -33,6 +33,7 @@ import { evaluateCommandSafety, requiresApproval } from "../agent/tools/exec-saf
import { addAllowlistEntry, recordAllowlistUse, matchAllowlist } from "../agent/tools/exec-allowlist.js";
import type { ExecApprovalCallback, ExecApprovalConfig, ApprovalResult, ExecApprovalRequest } from "../agent/tools/exec-approval-types.js";
import { readProfileConfig, writeProfileConfig } from "../agent/profile/storage.js";
import { ChannelManager, initChannels } from "../channels/index.js";
import { getCronService, shutdownCronService, executeCronJob } from "../cron/index.js";
import {
getLastHeartbeatEvent,
@ -65,6 +66,7 @@ export class Hub {
readonly deviceStore: DeviceStore;
private _onConfirmDevice: ((deviceId: string, agentId: string, meta?: DeviceMeta) => Promise<boolean>) | null = null;
private _stateChangeListeners: ((state: ConnectionState) => void)[] = [];
readonly channelManager: ChannelManager;
url: string;
readonly path: string;
readonly hubId: string;
@ -132,6 +134,17 @@ export class Hub {
this.client = this.createClient(this.url);
this.client.connect();
this.restoreAgents();
// Initialize channel plugin system
console.log("[Hub] Initializing channel system...");
initChannels();
this.channelManager = new ChannelManager(this);
void this.channelManager.startAll().then(() => {
console.log("[Hub] Channel system started");
}).catch((err) => {
const msg = err instanceof Error ? err.message : String(err);
console.error(`[Hub] Channel system failed to start: ${msg}`);
});
}
/** Initialize cron service with executor */
@ -252,6 +265,7 @@ export class Hub {
const agent = this.agents.get(agentId);
if (agent && !agent.closed) {
this.agentSenders.set(agentId, msg.from);
this.channelManager.clearLastRoute();
agent.write(content);
} else {
console.warn(`[Hub] Agent not found or closed: ${agentId}`);
@ -401,12 +415,12 @@ export class Hub {
continue;
}
// Compaction events: forward with synthetic streamId (no stream tracking)
const isCompactionEvent =
item.type === "compaction_start" || item.type === "compaction_end";
if (isCompactionEvent) {
// Passthrough events: forward with synthetic streamId (no stream tracking)
const isPassthroughEvent =
item.type === "compaction_start" || item.type === "compaction_end" || item.type === "agent_error";
if (isPassthroughEvent) {
this.client.send(targetDeviceId, StreamAction, {
streamId: `compaction:${agent.sessionId}`,
streamId: `system:${agent.sessionId}`,
agentId: agent.sessionId,
event: item,
});
@ -693,6 +707,9 @@ export class Hub {
}
shutdown(): void {
// Stop all channel connections
this.channelManager.stopAll();
// Stop cron service
shutdownCronService();
this.heartbeatRunner?.stop();

View file

@ -0,0 +1,87 @@
/**
* Image description via OpenAI Vision API.
*
* Called by ChannelManager before the message reaches the Agent,
* so the Agent only ever sees a text description of the image.
*
* @see docs/channels/media-handling.md Media processing pipeline
*/
import { readFile, stat } from "node:fs/promises";
import { extname } from "node:path";
import { credentialManager } from "../agent/credentials.js";
/** Max image file size: 20MB (OpenAI API limit) */
const MAX_IMAGE_SIZE = 20 * 1024 * 1024;
/** Map file extension to MIME type for common image formats */
function mimeFromExt(filePath: string): string {
const ext = extname(filePath).toLowerCase();
switch (ext) {
case ".png": return "image/png";
case ".gif": return "image/gif";
case ".webp": return "image/webp";
default: return "image/jpeg";
}
}
/**
* Describe an image using OpenAI Vision API (gpt-4o-mini).
*
* @param filePath - Local path to the image file
* @returns Text description, or null if no API key configured
*/
export async function describeImage(filePath: string): Promise<string | null> {
const config = credentialManager.getLlmProviderConfig("openai");
const apiKey = config?.apiKey;
if (!apiKey) return null;
// Check file size to avoid OOM and API payload limits
const fileStat = await stat(filePath);
if (fileStat.size > MAX_IMAGE_SIZE) {
console.warn(`[DescribeImage] File too large (${(fileStat.size / 1024 / 1024).toFixed(1)}MB), skipping`);
return null;
}
const buffer = await readFile(filePath);
const base64 = buffer.toString("base64");
const mimeType = mimeFromExt(filePath);
const dataUrl = `data:${mimeType};base64,${base64}`;
const res = await fetch("https://api.openai.com/v1/chat/completions", {
method: "POST",
headers: {
Authorization: `Bearer ${apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model: "gpt-4o-mini",
messages: [
{
role: "user",
content: [
{
type: "text",
text: "Describe this image concisely. Focus on the main content and any text visible in the image.",
},
{
type: "image_url",
image_url: { url: dataUrl },
},
],
},
],
max_tokens: 500,
}),
});
if (!res.ok) {
const errText = await res.text().catch(() => "");
throw new Error(`Vision API error: HTTP ${res.status} ${errText}`);
}
const result = (await res.json()) as {
choices: Array<{ message: { content: string } }>;
};
return result.choices[0]?.message.content ?? null;
}

View file

@ -0,0 +1,52 @@
/**
* Video description via frame extraction + Vision API.
*
* Extracts the first frame using ffmpeg, then describes it
* with the same Vision API used for images.
*
* @see docs/channels/media-handling.md Media processing pipeline
*/
import { join } from "node:path";
import { execFile } from "node:child_process";
import { mkdir, unlink } from "node:fs/promises";
import { v7 as uuidv7 } from "uuid";
import { MEDIA_CACHE_DIR } from "../shared/paths.js";
import { describeImage } from "./describe-image.js";
/**
* Describe a video by extracting the first frame and passing it to Vision API.
*
* @param filePath - Local path to the video file
* @returns Text description, or null if ffmpeg unavailable or no API key
*/
export async function describeVideo(filePath: string): Promise<string | null> {
const framePath = join(MEDIA_CACHE_DIR, `${uuidv7()}.jpg`);
try {
// Ensure output directory exists
await mkdir(MEDIA_CACHE_DIR, { recursive: true });
// Extract first frame with ffmpeg
await new Promise<void>((resolve, reject) => {
execFile(
"ffmpeg",
["-i", filePath, "-vframes", "1", "-f", "image2", "-y", framePath],
{ timeout: 10000 },
(err) => (err ? reject(err) : resolve()),
);
});
// Describe the extracted frame
const description = await describeImage(framePath);
// Clean up the frame file
await unlink(framePath).catch(() => {});
return description;
} catch {
// ffmpeg not available or extraction failed
await unlink(framePath).catch(() => {});
return null;
}
}

151
src/media/transcribe.ts Normal file
View file

@ -0,0 +1,151 @@
/**
* Audio transcription local whisper first, OpenAI API fallback.
*
* Priority:
* 1. Local whisper/whisper-cli binary (free, no latency, offline)
* 2. OpenAI Whisper API (requires API key)
* 3. null (no provider available placeholder stays for Agent)
*
* Called by ChannelManager before the message reaches the Agent,
* so the Agent only ever sees text.
*
* @see docs/channels/media-handling.md Media processing pipeline and provider priority
*/
import { readFile, unlink } from "node:fs/promises";
import { basename, join } from "node:path";
import { execFile, execFileSync } from "node:child_process";
import { tmpdir } from "node:os";
import { credentialManager } from "../agent/credentials.js";
/** Cached path to local whisper binary, or false if not found */
let cachedWhisperBin: string | false | undefined;
/** Find local whisper binary in PATH */
function findWhisperBin(): string | false {
if (cachedWhisperBin !== undefined) return cachedWhisperBin;
for (const bin of ["whisper", "whisper-cli"]) {
try {
execFileSync("which", [bin], { stdio: "pipe" });
cachedWhisperBin = bin;
return bin;
} catch {
// not found, try next
}
}
cachedWhisperBin = false;
return false;
}
/**
* Transcribe audio using local whisper CLI.
*
* Runs: whisper "<file>" --model base --output_format txt --output_dir <tmpdir>
* Reads the generated .txt file and returns its content.
*/
async function transcribeLocal(whisperBin: string, filePath: string): Promise<string> {
const outDir = tmpdir();
await new Promise<void>((resolve, reject) => {
execFile(
whisperBin,
[filePath, "--model", "base", "--output_format", "txt", "--output_dir", outDir],
{ timeout: 120000 },
(err) => (err ? reject(err) : resolve()),
);
});
// whisper outputs <basename_without_ext>.txt
const name = basename(filePath).replace(/\.[^.]+$/, "");
const txtPath = join(outDir, `${name}.txt`);
const text = (await readFile(txtPath, "utf-8")).trim();
// Clean up the txt file
await unlink(txtPath).catch(() => {});
return text;
}
/**
* Transcribe audio using OpenAI Whisper API.
*/
async function transcribeApi(apiKey: string, filePath: string): Promise<string> {
const fileBuffer = await readFile(filePath);
const fileName = basename(filePath);
// Build multipart form data manually (no external dependency)
const boundary = `----FormBoundary${Date.now()}`;
const parts: Buffer[] = [];
// file field
parts.push(Buffer.from(
`--${boundary}\r\nContent-Disposition: form-data; name="file"; filename="${fileName}"\r\nContent-Type: application/octet-stream\r\n\r\n`,
));
parts.push(fileBuffer);
parts.push(Buffer.from("\r\n"));
// model field
parts.push(Buffer.from(
`--${boundary}\r\nContent-Disposition: form-data; name="model"\r\n\r\nwhisper-1\r\n`,
));
// closing boundary
parts.push(Buffer.from(`--${boundary}--\r\n`));
const body = Buffer.concat(parts);
const res = await fetch("https://api.openai.com/v1/audio/transcriptions", {
method: "POST",
headers: {
Authorization: `Bearer ${apiKey}`,
"Content-Type": `multipart/form-data; boundary=${boundary}`,
},
body,
});
if (!res.ok) {
const errText = await res.text().catch(() => "");
throw new Error(`Whisper API error: HTTP ${res.status} ${errText}`);
}
const result = (await res.json()) as { text: string };
return result.text;
}
/**
* Transcribe an audio file.
*
* Priority: local whisper OpenAI API null.
*
* @param filePath - Local path to the audio file
* @returns Transcribed text, or null if no provider available
*/
export async function transcribeAudio(filePath: string): Promise<string | null> {
// 1. Try local whisper
const whisperBin = findWhisperBin();
if (whisperBin) {
try {
return await transcribeLocal(whisperBin, filePath);
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
console.error(`[Transcribe] Local whisper failed: ${msg}, trying API...`);
}
}
// 2. Try OpenAI API
const config = credentialManager.getLlmProviderConfig("openai");
const apiKey = config?.apiKey;
if (apiKey) {
try {
return await transcribeApi(apiKey, filePath);
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
console.error(`[Transcribe] Whisper API failed: ${msg}`);
}
}
// 3. No provider available
return null;
}

View file

@ -3,3 +3,6 @@ import { homedir } from "node:os";
/** Root data directory: ~/.super-multica */
export const DATA_DIR = join(homedir(), ".super-multica");
/** Cache directory for downloaded media files */
export const MEDIA_CACHE_DIR = join(DATA_DIR, "cache", "media");

View file