diff --git a/README.md b/README.md index c5c959c..8fd575e 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,8 @@ [![npm](https://img.shields.io/npm/v/9router.svg)](https://www.npmjs.com/package/9router) [![Downloads](https://img.shields.io/npm/dm/9router.svg)](https://www.npmjs.com/package/9router) [![License](https://img.shields.io/npm/l/9router.svg)](https://github.com/decolua/9router/blob/main/LICENSE) + + decolua%2F9router | Trendshift [๐Ÿš€ Quick Start](#-quick-start) โ€ข [๐Ÿ’ก Features](#-key-features) โ€ข [๐Ÿ“– Setup](#-setup-guide) โ€ข [๐ŸŒ Website](https://9router.com) diff --git a/open-sse/config/providerModels.js b/open-sse/config/providerModels.js index 86043b4..58e2d11 100644 --- a/open-sse/config/providerModels.js +++ b/open-sse/config/providerModels.js @@ -105,6 +105,9 @@ export const PROVIDER_MODELS = { { id: "grok-code-fast-1", name: "Grok Code Fast 1" }, { id: "oswe-vscode-prime", name: "Raptor Mini" }, { id: "goldeneye-free-auto", name: "GoldenEye" }, + // GitHub Copilot - Embedding models + { id: "text-embedding-3-small", name: "Text Embedding 3 Small (GitHub)", type: "embedding" }, + { id: "text-embedding-3-large", name: "Text Embedding 3 Large (GitHub)", type: "embedding" }, ], kr: [ // Kiro AI // { id: "claude-opus-4.5", name: "Claude Opus 4.5" }, @@ -378,6 +381,7 @@ export const PROVIDER_MODELS = { { id: "mistral-large-latest", name: "Mistral Large 3" }, { id: "codestral-latest", name: "Codestral" }, { id: "mistral-medium-latest", name: "Mistral Medium 3" }, + { id: "mistral-embed", name: "Mistral Embed", type: "embedding" }, ], perplexity: [ { id: "sonar-pro", name: "Sonar Pro" }, @@ -388,11 +392,14 @@ export const PROVIDER_MODELS = { { id: "deepseek-ai/DeepSeek-R1", name: "DeepSeek R1" }, { id: "Qwen/Qwen3-235B-A22B", name: "Qwen3 235B" }, { id: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", name: "Llama 4 Maverick" }, + { id: "BAAI/bge-large-en-v1.5", name: "BGE Large EN v1.5", type: "embedding" }, + { id: "togethercomputer/m2-bert-80M-8k-retrieval", name: "M2 BERT 80M 8K", type: "embedding" }, ], fireworks: [ { id: "accounts/fireworks/models/deepseek-v3p1", name: "DeepSeek V3.1" }, { id: "accounts/fireworks/models/llama-v3p3-70b-instruct", name: "Llama 3.3 70B" }, { id: "accounts/fireworks/models/qwen3-235b-a22b", name: "Qwen3 235B" }, + { id: "nomic-ai/nomic-embed-text-v1.5", name: "Nomic Embed Text v1.5", type: "embedding" }, ], cerebras: [ { id: "gpt-oss-120b", name: "GPT OSS 120B" }, @@ -410,9 +417,20 @@ export const PROVIDER_MODELS = { nvidia: [ { id: "moonshotai/kimi-k2.5", name: "Kimi K2.5" }, { id: "z-ai/glm4.7", name: "GLM 4.7" }, + { id: "nvidia/nv-embedqa-e5-v5", name: "NV EmbedQA E5 v5", type: "embedding" }, ], nebius: [ { id: "meta-llama/Llama-3.3-70B-Instruct", name: "Llama 3.3 70B Instruct" }, + { id: "Qwen/Qwen3-Embedding-8B", name: "Qwen3 Embedding 8B", type: "embedding" }, + ], + "voyage-ai": [ + { id: "voyage-3-large", name: "Voyage 3 Large", type: "embedding" }, + { id: "voyage-3.5", name: "Voyage 3.5", type: "embedding" }, + { id: "voyage-3.5-lite", name: "Voyage 3.5 Lite", type: "embedding" }, + { id: "voyage-code-3", name: "Voyage Code 3", type: "embedding" }, + { id: "voyage-finance-2", name: "Voyage Finance 2", type: "embedding" }, + { id: "voyage-law-2", name: "Voyage Law 2", type: "embedding" }, + { id: "voyage-multilingual-2", name: "Voyage Multilingual 2", type: "embedding" }, ], siliconflow: [ { id: "deepseek-ai/DeepSeek-V3.2", name: "DeepSeek V3.2" }, diff --git a/open-sse/handlers/embeddingsCore.js b/open-sse/handlers/embeddingsCore.js index e6e98fc..b210b68 100644 --- a/open-sse/handlers/embeddingsCore.js +++ b/open-sse/handlers/embeddingsCore.js @@ -7,6 +7,19 @@ import { refreshWithRetry } from "../services/tokenRefresh.js"; // Google AI (Gemini) provider aliases / identifiers const GEMINI_PROVIDERS = new Set(["gemini", "google_ai_studio"]); +// Static map: provider id โ†’ embeddings endpoint (OpenAI-compatible body format) +const EMBEDDING_URLS = { + openai: "https://api.openai.com/v1/embeddings", + openrouter: "https://openrouter.ai/api/v1/embeddings", + mistral: "https://api.mistral.ai/v1/embeddings", + "voyage-ai": "https://api.voyageai.com/v1/embeddings", + fireworks: "https://api.fireworks.ai/inference/v1/embeddings", + together: "https://api.together.xyz/v1/embeddings", + nebius: "https://api.tokenfactory.nebius.com/v1/embeddings", + github: "https://models.github.ai/inference/embeddings", + nvidia: "https://integrate.api.nvidia.com/v1/embeddings", +}; + /** * Check whether a provider targets the Google AI (Gemini) embeddings API. * @param {string} provider @@ -77,22 +90,16 @@ function buildEmbeddingsUrl(provider, model, credentials, input) { return `https://generativelanguage.googleapis.com/v1beta/${modelPath}:embedContent?key=${encodeURIComponent(apiKey)}`; } - switch (provider) { - case "openai": - return "https://api.openai.com/v1/embeddings"; - case "openrouter": - return "https://openrouter.ai/api/v1/embeddings"; - default: - // openai-compatible & custom-embedding providers: use their baseUrl + /embeddings - if (provider?.startsWith?.("openai-compatible-") || provider?.startsWith?.("custom-embedding-")) { - const rawBaseUrl = credentials?.providerSpecificData?.baseUrl || "https://api.openai.com/v1"; - // Defensive: strip trailing slash and accidental /embeddings to avoid double-append - const baseUrl = rawBaseUrl.replace(/\/$/, "").replace(/\/embeddings$/, ""); - return `${baseUrl}/embeddings`; - } - // For other providers, attempt to use their base URL pattern with /embeddings path - return null; + if (EMBEDDING_URLS[provider]) return EMBEDDING_URLS[provider]; + + // openai-compatible & custom-embedding providers: use their baseUrl + /embeddings + if (provider?.startsWith?.("openai-compatible-") || provider?.startsWith?.("custom-embedding-")) { + const rawBaseUrl = credentials?.providerSpecificData?.baseUrl || "https://api.openai.com/v1"; + // Defensive: strip trailing slash and accidental /embeddings to avoid double-append + const baseUrl = rawBaseUrl.replace(/\/$/, "").replace(/\/embeddings$/, ""); + return `${baseUrl}/embeddings`; } + return null; } /** diff --git a/open-sse/handlers/ttsCore.js b/open-sse/handlers/ttsCore.js index 9618b7a..233719f 100644 --- a/open-sse/handlers/ttsCore.js +++ b/open-sse/handlers/ttsCore.js @@ -455,7 +455,209 @@ async function handleOpenAiTts({ model, input, credentials, responseFormat = "mp return createTtsResponse(base64, "mp3", responseFormat); } -// โ”€โ”€ TTS Provider Registry (DRY) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +// โ”€โ”€ Generic TTS Format Handlers (config-driven via ttsConfig.format) โ”€โ”€โ”€โ”€โ”€โ”€ +// Parse `model` string as "modelId/voiceId" or "modelId" (modelId may contain slashes โ€” match against known list) +function parseModelVoice(model, defaultModel = "", defaultVoice = "", knownModels = []) { + if (!model) return { modelId: defaultModel, voiceId: defaultVoice }; + // Find longest known model id that prefixes `model` + const known = knownModels.map((m) => m.id || m).filter(Boolean).sort((a, b) => b.length - a.length); + for (const id of known) { + if (model === id) return { modelId: id, voiceId: defaultVoice }; + if (model.startsWith(`${id}/`)) return { modelId: id, voiceId: model.slice(id.length + 1) }; + } + // Fallback: split on last "/" so "vendor/model/voice" โ†’ model="vendor/model", voice="voice" + const idx = model.lastIndexOf("/"); + if (idx > 0) return { modelId: model.slice(0, idx), voiceId: model.slice(idx + 1) }; + return { modelId: defaultModel || model, voiceId: defaultVoice || model }; +} + +// Convert upstream Response (binary audio) to { base64, format } +async function responseToBase64(res, defaultFormat = "mp3") { + const buf = await res.arrayBuffer(); + if (buf.byteLength < 100) throw new Error("Upstream returned empty audio"); + const ctype = res.headers.get("content-type") || ""; + let format = defaultFormat; + if (ctype.includes("wav")) format = "wav"; + else if (ctype.includes("mpeg") || ctype.includes("mp3")) format = "mp3"; + else if (ctype.includes("ogg")) format = "ogg"; + return { base64: Buffer.from(buf).toString("base64"), format }; +} + +async function throwUpstreamError(res) { + const text = await res.text().catch(() => ""); + let msg = `Upstream error (${res.status})`; + try { + const parsed = JSON.parse(text); + msg = parsed?.error?.message || parsed?.message || parsed?.detail?.message || (typeof parsed?.detail === "string" ? parsed.detail : null) || text || msg; + } catch { msg = text || msg; } + throw new Error(msg); +} + +// Hyperbolic: POST { text } โ†’ { audio: base64 } +async function ttsHyperbolic({ baseUrl, apiKey, text }) { + const res = await fetch(baseUrl, { + method: "POST", + headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` }, + body: JSON.stringify({ text }), + }); + if (!res.ok) await throwUpstreamError(res); + const data = await res.json(); + return { base64: data.audio, format: "mp3" }; +} + +// Deepgram: model via query, Token auth, returns binary +async function ttsDeepgram({ baseUrl, apiKey, text, modelId }) { + const url = new URL(baseUrl); + url.searchParams.set("model", modelId || "aura-asteria-en"); + const res = await fetch(url.toString(), { + method: "POST", + headers: { "Content-Type": "application/json", "Authorization": `Token ${apiKey}` }, + body: JSON.stringify({ text }), + }); + if (!res.ok) await throwUpstreamError(res); + return responseToBase64(res, "mp3"); +} + +// Nvidia NIM: POST { input: { text }, voice, model } โ†’ binary +async function ttsNvidia({ baseUrl, apiKey, text, modelId, voiceId }) { + const res = await fetch(baseUrl, { + method: "POST", + headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` }, + body: JSON.stringify({ input: { text }, voice: voiceId || "default", model: modelId }), + }); + if (!res.ok) await throwUpstreamError(res); + return responseToBase64(res, "wav"); +} + +// HuggingFace: POST {baseUrl}/{modelId} { inputs: text } โ†’ binary +async function ttsHuggingFace({ baseUrl, apiKey, text, modelId }) { + if (!modelId || modelId.includes("..")) throw new Error("Invalid HuggingFace model ID"); + const res = await fetch(`${baseUrl}/${modelId}`, { + method: "POST", + headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` }, + body: JSON.stringify({ inputs: text }), + }); + if (!res.ok) await throwUpstreamError(res); + return responseToBase64(res, "wav"); +} + +// Inworld: POST { text, voiceId, modelId, audioConfig } โ†’ JSON { audioContent } +async function ttsInworld({ baseUrl, apiKey, text, modelId, voiceId }) { + const res = await fetch(baseUrl, { + method: "POST", + headers: { "Content-Type": "application/json", "Authorization": `Basic ${apiKey}` }, + body: JSON.stringify({ + text, + voiceId: voiceId || "Alex", + modelId: modelId || "inworld-tts-1.5-mini", + audioConfig: { audioEncoding: "MP3" }, + }), + }); + if (!res.ok) await throwUpstreamError(res); + const data = await res.json(); + if (!data.audioContent) throw new Error("Inworld TTS returned no audio"); + return { base64: data.audioContent, format: "mp3" }; +} + +// Cartesia: POST { model_id, transcript, voice, output_format } โ†’ binary +async function ttsCartesia({ baseUrl, apiKey, text, modelId, voiceId }) { + const res = await fetch(baseUrl, { + method: "POST", + headers: { + "Content-Type": "application/json", + "X-API-Key": apiKey, + "Cartesia-Version": "2024-06-10", + }, + body: JSON.stringify({ + model_id: modelId || "sonic-2", + transcript: text, + ...(voiceId ? { voice: { mode: "id", id: voiceId } } : {}), + output_format: { container: "mp3", bit_rate: 128000, sample_rate: 44100 }, + }), + }); + if (!res.ok) await throwUpstreamError(res); + return responseToBase64(res, "mp3"); +} + +// PlayHT: token format "userId:apiKey", voice = s3 URL +async function ttsPlayHt({ baseUrl, apiKey, text, modelId, voiceId }) { + const [userId, key] = (apiKey || ":").split(":"); + const res = await fetch(baseUrl, { + method: "POST", + headers: { + "Content-Type": "application/json", + "Accept": "audio/mpeg", + "X-USER-ID": userId || "", + "Authorization": `Bearer ${key || apiKey}`, + }, + body: JSON.stringify({ + text, + voice: voiceId || "s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json", + voice_engine: modelId || "PlayDialog", + output_format: "mp3", + speed: 1, + }), + }); + if (!res.ok) await throwUpstreamError(res); + return responseToBase64(res, "mp3"); +} + +// Coqui (local, noAuth): POST { text, speaker_id } โ†’ WAV +async function ttsCoqui({ baseUrl, text, voiceId }) { + const res = await fetch(baseUrl, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ text, ...(voiceId ? { speaker_id: voiceId } : {}) }), + }); + if (!res.ok) await throwUpstreamError(res); + return responseToBase64(res, "wav"); +} + +// Tortoise (local, noAuth): POST { text, voice } โ†’ binary +async function ttsTortoise({ baseUrl, text, voiceId }) { + const res = await fetch(baseUrl, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ text, voice: voiceId || "random" }), + }); + if (!res.ok) await throwUpstreamError(res); + return responseToBase64(res, "wav"); +} + +// OpenAI-compatible (qwen3-tts, openai-compat): POST { model, input, voice } โ†’ binary +async function ttsOpenAiCompat({ baseUrl, apiKey, text, modelId, voiceId }) { + const headers = { "Content-Type": "application/json" }; + if (apiKey) headers["Authorization"] = `Bearer ${apiKey}`; + const res = await fetch(baseUrl, { + method: "POST", + headers, + body: JSON.stringify({ + model: modelId, + input: text, + voice: voiceId || "alloy", + response_format: "mp3", + speed: 1.0, + }), + }); + if (!res.ok) await throwUpstreamError(res); + return responseToBase64(res, "mp3"); +} + +// Format โ†’ handler dispatcher (DRY) +const FORMAT_HANDLERS = { + hyperbolic: ttsHyperbolic, + deepgram: ttsDeepgram, + "nvidia-tts": ttsNvidia, + "huggingface-tts": ttsHuggingFace, + inworld: ttsInworld, + cartesia: ttsCartesia, + playht: ttsPlayHt, + coqui: ttsCoqui, + tortoise: ttsTortoise, + openai: ttsOpenAiCompat, +}; + +// โ”€โ”€ TTS Provider Registry (legacy noAuth + special providers) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ const TTS_PROVIDERS = { "google-tts": { synthesize: async (text, model) => { @@ -480,15 +682,10 @@ const TTS_PROVIDERS = { }, "elevenlabs": { synthesize: async (text, model, credentials) => { - if (!credentials?.apiKey) { - throw new Error("ElevenLabs API key required"); - } - // model format: "voice_id" or "model_id/voice_id" + if (!credentials?.apiKey) throw new Error("ElevenLabs API key required"); let modelId = "eleven_flash_v2_5"; let voiceId = model; - if (model && model.includes("/")) { - [modelId, voiceId] = model.split("/"); - } + if (model && model.includes("/")) [modelId, voiceId] = model.split("/"); const base64 = await elevenlabsTts(text, voiceId, credentials.apiKey, modelId); return { base64, format: "mp3" }; }, @@ -508,15 +705,24 @@ const TTS_PROVIDERS = { }, }; +// โ”€โ”€ Generic dispatcher: providers with ttsConfig.format โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +// Resolves to TTS_PROVIDERS first; falls back to ttsConfig.format dispatch. +async function synthesizeViaConfig(provider, text, model, credentials) { + const { AI_PROVIDERS } = await import("@/shared/constants/providers"); + const cfg = AI_PROVIDERS[provider]?.ttsConfig; + if (!cfg) return null; + const handler = FORMAT_HANDLERS[cfg.format]; + if (!handler) return null; + const apiKey = credentials?.apiKey; + if (cfg.authType !== "none" && !apiKey) throw new Error(`${provider} API key required`); + const defaultModel = cfg.models?.[0]?.id || ""; + const { modelId, voiceId } = parseModelVoice(model, defaultModel, "", cfg.models || []); + return handler({ baseUrl: cfg.baseUrl, apiKey, text, modelId, voiceId }); +} + // โ”€โ”€ Core handler โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ /** * Synthesize text to audio. - * @param {object} options - * @param {string} options.provider - "google-tts" | "edge-tts" | "local-device" | "openai" - * @param {string} options.model - voice/lang id - * @param {string} options.input - text to synthesize - * @param {object} [options.credentials] - required for openai - * @param {string} [options.responseFormat] - "mp3" (default) | "json" (base64) * @returns {Promise<{success, response, status?, error?}>} */ export async function handleTtsCore({ provider, model, input, credentials, responseFormat = "mp3" }) { @@ -525,18 +731,20 @@ export async function handleTtsCore({ provider, model, input, credentials, respo } const ttsProvider = TTS_PROVIDERS[provider]; - if (!ttsProvider) { - return createErrorResult(HTTP_STATUS.BAD_REQUEST, `Provider '${provider}' does not support TTS via this route.`); - } try { - const result = await ttsProvider.synthesize(input.trim(), model, credentials, responseFormat); - - // OpenAI returns full response object - if (result.success !== undefined) return result; - - // Other providers return { base64, format } - return createTtsResponse(result.base64, result.format, responseFormat); + // Legacy/special providers (google-tts, edge-tts, local-device, elevenlabs, openai, openrouter) + if (ttsProvider) { + const result = await ttsProvider.synthesize(input.trim(), model, credentials, responseFormat); + if (result.success !== undefined) return result; + return createTtsResponse(result.base64, result.format, responseFormat); + } + + // Generic config-driven dispatcher (hyperbolic, deepgram, nvidia, huggingface, inworld, cartesia, playht, coqui, tortoise, qwen, ...) + const result = await synthesizeViaConfig(provider, input.trim(), model, credentials); + if (result) return createTtsResponse(result.base64, result.format, responseFormat); + + return createErrorResult(HTTP_STATUS.BAD_REQUEST, `Provider '${provider}' does not support TTS via this route.`); } catch (err) { return createErrorResult(HTTP_STATUS.BAD_GATEWAY, err.message || "TTS synthesis failed"); } diff --git a/package.json b/package.json index 238fe47..cfe1017 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "9router-app", - "version": "0.4.8", + "version": "0.4.9", "description": "9Router web dashboard", "private": true, "scripts": { diff --git a/public/providers/coqui.png b/public/providers/coqui.png new file mode 100644 index 0000000..6e2471e Binary files /dev/null and b/public/providers/coqui.png differ diff --git a/public/providers/inworld.png b/public/providers/inworld.png new file mode 100644 index 0000000..579a6c2 Binary files /dev/null and b/public/providers/inworld.png differ diff --git a/public/providers/tortoise.png b/public/providers/tortoise.png new file mode 100644 index 0000000..70e93fc Binary files /dev/null and b/public/providers/tortoise.png differ diff --git a/public/providers/voyage-ai.png b/public/providers/voyage-ai.png new file mode 100644 index 0000000..72c4196 Binary files /dev/null and b/public/providers/voyage-ai.png differ diff --git a/src/app/(dashboard)/dashboard/media-providers/[kind]/[id]/page.js b/src/app/(dashboard)/dashboard/media-providers/[kind]/[id]/page.js index 3cc8c56..13d2e81 100644 --- a/src/app/(dashboard)/dashboard/media-providers/[kind]/[id]/page.js +++ b/src/app/(dashboard)/dashboard/media-providers/[kind]/[id]/page.js @@ -364,6 +364,8 @@ function TtsExampleCard({ providerId }) { const [countryVoices, setCountryVoices] = useState([]); const [selectedLang, setSelectedLang] = useState(""); const [selectedModel, setSelectedModel] = useState(() => { + const cfgModels = AI_PROVIDERS[providerId]?.ttsConfig?.models; + if (cfgModels?.length) return cfgModels[0].id; if (config.hasModelSelector && config.modelKey) { const models = getModelsByProviderId(config.modelKey); return models?.[0]?.id || ""; @@ -430,6 +432,8 @@ function TtsExampleCard({ providerId }) { } } // api-language (edge-tts, local-device, elevenlabs): NO default load, wait for user to pick language + // config (nvidia, hyperbolic, deepgram, huggingface, cartesia, playht, coqui, tortoise, inworld, qwen): + // use ttsConfig.models for model selector; voice is empty by default (backend uses provider default) }, [providerId]); // Update voices when model changes (voicesPerModel providers) @@ -501,11 +505,14 @@ function TtsExampleCard({ providerId }) { : languages; const endpoint = useTunnel ? tunnelEndpoint : localEndpoint; - // For ElevenLabs: use voiceId (editable) instead of selectedVoice - const activeVoiceId = config.hasVoiceIdInput ? voiceId : selectedVoice; - const modelFull = config.hasModelSelector && activeVoiceId && selectedModel - ? `${providerAlias}/${selectedModel}/${activeVoiceId}` - : activeVoiceId ? `${providerAlias}/${activeVoiceId}` : ""; + // For ElevenLabs/config-driven: prefer manual voiceId (if any), else fall back to selectedVoice + const activeVoiceId = config.hasVoiceIdInput ? (voiceId || selectedVoice) : selectedVoice; + const modelFull = (() => { + if (config.hasModelSelector && selectedModel && activeVoiceId) return `${providerAlias}/${selectedModel}/${activeVoiceId}`; + if (config.hasModelSelector && selectedModel) return `${providerAlias}/${selectedModel}`; + if (activeVoiceId) return `${providerAlias}/${activeVoiceId}`; + return ""; + })(); const curlSnippet = `curl -X POST ${endpoint}/v1/audio/speech${responseFormat === "json" ? "?response_format=json" : ""} \\ -H "Content-Type: application/json" \\ @@ -584,15 +591,17 @@ function TtsExampleCard({ providerId }) { - {/* Model selector (OpenAI, ElevenLabs) */} - {config.hasModelSelector && config.modelKey && ( + {/* Model selector โ€” prefer ttsConfig.models, else providerModels via modelKey */} + {config.hasModelSelector && (config.modelKey || AI_PROVIDERS[providerId]?.ttsConfig?.models?.length) && ( @@ -1446,13 +1455,14 @@ export default function MediaProviderDetailPage() { /> )} - {/* Provider Info โ€” config-driven, supports searchConfig, fetchConfig, searchViaChat */} - {!isCustom && (provider.searchConfig || provider.fetchConfig || provider.searchViaChat) && ( + {/* Provider Info โ€” config-driven, supports searchConfig, fetchConfig, ttsConfig, embeddingConfig, searchViaChat */} + {!isCustom && (provider.searchConfig || provider.fetchConfig || provider.ttsConfig || provider.embeddingConfig || provider.searchViaChat) && ( ""); + return NextResponse.json({ error: `Deepgram API ${res.status}: ${text || "Failed"}` }, { status: 502 }); + } + const data = await res.json(); + const ttsModels = data.tts || []; + + const byLang = {}; + for (const m of ttsModels) { + // Deepgram returns `languages: ["en"]` or sometimes language inferred from canonical_name suffix + const langs = Array.isArray(m.languages) && m.languages.length + ? m.languages + : [m.canonical_name?.split("-").pop() || "en"]; + for (const code of langs) { + if (!byLang[code]) { + byLang[code] = { + code, + name: (() => { try { return langNames.of(code); } catch { return code; } })(), + voices: [], + }; + } + const voiceId = m.canonical_name || m.name; + if (!byLang[code].voices.find((x) => x.id === voiceId)) { + byLang[code].voices.push({ + id: voiceId, + name: m.name || voiceId, + gender: m.metadata?.tags?.find((t) => t === "masculine" || t === "feminine") || "", + lang: code, + }); + } + } + } + + const languages = Object.values(byLang).sort((a, b) => a.name.localeCompare(b.name)); + + if (langFilter) { + return NextResponse.json({ voices: byLang[langFilter]?.voices || [] }); + } + return NextResponse.json({ languages, byLang }); + } catch (err) { + return NextResponse.json({ error: err.message || "Failed to fetch voices" }, { status: 502 }); + } +} diff --git a/src/app/api/media-providers/tts/inworld/voices/route.js b/src/app/api/media-providers/tts/inworld/voices/route.js new file mode 100644 index 0000000..fb904e5 --- /dev/null +++ b/src/app/api/media-providers/tts/inworld/voices/route.js @@ -0,0 +1,61 @@ +import { NextResponse } from "next/server"; +import { getProviderConnections } from "@/lib/localDb"; + +const langNames = new Intl.DisplayNames(["en"], { type: "language" }); + +/** + * GET /api/media-providers/tts/inworld/voices[?lang=en] + * Returns { languages, byLang } grouped by language code (same shape as edge-tts/elevenlabs) + */ +export async function GET(request) { + try { + const { searchParams } = new URL(request.url); + const langFilter = searchParams.get("lang"); + + const connections = await getProviderConnections({ provider: "inworld", isActive: true }); + const apiKey = connections[0]?.apiKey; + if (!apiKey) return NextResponse.json({ error: "No Inworld connection found" }, { status: 400 }); + + const res = await fetch("https://api.inworld.ai/tts/v1/voices", { + headers: { "Authorization": `Basic ${apiKey}` }, + }); + if (!res.ok) { + const text = await res.text().catch(() => ""); + return NextResponse.json({ error: `Inworld API ${res.status}: ${text || "Failed"}` }, { status: 502 }); + } + const data = await res.json(); + const voices = data.voices || []; + + const byLang = {}; + for (const v of voices) { + // Each voice has `languages: ["en", "es", ...]` + const langs = Array.isArray(v.languages) && v.languages.length ? v.languages : ["en"]; + for (const code of langs) { + if (!byLang[code]) { + byLang[code] = { + code, + name: (() => { try { return langNames.of(code); } catch { return code; } })(), + voices: [], + }; + } + if (!byLang[code].voices.find((x) => x.id === v.voiceId)) { + byLang[code].voices.push({ + id: v.voiceId, + name: v.displayName || v.voiceId, + gender: v.gender || "", + lang: code, + }); + } + } + } + + const languages = Object.values(byLang).sort((a, b) => a.name.localeCompare(b.name)); + + if (langFilter) { + return NextResponse.json({ voices: byLang[langFilter]?.voices || [] }); + } + return NextResponse.json({ languages, byLang }); + } catch (err) { + return NextResponse.json({ error: err.message || "Failed to fetch voices" }, { status: 502 }); + } +} diff --git a/src/app/api/providers/validate/route.js b/src/app/api/providers/validate/route.js index db9cbb5..1c28327 100644 --- a/src/app/api/providers/validate/route.js +++ b/src/app/api/providers/validate/route.js @@ -40,6 +40,43 @@ async function probeWebProvider(provider, apiKey) { return res.status !== 401 && res.status !== 403; } +// Probe a tts/embedding provider using ttsConfig/embeddingConfig. +// Returns true if API key is accepted (status !== 401 && !== 403); null to skip. +async function probeMediaProvider(provider, apiKey) { + const p = AI_PROVIDERS[provider]; + if (!p) return null; + // Only probe providers that are media-only (not LLM dual-purpose, let LLM validate handle those) + const kinds = p.serviceKinds || ["llm"]; + const isMediaOnly = kinds.every((k) => k === "tts" || k === "embedding" || k === "stt"); + if (!isMediaOnly) return null; + const cfg = p.ttsConfig || p.embeddingConfig; + if (!cfg) return null; + if (p.noAuth || cfg.authType === "none") return true; + // Skip auth schemes that need provider-specific data + if (cfg.authHeader === "playht" || cfg.authHeader === "aws-sigv4") return null; + + const headers = { "Content-Type": "application/json" }; + + // Apply auth based on authHeader + switch (cfg.authHeader) { + case "bearer": headers["Authorization"] = `Bearer ${apiKey}`; break; + case "x-api-key": headers["x-api-key"] = apiKey; break; + case "xi-api-key": headers["xi-api-key"] = apiKey; break; + case "token": headers["Authorization"] = `Token ${apiKey}`; break; + case "basic": headers["Authorization"] = `Basic ${apiKey}`; break; + default: return null; + } + + // Minimal POST body โ€” server will reject auth before validating body + const res = await fetch(cfg.baseUrl, { + method: "POST", + headers, + body: JSON.stringify({ input: "ping", text: "ping", model: cfg.models?.[0]?.id || "test" }), + signal: AbortSignal.timeout(8000), + }); + return res.status !== 401 && res.status !== 403; +} + // POST /api/providers/validate - Validate API key with provider export async function POST(request) { try { @@ -192,6 +229,15 @@ export async function POST(request) { }); } + // Generic probe for tts/embedding providers (config-driven) + const mediaResult = await probeMediaProvider(provider, apiKey); + if (mediaResult !== null) { + return NextResponse.json({ + valid: mediaResult, + error: mediaResult ? null : "Invalid API key", + }); + } + switch (provider) { case "openai": const openaiRes = await fetch("https://api.openai.com/v1/models", { diff --git a/src/mitm/config.js b/src/mitm/config.js index 29229d9..f8dc7a0 100644 --- a/src/mitm/config.js +++ b/src/mitm/config.js @@ -15,6 +15,11 @@ const URL_PATTERNS = { cursor: ["/BidiAppend", "/RunSSE", "/RunPoll", "/Run"], }; +// Synonym map: rawModel from request โ†’ canonical alias key in mitmAlias DB +const MODEL_SYNONYMS = { + antigravity: { "gemini-default": "gemini-3-flash" }, +}; + function getToolForHost(host) { const h = (host || "").split(":")[0]; if (h === "api.individual.githubcopilot.com") return "copilot"; @@ -24,4 +29,4 @@ function getToolForHost(host) { return null; } -module.exports = { TARGET_HOSTS, URL_PATTERNS, getToolForHost }; +module.exports = { TARGET_HOSTS, URL_PATTERNS, MODEL_SYNONYMS, getToolForHost }; diff --git a/src/mitm/server.js b/src/mitm/server.js index 78e3362..cc81497 100644 --- a/src/mitm/server.js +++ b/src/mitm/server.js @@ -5,14 +5,14 @@ const dns = require("dns"); const { promisify } = require("util"); const { execSync } = require("child_process"); const { log, err } = require("./logger"); -const { TARGET_HOSTS, URL_PATTERNS, getToolForHost } = require("./config"); +const { TARGET_HOSTS, URL_PATTERNS, MODEL_SYNONYMS, getToolForHost } = require("./config"); const { DATA_DIR, MITM_DIR } = require("./paths"); const { getCertForDomain } = require("./cert/generate"); const DB_FILE = path.join(DATA_DIR, "db.json"); const LOCAL_PORT = 443; const IS_WIN = process.platform === "win32"; -const ENABLE_FILE_LOG = false; +const ENABLE_FILE_LOG = true; const LOG_DIR = path.join(DATA_DIR, "logs", "mitm"); const INTERNAL_REQUEST_HEADER = { name: "x-request-source", value: "local" }; @@ -107,9 +107,11 @@ function getMappedModel(tool, model) { const db = JSON.parse(fs.readFileSync(DB_FILE, "utf-8")); const aliases = db.mitmAlias?.[tool]; if (!aliases) return null; - if (aliases[model]) return aliases[model]; + // Normalize via synonym map (e.g., gemini-default โ†’ gemini-3-flash) + const lookup = MODEL_SYNONYMS?.[tool]?.[model] || model; + if (aliases[lookup]) return aliases[lookup]; // Prefix match fallback - const prefixKey = Object.keys(aliases).find(k => k && aliases[k] && (model.startsWith(k) || k.startsWith(model))); + const prefixKey = Object.keys(aliases).find(k => k && aliases[k] && (lookup.startsWith(k) || k.startsWith(lookup))); return prefixKey ? aliases[prefixKey] : null; } catch { return null; } } diff --git a/src/shared/components/ProviderInfoCard.js b/src/shared/components/ProviderInfoCard.js index 6150fdd..1957f89 100644 --- a/src/shared/components/ProviderInfoCard.js +++ b/src/shared/components/ProviderInfoCard.js @@ -8,6 +8,8 @@ const FIELD_SCHEMA = { defaultModel: { label: "Model", format: (v) => v, mono: true }, baseUrl: { label: "Endpoint", format: (v) => v, isLink: true, mono: true }, costPerQuery: { label: "Cost / call", format: (v) => v === 0 ? "Free" : `$${v.toFixed(4)}` }, + pricingUrl: { label: "Pricing", format: () => "View pricing", isLink: true }, + freeTier: { label: "Free tier", format: (v) => v }, freeMonthlyQuota: { label: "Free quota", format: (v) => v === 0 ? "โ€”" : v >= 999999 ? "Unlimited" : `${v.toLocaleString()} / mo` }, searchTypes: { label: "Types", format: (v) => v.join(", ") }, formats: { label: "Formats", format: (v) => v.join(", ") }, @@ -30,6 +32,7 @@ export default function ProviderInfoCard({ config, provider, title = "Provider I })); const signupUrl = provider?.notice?.apiKeyUrl || provider?.website; + const noticeText = provider?.notice?.text; return ( @@ -67,6 +70,12 @@ export default function ProviderInfoCard({ config, provider, title = "Provider I )} ))} + {noticeText && ( +
+ Notice + {noticeText} +
+ )}
); diff --git a/src/shared/constants/cliTools.js b/src/shared/constants/cliTools.js index 43cb41c..8ff1f11 100644 --- a/src/shared/constants/cliTools.js +++ b/src/shared/constants/cliTools.js @@ -12,7 +12,7 @@ export const MITM_TOOLS = { defaultModels: [ { id: "gemini-3.1-pro-high", name: "Gemini 3.1 Pro High", alias: "gemini-3.1-pro-high" }, { id: "gemini-3.1-pro-low", name: "Gemini 3.1 Pro Low", alias: "gemini-3.1-pro-low" }, - { id: "gemini-3-flash", name: "Gemini 3 Flash", alias: "gemini-3-flash" }, + { id: "gemini-3-flash", name: "Gemini 3 Flash / Default", alias: "gemini-3-flash" }, { id: "claude-sonnet-4-6", name: "Claude Sonnet 4.6", alias: "claude-sonnet-4-6" }, { id: "claude-opus-4-6-thinking", name: "Claude Opus 4.6 Thinking", alias: "claude-opus-4-6-thinking" }, { id: "gpt-oss-120b-medium", name: "GPT OSS 120B Medium", alias: "gpt-oss-120b-medium" }, diff --git a/src/shared/constants/providers.js b/src/shared/constants/providers.js index 54f0bf5..ec03057 100644 --- a/src/shared/constants/providers.js +++ b/src/shared/constants/providers.js @@ -3,7 +3,7 @@ // Free Providers (kiro first, iflow last) export const FREE_PROVIDERS = { kiro: { id: "kiro", alias: "kr", name: "Kiro AI", icon: "psychology_alt", color: "#FF6B35" }, - qwen: { id: "qwen", alias: "qw", name: "Qwen Code", icon: "psychology", color: "#10B981", deprecated: true, deprecationNotice: "Qwen OAuth free tier was discontinued by Alibaba on 2026-04-15. New connections will not work." }, + qwen: { id: "qwen", alias: "qw", name: "Qwen Code", icon: "psychology", color: "#10B981", deprecated: true, deprecationNotice: "Qwen OAuth free tier was discontinued by Alibaba on 2026-04-15. New connections will not work.", serviceKinds: ["llm", "tts", "stt"], ttsConfig: { baseUrl: "http://localhost:8000/v1/audio/speech", authType: "none", authHeader: "none", format: "openai", models: [{ id: "qwen3-tts", name: "Qwen3 TTS" }] } }, "gemini-cli": { id: "gemini-cli", alias: "gc", name: "Gemini CLI", icon: "terminal", color: "#4285F4", deprecated: true, deprecationNotice: "Gemini CLI is designed exclusively for Gemini CLI. Using it with other tools (OpenClaw, Claude, Codex...) may result in account restrictions or bans." }, // gitlab: { id: "gitlab", alias: "gl", name: "GitLab Duo", icon: "code", color: "#FC6D26" }, // codebuddy: { id: "codebuddy", alias: "cb", name: "CodeBuddy", icon: "smart_toy", color: "#006EFF" }, @@ -14,11 +14,11 @@ export const FREE_PROVIDERS = { // Free Tier Providers (has free access but may require account/API key) export const FREE_TIER_PROVIDERS = { - openrouter: { id: "openrouter", alias: "openrouter", name: "OpenRouter", icon: "router", color: "#F97316", textIcon: "OR", website: "https://openrouter.ai", notice: { text: "Free tier: 27+ free models, no credit card needed, 200 req/day. After $10 credit: 1,000 req/day.", apiKeyUrl: "https://openrouter.ai/settings/keys" }, modelsFetcher: { url: "https://openrouter.ai/api/v1/models", type: "openrouter-free" }, passthroughModels: true, serviceKinds: ["llm", "embedding", "tts", "imageToText"] }, - nvidia: { id: "nvidia", alias: "nvidia", name: "NVIDIA NIM", icon: "developer_board", color: "#76B900", textIcon: "NV", website: "https://developer.nvidia.com/nim", notice: { text: "Free access for NVIDIA Developer Program members (prototyping & testing).", apiKeyUrl: "https://build.nvidia.com/settings/api-keys" } }, + openrouter: { id: "openrouter", alias: "openrouter", name: "OpenRouter", icon: "router", color: "#F97316", textIcon: "OR", website: "https://openrouter.ai", notice: { text: "Free tier: 27+ free models, no credit card needed, 200 req/day. After $10 credit: 1,000 req/day.", apiKeyUrl: "https://openrouter.ai/settings/keys" }, modelsFetcher: { url: "https://openrouter.ai/api/v1/models", type: "openrouter-free" }, passthroughModels: true, serviceKinds: ["llm", "embedding", "tts", "imageToText"], embeddingConfig: { baseUrl: "https://openrouter.ai/api/v1/embeddings", authType: "apikey", authHeader: "bearer", models: [{ id: "openai/text-embedding-3-small", name: "Text Embedding 3 Small (OpenRouter)", dimensions: 1536 }, { id: "openai/text-embedding-3-large", name: "Text Embedding 3 Large (OpenRouter)", dimensions: 3072 }, { id: "openai/text-embedding-ada-002", name: "Text Embedding Ada 002 (OpenRouter)", dimensions: 1536 }] } }, + nvidia: { id: "nvidia", alias: "nvidia", name: "NVIDIA NIM", icon: "developer_board", color: "#76B900", textIcon: "NV", website: "https://developer.nvidia.com/nim", notice: { text: "Free access for NVIDIA Developer Program members (prototyping & testing).", apiKeyUrl: "https://build.nvidia.com/settings/api-keys" }, serviceKinds: ["llm", "tts", "embedding", "stt"], ttsConfig: { baseUrl: "https://integrate.api.nvidia.com/v1/audio/speech", authType: "apikey", authHeader: "bearer", format: "nvidia-tts", models: [{ id: "fastpitch", name: "FastPitch" }, { id: "tacotron2", name: "Tacotron2" }] }, embeddingConfig: { baseUrl: "https://integrate.api.nvidia.com/v1/embeddings", authType: "apikey", authHeader: "bearer", models: [{ id: "nvidia/nv-embedqa-e5-v5", name: "NV EmbedQA E5 v5", dimensions: 1024 }] } }, ollama: { id: "ollama", alias: "ollama", name: "Ollama Cloud", icon: "cloud", color: "#ffffffff", textIcon: "OL", website: "https://ollama.com", notice: { text: "Free tier: light usage, 1 cloud model at a time (limits reset every 5h & 7d). Pro $20/mo ยท Max $100/mo.", apiKeyUrl: "https://ollama.com/settings/keys" } }, vertex: { id: "vertex", alias: "vx", name: "Vertex AI", icon: "cloud", color: "#4285F4", textIcon: "VX", website: "https://cloud.google.com/vertex-ai", notice: { text: "New Google Cloud accounts get $300 free credits. Requires GCP project + Service Account with Vertex AI API enabled.", apiKeyUrl: "https://console.cloud.google.com/iam-admin/serviceaccounts" } }, - gemini: { id: "gemini", alias: "gemini", name: "Gemini", icon: "diamond", color: "#4285F4", textIcon: "GE", website: "https://ai.google.dev", serviceKinds: ["llm", "embedding", "image", "imageToText", "webSearch"], searchViaChat: { defaultModel: "gemini-2.5-flash" } }, + gemini: { id: "gemini", alias: "gemini", name: "Gemini", icon: "diamond", color: "#4285F4", textIcon: "GE", website: "https://ai.google.dev", serviceKinds: ["llm", "embedding", "image", "imageToText", "webSearch"], searchViaChat: { defaultModel: "gemini-2.5-flash", pricingUrl: "https://ai.google.dev/pricing", freeTier: "Free tier: 15 RPM, 1M tokens/day on gemini-2.5-flash via AI Studio." }, embeddingConfig: { baseUrl: "https://generativelanguage.googleapis.com/v1beta/models", authType: "apikey", authHeader: "key", models: [{ id: "text-embedding-004", name: "Text Embedding 004", dimensions: 768 }, { id: "embedding-001", name: "Embedding 001", dimensions: 768 }] } }, byteplus: { id: "byteplus", alias: "bpm", name: "BytePlus ModelArk", icon: "cloud", color: "#2563EB", textIcon: "BP", website: "https://console.byteplus.com/ark", notice: { text: "Free credits for new accounts. Access to Seed 2.0, Kimi K2 Thinking, GLM 4.7, GPT-OSS-120B models.", apiKeyUrl: "https://console.byteplus.com/ark/region:ark+ap-southeast-1/apiKey" }, serviceKinds: ["llm"] }, }; @@ -44,7 +44,7 @@ export const OAUTH_PROVIDERS = { claude: { id: "claude", alias: "cc", name: "Claude Code", icon: "smart_toy", color: "#D97757" }, antigravity: { id: "antigravity", alias: "ag", name: "Antigravity", icon: "rocket_launch", color: "#F59E0B", deprecated: true, deprecationNotice: "AG is designed exclusively for Antigravity IDE. Using it with other tools (OpenClaw, Claude, Codex...) may result in account restrictions or bans." }, codex: { id: "codex", alias: "cx", name: "OpenAI Codex", icon: "code", color: "#3B82F6", thinkingConfig: THINKING_CONFIG.effort, serviceKinds: ["llm", "image"], kindNotice: { image: "Requires a ChatGPT Plus (or higher) account. Free accounts are not supported for image generation." } }, - github: { id: "github", alias: "gh", name: "GitHub Copilot", icon: "code", color: "#333333" }, + github: { id: "github", alias: "gh", name: "GitHub Copilot", icon: "code", color: "#333333", serviceKinds: ["llm", "embedding"], embeddingConfig: { baseUrl: "https://models.github.ai/inference/embeddings", authType: "apikey", authHeader: "bearer", models: [{ id: "text-embedding-3-small", name: "Text Embedding 3 Small (GitHub)", dimensions: 1536 }, { id: "text-embedding-3-large", name: "Text Embedding 3 Large (GitHub)", dimensions: 3072 }] } }, cursor: { id: "cursor", alias: "cu", name: "Cursor IDE", icon: "edit_note", color: "#00D4AA" }, // "kimi-coding": { id: "kimi-coding", alias: "kmc", name: "Kimi Coding", icon: "psychology", color: "#1E40AF", textIcon: "KC" }, kilocode: { id: "kilocode", alias: "kc", name: "Kilo Code", icon: "code", color: "#FF6B35", textIcon: "KC" }, @@ -55,41 +55,45 @@ export const OAUTH_PROVIDERS = { export const APIKEY_PROVIDERS = { glm: { id: "glm", alias: "glm", name: "GLM Coding", icon: "code", color: "#2563EB", textIcon: "GL", website: "https://open.bigmodel.cn" }, "glm-cn": { id: "glm-cn", alias: "glm-cn", name: "GLM (China)", icon: "code", color: "#DC2626", textIcon: "GC", website: "https://open.bigmodel.cn" }, - kimi: { id: "kimi", alias: "kimi", name: "Kimi", icon: "psychology", color: "#1E3A8A", textIcon: "KM", website: "https://kimi.moonshot.cn", serviceKinds: ["llm", "webSearch"], searchViaChat: { defaultModel: "kimi-k2.5" } }, - minimax: { id: "minimax", alias: "minimax", name: "Minimax Coding", icon: "memory", color: "#7C3AED", textIcon: "MM", website: "https://www.minimaxi.com", serviceKinds: ["llm", "image", "imageToText", "webSearch"], searchViaChat: { defaultModel: "MiniMax-M2.7" } }, + kimi: { id: "kimi", alias: "kimi", name: "Kimi", icon: "psychology", color: "#1E3A8A", textIcon: "KM", website: "https://kimi.moonshot.cn", serviceKinds: ["llm", "webSearch"], searchViaChat: { defaultModel: "kimi-k2.5", pricingUrl: "https://platform.moonshot.ai/docs/pricing/chat" } }, + minimax: { id: "minimax", alias: "minimax", name: "Minimax Coding", icon: "memory", color: "#7C3AED", textIcon: "MM", website: "https://www.minimaxi.com", serviceKinds: ["llm", "image", "imageToText", "webSearch"], searchViaChat: { defaultModel: "MiniMax-M2.7", pricingUrl: "https://www.minimaxi.com/document/price" } }, "minimax-cn": { id: "minimax-cn", alias: "minimax-cn", name: "Minimax (China)", icon: "memory", color: "#DC2626", textIcon: "MC", website: "https://www.minimaxi.com" }, alicode: { id: "alicode", alias: "alicode", name: "Alibaba", icon: "cloud", color: "#FF6A00", textIcon: "ALi" }, "alicode-intl": { id: "alicode-intl", alias: "alicode-intl", name: "Alibaba Intl", icon: "cloud", color: "#FF6A00", textIcon: "ALi" }, "volcengine-ark": { id: "volcengine-ark", alias: "ark", name: "Volcengine Ark", icon: "cloud", color: "#1677FF", textIcon: "ARK", website: "https://ark.cn-beijing.volces.com" }, - openai: { id: "openai", alias: "openai", name: "OpenAI", icon: "auto_awesome", color: "#10A37F", textIcon: "OA", website: "https://platform.openai.com", serviceKinds: ["llm", "embedding", "tts", "image", "imageToText", "webSearch"], thinkingConfig: THINKING_CONFIG.effort, searchViaChat: { defaultModel: "gpt-4o-mini" } }, + openai: { id: "openai", alias: "openai", name: "OpenAI", icon: "auto_awesome", color: "#10A37F", textIcon: "OA", website: "https://platform.openai.com", serviceKinds: ["llm", "embedding", "tts", "image", "imageToText", "webSearch"], thinkingConfig: THINKING_CONFIG.effort, searchViaChat: { defaultModel: "gpt-4o-mini", pricingUrl: "https://openai.com/api/pricing" }, ttsConfig: { baseUrl: "https://api.openai.com/v1/audio/speech", authType: "apikey", authHeader: "bearer", format: "openai", models: [{ id: "tts-1", name: "TTS-1" }, { id: "tts-1-hd", name: "TTS-1 HD" }, { id: "gpt-4o-mini-tts", name: "GPT-4o Mini TTS" }] }, embeddingConfig: { baseUrl: "https://api.openai.com/v1/embeddings", authType: "apikey", authHeader: "bearer", models: [{ id: "text-embedding-3-small", name: "Text Embedding 3 Small", dimensions: 1536 }, { id: "text-embedding-3-large", name: "Text Embedding 3 Large", dimensions: 3072 }, { id: "text-embedding-ada-002", name: "Text Embedding Ada 002", dimensions: 1536 }] } }, anthropic: { id: "anthropic", alias: "anthropic", name: "Anthropic", icon: "smart_toy", color: "#D97757", textIcon: "AN", website: "https://console.anthropic.com", serviceKinds: ["llm", "imageToText"] }, "opencode-go": { id: "opencode-go", alias: "ocg", name: "OpenCode Go", icon: "terminal", color: "#E87040", textIcon: "OC", website: "https://opencode.ai/auth", notice: { text: "OpenCode Go subscription: $5/mo (then $10/mo). Access to Kimi, GLM, Qwen, MiMo, MiniMax models.", apiKeyUrl: "https://opencode.ai/auth" } }, azure: { id: "azure", alias: "azure", name: "Azure OpenAI", icon: "cloud", color: "#0078D4", textIcon: "AZ", website: "https://azure.microsoft.com/en-us/products/ai-services/openai-service", hasProviderSpecificData: true }, deepseek: { id: "deepseek", alias: "ds", name: "DeepSeek", icon: "bolt", color: "#4D6BFE", textIcon: "DS", website: "https://deepseek.com" }, groq: { id: "groq", alias: "groq", name: "Groq", icon: "speed", color: "#F55036", textIcon: "GQ", website: "https://groq.com", serviceKinds: ["llm", "imageToText"] }, - xai: { id: "xai", alias: "xai", name: "xAI (Grok)", icon: "auto_awesome", color: "#1DA1F2", textIcon: "XA", website: "https://x.ai", serviceKinds: ["llm", "imageToText", "webSearch"], searchViaChat: { defaultModel: "grok-4.20-reasoning" } }, - mistral: { id: "mistral", alias: "mistral", name: "Mistral", icon: "air", color: "#FF7000", textIcon: "MI", website: "https://mistral.ai", serviceKinds: ["llm", "imageToText"] }, + xai: { id: "xai", alias: "xai", name: "xAI (Grok)", icon: "auto_awesome", color: "#1DA1F2", textIcon: "XA", website: "https://x.ai", serviceKinds: ["llm", "imageToText", "webSearch"], searchViaChat: { defaultModel: "grok-4.20-reasoning", pricingUrl: "https://x.ai/api#pricing" } }, + mistral: { id: "mistral", alias: "mistral", name: "Mistral", icon: "air", color: "#FF7000", textIcon: "MI", website: "https://mistral.ai", serviceKinds: ["llm", "imageToText", "embedding"], embeddingConfig: { baseUrl: "https://api.mistral.ai/v1/embeddings", authType: "apikey", authHeader: "bearer", models: [{ id: "mistral-embed", name: "Mistral Embed", dimensions: 1024 }] } }, perplexity: { id: "perplexity", alias: "pplx", name: "Perplexity", icon: "search", color: "#20808D", textIcon: "PP", website: "https://www.perplexity.ai", serviceKinds: ["llm", "webSearch"], searchConfig: { baseUrl: "https://api.perplexity.ai/search", method: "POST", authType: "apikey", authHeader: "bearer", costPerQuery: 0.005, freeMonthlyQuota: 0, searchTypes: ["web"], defaultMaxResults: 5, maxMaxResults: 20, timeoutMs: 10000, cacheTTLMs: 300000 } }, - together: { id: "together", alias: "together", name: "Together AI", icon: "group_work", color: "#0F6FFF", textIcon: "TG", website: "https://www.together.ai" }, - fireworks: { id: "fireworks", alias: "fireworks", name: "Fireworks AI", icon: "local_fire_department", color: "#7B2EF2", textIcon: "FW", website: "https://fireworks.ai" }, + together: { id: "together", alias: "together", name: "Together AI", icon: "group_work", color: "#0F6FFF", textIcon: "TG", website: "https://www.together.ai", serviceKinds: ["llm", "embedding"], embeddingConfig: { baseUrl: "https://api.together.xyz/v1/embeddings", authType: "apikey", authHeader: "bearer", models: [{ id: "BAAI/bge-large-en-v1.5", name: "BGE Large EN v1.5", dimensions: 1024 }, { id: "togethercomputer/m2-bert-80M-8k-retrieval", name: "M2 BERT 80M 8K", dimensions: 768 }] } }, + fireworks: { id: "fireworks", alias: "fireworks", name: "Fireworks AI", icon: "local_fire_department", color: "#7B2EF2", textIcon: "FW", website: "https://fireworks.ai", serviceKinds: ["llm", "embedding"], embeddingConfig: { baseUrl: "https://api.fireworks.ai/inference/v1/embeddings", authType: "apikey", authHeader: "bearer", models: [{ id: "nomic-ai/nomic-embed-text-v1.5", name: "Nomic Embed Text v1.5", dimensions: 768 }] } }, cerebras: { id: "cerebras", alias: "cerebras", name: "Cerebras", icon: "memory", color: "#FF4F00", textIcon: "CB", website: "https://www.cerebras.ai" }, cohere: { id: "cohere", alias: "cohere", name: "Cohere", icon: "hub", color: "#39594D", textIcon: "CO", website: "https://cohere.com" }, - nebius: { id: "nebius", alias: "nebius", name: "Nebius AI", icon: "cloud", color: "#6C5CE7", textIcon: "NB", website: "https://nebius.com" }, + nebius: { id: "nebius", alias: "nebius", name: "Nebius AI", icon: "cloud", color: "#6C5CE7", textIcon: "NB", website: "https://nebius.com", serviceKinds: ["llm", "embedding"], embeddingConfig: { baseUrl: "https://api.tokenfactory.nebius.com/v1/embeddings", authType: "apikey", authHeader: "bearer", models: [{ id: "Qwen/Qwen3-Embedding-8B", name: "Qwen3 Embedding 8B", dimensions: 4096 }] } }, siliconflow: { id: "siliconflow", alias: "siliconflow", name: "SiliconFlow", icon: "cloud_queue", color: "#5B6EF5", textIcon: "SF", website: "https://cloud.siliconflow.com" }, - hyperbolic: { id: "hyperbolic", alias: "hyp", name: "Hyperbolic", icon: "bolt", color: "#00D4FF", textIcon: "HY", website: "https://hyperbolic.xyz" }, - deepgram: { id: "deepgram", alias: "dg", name: "Deepgram", icon: "mic", color: "#13EF93", textIcon: "DG", website: "https://deepgram.com", serviceKinds: ["stt", "imageToText"] }, + hyperbolic: { id: "hyperbolic", alias: "hyp", name: "Hyperbolic", icon: "bolt", color: "#00D4FF", textIcon: "HY", website: "https://hyperbolic.xyz", serviceKinds: ["llm", "tts"], ttsConfig: { baseUrl: "https://api.hyperbolic.xyz/v1/audio/generation", authType: "apikey", authHeader: "bearer", format: "hyperbolic", models: [{ id: "melo-tts", name: "Melo TTS" }] } }, + deepgram: { id: "deepgram", alias: "dg", name: "Deepgram", icon: "mic", color: "#13EF93", textIcon: "DG", website: "https://deepgram.com", notice: { text: "$200 free credit on signup (no card required). Aura-1: $0.015/1k chars, Aura-2: $0.030/1k chars (Pay-As-You-Go).", apiKeyUrl: "https://console.deepgram.com/api-keys" }, serviceKinds: ["stt", "imageToText", "tts"], ttsConfig: { baseUrl: "https://api.deepgram.com/v1/speak", authType: "apikey", authHeader: "token", format: "deepgram", models: [] } }, assemblyai: { id: "assemblyai", alias: "aai", name: "AssemblyAI", icon: "record_voice_over", color: "#0062FF", textIcon: "AA", website: "https://assemblyai.com", serviceKinds: ["stt"] }, nanobanana: { id: "nanobanana", alias: "nb", name: "NanoBanana", icon: "image", color: "#FFD700", textIcon: "NB", website: "https://nanobananaapi.ai", serviceKinds: ["image"] }, - elevenlabs: { id: "elevenlabs", alias: "el", name: "ElevenLabs", icon: "record_voice_over", color: "#6C47FF", textIcon: "EL", website: "https://elevenlabs.io", serviceKinds: ["tts"] }, - cartesia: { id: "cartesia", alias: "cartesia", name: "Cartesia", icon: "spatial_audio", color: "#FF4F8B", textIcon: "CA", website: "https://cartesia.ai", serviceKinds: ["tts"], hidden: true }, - playht: { id: "playht", alias: "playht", name: "PlayHT", icon: "play_circle", color: "#00B4D8", textIcon: "PH", website: "https://play.ht", serviceKinds: ["tts"], hidden: true }, - "local-device": { id: "local-device", alias: "local-device", name: "Local Device", icon: "speaker", color: "#64748B", textIcon: "LD", serviceKinds: ["tts"], noAuth: true }, - "google-tts": { id: "google-tts", alias: "google-tts", name: "Google TTS", icon: "record_voice_over", color: "#4285F4", textIcon: "GT", serviceKinds: ["tts"], noAuth: true }, - "edge-tts": { id: "edge-tts", alias: "edge-tts", name: "Edge TTS", icon: "record_voice_over", color: "#0078D4", textIcon: "ET", serviceKinds: ["tts"], noAuth: true }, + elevenlabs: { id: "elevenlabs", alias: "el", name: "ElevenLabs", icon: "record_voice_over", color: "#6C47FF", textIcon: "EL", website: "https://elevenlabs.io", serviceKinds: ["tts"], ttsConfig: { baseUrl: "https://api.elevenlabs.io/v1/text-to-speech", authType: "apikey", authHeader: "xi-api-key", format: "elevenlabs", models: [{ id: "eleven_multilingual_v2", name: "Eleven Multilingual v2" }, { id: "eleven_turbo_v2_5", name: "Eleven Turbo v2.5" }] } }, + cartesia: { id: "cartesia", alias: "cartesia", name: "Cartesia", icon: "spatial_audio", color: "#FF4F8B", textIcon: "CA", website: "https://cartesia.ai", serviceKinds: ["tts"], hidden: true, ttsConfig: { baseUrl: "https://api.cartesia.ai/tts/bytes", authType: "apikey", authHeader: "x-api-key", format: "cartesia", models: [{ id: "sonic-2", name: "Sonic 2" }, { id: "sonic-3", name: "Sonic 3" }] } }, + playht: { id: "playht", alias: "playht", name: "PlayHT", icon: "play_circle", color: "#00B4D8", textIcon: "PH", website: "https://play.ht", serviceKinds: ["tts"], hidden: true, ttsConfig: { baseUrl: "https://api.play.ht/api/v2/tts/stream", authType: "apikey", authHeader: "playht", format: "playht", models: [{ id: "PlayDialog", name: "PlayDialog" }, { id: "Play3.0-mini", name: "Play 3.0 Mini" }] } }, + "local-device": { id: "local-device", alias: "local-device", name: "Local Device", icon: "speaker", color: "#64748B", textIcon: "LD", serviceKinds: ["tts"], noAuth: true, ttsConfig: { baseUrl: "local-device", authType: "none", authHeader: "none", format: "local-device", models: [] } }, + "google-tts": { id: "google-tts", alias: "google-tts", name: "Google TTS", icon: "record_voice_over", color: "#4285F4", textIcon: "GT", serviceKinds: ["tts"], noAuth: true, ttsConfig: { baseUrl: "google-tts", authType: "none", authHeader: "none", format: "google-tts", models: [] } }, + "edge-tts": { id: "edge-tts", alias: "edge-tts", name: "Edge TTS", icon: "record_voice_over", color: "#0078D4", textIcon: "ET", serviceKinds: ["tts"], noAuth: true, ttsConfig: { baseUrl: "edge-tts", authType: "none", authHeader: "none", format: "edge-tts", models: [] } }, + coqui: { id: "coqui", alias: "coqui", name: "Coqui TTS", icon: "record_voice_over", color: "#10B981", textIcon: "CQ", website: "https://github.com/coqui-ai/TTS", serviceKinds: ["tts"], hidden: true, noAuth: true, ttsConfig: { baseUrl: "http://localhost:5002/api/tts", authType: "none", authHeader: "none", format: "coqui", models: [{ id: "tts_models/en/ljspeech/tacotron2-DDC", name: "Tacotron2 DDC (LJSpeech)" }] } }, + tortoise: { id: "tortoise", alias: "tortoise", name: "Tortoise TTS", icon: "record_voice_over", color: "#7C3AED", textIcon: "TT", website: "https://github.com/neonbjb/tortoise-tts", serviceKinds: ["tts"], hidden: true, noAuth: true, ttsConfig: { baseUrl: "http://localhost:5000/api/tts", authType: "none", authHeader: "none", format: "tortoise", models: [{ id: "tortoise-v2", name: "Tortoise v2" }] } }, + inworld: { id: "inworld", alias: "inworld", name: "Inworld TTS", icon: "record_voice_over", color: "#FF6B6B", textIcon: "IW", website: "https://inworld.ai", notice: { text: "Free tier: 40 minutes/month TTS. Paid: TTS-1.5 Mini $0.01/min ($15/1M chars), TTS-1.5 Max $0.025/min ($30/1M chars). 270+ voices, 15 languages.", apiKeyUrl: "https://platform.inworld.ai/api-keys" }, serviceKinds: ["tts"], ttsConfig: { baseUrl: "https://api.inworld.ai/tts/v1/voice", authType: "apikey", authHeader: "basic", format: "inworld", models: [{ id: "inworld-tts-1.5-mini", name: "Inworld TTS 1.5 Mini ($0.01/min)" }, { id: "inworld-tts-1.5-max", name: "Inworld TTS 1.5 Max ($0.025/min)" }] } }, + "voyage-ai": { id: "voyage-ai", alias: "voyage", name: "Voyage AI", icon: "data_array", color: "#0EA5E9", textIcon: "VG", website: "https://www.voyageai.com", notice: { apiKeyUrl: "https://dash.voyageai.com/api-keys" }, serviceKinds: ["embedding"], embeddingConfig: { baseUrl: "https://api.voyageai.com/v1/embeddings", authType: "apikey", authHeader: "bearer", models: [{ id: "voyage-3-large", name: "Voyage 3 Large", dimensions: 1024 }, { id: "voyage-3.5", name: "Voyage 3.5", dimensions: 1024 }, { id: "voyage-3.5-lite", name: "Voyage 3.5 Lite", dimensions: 1024 }, { id: "voyage-code-3", name: "Voyage Code 3", dimensions: 1024 }, { id: "voyage-finance-2", name: "Voyage Finance 2", dimensions: 1024 }, { id: "voyage-law-2", name: "Voyage Law 2", dimensions: 1024 }, { id: "voyage-multilingual-2", name: "Voyage Multilingual 2", dimensions: 1024 }] } }, sdwebui: { id: "sdwebui", alias: "sdwebui", name: "SD WebUI", icon: "brush", color: "#FF7043", textIcon: "SD", website: "https://github.com/AUTOMATIC1111/stable-diffusion-webui", serviceKinds: ["image"] }, comfyui: { id: "comfyui", alias: "comfyui", name: "ComfyUI", icon: "account_tree", color: "#4CAF50", textIcon: "CF", website: "https://github.com/comfyanonymous/ComfyUI", serviceKinds: ["image"] }, - huggingface: { id: "huggingface", alias: "hf", name: "HuggingFace", icon: "face", color: "#FFD21E", textIcon: "HF", website: "https://huggingface.co", serviceKinds: ["image", "imageToText", "tts"], hiddenKinds: ["tts"] }, + huggingface: { id: "huggingface", alias: "hf", name: "HuggingFace", icon: "face", color: "#FFD21E", textIcon: "HF", website: "https://huggingface.co", serviceKinds: ["image", "imageToText", "tts"], hiddenKinds: ["tts"], ttsConfig: { baseUrl: "https://api-inference.huggingface.co/models", authType: "apikey", authHeader: "bearer", format: "huggingface-tts", models: [{ id: "facebook/mms-tts-eng", name: "MMS TTS English" }, { id: "microsoft/speecht5_tts", name: "SpeechT5 TTS" }] } }, blackbox: { id: "blackbox", alias: "bb", name: "Blackbox AI", icon: "smart_toy", color: "#5B5FEF", textIcon: "BB", website: "https://blackbox.ai", serviceKinds: ["llm"] }, chutes: { id: "chutes", alias: "ch", name: "Chutes AI", icon: "water_drop", color: "#ffffffff", textIcon: "CH", website: "https://chutes.ai" }, "ollama-local": { id: "ollama-local", alias: "ollama-local", name: "Ollama Local", icon: "cloud", color: "#ffffffff", textIcon: "OL", website: "https://ollama.com" }, diff --git a/src/shared/constants/ttsProviders.js b/src/shared/constants/ttsProviders.js index b75bfe7..bc2e032 100644 --- a/src/shared/constants/ttsProviders.js +++ b/src/shared/constants/ttsProviders.js @@ -48,4 +48,65 @@ export const TTS_PROVIDER_CONFIG = { hasBrowseButton: true, voiceSource: "api-language", // from API with language picker }, + // โ”€โ”€ Config-driven providers (load models from providers.js โ†’ ttsConfig.models) โ”€โ”€ + "nvidia": { + hasModelSelector: true, + hasBrowseButton: false, + hasVoiceIdInput: true, + voiceSource: "config", + }, + "hyperbolic": { + hasModelSelector: true, + hasBrowseButton: false, + voiceSource: "config", + }, + "deepgram": { + hasModelSelector: false, + hasBrowseButton: true, + voiceSource: "api-language", + apiEndpoint: "/api/media-providers/tts/deepgram/voices", + }, + "huggingface": { + hasModelSelector: true, + hasBrowseButton: false, + voiceSource: "config", + }, + "cartesia": { + hasModelSelector: true, + hasBrowseButton: false, + hasVoiceIdInput: true, + voiceSource: "config", + }, + "playht": { + hasModelSelector: true, + hasBrowseButton: false, + hasVoiceIdInput: true, + voiceSource: "config", + }, + "coqui": { + hasModelSelector: true, + hasBrowseButton: false, + hasVoiceIdInput: true, + voiceSource: "config", + }, + "tortoise": { + hasModelSelector: true, + hasBrowseButton: false, + hasVoiceIdInput: true, + voiceSource: "config", + }, + "inworld": { + hasModelSelector: true, + hasBrowseButton: true, + hasVoiceIdInput: true, + voiceSource: "api-language", + modelKey: "inworld-tts-models", + apiEndpoint: "/api/media-providers/tts/inworld/voices", + }, + "qwen": { + hasModelSelector: true, + hasBrowseButton: false, + hasVoiceIdInput: true, + voiceSource: "config", + }, }; diff --git a/src/sse/handlers/tts.js b/src/sse/handlers/tts.js index f861209..c8a6c73 100644 --- a/src/sse/handlers/tts.js +++ b/src/sse/handlers/tts.js @@ -7,10 +7,15 @@ import { getModelInfo } from "../services/model.js"; import { handleTtsCore } from "open-sse/handlers/ttsCore.js"; import { errorResponse, unavailableResponse } from "open-sse/utils/error.js"; import { HTTP_STATUS } from "open-sse/config/runtimeConfig.js"; +import { AI_PROVIDERS } from "@/shared/constants/providers"; import * as log from "../utils/logger.js"; -// Providers that require stored credentials (not noAuth) -const CREDENTIALED_PROVIDERS = new Set(["openai", "elevenlabs", "openrouter"]); +// Derived from providers.js: any TTS provider not noAuth requires stored credentials +const CREDENTIALED_PROVIDERS = new Set( + Object.entries(AI_PROVIDERS) + .filter(([, p]) => p.serviceKinds?.includes("tts") && !p.noAuth && p.ttsConfig?.authType !== "none") + .map(([id]) => id) +); export async function handleTts(request) { let body;