diff --git a/agent_reach/channels/twitter.py b/agent_reach/channels/twitter.py index 87b0a84..77edc51 100644 --- a/agent_reach/channels/twitter.py +++ b/agent_reach/channels/twitter.py @@ -20,8 +20,14 @@ def _bird_cmd(): def _bird_env(config=None): - """Build env dict with Twitter cookies for bird CLI.""" + """Build env dict with Twitter cookies and proxy support for bird CLI. + + Node.js native fetch() doesn't respect HTTP_PROXY/HTTPS_PROXY. + We inject undici's EnvHttpProxyAgent via NODE_OPTIONS so bird + automatically routes through the user's proxy. + """ import os + import tempfile env = os.environ.copy() if config: auth_token = config.get("twitter_auth_token") @@ -30,9 +36,49 @@ def _bird_env(config=None): env["AUTH_TOKEN"] = auth_token if ct0: env["CT0"] = ct0 + + # Auto-inject undici proxy support if HTTP_PROXY/HTTPS_PROXY is set + has_proxy = env.get("HTTPS_PROXY") or env.get("HTTP_PROXY") or env.get("https_proxy") or env.get("http_proxy") + if has_proxy: + bootstrap = _get_proxy_bootstrap_path() + if bootstrap: + npm_root = subprocess.run( + ["npm", "root", "-g"], + capture_output=True, text=True, timeout=5, + ).stdout.strip() + existing_opts = env.get("NODE_OPTIONS", "") + env["NODE_OPTIONS"] = f"--require {bootstrap} {existing_opts}".strip() + env["NODE_PATH"] = npm_root + return env +def _get_proxy_bootstrap_path(): + """Create/return a bootstrap JS file that sets up undici proxy for fetch.""" + import os + import tempfile + bootstrap_path = os.path.join(tempfile.gettempdir(), "agent-reach-undici-proxy.js") + if not os.path.exists(bootstrap_path): + # Check if undici is available + npm_root = subprocess.run( + ["npm", "root", "-g"], + capture_output=True, text=True, timeout=5, + ).stdout.strip() + undici_path = os.path.join(npm_root, "undici", "index.js") + if not os.path.exists(undici_path): + return None + with open(bootstrap_path, "w") as f: + f.write( + "try {\n" + " const { EnvHttpProxyAgent, setGlobalDispatcher } = require('undici');\n" + " if (process.env.HTTPS_PROXY || process.env.HTTP_PROXY) {\n" + " setGlobalDispatcher(new EnvHttpProxyAgent());\n" + " }\n" + "} catch(e) {}\n" + ) + return bootstrap_path + + class TwitterChannel(Channel): name = "twitter" description = "Twitter/X 推文" @@ -45,8 +91,31 @@ class TwitterChannel(Channel): def check(self, config=None): # Basic reading always works (Jina fallback) - if _bird_cmd(): - return "ok", "搜索、时间线、发推全部可用" + bird = _bird_cmd() + if bird: + # Actually test bird connectivity + try: + result = subprocess.run( + [bird, "whoami"], + capture_output=True, timeout=15, + encoding='utf-8', errors='replace', + env=_bird_env(config), + ) + if result.returncode == 0 and "fetch failed" not in result.stdout.lower() and "fetch failed" not in result.stderr.lower(): + return "ok", "搜索、时间线、发推全部可用" + else: + error_hint = (result.stderr or result.stdout).strip()[:100] + if "fetch failed" in (error_hint + result.stdout).lower(): + return "warn", ( + f"bird 已安装但连接失败(fetch failed)。可能原因:\n" + " 1. Cookie 无效或过期 → 重新导出 Cookie\n" + " 2. 需要代理但 Node.js fetch 不走系统代理 → 使用全局/透明代理(如 Clash TUN 模式、Proxifier)\n" + " 3. 网络无法直连 x.com\n" + " 搜索功能暂不可用,将使用 Exa 搜索作为替代" + ) + return "warn", f"bird 连接异常:{error_hint}。搜索将使用 Exa 替代" + except (subprocess.TimeoutExpired, FileNotFoundError): + return "warn", "bird 已安装但连接超时。搜索将使用 Exa 替代" return "ok", "可读取推文。安装 bird + 配置 Cookie 可解锁搜索和发推" async def read(self, url: str, config=None) -> ReadResult: @@ -153,11 +222,19 @@ class TwitterChannel(Channel): env=_bird_env(config), ) if result.returncode != 0: - return [] + stderr = (result.stderr or "").strip() + if "fetch failed" in stderr.lower() or "fetch failed" in (result.stdout or "").lower(): + # bird can't connect — fall back to Exa silently + return await self._search_exa(query, limit, config) + return await self._search_exa(query, limit, config) - return self._parse_bird_output(result.stdout) + parsed = self._parse_bird_output(result.stdout) + if not parsed: + # bird returned nothing — try Exa + return await self._search_exa(query, limit, config) + return parsed except (subprocess.TimeoutExpired, FileNotFoundError): - return [] + return await self._search_exa(query, limit, config) def _parse_bird_output(self, text: str) -> List[SearchResult]: """Parse bird text output into SearchResults.""" diff --git a/agent_reach/cli.py b/agent_reach/cli.py index f02f7fd..9d7d89e 100644 --- a/agent_reach/cli.py +++ b/agent_reach/cli.py @@ -423,6 +423,19 @@ def _install_system_deps(): else: print(" ⬜ bird CLI requires Node.js (optional — Twitter reading still works via Jina)") + # ── undici (proxy support for Node.js fetch) ── + if shutil.which("npm"): + npm_root = subprocess.run(["npm", "root", "-g"], capture_output=True, text=True, timeout=5).stdout.strip() + undici_path = os.path.join(npm_root, "undici", "index.js") if npm_root else "" + if os.path.exists(undici_path): + print(" ✅ undici already installed (Node.js proxy support)") + else: + try: + subprocess.run(["npm", "install", "-g", "undici"], capture_output=True, text=True, timeout=60) + print(" ✅ undici installed (Node.js proxy support)") + except Exception: + print(" ⬜ undici install failed (optional — bird may not work behind proxies)") + # ── instaloader (for Instagram) ── if shutil.which("instaloader"): print(" ✅ instaloader already installed") diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md new file mode 100644 index 0000000..880c7a0 --- /dev/null +++ b/docs/troubleshooting.md @@ -0,0 +1,83 @@ +# Troubleshooting / 常见问题 + +## Twitter/X: bird CLI "fetch failed" + +**症状:** `bird whoami` 或 `bird search` 返回 "fetch failed" + +**原因:** bird CLI 使用 Node.js 原生 `fetch()` 发请求,而 Node.js 的 fetch **不走系统代理**(不读取 `HTTP_PROXY`/`HTTPS_PROXY` 环境变量)。如果你的网络环境需要代理才能访问 x.com,bird 就连不上。 + +**解决方案(按推荐顺序):** + +### 方案 1:使用透明代理 / TUN 模式(推荐) + +让代理工具接管所有网络流量,这样 bird 的 fetch 也会走代理: + +- **Clash Verge / Clash for Windows:** 开启 TUN 模式或系统代理 +- **Proxifier(Windows):** 添加规则让 Node.js 进程走代理 +- **macOS:** 在 Surge/ClashX Pro 中开启增强模式 + +### 方案 2:验证 Cookie 有效性 + +确认 Cookie 没过期: + +1. 在浏览器里正常登录 x.com +2. 用 [Cookie-Editor](https://chromewebstore.google.com/detail/cookie-editor/hlkenndednhfkekhgcdicdfddnkalmdm) 重新导出 Header String +3. 重新配置:`agent-reach configure twitter-cookies "新的Cookie"` + +### 方案 3:不用 bird,用 Exa 搜索替代 + +Agent Reach 在 bird 失败时会自动 fallback 到 Exa 搜索。Exa 支持搜索 x.com 上的内容,虽然不如 bird 实时,但不受代理限制: + +```bash +agent-reach search-twitter "query" # bird 失败时自动用 Exa +agent-reach search "site:x.com query" # 直接用 Exa 搜索 +``` + +### 方案 4:配置 Node.js 全局代理(高级) + +安装 `global-agent` 让 Node.js 的 fetch 走代理: + +```bash +npm install -g global-agent +``` + +然后在运行 bird 前设置环境变量: + +```bash +# Linux / macOS +export GLOBAL_AGENT_HTTP_PROXY=http://127.0.0.1:7890 +export NODE_OPTIONS="--require global-agent/bootstrap" +bird search "test" + +# Windows (PowerShell) +$env:GLOBAL_AGENT_HTTP_PROXY = "http://127.0.0.1:7890" +$env:NODE_OPTIONS = "--require global-agent/bootstrap" +bird search "test" +``` + +> ⚠️ 注意:这个方案需要每次运行 bird 前都设置环境变量,不太方便。推荐用方案 1。 + +--- + +## Boss直聘: "访问行为异常" + +**症状:** mcp-bosszp 登录成功,但 API 请求返回"您的访问行为异常" + +**原因:** Boss直聘的反爬机制会检测请求指纹(不只是 IP),Python requests 库的特征与真实浏览器不同。 + +**解决方案:** +- **本地电脑:** 正常使用,一般不会被拦 +- **服务器:** 使用 Jina Reader 读取职位页面 + Exa 搜索职位信息作为替代 + +--- + +## Instagram: Checkpoint / 安全验证 + +**症状:** `instaloader --login` 触发 Instagram 安全验证 + +**原因:** Instagram 检测到从未见过的设备/位置登录。 + +**解决方案:** +1. 在自己的浏览器登录 Instagram +2. 用 Cookie-Editor 导出 Cookie +3. 配置:`agent-reach configure instagram-cookies "sessionid=xxx; csrftoken=yyy; ..."`