Merge pull request #10 from Panniantong/fix/twitter-fetch-failed
fix: Twitter bird CLI fetch failed — 自动 fallback + 连通性检测
This commit is contained in:
commit
0be4f097d0
3 changed files with 179 additions and 6 deletions
|
|
@ -20,8 +20,14 @@ def _bird_cmd():
|
|||
|
||||
|
||||
def _bird_env(config=None):
|
||||
"""Build env dict with Twitter cookies for bird CLI."""
|
||||
"""Build env dict with Twitter cookies and proxy support for bird CLI.
|
||||
|
||||
Node.js native fetch() doesn't respect HTTP_PROXY/HTTPS_PROXY.
|
||||
We inject undici's EnvHttpProxyAgent via NODE_OPTIONS so bird
|
||||
automatically routes through the user's proxy.
|
||||
"""
|
||||
import os
|
||||
import tempfile
|
||||
env = os.environ.copy()
|
||||
if config:
|
||||
auth_token = config.get("twitter_auth_token")
|
||||
|
|
@ -30,9 +36,49 @@ def _bird_env(config=None):
|
|||
env["AUTH_TOKEN"] = auth_token
|
||||
if ct0:
|
||||
env["CT0"] = ct0
|
||||
|
||||
# Auto-inject undici proxy support if HTTP_PROXY/HTTPS_PROXY is set
|
||||
has_proxy = env.get("HTTPS_PROXY") or env.get("HTTP_PROXY") or env.get("https_proxy") or env.get("http_proxy")
|
||||
if has_proxy:
|
||||
bootstrap = _get_proxy_bootstrap_path()
|
||||
if bootstrap:
|
||||
npm_root = subprocess.run(
|
||||
["npm", "root", "-g"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
).stdout.strip()
|
||||
existing_opts = env.get("NODE_OPTIONS", "")
|
||||
env["NODE_OPTIONS"] = f"--require {bootstrap} {existing_opts}".strip()
|
||||
env["NODE_PATH"] = npm_root
|
||||
|
||||
return env
|
||||
|
||||
|
||||
def _get_proxy_bootstrap_path():
|
||||
"""Create/return a bootstrap JS file that sets up undici proxy for fetch."""
|
||||
import os
|
||||
import tempfile
|
||||
bootstrap_path = os.path.join(tempfile.gettempdir(), "agent-reach-undici-proxy.js")
|
||||
if not os.path.exists(bootstrap_path):
|
||||
# Check if undici is available
|
||||
npm_root = subprocess.run(
|
||||
["npm", "root", "-g"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
).stdout.strip()
|
||||
undici_path = os.path.join(npm_root, "undici", "index.js")
|
||||
if not os.path.exists(undici_path):
|
||||
return None
|
||||
with open(bootstrap_path, "w") as f:
|
||||
f.write(
|
||||
"try {\n"
|
||||
" const { EnvHttpProxyAgent, setGlobalDispatcher } = require('undici');\n"
|
||||
" if (process.env.HTTPS_PROXY || process.env.HTTP_PROXY) {\n"
|
||||
" setGlobalDispatcher(new EnvHttpProxyAgent());\n"
|
||||
" }\n"
|
||||
"} catch(e) {}\n"
|
||||
)
|
||||
return bootstrap_path
|
||||
|
||||
|
||||
class TwitterChannel(Channel):
|
||||
name = "twitter"
|
||||
description = "Twitter/X 推文"
|
||||
|
|
@ -45,8 +91,31 @@ class TwitterChannel(Channel):
|
|||
|
||||
def check(self, config=None):
|
||||
# Basic reading always works (Jina fallback)
|
||||
if _bird_cmd():
|
||||
return "ok", "搜索、时间线、发推全部可用"
|
||||
bird = _bird_cmd()
|
||||
if bird:
|
||||
# Actually test bird connectivity
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[bird, "whoami"],
|
||||
capture_output=True, timeout=15,
|
||||
encoding='utf-8', errors='replace',
|
||||
env=_bird_env(config),
|
||||
)
|
||||
if result.returncode == 0 and "fetch failed" not in result.stdout.lower() and "fetch failed" not in result.stderr.lower():
|
||||
return "ok", "搜索、时间线、发推全部可用"
|
||||
else:
|
||||
error_hint = (result.stderr or result.stdout).strip()[:100]
|
||||
if "fetch failed" in (error_hint + result.stdout).lower():
|
||||
return "warn", (
|
||||
f"bird 已安装但连接失败(fetch failed)。可能原因:\n"
|
||||
" 1. Cookie 无效或过期 → 重新导出 Cookie\n"
|
||||
" 2. 需要代理但 Node.js fetch 不走系统代理 → 使用全局/透明代理(如 Clash TUN 模式、Proxifier)\n"
|
||||
" 3. 网络无法直连 x.com\n"
|
||||
" 搜索功能暂不可用,将使用 Exa 搜索作为替代"
|
||||
)
|
||||
return "warn", f"bird 连接异常:{error_hint}。搜索将使用 Exa 替代"
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError):
|
||||
return "warn", "bird 已安装但连接超时。搜索将使用 Exa 替代"
|
||||
return "ok", "可读取推文。安装 bird + 配置 Cookie 可解锁搜索和发推"
|
||||
|
||||
async def read(self, url: str, config=None) -> ReadResult:
|
||||
|
|
@ -153,11 +222,19 @@ class TwitterChannel(Channel):
|
|||
env=_bird_env(config),
|
||||
)
|
||||
if result.returncode != 0:
|
||||
return []
|
||||
stderr = (result.stderr or "").strip()
|
||||
if "fetch failed" in stderr.lower() or "fetch failed" in (result.stdout or "").lower():
|
||||
# bird can't connect — fall back to Exa silently
|
||||
return await self._search_exa(query, limit, config)
|
||||
return await self._search_exa(query, limit, config)
|
||||
|
||||
return self._parse_bird_output(result.stdout)
|
||||
parsed = self._parse_bird_output(result.stdout)
|
||||
if not parsed:
|
||||
# bird returned nothing — try Exa
|
||||
return await self._search_exa(query, limit, config)
|
||||
return parsed
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError):
|
||||
return []
|
||||
return await self._search_exa(query, limit, config)
|
||||
|
||||
def _parse_bird_output(self, text: str) -> List[SearchResult]:
|
||||
"""Parse bird text output into SearchResults."""
|
||||
|
|
|
|||
|
|
@ -423,6 +423,19 @@ def _install_system_deps():
|
|||
else:
|
||||
print(" ⬜ bird CLI requires Node.js (optional — Twitter reading still works via Jina)")
|
||||
|
||||
# ── undici (proxy support for Node.js fetch) ──
|
||||
if shutil.which("npm"):
|
||||
npm_root = subprocess.run(["npm", "root", "-g"], capture_output=True, text=True, timeout=5).stdout.strip()
|
||||
undici_path = os.path.join(npm_root, "undici", "index.js") if npm_root else ""
|
||||
if os.path.exists(undici_path):
|
||||
print(" ✅ undici already installed (Node.js proxy support)")
|
||||
else:
|
||||
try:
|
||||
subprocess.run(["npm", "install", "-g", "undici"], capture_output=True, text=True, timeout=60)
|
||||
print(" ✅ undici installed (Node.js proxy support)")
|
||||
except Exception:
|
||||
print(" ⬜ undici install failed (optional — bird may not work behind proxies)")
|
||||
|
||||
# ── instaloader (for Instagram) ──
|
||||
if shutil.which("instaloader"):
|
||||
print(" ✅ instaloader already installed")
|
||||
|
|
|
|||
83
docs/troubleshooting.md
Normal file
83
docs/troubleshooting.md
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
# Troubleshooting / 常见问题
|
||||
|
||||
## Twitter/X: bird CLI "fetch failed"
|
||||
|
||||
**症状:** `bird whoami` 或 `bird search` 返回 "fetch failed"
|
||||
|
||||
**原因:** bird CLI 使用 Node.js 原生 `fetch()` 发请求,而 Node.js 的 fetch **不走系统代理**(不读取 `HTTP_PROXY`/`HTTPS_PROXY` 环境变量)。如果你的网络环境需要代理才能访问 x.com,bird 就连不上。
|
||||
|
||||
**解决方案(按推荐顺序):**
|
||||
|
||||
### 方案 1:使用透明代理 / TUN 模式(推荐)
|
||||
|
||||
让代理工具接管所有网络流量,这样 bird 的 fetch 也会走代理:
|
||||
|
||||
- **Clash Verge / Clash for Windows:** 开启 TUN 模式或系统代理
|
||||
- **Proxifier(Windows):** 添加规则让 Node.js 进程走代理
|
||||
- **macOS:** 在 Surge/ClashX Pro 中开启增强模式
|
||||
|
||||
### 方案 2:验证 Cookie 有效性
|
||||
|
||||
确认 Cookie 没过期:
|
||||
|
||||
1. 在浏览器里正常登录 x.com
|
||||
2. 用 [Cookie-Editor](https://chromewebstore.google.com/detail/cookie-editor/hlkenndednhfkekhgcdicdfddnkalmdm) 重新导出 Header String
|
||||
3. 重新配置:`agent-reach configure twitter-cookies "新的Cookie"`
|
||||
|
||||
### 方案 3:不用 bird,用 Exa 搜索替代
|
||||
|
||||
Agent Reach 在 bird 失败时会自动 fallback 到 Exa 搜索。Exa 支持搜索 x.com 上的内容,虽然不如 bird 实时,但不受代理限制:
|
||||
|
||||
```bash
|
||||
agent-reach search-twitter "query" # bird 失败时自动用 Exa
|
||||
agent-reach search "site:x.com query" # 直接用 Exa 搜索
|
||||
```
|
||||
|
||||
### 方案 4:配置 Node.js 全局代理(高级)
|
||||
|
||||
安装 `global-agent` 让 Node.js 的 fetch 走代理:
|
||||
|
||||
```bash
|
||||
npm install -g global-agent
|
||||
```
|
||||
|
||||
然后在运行 bird 前设置环境变量:
|
||||
|
||||
```bash
|
||||
# Linux / macOS
|
||||
export GLOBAL_AGENT_HTTP_PROXY=http://127.0.0.1:7890
|
||||
export NODE_OPTIONS="--require global-agent/bootstrap"
|
||||
bird search "test"
|
||||
|
||||
# Windows (PowerShell)
|
||||
$env:GLOBAL_AGENT_HTTP_PROXY = "http://127.0.0.1:7890"
|
||||
$env:NODE_OPTIONS = "--require global-agent/bootstrap"
|
||||
bird search "test"
|
||||
```
|
||||
|
||||
> ⚠️ 注意:这个方案需要每次运行 bird 前都设置环境变量,不太方便。推荐用方案 1。
|
||||
|
||||
---
|
||||
|
||||
## Boss直聘: "访问行为异常"
|
||||
|
||||
**症状:** mcp-bosszp 登录成功,但 API 请求返回"您的访问行为异常"
|
||||
|
||||
**原因:** Boss直聘的反爬机制会检测请求指纹(不只是 IP),Python requests 库的特征与真实浏览器不同。
|
||||
|
||||
**解决方案:**
|
||||
- **本地电脑:** 正常使用,一般不会被拦
|
||||
- **服务器:** 使用 Jina Reader 读取职位页面 + Exa 搜索职位信息作为替代
|
||||
|
||||
---
|
||||
|
||||
## Instagram: Checkpoint / 安全验证
|
||||
|
||||
**症状:** `instaloader --login` 触发 Instagram 安全验证
|
||||
|
||||
**原因:** Instagram 检测到从未见过的设备/位置登录。
|
||||
|
||||
**解决方案:**
|
||||
1. 在自己的浏览器登录 Instagram
|
||||
2. 用 Cookie-Editor 导出 Cookie
|
||||
3. 配置:`agent-reach configure instagram-cookies "sessionid=xxx; csrftoken=yyy; ..."`
|
||||
Loading…
Add table
Add a link
Reference in a new issue