birdx 从来不是 PyPI 包,pip install birdx 必然失败。 实际工具是 npm 包 @steipete/bird,一个 Twitter GraphQL CLI。 变更: - 安装器改用 npm install -g @steipete/bird - twitter.py 直接调 bird,通过环境变量传 AUTH_TOKEN/CT0 - 兼容已有的 birdx wrapper(shutil.which 回退) - 更新所有文档引用 - 重写 setup-twitter.md 指南
207 lines
7.5 KiB
Python
207 lines
7.5 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""Twitter/X — via bird CLI (free) or Jina Reader fallback.
|
|
|
|
Backend: bird (@steipete/bird npm package) for search/timeline
|
|
Jina Reader for single tweets
|
|
Swap to: any Twitter access tool
|
|
"""
|
|
|
|
import shutil
|
|
import subprocess
|
|
from urllib.parse import urlparse
|
|
from .base import Channel, ReadResult, SearchResult
|
|
from typing import List
|
|
import requests
|
|
|
|
|
|
def _bird_cmd():
|
|
"""Find bird CLI binary."""
|
|
return shutil.which("bird") or shutil.which("birdx")
|
|
|
|
|
|
def _bird_env(config=None):
|
|
"""Build env dict with Twitter cookies for bird CLI."""
|
|
import os
|
|
env = os.environ.copy()
|
|
if config:
|
|
auth_token = config.get("twitter_auth_token")
|
|
ct0 = config.get("twitter_ct0")
|
|
if auth_token:
|
|
env["AUTH_TOKEN"] = auth_token
|
|
if ct0:
|
|
env["CT0"] = ct0
|
|
return env
|
|
|
|
|
|
class TwitterChannel(Channel):
|
|
name = "twitter"
|
|
description = "Twitter/X 推文"
|
|
backends = ["bird", "Jina Reader"]
|
|
tier = 0 # Single tweet reading is zero-config
|
|
|
|
def can_handle(self, url: str) -> bool:
|
|
domain = urlparse(url).netloc.lower()
|
|
return "x.com" in domain or "twitter.com" in domain
|
|
|
|
def check(self, config=None):
|
|
# Basic reading always works (Jina fallback)
|
|
if _bird_cmd():
|
|
return "ok", "搜索、时间线、发推全部可用"
|
|
return "ok", "可读取推文。安装 bird + 配置 Cookie 可解锁搜索和发推"
|
|
|
|
async def read(self, url: str, config=None) -> ReadResult:
|
|
# Try bird first
|
|
bird = _bird_cmd()
|
|
if bird:
|
|
return await self._read_bird(url, bird, config)
|
|
# Fallback: Jina Reader
|
|
return await self._read_jina(url)
|
|
|
|
async def _read_bird(self, url: str, bird: str, config=None) -> ReadResult:
|
|
result = subprocess.run(
|
|
[bird, "read", url],
|
|
capture_output=True, text=True, timeout=30,
|
|
env=_bird_env(config),
|
|
)
|
|
if result.returncode != 0:
|
|
return await self._read_jina(url)
|
|
|
|
text = result.stdout.strip()
|
|
# Extract author from first line
|
|
author = ""
|
|
lines = text.split("\n")
|
|
if lines and lines[0].startswith("@"):
|
|
author = lines[0].split()[0]
|
|
|
|
return ReadResult(
|
|
title=text[:100],
|
|
content=text,
|
|
url=url,
|
|
author=author,
|
|
platform="twitter",
|
|
)
|
|
|
|
async def _read_jina(self, url: str) -> ReadResult:
|
|
try:
|
|
resp = requests.get(
|
|
f"https://r.jina.ai/{url}",
|
|
headers={"Accept": "text/markdown"},
|
|
timeout=15,
|
|
)
|
|
resp.raise_for_status()
|
|
text = resp.text
|
|
|
|
# Detect unusable Jina responses for X/Twitter (JS-required pages)
|
|
unusable_indicators = [
|
|
"page doesn", # "this page doesn't exist" (handles both ' and ')
|
|
"miss what", # "Don't miss what's happening"
|
|
"Something went wrong. Try reloading",
|
|
"Log in](", # Markdown link: [Log in](...)
|
|
]
|
|
if any(indicator in text for indicator in unusable_indicators):
|
|
return ReadResult(
|
|
title="Twitter/X",
|
|
content="⚠️ Could not read this tweet.\n"
|
|
"The tweet may have been deleted, or the account is private.\n\n"
|
|
"Tips:\n"
|
|
"- Make sure the URL is correct\n"
|
|
"- Try: bird read <url> (if bird CLI is installed)\n"
|
|
"- For protected tweets, configure Twitter cookies: "
|
|
"agent-reach configure twitter-cookies AUTH_TOKEN CT0",
|
|
url=url,
|
|
platform="twitter",
|
|
)
|
|
|
|
title = text[:100] if text else url
|
|
return ReadResult(
|
|
title=title,
|
|
content=text,
|
|
url=url,
|
|
platform="twitter",
|
|
)
|
|
except Exception:
|
|
return ReadResult(
|
|
title="Twitter/X",
|
|
content="⚠️ Could not read this tweet.\n"
|
|
"The tweet may have been deleted, or the account is private.\n\n"
|
|
"Tips:\n"
|
|
"- Make sure the URL is correct\n"
|
|
"- Try: bird read <url> (if bird CLI is installed)\n"
|
|
"- For protected tweets, configure Twitter cookies: "
|
|
"agent-reach configure twitter-cookies AUTH_TOKEN CT0",
|
|
url=url,
|
|
platform="twitter",
|
|
)
|
|
|
|
async def search(self, query: str, config=None, **kwargs) -> List[SearchResult]:
|
|
limit = kwargs.get("limit", 10)
|
|
|
|
bird = _bird_cmd()
|
|
if bird:
|
|
return await self._search_bird(query, limit, bird, config)
|
|
|
|
# Fallback to Exa
|
|
return await self._search_exa(query, limit, config)
|
|
|
|
async def _search_bird(self, query: str, limit: int, bird: str, config=None) -> List[SearchResult]:
|
|
try:
|
|
result = subprocess.run(
|
|
[bird, "search", query, "-n", str(limit)],
|
|
capture_output=True, text=True, timeout=30,
|
|
env=_bird_env(config),
|
|
)
|
|
if result.returncode != 0:
|
|
return []
|
|
|
|
return self._parse_bird_output(result.stdout)
|
|
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
return []
|
|
|
|
def _parse_bird_output(self, text: str) -> List[SearchResult]:
|
|
"""Parse bird text output into SearchResults."""
|
|
results = []
|
|
current = {}
|
|
text_lines = []
|
|
|
|
for line in text.strip().split("\n"):
|
|
line = line.strip()
|
|
if line.startswith("─"):
|
|
if current:
|
|
current["text"] = "\n".join(text_lines).strip()
|
|
results.append(SearchResult(
|
|
title=current.get("text", "")[:80],
|
|
url=current.get("url", ""),
|
|
snippet=current.get("text", ""),
|
|
author=current.get("author", ""),
|
|
date=current.get("date", ""),
|
|
))
|
|
current = {}
|
|
text_lines = []
|
|
continue
|
|
if line.startswith("@") and line.endswith(":") and "(" in line:
|
|
current["author"] = line.split()[0]
|
|
continue
|
|
if line.startswith("date:"):
|
|
current["date"] = line[5:].strip()
|
|
continue
|
|
if line.startswith("url:"):
|
|
current["url"] = line[4:].strip()
|
|
continue
|
|
if current is not None:
|
|
text_lines.append(line)
|
|
|
|
if current and text_lines:
|
|
current["text"] = "\n".join(text_lines).strip()
|
|
results.append(SearchResult(
|
|
title=current.get("text", "")[:80],
|
|
url=current.get("url", ""),
|
|
snippet=current.get("text", ""),
|
|
author=current.get("author", ""),
|
|
date=current.get("date", ""),
|
|
))
|
|
return results
|
|
|
|
async def _search_exa(self, query: str, limit: int, config=None) -> List[SearchResult]:
|
|
from agent_reach.channels.exa_search import ExaSearchChannel
|
|
exa = ExaSearchChannel()
|
|
return await exa.search(f"site:x.com {query}", config=config, limit=limit)
|