Agent-Reach/agent_reach/channels/twitter.py

184 lines
6.9 KiB
Python

# -*- coding: utf-8 -*-
"""Twitter/X — via birdx CLI (free) or Jina Reader fallback.
Backend: birdx (https://github.com/runesleo/birdx) for search/timeline
Jina Reader for single tweets
Swap to: any Twitter access tool
"""
import shutil
import subprocess
from urllib.parse import urlparse
from .base import Channel, ReadResult, SearchResult
from typing import List
import requests
class TwitterChannel(Channel):
name = "twitter"
description = "Twitter/X 推文"
backends = ["birdx", "Jina Reader"]
tier = 0 # Single tweet reading is zero-config
def can_handle(self, url: str) -> bool:
domain = urlparse(url).netloc.lower()
return "x.com" in domain or "twitter.com" in domain
def check(self, config=None):
# Basic reading always works (Jina fallback)
if shutil.which("birdx"):
return "ok", "搜索、时间线、发推全部可用"
return "ok", "可读取推文。安装 birdx + 配置 Cookie 可解锁搜索和发推"
async def read(self, url: str, config=None) -> ReadResult:
# Try birdx first
if shutil.which("birdx"):
return await self._read_birdx(url)
# Fallback: Jina Reader
return await self._read_jina(url)
async def _read_birdx(self, url: str) -> ReadResult:
result = subprocess.run(
["birdx", "read", url],
capture_output=True, text=True, timeout=30,
)
if result.returncode != 0:
return await self._read_jina(url)
text = result.stdout.strip()
# Extract author from first line
author = ""
lines = text.split("\n")
if lines and lines[0].startswith("@"):
author = lines[0].split()[0]
return ReadResult(
title=text[:100],
content=text,
url=url,
author=author,
platform="twitter",
)
async def _read_jina(self, url: str) -> ReadResult:
try:
resp = requests.get(
f"https://r.jina.ai/{url}",
headers={"Accept": "text/markdown"},
timeout=15,
)
resp.raise_for_status()
text = resp.text
# Detect unusable Jina responses for X/Twitter (JS-required pages)
unusable_indicators = [
"page doesn", # "this page doesn't exist" (handles both ' and ')
"miss what", # "Don't miss what's happening"
"Something went wrong. Try reloading",
"Log in](", # Markdown link: [Log in](...)
]
if any(indicator in text for indicator in unusable_indicators):
return ReadResult(
title="Twitter/X",
content="⚠️ Could not read this tweet.\n"
"The tweet may have been deleted, or the account is private.\n\n"
"Tips:\n"
"- Make sure the URL is correct\n"
"- Try: birdx read <url> (if birdx is installed)\n"
"- For protected tweets, configure Twitter cookies: "
"agent-reach configure twitter-cookies AUTH_TOKEN CT0",
url=url,
platform="twitter",
)
title = text[:100] if text else url
return ReadResult(
title=title,
content=text,
url=url,
platform="twitter",
)
except Exception:
return ReadResult(
title="Twitter/X",
content="⚠️ Could not read this tweet.\n"
"The tweet may have been deleted, or the account is private.\n\n"
"Tips:\n"
"- Make sure the URL is correct\n"
"- Try: birdx read <url> (if birdx is installed)\n"
"- For protected tweets, configure Twitter cookies: "
"agent-reach configure twitter-cookies AUTH_TOKEN CT0",
url=url,
platform="twitter",
)
async def search(self, query: str, config=None, **kwargs) -> List[SearchResult]:
limit = kwargs.get("limit", 10)
if shutil.which("birdx"):
return await self._search_birdx(query, limit)
# Fallback to Exa
return await self._search_exa(query, limit, config)
async def _search_birdx(self, query: str, limit: int) -> List[SearchResult]:
try:
result = subprocess.run(
["birdx", "search", query, "-n", str(limit)],
capture_output=True, text=True, timeout=30,
)
if result.returncode != 0:
return []
return self._parse_birdx_output(result.stdout)
except (subprocess.TimeoutExpired, FileNotFoundError):
return []
def _parse_birdx_output(self, text: str) -> List[SearchResult]:
"""Parse birdx text output into SearchResults."""
results = []
current = {}
text_lines = []
for line in text.strip().split("\n"):
line = line.strip()
if line.startswith(""):
if current:
current["text"] = "\n".join(text_lines).strip()
results.append(SearchResult(
title=current.get("text", "")[:80],
url=current.get("url", ""),
snippet=current.get("text", ""),
author=current.get("author", ""),
date=current.get("date", ""),
))
current = {}
text_lines = []
continue
if line.startswith("@") and line.endswith(":") and "(" in line:
current["author"] = line.split()[0]
continue
if line.startswith("date:"):
current["date"] = line[5:].strip()
continue
if line.startswith("url:"):
current["url"] = line[4:].strip()
continue
if current is not None:
text_lines.append(line)
if current and text_lines:
current["text"] = "\n".join(text_lines).strip()
results.append(SearchResult(
title=current.get("text", "")[:80],
url=current.get("url", ""),
snippet=current.get("text", ""),
author=current.get("author", ""),
date=current.get("date", ""),
))
return results
async def _search_exa(self, query: str, limit: int, config=None) -> List[SearchResult]:
from agent_reach.channels.exa_search import ExaSearchChannel
exa = ExaSearchChannel()
return await exa.search(f"site:x.com {query}", config=config, limit=limit)