feat: 新增 YouTube/Bilibili/XHS 搜索 CLI 命令 + 修复 B站搜索

- cli.py: 新增 search-youtube / search-bilibili / search-xhs 子命令
- core.py: 新增 search_youtube / search_bilibili / search_xhs 方法
- bilibili.py: 搜索策略改为 yt-dlp → Exa fallback(服务器 IP 被 B站 412)
  - 本地机器走 yt-dlp bilisearch(有 title/uploader)
  - 服务器自动 fallback 到 Exa site:bilibili.com
  - 修复 mcporter 输出解析(Title:/URL: 格式)

测试结果: 15/15 全通(8 read + 7 search)
This commit is contained in:
Panniantong 2026-02-24 13:39:21 +01:00
parent 88eaebe263
commit 7d0da09222
3 changed files with 101 additions and 6 deletions

View file

@ -72,35 +72,59 @@ class BilibiliChannel(Channel):
)
async def search(self, query: str, config=None, **kwargs) -> List[SearchResult]:
"""Search Bilibili via yt-dlp's bilisearch."""
"""Search Bilibili.
Strategy:
1. Try yt-dlp bilisearch (works on local machines)
2. Fallback to Exa site:bilibili.com (works on servers)
"""
if not shutil.which("yt-dlp"):
raise RuntimeError("yt-dlp not installed. Install: pip install yt-dlp")
limit = kwargs.get("limit", 10)
limit = kwargs.get("limit", 5)
proxy = config.get("bilibili_proxy") if config else None
# Strategy 1: yt-dlp bilisearch
results = self._search_ytdlp(query, limit, proxy)
if results:
return results
# Strategy 2: Exa fallback (server-friendly)
results = self._search_exa(query, limit)
if results:
return results
return []
def _search_ytdlp(self, query: str, limit: int, proxy: str = None) -> List[SearchResult]:
"""Search via yt-dlp bilisearch (needs local/Chinese IP)."""
cmd = [
"yt-dlp", "--dump-json", "--flat-playlist",
"yt-dlp", "--dump-json", "--no-download",
f"bilisearch{limit}:{query}",
]
if proxy:
cmd += ["--proxy", proxy]
try:
r = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
r = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
if r.returncode != 0:
return []
results = []
for line in r.stdout.strip().split("\n"):
if not line.strip():
continue
try:
d = json.loads(line)
vid = d.get("id", "")
url = d.get("webpage_url", f"https://www.bilibili.com/video/av{vid}")
results.append(SearchResult(
title=d.get("title", ""),
url=f"https://www.bilibili.com/video/{d.get('id', '')}",
title=d.get("title", f"av{vid}"),
url=url,
snippet=f"👤 {d.get('uploader', '?')} · 👁 {d.get('view_count', '?')}",
extra={
"view_count": d.get("view_count"),
"uploader": d.get("uploader"),
"duration": d.get("duration_string"),
},
))
except json.JSONDecodeError:
@ -109,6 +133,38 @@ class BilibiliChannel(Channel):
except subprocess.TimeoutExpired:
return []
def _search_exa(self, query: str, limit: int) -> List[SearchResult]:
"""Fallback: search via Exa (site:bilibili.com). Works on any IP."""
try:
r = subprocess.run(
["mcporter", "call",
f'exa.web_search_exa(query: "site:bilibili.com {query}", numResults: {limit})'],
capture_output=True, text=True, timeout=30,
)
if r.returncode != 0:
return []
results = []
# Parse mcporter output: Title: / Author: / URL: / Text: blocks
title, author, url = "", "", ""
for line in r.stdout.split("\n"):
if line.startswith("Title: "):
title = line[7:].strip()
elif line.startswith("Author: "):
author = line[8:].strip()
elif line.startswith("URL: "):
url = line[5:].strip()
if url and "bilibili.com" in url:
results.append(SearchResult(
title=title or url,
url=url,
snippet=f"👤 {author}" if author else "(via Exa search)",
))
title, author, url = "", "", ""
return results
except Exception:
return []
def _get_info(self, url: str, proxy: str = None) -> dict:
cmd = ["yt-dlp", "--dump-json", "--no-download", url]
if proxy:

View file

@ -66,6 +66,21 @@ def main():
p_st.add_argument("query", nargs="+", help="Search query")
p_st.add_argument("-n", "--num", type=int, default=10, help="Number of results")
# ── search-youtube ──
p_sy = sub.add_parser("search-youtube", help="Search YouTube")
p_sy.add_argument("query", nargs="+", help="Search query")
p_sy.add_argument("-n", "--num", type=int, default=5, help="Number of results")
# ── search-bilibili ──
p_sb = sub.add_parser("search-bilibili", help="Search Bilibili")
p_sb.add_argument("query", nargs="+", help="Search query")
p_sb.add_argument("-n", "--num", type=int, default=5, help="Number of results")
# ── search-xhs ──
p_sx = sub.add_parser("search-xhs", help="Search XiaoHongShu")
p_sx.add_argument("query", nargs="+", help="Search query")
p_sx.add_argument("-n", "--num", type=int, default=10, help="Number of results")
# ── setup ──
sub.add_parser("setup", help="Interactive configuration wizard")
@ -575,6 +590,12 @@ async def _cmd_search(args):
results = await eyes.search_github(query, language=getattr(args, "lang", None), limit=num)
elif args.command == "search-twitter":
results = await eyes.search_twitter(query, limit=num)
elif args.command == "search-youtube":
results = await eyes.search_youtube(query, limit=num)
elif args.command == "search-bilibili":
results = await eyes.search_bilibili(query, limit=num)
elif args.command == "search-xhs":
results = await eyes.search_xhs(query, limit=num)
else:
print(f"Unknown command: {args.command}", file=sys.stderr)
sys.exit(1)

View file

@ -83,6 +83,24 @@ class AgentReach:
results = await ch.search(query, config=self.config, limit=limit)
return [r.to_dict() for r in results]
async def search_youtube(self, query: str, limit: int = 5) -> List[Dict[str, Any]]:
"""Search YouTube via yt-dlp."""
ch = get_channel("youtube")
results = await ch.search(query, config=self.config, limit=limit)
return [r.to_dict() for r in results]
async def search_bilibili(self, query: str, limit: int = 5) -> List[Dict[str, Any]]:
"""Search Bilibili. Tries yt-dlp first, falls back to Exa."""
ch = get_channel("bilibili")
results = await ch.search(query, config=self.config, limit=limit)
return [r.to_dict() for r in results]
async def search_xhs(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
"""Search XiaoHongShu via mcporter."""
ch = get_channel("xiaohongshu")
results = await ch.search(query, config=self.config, limit=limit)
return [r.to_dict() for r in results]
# ── Health ──────────────────────────────────────────
def doctor(self) -> Dict[str, dict]: