feat: 新增 YouTube/Bilibili/XHS 搜索 CLI 命令 + 修复 B站搜索
- cli.py: 新增 search-youtube / search-bilibili / search-xhs 子命令 - core.py: 新增 search_youtube / search_bilibili / search_xhs 方法 - bilibili.py: 搜索策略改为 yt-dlp → Exa fallback(服务器 IP 被 B站 412) - 本地机器走 yt-dlp bilisearch(有 title/uploader) - 服务器自动 fallback 到 Exa site:bilibili.com - 修复 mcporter 输出解析(Title:/URL: 格式) 测试结果: 15/15 全通(8 read + 7 search)
This commit is contained in:
parent
88eaebe263
commit
7d0da09222
3 changed files with 101 additions and 6 deletions
|
|
@ -72,35 +72,59 @@ class BilibiliChannel(Channel):
|
|||
)
|
||||
|
||||
async def search(self, query: str, config=None, **kwargs) -> List[SearchResult]:
|
||||
"""Search Bilibili via yt-dlp's bilisearch."""
|
||||
"""Search Bilibili.
|
||||
|
||||
Strategy:
|
||||
1. Try yt-dlp bilisearch (works on local machines)
|
||||
2. Fallback to Exa site:bilibili.com (works on servers)
|
||||
"""
|
||||
if not shutil.which("yt-dlp"):
|
||||
raise RuntimeError("yt-dlp not installed. Install: pip install yt-dlp")
|
||||
|
||||
limit = kwargs.get("limit", 10)
|
||||
limit = kwargs.get("limit", 5)
|
||||
proxy = config.get("bilibili_proxy") if config else None
|
||||
|
||||
# Strategy 1: yt-dlp bilisearch
|
||||
results = self._search_ytdlp(query, limit, proxy)
|
||||
if results:
|
||||
return results
|
||||
|
||||
# Strategy 2: Exa fallback (server-friendly)
|
||||
results = self._search_exa(query, limit)
|
||||
if results:
|
||||
return results
|
||||
|
||||
return []
|
||||
|
||||
def _search_ytdlp(self, query: str, limit: int, proxy: str = None) -> List[SearchResult]:
|
||||
"""Search via yt-dlp bilisearch (needs local/Chinese IP)."""
|
||||
cmd = [
|
||||
"yt-dlp", "--dump-json", "--flat-playlist",
|
||||
"yt-dlp", "--dump-json", "--no-download",
|
||||
f"bilisearch{limit}:{query}",
|
||||
]
|
||||
if proxy:
|
||||
cmd += ["--proxy", proxy]
|
||||
|
||||
try:
|
||||
r = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
|
||||
r = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
|
||||
if r.returncode != 0:
|
||||
return []
|
||||
results = []
|
||||
for line in r.stdout.strip().split("\n"):
|
||||
if not line.strip():
|
||||
continue
|
||||
try:
|
||||
d = json.loads(line)
|
||||
vid = d.get("id", "")
|
||||
url = d.get("webpage_url", f"https://www.bilibili.com/video/av{vid}")
|
||||
results.append(SearchResult(
|
||||
title=d.get("title", ""),
|
||||
url=f"https://www.bilibili.com/video/{d.get('id', '')}",
|
||||
title=d.get("title", f"av{vid}"),
|
||||
url=url,
|
||||
snippet=f"👤 {d.get('uploader', '?')} · 👁 {d.get('view_count', '?')}",
|
||||
extra={
|
||||
"view_count": d.get("view_count"),
|
||||
"uploader": d.get("uploader"),
|
||||
"duration": d.get("duration_string"),
|
||||
},
|
||||
))
|
||||
except json.JSONDecodeError:
|
||||
|
|
@ -109,6 +133,38 @@ class BilibiliChannel(Channel):
|
|||
except subprocess.TimeoutExpired:
|
||||
return []
|
||||
|
||||
def _search_exa(self, query: str, limit: int) -> List[SearchResult]:
|
||||
"""Fallback: search via Exa (site:bilibili.com). Works on any IP."""
|
||||
try:
|
||||
r = subprocess.run(
|
||||
["mcporter", "call",
|
||||
f'exa.web_search_exa(query: "site:bilibili.com {query}", numResults: {limit})'],
|
||||
capture_output=True, text=True, timeout=30,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
return []
|
||||
|
||||
results = []
|
||||
# Parse mcporter output: Title: / Author: / URL: / Text: blocks
|
||||
title, author, url = "", "", ""
|
||||
for line in r.stdout.split("\n"):
|
||||
if line.startswith("Title: "):
|
||||
title = line[7:].strip()
|
||||
elif line.startswith("Author: "):
|
||||
author = line[8:].strip()
|
||||
elif line.startswith("URL: "):
|
||||
url = line[5:].strip()
|
||||
if url and "bilibili.com" in url:
|
||||
results.append(SearchResult(
|
||||
title=title or url,
|
||||
url=url,
|
||||
snippet=f"👤 {author}" if author else "(via Exa search)",
|
||||
))
|
||||
title, author, url = "", "", ""
|
||||
return results
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
def _get_info(self, url: str, proxy: str = None) -> dict:
|
||||
cmd = ["yt-dlp", "--dump-json", "--no-download", url]
|
||||
if proxy:
|
||||
|
|
|
|||
|
|
@ -66,6 +66,21 @@ def main():
|
|||
p_st.add_argument("query", nargs="+", help="Search query")
|
||||
p_st.add_argument("-n", "--num", type=int, default=10, help="Number of results")
|
||||
|
||||
# ── search-youtube ──
|
||||
p_sy = sub.add_parser("search-youtube", help="Search YouTube")
|
||||
p_sy.add_argument("query", nargs="+", help="Search query")
|
||||
p_sy.add_argument("-n", "--num", type=int, default=5, help="Number of results")
|
||||
|
||||
# ── search-bilibili ──
|
||||
p_sb = sub.add_parser("search-bilibili", help="Search Bilibili")
|
||||
p_sb.add_argument("query", nargs="+", help="Search query")
|
||||
p_sb.add_argument("-n", "--num", type=int, default=5, help="Number of results")
|
||||
|
||||
# ── search-xhs ──
|
||||
p_sx = sub.add_parser("search-xhs", help="Search XiaoHongShu")
|
||||
p_sx.add_argument("query", nargs="+", help="Search query")
|
||||
p_sx.add_argument("-n", "--num", type=int, default=10, help="Number of results")
|
||||
|
||||
# ── setup ──
|
||||
sub.add_parser("setup", help="Interactive configuration wizard")
|
||||
|
||||
|
|
@ -575,6 +590,12 @@ async def _cmd_search(args):
|
|||
results = await eyes.search_github(query, language=getattr(args, "lang", None), limit=num)
|
||||
elif args.command == "search-twitter":
|
||||
results = await eyes.search_twitter(query, limit=num)
|
||||
elif args.command == "search-youtube":
|
||||
results = await eyes.search_youtube(query, limit=num)
|
||||
elif args.command == "search-bilibili":
|
||||
results = await eyes.search_bilibili(query, limit=num)
|
||||
elif args.command == "search-xhs":
|
||||
results = await eyes.search_xhs(query, limit=num)
|
||||
else:
|
||||
print(f"Unknown command: {args.command}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
|
|
|||
|
|
@ -83,6 +83,24 @@ class AgentReach:
|
|||
results = await ch.search(query, config=self.config, limit=limit)
|
||||
return [r.to_dict() for r in results]
|
||||
|
||||
async def search_youtube(self, query: str, limit: int = 5) -> List[Dict[str, Any]]:
|
||||
"""Search YouTube via yt-dlp."""
|
||||
ch = get_channel("youtube")
|
||||
results = await ch.search(query, config=self.config, limit=limit)
|
||||
return [r.to_dict() for r in results]
|
||||
|
||||
async def search_bilibili(self, query: str, limit: int = 5) -> List[Dict[str, Any]]:
|
||||
"""Search Bilibili. Tries yt-dlp first, falls back to Exa."""
|
||||
ch = get_channel("bilibili")
|
||||
results = await ch.search(query, config=self.config, limit=limit)
|
||||
return [r.to_dict() for r in results]
|
||||
|
||||
async def search_xhs(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
|
||||
"""Search XiaoHongShu via mcporter."""
|
||||
ch = get_channel("xiaohongshu")
|
||||
results = await ch.search(query, config=self.config, limit=limit)
|
||||
return [r.to_dict() for r in results]
|
||||
|
||||
# ── Health ──────────────────────────────────────────
|
||||
|
||||
def doctor(self) -> Dict[str, dict]:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue