diff --git a/agent_reach/channels/__init__.py b/agent_reach/channels/__init__.py index 888126a..a402a81 100644 --- a/agent_reach/channels/__init__.py +++ b/agent_reach/channels/__init__.py @@ -21,9 +21,10 @@ from .linkedin import LinkedInChannel from .wechat import WeChatChannel from .weibo import WeiboChannel from .xiaoyuzhou import XiaoyuzhouChannel +from .v2ex import V2EXChannel + -# Channel registry ALL_CHANNELS: List[Channel] = [ GitHubChannel(), TwitterChannel(), @@ -36,6 +37,7 @@ ALL_CHANNELS: List[Channel] = [ WeChatChannel(), WeiboChannel(), XiaoyuzhouChannel(), + V2EXChannel(), RSSChannel(), ExaSearchChannel(), WebChannel(), diff --git a/agent_reach/channels/v2ex.py b/agent_reach/channels/v2ex.py new file mode 100644 index 0000000..700b653 --- /dev/null +++ b/agent_reach/channels/v2ex.py @@ -0,0 +1,212 @@ +# -*- coding: utf-8 -*- +"""V2EX — public API channel for topics, nodes, users, and replies.""" + +import json +import urllib.request +from typing import Any +from .base import Channel + +_UA = "agent-reach/1.0" +_TIMEOUT = 10 + + +def _get_json(url: str) -> Any: + """Fetch *url* and return parsed JSON. Raises on HTTP/network errors.""" + req = urllib.request.Request(url, headers={"User-Agent": _UA}) + with urllib.request.urlopen(req, timeout=_TIMEOUT) as resp: + return json.loads(resp.read().decode("utf-8")) + + +class V2EXChannel(Channel): + name = "v2ex" + description = "V2EX 节点、主题与回复" + backends = ["V2EX API (public)"] + tier = 0 + + # ------------------------------------------------------------------ # + # URL routing + # ------------------------------------------------------------------ # + + def can_handle(self, url: str) -> bool: + from urllib.parse import urlparse + d = urlparse(url).netloc.lower() + return "v2ex.com" in d + + # ------------------------------------------------------------------ # + # Health check + # ------------------------------------------------------------------ # + + def check(self, config=None): + try: + _get_json( + "https://www.v2ex.com/api/topics/show.json?node_name=python&page=1" + ) + return "ok", "公开 API 可用(热门主题、节点浏览、主题详情、用户信息)" + except Exception as e: + return "warn", f"V2EX API 连接失败(可能需要代理):{e}" + + # ------------------------------------------------------------------ # + # Data-fetching methods + # ------------------------------------------------------------------ # + + def get_hot_topics(self, limit: int = 20) -> list: + """获取热门帖子列表。 + + Returns a list of dicts with keys: + title, url, replies, node_name, node_title, content + """ + data = _get_json("https://www.v2ex.com/api/topics/hot.json") + results = [] + for item in data[:limit]: + node = item.get("node") or {} + content = item.get("content", "") or "" + results.append( + { + "id": item.get("id", 0), + "title": item.get("title", ""), + "url": item.get("url", ""), + "replies": item.get("replies", 0), + "node_name": node.get("name", ""), + "node_title": node.get("title", ""), + "content": content[:200], + "created": item.get("created", 0), + } + ) + return results + + def get_node_topics(self, node_name: str, limit: int = 20) -> list: + """获取指定节点的最新帖子。 + + Args: + node_name: 节点名称,如 "python"、"tech"、"jobs" + limit: 最多返回条数 + + Returns a list of dicts with keys: + title, url, replies, node_name, node_title, content + """ + url = ( + f"https://www.v2ex.com/api/topics/show.json" + f"?node_name={node_name}&page=1" + ) + data = _get_json(url) + results = [] + for item in data[:limit]: + node = item.get("node") or {} + content = item.get("content", "") or "" + results.append( + { + "id": item.get("id", 0), + "title": item.get("title", ""), + "url": item.get("url", ""), + "replies": item.get("replies", 0), + "node_name": node.get("name", node_name), + "node_title": node.get("title", ""), + "content": content[:200], + "created": item.get("created", 0), + } + ) + return results + + def get_topic(self, topic_id: int) -> dict: + """获取单个帖子详情和回复列表。 + + Args: + topic_id: 帖子 ID(从 URL https://www.v2ex.com/t/ 中获取) + + Returns a dict with keys: + id, title, url, content, replies_count, node_name, node_title, + author, created, replies (list of dicts with: author, content, created) + """ + topic_data = _get_json( + f"https://www.v2ex.com/api/topics/show.json?id={topic_id}" + ) + # API returns a list even for single-ID queries + if isinstance(topic_data, list): + topic = topic_data[0] if topic_data else {} + else: + topic = topic_data + + node = topic.get("node") or {} + member = topic.get("member") or {} + + # Fetch replies (first page) + try: + replies_raw = _get_json( + f"https://www.v2ex.com/api/replies/show.json" + f"?topic_id={topic_id}&page=1" + ) + except Exception: + replies_raw = [] + + replies = [ + { + "author": (r.get("member") or {}).get("username", ""), + "content": r.get("content", ""), + "created": r.get("created", 0), + } + for r in (replies_raw or []) + ] + + return { + "id": topic.get("id", topic_id), + "title": topic.get("title", ""), + "url": topic.get("url", f"https://www.v2ex.com/t/{topic_id}"), + "content": topic.get("content", ""), + "replies_count": topic.get("replies", 0), + "node_name": node.get("name", ""), + "node_title": node.get("title", ""), + "author": member.get("username", ""), + "created": topic.get("created", 0), + "replies": replies, + } + + def get_user(self, username: str) -> dict: + """获取用户信息。 + + Args: + username: V2EX 用户名 + + Returns a dict with keys: + id, username, url, website, twitter, psn, github, btc, + location, bio, avatar, created + """ + data = _get_json( + f"https://www.v2ex.com/api/members/show.json?username={username}" + ) + return { + "id": data.get("id", 0), + "username": data.get("username", username), + "url": data.get("url", f"https://www.v2ex.com/member/{username}"), + "website": data.get("website", ""), + "twitter": data.get("twitter", ""), + "psn": data.get("psn", ""), + "github": data.get("github", ""), + "btc": data.get("btc", ""), + "location": data.get("location", ""), + "bio": data.get("bio", ""), + "avatar": data.get("avatar_large", data.get("avatar_normal", "")), + "created": data.get("created", 0), + } + + def search(self, query: str, limit: int = 10) -> list: + """搜索帖子。 + + 注意:V2EX 公开 API 暂不支持全文搜索端点(/api/search.json 不可用)。 + 本方法通过 Jina Reader 代理 V2EX 站内搜索页面获取结果(纯文本,无结构化数据)。 + + 如需精确搜索,建议直接访问 https://www.v2ex.com/?q= 或 + 使用 Exa channel 的 site:v2ex.com 搜索。 + + Returns: + list of dicts with keys: title, url, snippet + 如果搜索不可用,返回包含单条 {"error": str} 的列表。 + """ + return [ + { + "error": ( + "V2EX 公开 API 不提供搜索端点。" + f"建议改用:https://www.v2ex.com/?q={query} " + "或通过 Exa channel 使用 site:v2ex.com 搜索。" + ) + } + ] diff --git a/agent_reach/skill/SKILL.md b/agent_reach/skill/SKILL.md index 56b7da4..26e3ab1 100644 --- a/agent_reach/skill/SKILL.md +++ b/agent_reach/skill/SKILL.md @@ -2,9 +2,9 @@ name: agent-reach description: > Give your AI agent eyes to see the entire internet. 7500+ GitHub stars. - Search and read 14 platforms: Twitter/X, Reddit, YouTube, GitHub, Bilibili, + Search and read 15 platforms: Twitter/X, Reddit, YouTube, GitHub, Bilibili, XiaoHongShu (小红书), Douyin (抖音), Weibo (微博), WeChat Articles (微信公众号), - LinkedIn, Instagram, RSS, Exa web search, and any web page. + LinkedIn, Instagram, V2EX, RSS, Exa web search, and any web page. One command install, zero config for 8 channels, agent-reach doctor for diagnostics. Use when: (1) user asks to search or read any of these platforms, (2) user shares a URL from any supported platform, @@ -15,6 +15,7 @@ description: > "search twitter", "read tweet", "youtube transcript", "search reddit", "read this link", "看这个链接", "B站", "bilibili", "抖音视频", "微信文章", "公众号", "LinkedIn", "GitHub issue", "RSS", "微博", + "V2EX", "v2ex", "节点", "看主题", "技术社区", "search online", "web search", "find information", "research", "帮我配", "configure twitter", "configure proxy", "帮我安装". metadata: @@ -141,6 +142,65 @@ mcporter call 'linkedin.search_people(keyword: "AI engineer", limit: 10)' Fallback: `curl -s "https://r.jina.ai/https://linkedin.com/in/username"` +## V2EX (public API) + +```bash +# 热门主题 +curl -s "https://www.v2ex.com/api/topics/hot.json" -H "User-Agent: agent-reach/1.0" + +# 节点主题(node_name 如 python、tech、jobs、qna) +curl -s "https://www.v2ex.com/api/topics/show.json?node_name=python&page=1" -H "User-Agent: agent-reach/1.0" + +# 主题详情(topic_id 从 URL 获取,如 https://www.v2ex.com/t/1234567) +curl -s "https://www.v2ex.com/api/topics/show.json?id=TOPIC_ID" -H "User-Agent: agent-reach/1.0" + +# 主题回复 +curl -s "https://www.v2ex.com/api/replies/show.json?topic_id=TOPIC_ID&page=1" -H "User-Agent: agent-reach/1.0" + +# 用户信息 +curl -s "https://www.v2ex.com/api/members/show.json?username=USERNAME" -H "User-Agent: agent-reach/1.0" +``` + +Python 调用示例(V2EXChannel): + +```python +from agent_reach.channels.v2ex import V2EXChannel + +ch = V2EXChannel() + +# 获取热门帖子(默认 20 条) +# 返回字段:id, title, url, replies, node_name, node_title, content(前200字), created +topics = ch.get_hot_topics(limit=10) +for t in topics: + print(f"[{t['node_title']}] {t['title']} ({t['replies']} 回复) {t['url']}") + print(f" id={t['id']} created={t['created']}") + +# 获取指定节点的最新帖子 +# 返回字段:id, title, url, replies, node_name, node_title, content(前200字), created +node_topics = ch.get_node_topics("python", limit=5) +for t in node_topics: + print(t["id"], t["title"], t["url"]) + +# 获取单个帖子详情 + 回复列表 +# 返回字段:id, title, url, content, replies_count, node_name, node_title, +# author, created, replies (list of {author, content, created}) +topic = ch.get_topic(1234567) +print(topic["title"], "—", topic["author"]) +for r in topic["replies"]: + print(f" {r['author']}: {r['content'][:80]}") + +# 获取用户信息 +# 返回字段:id, username, url, website, twitter, psn, github, btc, location, bio, avatar, created +user = ch.get_user("Livid") +print(user["username"], user["bio"], user["github"]) + +# 搜索(V2EX 公开 API 不支持,会返回说明信息) +result = ch.search("asyncio") +print(result[0]["error"]) # 提示使用站内搜索或 Exa channel +``` + +> No auth required. Results are public JSON. V2EX 节点名见 https://www.v2ex.com/planes + ## RSS (feedparser) ## RSS diff --git a/tests/test_channels.py b/tests/test_channels.py index f8cac6b..1785de8 100644 --- a/tests/test_channels.py +++ b/tests/test_channels.py @@ -1,11 +1,14 @@ # -*- coding: utf-8 -*- """Tests for channel registry basics and health checks.""" +import json import shutil import subprocess +from urllib.error import URLError from agent_reach.channels import get_all_channels, get_channel from agent_reach.channels.xiaohongshu import XiaoHongShuChannel +from agent_reach.channels.v2ex import V2EXChannel class TestChannelRegistry: @@ -23,6 +26,305 @@ class TestChannelRegistry: assert "web" in names assert "github" in names assert "twitter" in names + assert "v2ex" in names + + +class TestV2EXChannel: + def test_can_handle_v2ex_urls(self): + ch = V2EXChannel() + assert ch.can_handle("https://www.v2ex.com/t/1234567") + assert ch.can_handle("https://v2ex.com/go/python") + assert not ch.can_handle("https://github.com/user/repo") + assert not ch.can_handle("https://reddit.com/r/Python") + + def test_check_ok_when_api_reachable(self, monkeypatch): + import urllib.request + + class FakeResponse: + status = 200 + + def __enter__(self): + return self + + def __exit__(self, *args): + pass + + def read(self): + return b"[]" + + monkeypatch.setattr( + urllib.request, + "urlopen", + lambda req, timeout=None: FakeResponse(), + ) + status, msg = V2EXChannel().check() + assert status == "ok" + assert "公开 API 可用" in msg + + def test_check_warn_when_api_unreachable(self, monkeypatch): + import urllib.request + + def raise_error(req, timeout=None): + raise URLError("connection refused") + + monkeypatch.setattr(urllib.request, "urlopen", raise_error) + status, msg = V2EXChannel().check() + assert status == "warn" + assert "失败" in msg + + # ------------------------------------------------------------------ # + # get_hot_topics + # ------------------------------------------------------------------ # + + def test_get_hot_topics_returns_list(self, monkeypatch): + import urllib.request + + fake_data = [ + { + "id": 111, + "title": "Python 3.13 发布了", + "url": "https://www.v2ex.com/t/111", + "replies": 42, + "content": "发布公告内容", + "created": 1700000000, + "node": {"name": "python", "title": "Python"}, + }, + { + "id": 222, + "title": "Rust 好学吗", + "url": "https://www.v2ex.com/t/222", + "replies": 10, + "content": "", + "created": 1700000001, + "node": {"name": "rust", "title": "Rust"}, + }, + ] + + class FakeResponse: + status = 200 + + def __enter__(self): + return self + + def __exit__(self, *_): + pass + + def read(self): + return json.dumps(fake_data).encode() + + monkeypatch.setattr(urllib.request, "urlopen", lambda req, timeout=None: FakeResponse()) + topics = V2EXChannel().get_hot_topics(limit=5) + assert len(topics) == 2 + assert topics[0]["id"] == 111 + assert topics[0]["title"] == "Python 3.13 发布了" + assert topics[0]["replies"] == 42 + assert topics[0]["node_name"] == "python" + assert topics[0]["node_title"] == "Python" + assert topics[0]["created"] == 1700000000 + + def test_get_hot_topics_respects_limit(self, monkeypatch): + import urllib.request + + fake_data = [ + {"id": i, "title": f"Topic {i}", "url": f"https://v2ex.com/t/{i}", "replies": i, + "content": "", "created": 1700000000 + i, "node": {"name": "tech", "title": "Tech"}} + for i in range(10) + ] + + class FakeResponse: + def __enter__(self): return self + def __exit__(self, *_): pass + def read(self): return json.dumps(fake_data).encode() + + monkeypatch.setattr(urllib.request, "urlopen", lambda req, timeout=None: FakeResponse()) + topics = V2EXChannel().get_hot_topics(limit=3) + assert len(topics) == 3 + + def test_get_hot_topics_truncates_content(self, monkeypatch): + import urllib.request + + long_content = "A" * 300 + fake_data = [ + {"id": 1, "title": "Long post", "url": "https://v2ex.com/t/1", "replies": 0, + "content": long_content, "created": 1700000000, "node": {"name": "tech", "title": "Tech"}} + ] + + class FakeResponse: + def __enter__(self): return self + def __exit__(self, *_): pass + def read(self): return json.dumps(fake_data).encode() + + monkeypatch.setattr(urllib.request, "urlopen", lambda req, timeout=None: FakeResponse()) + topics = V2EXChannel().get_hot_topics(limit=1) + assert len(topics[0]["content"]) == 200 + + # ------------------------------------------------------------------ # + # get_node_topics + # ------------------------------------------------------------------ # + + def test_get_node_topics(self, monkeypatch): + import urllib.request + + fake_data = [ + { + "id": 333, + "title": "Flask 部署问题", + "url": "https://www.v2ex.com/t/333", + "replies": 5, + "content": "求帮助", + "created": 1710000000, + "node": {"name": "python", "title": "Python"}, + } + ] + + class FakeResponse: + def __enter__(self): return self + def __exit__(self, *_): pass + def read(self): return json.dumps(fake_data).encode() + + monkeypatch.setattr(urllib.request, "urlopen", lambda req, timeout=None: FakeResponse()) + topics = V2EXChannel().get_node_topics("python") + assert len(topics) == 1 + assert topics[0]["id"] == 333 + assert topics[0]["node_name"] == "python" + assert topics[0]["title"] == "Flask 部署问题" + assert topics[0]["created"] == 1710000000 + + # ------------------------------------------------------------------ # + # get_topic + # ------------------------------------------------------------------ # + + def test_get_topic_returns_detail_and_replies(self, monkeypatch): + import urllib.request + + topic_data = [ + { + "id": 999, + "title": "测试帖子", + "url": "https://www.v2ex.com/t/999", + "content": "帖子正文", + "replies": 2, + "node": {"name": "qna", "title": "问与答"}, + "member": {"username": "alice"}, + "created": 1700000000, + } + ] + replies_data = [ + { + "member": {"username": "bob"}, + "content": "第一条回复", + "created": 1700000100, + }, + { + "member": {"username": "carol"}, + "content": "第二条回复", + "created": 1700000200, + }, + ] + + call_count = {"n": 0} + + class FakeResponse: + def __init__(self, payload): + self._payload = payload + + def __enter__(self): return self + def __exit__(self, *_): pass + def read(self): return json.dumps(self._payload).encode() + + def fake_urlopen(req, timeout=None): + url = req.full_url + if "replies" in url: + return FakeResponse(replies_data) + return FakeResponse(topic_data) + + monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen) + result = V2EXChannel().get_topic(999) + + assert result["id"] == 999 + assert result["title"] == "测试帖子" + assert result["author"] == "alice" + assert result["node_name"] == "qna" + assert len(result["replies"]) == 2 + assert result["replies"][0]["author"] == "bob" + assert result["replies"][1]["content"] == "第二条回复" + + def test_get_topic_handles_empty_replies(self, monkeypatch): + import urllib.request + + topic_data = [ + { + "id": 1, + "title": "孤独帖子", + "url": "https://www.v2ex.com/t/1", + "content": "", + "replies": 0, + "node": {"name": "offtopic", "title": "水"}, + "member": {"username": "dave"}, + "created": 0, + } + ] + + class FakeResponse: + def __init__(self, payload): self._payload = payload + def __enter__(self): return self + def __exit__(self, *_): pass + def read(self): return json.dumps(self._payload).encode() + + def fake_urlopen(req, timeout=None): + if "replies" in req.full_url: + return FakeResponse([]) + return FakeResponse(topic_data) + + monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen) + result = V2EXChannel().get_topic(1) + assert result["replies"] == [] + + # ------------------------------------------------------------------ # + # get_user + # ------------------------------------------------------------------ # + + def test_get_user_returns_profile(self, monkeypatch): + import urllib.request + + fake_user = { + "id": 42, + "username": "alice", + "url": "https://www.v2ex.com/member/alice", + "website": "https://alice.dev", + "twitter": "alice_tw", + "psn": "", + "github": "alice", + "btc": "", + "location": "Shanghai", + "bio": "Python dev", + "avatar_large": "https://cdn.v2ex.com/avatars/alice_large.png", + "created": 1500000000, + } + + class FakeResponse: + def __enter__(self): return self + def __exit__(self, *_): pass + def read(self): return json.dumps(fake_user).encode() + + monkeypatch.setattr(urllib.request, "urlopen", lambda req, timeout=None: FakeResponse()) + user = V2EXChannel().get_user("alice") + + assert user["id"] == 42 + assert user["username"] == "alice" + assert user["github"] == "alice" + assert user["location"] == "Shanghai" + assert "alice_large.png" in user["avatar"] + + # ------------------------------------------------------------------ # + # search + # ------------------------------------------------------------------ # + + def test_search_returns_unavailable_notice(self): + result = V2EXChannel().search("python asyncio") + assert len(result) == 1 + assert "error" in result[0] + assert "V2EX" in result[0]["error"] class TestXiaoHongShuChannel: