feat(channels): add V2EX support via public API (zero-config, tier=0)

* feat(channels): add V2EX support via public API V2EX provides a public JSON API that requires no authentication. This PR adds: - agent_reach/channels/v2ex.py: V2EXChannel (tier=0, zero-config) - can_handle() matches v2ex.com URLs - check() verifies API reachability via urllib (no extra deps) - Register V2EXChannel in channels/__init__.py - SKILL.md: add V2EX section with curl examples for hot topics, node browsing, topic detail, replies, and user info - tests/test_channels.py: URL matching + mocked ok/warn check tests V2EX API endpoints used: GET /api/v2/topics/hot — hot topics GET /api/topics/show.json — node topics / topic detail GET /api/replies/show.json — topic replies GET /api/members/show.json — user info * feat(channels): expand V2EX channel with data-fetching methods Add get_hot_topics, get_node_topics, get_topic, get_user, and search methods to V2EXChannel using stdlib urllib only (no new dependencies). Update unit tests and SKILL.md with Python call examples. * feat(v2ex): add data fetching methods to V2EXChannel
2026-03-12 14:29:07 +08:00 · 2026-03-12 14:29:07 +08:00 · 31f00b8d78
commit 31f00b8d78
parent ba565bd096
4 changed files with 579 additions and 3 deletions
--- a/agent_reach/channels/init.py
+++ b/agent_reach/channels/init.py
@ -21,9 +21,10 @@ from .linkedin import LinkedInChannel
 from .wechat import WeChatChannel
 from .weibo import WeiboChannel
 from .xiaoyuzhou import XiaoyuzhouChannel
+from .v2ex import V2EXChannel
+


-# Channel registry
 ALL_CHANNELS: List[Channel] = [
    GitHubChannel(),
    TwitterChannel(),
@ -36,6 +37,7 @@ ALL_CHANNELS: List[Channel] = [
    WeChatChannel(),
    WeiboChannel(),
    XiaoyuzhouChannel(),
+    V2EXChannel(),
    RSSChannel(),
    ExaSearchChannel(),
    WebChannel(),
--- a/agent_reach/channels/v2ex.py
+++ b/agent_reach/channels/v2ex.py
@ -0,0 +1,212 @@
+# -*- coding: utf-8 -*-
+"""V2EX — public API channel for topics, nodes, users, and replies."""
+
+import json
+import urllib.request
+from typing import Any
+from .base import Channel
+
+_UA = "agent-reach/1.0"
+_TIMEOUT = 10
+
+
+def _get_json(url: str) -> Any:
+    """Fetch *url* and return parsed JSON. Raises on HTTP/network errors."""
+    req = urllib.request.Request(url, headers={"User-Agent": _UA})
+    with urllib.request.urlopen(req, timeout=_TIMEOUT) as resp:
+        return json.loads(resp.read().decode("utf-8"))
+
+
+class V2EXChannel(Channel):
+    name = "v2ex"
+    description = "V2EX 节点、主题与回复"
+    backends = ["V2EX API (public)"]
+    tier = 0
+
+    # ------------------------------------------------------------------ #
+    # URL routing
+    # ------------------------------------------------------------------ #
+
+    def can_handle(self, url: str) -> bool:
+        from urllib.parse import urlparse
+        d = urlparse(url).netloc.lower()
+        return "v2ex.com" in d
+
+    # ------------------------------------------------------------------ #
+    # Health check
+    # ------------------------------------------------------------------ #
+
+    def check(self, config=None):
+        try:
+            _get_json(
+                "https://www.v2ex.com/api/topics/show.json?node_name=python&page=1"
+            )
+            return "ok", "公开 API 可用（热门主题、节点浏览、主题详情、用户信息）"
+        except Exception as e:
+            return "warn", f"V2EX API 连接失败（可能需要代理）：{e}"
+
+    # ------------------------------------------------------------------ #
+    # Data-fetching methods
+    # ------------------------------------------------------------------ #
+
+    def get_hot_topics(self, limit: int = 20) -> list:
+        """获取热门帖子列表。
+
+        Returns a list of dicts with keys:
+          title, url, replies, node_name, node_title, content
+        """
+        data = _get_json("https://www.v2ex.com/api/topics/hot.json")
+        results = []
+        for item in data[:limit]:
+            node = item.get("node") or {}
+            content = item.get("content", "") or ""
+            results.append(
+                {
+                    "id": item.get("id", 0),
+                    "title": item.get("title", ""),
+                    "url": item.get("url", ""),
+                    "replies": item.get("replies", 0),
+                    "node_name": node.get("name", ""),
+                    "node_title": node.get("title", ""),
+                    "content": content[:200],
+                    "created": item.get("created", 0),
+                }
+            )
+        return results
+
+    def get_node_topics(self, node_name: str, limit: int = 20) -> list:
+        """获取指定节点的最新帖子。
+
+        Args:
+            node_name: 节点名称，如 "python"、"tech"、"jobs"
+            limit:     最多返回条数
+
+        Returns a list of dicts with keys:
+          title, url, replies, node_name, node_title, content
+        """
+        url = (
+            f"https://www.v2ex.com/api/topics/show.json"
+            f"?node_name={node_name}&page=1"
+        )
+        data = _get_json(url)
+        results = []
+        for item in data[:limit]:
+            node = item.get("node") or {}
+            content = item.get("content", "") or ""
+            results.append(
+                {
+                    "id": item.get("id", 0),
+                    "title": item.get("title", ""),
+                    "url": item.get("url", ""),
+                    "replies": item.get("replies", 0),
+                    "node_name": node.get("name", node_name),
+                    "node_title": node.get("title", ""),
+                    "content": content[:200],
+                    "created": item.get("created", 0),
+                }
+            )
+        return results
+
+    def get_topic(self, topic_id: int) -> dict:
+        """获取单个帖子详情和回复列表。
+
+        Args:
+            topic_id: 帖子 ID（从 URL https://www.v2ex.com/t/<id> 中获取）
+
+        Returns a dict with keys:
+          id, title, url, content, replies_count, node_name, node_title,
+          author, created, replies (list of dicts with: author, content, created)
+        """
+        topic_data = _get_json(
+            f"https://www.v2ex.com/api/topics/show.json?id={topic_id}"
+        )
+        # API returns a list even for single-ID queries
+        if isinstance(topic_data, list):
+            topic = topic_data[0] if topic_data else {}
+        else:
+            topic = topic_data
+
+        node = topic.get("node") or {}
+        member = topic.get("member") or {}
+
+        # Fetch replies (first page)
+        try:
+            replies_raw = _get_json(
+                f"https://www.v2ex.com/api/replies/show.json"
+                f"?topic_id={topic_id}&page=1"
+            )
+        except Exception:
+            replies_raw = []
+
+        replies = [
+            {
+                "author": (r.get("member") or {}).get("username", ""),
+                "content": r.get("content", ""),
+                "created": r.get("created", 0),
+            }
+            for r in (replies_raw or [])
+        ]
+
+        return {
+            "id": topic.get("id", topic_id),
+            "title": topic.get("title", ""),
+            "url": topic.get("url", f"https://www.v2ex.com/t/{topic_id}"),
+            "content": topic.get("content", ""),
+            "replies_count": topic.get("replies", 0),
+            "node_name": node.get("name", ""),
+            "node_title": node.get("title", ""),
+            "author": member.get("username", ""),
+            "created": topic.get("created", 0),
+            "replies": replies,
+        }
+
+    def get_user(self, username: str) -> dict:
+        """获取用户信息。
+
+        Args:
+            username: V2EX 用户名
+
+        Returns a dict with keys:
+          id, username, url, website, twitter, psn, github, btc,
+          location, bio, avatar, created
+        """
+        data = _get_json(
+            f"https://www.v2ex.com/api/members/show.json?username={username}"
+        )
+        return {
+            "id": data.get("id", 0),
+            "username": data.get("username", username),
+            "url": data.get("url", f"https://www.v2ex.com/member/{username}"),
+            "website": data.get("website", ""),
+            "twitter": data.get("twitter", ""),
+            "psn": data.get("psn", ""),
+            "github": data.get("github", ""),
+            "btc": data.get("btc", ""),
+            "location": data.get("location", ""),
+            "bio": data.get("bio", ""),
+            "avatar": data.get("avatar_large", data.get("avatar_normal", "")),
+            "created": data.get("created", 0),
+        }
+
+    def search(self, query: str, limit: int = 10) -> list:
+        """搜索帖子。
+
+        注意：V2EX 公开 API 暂不支持全文搜索端点（/api/search.json 不可用）。
+        本方法通过 Jina Reader 代理 V2EX 站内搜索页面获取结果（纯文本，无结构化数据）。
+
+        如需精确搜索，建议直接访问 https://www.v2ex.com/?q=<query> 或
+        使用 Exa channel 的 site:v2ex.com 搜索。
+
+        Returns:
+            list of dicts with keys: title, url, snippet
+            如果搜索不可用，返回包含单条 {"error": str} 的列表。
+        """
+        return [
+            {
+                "error": (
+                    "V2EX 公开 API 不提供搜索端点。"
+                    f"建议改用：https://www.v2ex.com/?q={query} "
+                    "或通过 Exa channel 使用 site:v2ex.com 搜索。"
+                )
+            }
+        ]
--- a/agent_reach/skill/SKILL.md
+++ b/agent_reach/skill/SKILL.md
@ -2,9 +2,9 @@
 name: agent-reach
 description: >
  Give your AI agent eyes to see the entire internet. 7500+ GitHub stars.
-  Search and read 14 platforms: Twitter/X, Reddit, YouTube, GitHub, Bilibili,
+  Search and read 15 platforms: Twitter/X, Reddit, YouTube, GitHub, Bilibili,
  XiaoHongShu (小红书), Douyin (抖音), Weibo (微博), WeChat Articles (微信公众号),
-  LinkedIn, Instagram, RSS, Exa web search, and any web page.
+  LinkedIn, Instagram, V2EX, RSS, Exa web search, and any web page.
  One command install, zero config for 8 channels, agent-reach doctor for diagnostics.
  Use when: (1) user asks to search or read any of these platforms,
  (2) user shares a URL from any supported platform,
@ -15,6 +15,7 @@ description: >
  "search twitter", "read tweet", "youtube transcript", "search reddit",
  "read this link", "看这个链接", "B站", "bilibili", "抖音视频",
  "微信文章", "公众号", "LinkedIn", "GitHub issue", "RSS", "微博",
+  "V2EX", "v2ex", "节点", "看主题", "技术社区",
  "search online", "web search", "find information", "research",
  "帮我配", "configure twitter", "configure proxy", "帮我安装".
 metadata:
@ -141,6 +142,65 @@ mcporter call 'linkedin.search_people(keyword: "AI engineer", limit: 10)'

 Fallback: `curl -s "https://r.jina.ai/https://linkedin.com/in/username"`

+## V2EX (public API)
+
+```bash
+# 热门主题
+curl -s "https://www.v2ex.com/api/topics/hot.json" -H "User-Agent: agent-reach/1.0"
+
+# 节点主题（node_name 如 python、tech、jobs、qna）
+curl -s "https://www.v2ex.com/api/topics/show.json?node_name=python&page=1" -H "User-Agent: agent-reach/1.0"
+
+# 主题详情（topic_id 从 URL 获取，如 https://www.v2ex.com/t/1234567）
+curl -s "https://www.v2ex.com/api/topics/show.json?id=TOPIC_ID" -H "User-Agent: agent-reach/1.0"
+
+# 主题回复
+curl -s "https://www.v2ex.com/api/replies/show.json?topic_id=TOPIC_ID&page=1" -H "User-Agent: agent-reach/1.0"
+
+# 用户信息
+curl -s "https://www.v2ex.com/api/members/show.json?username=USERNAME" -H "User-Agent: agent-reach/1.0"
+```
+
+Python 调用示例（V2EXChannel）：
+
+```python
+from agent_reach.channels.v2ex import V2EXChannel
+
+ch = V2EXChannel()
+
+# 获取热门帖子（默认 20 条）
+# 返回字段：id, title, url, replies, node_name, node_title, content(前200字), created
+topics = ch.get_hot_topics(limit=10)
+for t in topics:
+    print(f"[{t['node_title']}] {t['title']} ({t['replies']} 回复) {t['url']}")
+    print(f"  id={t['id']} created={t['created']}")
+
+# 获取指定节点的最新帖子
+# 返回字段：id, title, url, replies, node_name, node_title, content(前200字), created
+node_topics = ch.get_node_topics("python", limit=5)
+for t in node_topics:
+    print(t["id"], t["title"], t["url"])
+
+# 获取单个帖子详情 + 回复列表
+# 返回字段：id, title, url, content, replies_count, node_name, node_title,
+#           author, created, replies (list of {author, content, created})
+topic = ch.get_topic(1234567)
+print(topic["title"], "—", topic["author"])
+for r in topic["replies"]:
+    print(f"  {r['author']}: {r['content'][:80]}")
+
+# 获取用户信息
+# 返回字段：id, username, url, website, twitter, psn, github, btc, location, bio, avatar, created
+user = ch.get_user("Livid")
+print(user["username"], user["bio"], user["github"])
+
+# 搜索（V2EX 公开 API 不支持，会返回说明信息）
+result = ch.search("asyncio")
+print(result[0]["error"])  # 提示使用站内搜索或 Exa channel
+```
+
+> No auth required. Results are public JSON. V2EX 节点名见 https://www.v2ex.com/planes
+
 ## RSS (feedparser)

 ## RSS
--- a/tests/test_channels.py
+++ b/tests/test_channels.py
@ -1,11 +1,14 @@
 # -*- coding: utf-8 -*-
 """Tests for channel registry basics and health checks."""

+import json
 import shutil
 import subprocess
+from urllib.error import URLError

 from agent_reach.channels import get_all_channels, get_channel
 from agent_reach.channels.xiaohongshu import XiaoHongShuChannel
+from agent_reach.channels.v2ex import V2EXChannel


 class TestChannelRegistry:
@ -23,6 +26,305 @@ class TestChannelRegistry:
        assert "web" in names
        assert "github" in names
        assert "twitter" in names
+        assert "v2ex" in names
+
+
+class TestV2EXChannel:
+    def test_can_handle_v2ex_urls(self):
+        ch = V2EXChannel()
+        assert ch.can_handle("https://www.v2ex.com/t/1234567")
+        assert ch.can_handle("https://v2ex.com/go/python")
+        assert not ch.can_handle("https://github.com/user/repo")
+        assert not ch.can_handle("https://reddit.com/r/Python")
+
+    def test_check_ok_when_api_reachable(self, monkeypatch):
+        import urllib.request
+
+        class FakeResponse:
+            status = 200
+
+            def __enter__(self):
+                return self
+
+            def __exit__(self, *args):
+                pass
+
+            def read(self):
+                return b"[]"
+
+        monkeypatch.setattr(
+            urllib.request,
+            "urlopen",
+            lambda req, timeout=None: FakeResponse(),
+        )
+        status, msg = V2EXChannel().check()
+        assert status == "ok"
+        assert "公开 API 可用" in msg
+
+    def test_check_warn_when_api_unreachable(self, monkeypatch):
+        import urllib.request
+
+        def raise_error(req, timeout=None):
+            raise URLError("connection refused")
+
+        monkeypatch.setattr(urllib.request, "urlopen", raise_error)
+        status, msg = V2EXChannel().check()
+        assert status == "warn"
+        assert "失败" in msg
+
+    # ------------------------------------------------------------------ #
+    # get_hot_topics
+    # ------------------------------------------------------------------ #
+
+    def test_get_hot_topics_returns_list(self, monkeypatch):
+        import urllib.request
+
+        fake_data = [
+            {
+                "id": 111,
+                "title": "Python 3.13 发布了",
+                "url": "https://www.v2ex.com/t/111",
+                "replies": 42,
+                "content": "发布公告内容",
+                "created": 1700000000,
+                "node": {"name": "python", "title": "Python"},
+            },
+            {
+                "id": 222,
+                "title": "Rust 好学吗",
+                "url": "https://www.v2ex.com/t/222",
+                "replies": 10,
+                "content": "",
+                "created": 1700000001,
+                "node": {"name": "rust", "title": "Rust"},
+            },
+        ]
+
+        class FakeResponse:
+            status = 200
+
+            def __enter__(self):
+                return self
+
+            def __exit__(self, *_):
+                pass
+
+            def read(self):
+                return json.dumps(fake_data).encode()
+
+        monkeypatch.setattr(urllib.request, "urlopen", lambda req, timeout=None: FakeResponse())
+        topics = V2EXChannel().get_hot_topics(limit=5)
+        assert len(topics) == 2
+        assert topics[0]["id"] == 111
+        assert topics[0]["title"] == "Python 3.13 发布了"
+        assert topics[0]["replies"] == 42
+        assert topics[0]["node_name"] == "python"
+        assert topics[0]["node_title"] == "Python"
+        assert topics[0]["created"] == 1700000000
+
+    def test_get_hot_topics_respects_limit(self, monkeypatch):
+        import urllib.request
+
+        fake_data = [
+            {"id": i, "title": f"Topic {i}", "url": f"https://v2ex.com/t/{i}", "replies": i,
+             "content": "", "created": 1700000000 + i, "node": {"name": "tech", "title": "Tech"}}
+            for i in range(10)
+        ]
+
+        class FakeResponse:
+            def __enter__(self): return self
+            def __exit__(self, *_): pass
+            def read(self): return json.dumps(fake_data).encode()
+
+        monkeypatch.setattr(urllib.request, "urlopen", lambda req, timeout=None: FakeResponse())
+        topics = V2EXChannel().get_hot_topics(limit=3)
+        assert len(topics) == 3
+
+    def test_get_hot_topics_truncates_content(self, monkeypatch):
+        import urllib.request
+
+        long_content = "A" * 300
+        fake_data = [
+            {"id": 1, "title": "Long post", "url": "https://v2ex.com/t/1", "replies": 0,
+             "content": long_content, "created": 1700000000, "node": {"name": "tech", "title": "Tech"}}
+        ]
+
+        class FakeResponse:
+            def __enter__(self): return self
+            def __exit__(self, *_): pass
+            def read(self): return json.dumps(fake_data).encode()
+
+        monkeypatch.setattr(urllib.request, "urlopen", lambda req, timeout=None: FakeResponse())
+        topics = V2EXChannel().get_hot_topics(limit=1)
+        assert len(topics[0]["content"]) == 200
+
+    # ------------------------------------------------------------------ #
+    # get_node_topics
+    # ------------------------------------------------------------------ #
+
+    def test_get_node_topics(self, monkeypatch):
+        import urllib.request
+
+        fake_data = [
+            {
+                "id": 333,
+                "title": "Flask 部署问题",
+                "url": "https://www.v2ex.com/t/333",
+                "replies": 5,
+                "content": "求帮助",
+                "created": 1710000000,
+                "node": {"name": "python", "title": "Python"},
+            }
+        ]
+
+        class FakeResponse:
+            def __enter__(self): return self
+            def __exit__(self, *_): pass
+            def read(self): return json.dumps(fake_data).encode()
+
+        monkeypatch.setattr(urllib.request, "urlopen", lambda req, timeout=None: FakeResponse())
+        topics = V2EXChannel().get_node_topics("python")
+        assert len(topics) == 1
+        assert topics[0]["id"] == 333
+        assert topics[0]["node_name"] == "python"
+        assert topics[0]["title"] == "Flask 部署问题"
+        assert topics[0]["created"] == 1710000000
+
+    # ------------------------------------------------------------------ #
+    # get_topic
+    # ------------------------------------------------------------------ #
+
+    def test_get_topic_returns_detail_and_replies(self, monkeypatch):
+        import urllib.request
+
+        topic_data = [
+            {
+                "id": 999,
+                "title": "测试帖子",
+                "url": "https://www.v2ex.com/t/999",
+                "content": "帖子正文",
+                "replies": 2,
+                "node": {"name": "qna", "title": "问与答"},
+                "member": {"username": "alice"},
+                "created": 1700000000,
+            }
+        ]
+        replies_data = [
+            {
+                "member": {"username": "bob"},
+                "content": "第一条回复",
+                "created": 1700000100,
+            },
+            {
+                "member": {"username": "carol"},
+                "content": "第二条回复",
+                "created": 1700000200,
+            },
+        ]
+
+        call_count = {"n": 0}
+
+        class FakeResponse:
+            def __init__(self, payload):
+                self._payload = payload
+
+            def __enter__(self): return self
+            def __exit__(self, *_): pass
+            def read(self): return json.dumps(self._payload).encode()
+
+        def fake_urlopen(req, timeout=None):
+            url = req.full_url
+            if "replies" in url:
+                return FakeResponse(replies_data)
+            return FakeResponse(topic_data)
+
+        monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen)
+        result = V2EXChannel().get_topic(999)
+
+        assert result["id"] == 999
+        assert result["title"] == "测试帖子"
+        assert result["author"] == "alice"
+        assert result["node_name"] == "qna"
+        assert len(result["replies"]) == 2
+        assert result["replies"][0]["author"] == "bob"
+        assert result["replies"][1]["content"] == "第二条回复"
+
+    def test_get_topic_handles_empty_replies(self, monkeypatch):
+        import urllib.request
+
+        topic_data = [
+            {
+                "id": 1,
+                "title": "孤独帖子",
+                "url": "https://www.v2ex.com/t/1",
+                "content": "",
+                "replies": 0,
+                "node": {"name": "offtopic", "title": "水"},
+                "member": {"username": "dave"},
+                "created": 0,
+            }
+        ]
+
+        class FakeResponse:
+            def __init__(self, payload): self._payload = payload
+            def __enter__(self): return self
+            def __exit__(self, *_): pass
+            def read(self): return json.dumps(self._payload).encode()
+
+        def fake_urlopen(req, timeout=None):
+            if "replies" in req.full_url:
+                return FakeResponse([])
+            return FakeResponse(topic_data)
+
+        monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen)
+        result = V2EXChannel().get_topic(1)
+        assert result["replies"] == []
+
+    # ------------------------------------------------------------------ #
+    # get_user
+    # ------------------------------------------------------------------ #
+
+    def test_get_user_returns_profile(self, monkeypatch):
+        import urllib.request
+
+        fake_user = {
+            "id": 42,
+            "username": "alice",
+            "url": "https://www.v2ex.com/member/alice",
+            "website": "https://alice.dev",
+            "twitter": "alice_tw",
+            "psn": "",
+            "github": "alice",
+            "btc": "",
+            "location": "Shanghai",
+            "bio": "Python dev",
+            "avatar_large": "https://cdn.v2ex.com/avatars/alice_large.png",
+            "created": 1500000000,
+        }
+
+        class FakeResponse:
+            def __enter__(self): return self
+            def __exit__(self, *_): pass
+            def read(self): return json.dumps(fake_user).encode()
+
+        monkeypatch.setattr(urllib.request, "urlopen", lambda req, timeout=None: FakeResponse())
+        user = V2EXChannel().get_user("alice")
+
+        assert user["id"] == 42
+        assert user["username"] == "alice"
+        assert user["github"] == "alice"
+        assert user["location"] == "Shanghai"
+        assert "alice_large.png" in user["avatar"]
+
+    # ------------------------------------------------------------------ #
+    # search
+    # ------------------------------------------------------------------ #
+
+    def test_search_returns_unavailable_notice(self):
+        result = V2EXChannel().search("python asyncio")
+        assert len(result) == 1
+        assert "error" in result[0]
+        assert "V2EX" in result[0]["error"]


 class TestXiaoHongShuChannel: