Agent-Reach/agent_reach/channels/v2ex.py
Kada Liao 31f00b8d78
feat(channels): add V2EX support via public API (zero-config, tier=0)
* feat(channels): add V2EX support via public API

V2EX provides a public JSON API that requires no authentication.
This PR adds:
- agent_reach/channels/v2ex.py: V2EXChannel (tier=0, zero-config)
  - can_handle() matches v2ex.com URLs
  - check() verifies API reachability via urllib (no extra deps)
- Register V2EXChannel in channels/__init__.py
- SKILL.md: add V2EX section with curl examples for hot topics,
  node browsing, topic detail, replies, and user info
- tests/test_channels.py: URL matching + mocked ok/warn check tests

V2EX API endpoints used:
  GET /api/v2/topics/hot          — hot topics
  GET /api/topics/show.json       — node topics / topic detail
  GET /api/replies/show.json      — topic replies
  GET /api/members/show.json      — user info

* feat(channels): expand V2EX channel with data-fetching methods

Add get_hot_topics, get_node_topics, get_topic, get_user, and search
methods to V2EXChannel using stdlib urllib only (no new dependencies).
Update unit tests and SKILL.md with Python call examples.

* feat(v2ex): add data fetching methods to V2EXChannel
2026-03-12 14:29:07 +08:00

212 lines
7.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
"""V2EX — public API channel for topics, nodes, users, and replies."""
import json
import urllib.request
from typing import Any
from .base import Channel
_UA = "agent-reach/1.0"
_TIMEOUT = 10
def _get_json(url: str) -> Any:
"""Fetch *url* and return parsed JSON. Raises on HTTP/network errors."""
req = urllib.request.Request(url, headers={"User-Agent": _UA})
with urllib.request.urlopen(req, timeout=_TIMEOUT) as resp:
return json.loads(resp.read().decode("utf-8"))
class V2EXChannel(Channel):
name = "v2ex"
description = "V2EX 节点、主题与回复"
backends = ["V2EX API (public)"]
tier = 0
# ------------------------------------------------------------------ #
# URL routing
# ------------------------------------------------------------------ #
def can_handle(self, url: str) -> bool:
from urllib.parse import urlparse
d = urlparse(url).netloc.lower()
return "v2ex.com" in d
# ------------------------------------------------------------------ #
# Health check
# ------------------------------------------------------------------ #
def check(self, config=None):
try:
_get_json(
"https://www.v2ex.com/api/topics/show.json?node_name=python&page=1"
)
return "ok", "公开 API 可用(热门主题、节点浏览、主题详情、用户信息)"
except Exception as e:
return "warn", f"V2EX API 连接失败(可能需要代理):{e}"
# ------------------------------------------------------------------ #
# Data-fetching methods
# ------------------------------------------------------------------ #
def get_hot_topics(self, limit: int = 20) -> list:
"""获取热门帖子列表。
Returns a list of dicts with keys:
title, url, replies, node_name, node_title, content
"""
data = _get_json("https://www.v2ex.com/api/topics/hot.json")
results = []
for item in data[:limit]:
node = item.get("node") or {}
content = item.get("content", "") or ""
results.append(
{
"id": item.get("id", 0),
"title": item.get("title", ""),
"url": item.get("url", ""),
"replies": item.get("replies", 0),
"node_name": node.get("name", ""),
"node_title": node.get("title", ""),
"content": content[:200],
"created": item.get("created", 0),
}
)
return results
def get_node_topics(self, node_name: str, limit: int = 20) -> list:
"""获取指定节点的最新帖子。
Args:
node_name: 节点名称,如 "python""tech""jobs"
limit: 最多返回条数
Returns a list of dicts with keys:
title, url, replies, node_name, node_title, content
"""
url = (
f"https://www.v2ex.com/api/topics/show.json"
f"?node_name={node_name}&page=1"
)
data = _get_json(url)
results = []
for item in data[:limit]:
node = item.get("node") or {}
content = item.get("content", "") or ""
results.append(
{
"id": item.get("id", 0),
"title": item.get("title", ""),
"url": item.get("url", ""),
"replies": item.get("replies", 0),
"node_name": node.get("name", node_name),
"node_title": node.get("title", ""),
"content": content[:200],
"created": item.get("created", 0),
}
)
return results
def get_topic(self, topic_id: int) -> dict:
"""获取单个帖子详情和回复列表。
Args:
topic_id: 帖子 ID从 URL https://www.v2ex.com/t/<id> 中获取)
Returns a dict with keys:
id, title, url, content, replies_count, node_name, node_title,
author, created, replies (list of dicts with: author, content, created)
"""
topic_data = _get_json(
f"https://www.v2ex.com/api/topics/show.json?id={topic_id}"
)
# API returns a list even for single-ID queries
if isinstance(topic_data, list):
topic = topic_data[0] if topic_data else {}
else:
topic = topic_data
node = topic.get("node") or {}
member = topic.get("member") or {}
# Fetch replies (first page)
try:
replies_raw = _get_json(
f"https://www.v2ex.com/api/replies/show.json"
f"?topic_id={topic_id}&page=1"
)
except Exception:
replies_raw = []
replies = [
{
"author": (r.get("member") or {}).get("username", ""),
"content": r.get("content", ""),
"created": r.get("created", 0),
}
for r in (replies_raw or [])
]
return {
"id": topic.get("id", topic_id),
"title": topic.get("title", ""),
"url": topic.get("url", f"https://www.v2ex.com/t/{topic_id}"),
"content": topic.get("content", ""),
"replies_count": topic.get("replies", 0),
"node_name": node.get("name", ""),
"node_title": node.get("title", ""),
"author": member.get("username", ""),
"created": topic.get("created", 0),
"replies": replies,
}
def get_user(self, username: str) -> dict:
"""获取用户信息。
Args:
username: V2EX 用户名
Returns a dict with keys:
id, username, url, website, twitter, psn, github, btc,
location, bio, avatar, created
"""
data = _get_json(
f"https://www.v2ex.com/api/members/show.json?username={username}"
)
return {
"id": data.get("id", 0),
"username": data.get("username", username),
"url": data.get("url", f"https://www.v2ex.com/member/{username}"),
"website": data.get("website", ""),
"twitter": data.get("twitter", ""),
"psn": data.get("psn", ""),
"github": data.get("github", ""),
"btc": data.get("btc", ""),
"location": data.get("location", ""),
"bio": data.get("bio", ""),
"avatar": data.get("avatar_large", data.get("avatar_normal", "")),
"created": data.get("created", 0),
}
def search(self, query: str, limit: int = 10) -> list:
"""搜索帖子。
注意V2EX 公开 API 暂不支持全文搜索端点(/api/search.json 不可用)。
本方法通过 Jina Reader 代理 V2EX 站内搜索页面获取结果(纯文本,无结构化数据)。
如需精确搜索,建议直接访问 https://www.v2ex.com/?q=<query> 或
使用 Exa channel 的 site:v2ex.com 搜索。
Returns:
list of dicts with keys: title, url, snippet
如果搜索不可用,返回包含单条 {"error": str} 的列表。
"""
return [
{
"error": (
"V2EX 公开 API 不提供搜索端点。"
f"建议改用https://www.v2ex.com/?q={query} "
"或通过 Exa channel 使用 site:v2ex.com 搜索。"
)
}
]