* feat(channels): add V2EX support via public API V2EX provides a public JSON API that requires no authentication. This PR adds: - agent_reach/channels/v2ex.py: V2EXChannel (tier=0, zero-config) - can_handle() matches v2ex.com URLs - check() verifies API reachability via urllib (no extra deps) - Register V2EXChannel in channels/__init__.py - SKILL.md: add V2EX section with curl examples for hot topics, node browsing, topic detail, replies, and user info - tests/test_channels.py: URL matching + mocked ok/warn check tests V2EX API endpoints used: GET /api/v2/topics/hot — hot topics GET /api/topics/show.json — node topics / topic detail GET /api/replies/show.json — topic replies GET /api/members/show.json — user info * feat(channels): expand V2EX channel with data-fetching methods Add get_hot_topics, get_node_topics, get_topic, get_user, and search methods to V2EXChannel using stdlib urllib only (no new dependencies). Update unit tests and SKILL.md with Python call examples. * feat(v2ex): add data fetching methods to V2EXChannel
212 lines
7.5 KiB
Python
212 lines
7.5 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""V2EX — public API channel for topics, nodes, users, and replies."""
|
||
|
||
import json
|
||
import urllib.request
|
||
from typing import Any
|
||
from .base import Channel
|
||
|
||
_UA = "agent-reach/1.0"
|
||
_TIMEOUT = 10
|
||
|
||
|
||
def _get_json(url: str) -> Any:
|
||
"""Fetch *url* and return parsed JSON. Raises on HTTP/network errors."""
|
||
req = urllib.request.Request(url, headers={"User-Agent": _UA})
|
||
with urllib.request.urlopen(req, timeout=_TIMEOUT) as resp:
|
||
return json.loads(resp.read().decode("utf-8"))
|
||
|
||
|
||
class V2EXChannel(Channel):
|
||
name = "v2ex"
|
||
description = "V2EX 节点、主题与回复"
|
||
backends = ["V2EX API (public)"]
|
||
tier = 0
|
||
|
||
# ------------------------------------------------------------------ #
|
||
# URL routing
|
||
# ------------------------------------------------------------------ #
|
||
|
||
def can_handle(self, url: str) -> bool:
|
||
from urllib.parse import urlparse
|
||
d = urlparse(url).netloc.lower()
|
||
return "v2ex.com" in d
|
||
|
||
# ------------------------------------------------------------------ #
|
||
# Health check
|
||
# ------------------------------------------------------------------ #
|
||
|
||
def check(self, config=None):
|
||
try:
|
||
_get_json(
|
||
"https://www.v2ex.com/api/topics/show.json?node_name=python&page=1"
|
||
)
|
||
return "ok", "公开 API 可用(热门主题、节点浏览、主题详情、用户信息)"
|
||
except Exception as e:
|
||
return "warn", f"V2EX API 连接失败(可能需要代理):{e}"
|
||
|
||
# ------------------------------------------------------------------ #
|
||
# Data-fetching methods
|
||
# ------------------------------------------------------------------ #
|
||
|
||
def get_hot_topics(self, limit: int = 20) -> list:
|
||
"""获取热门帖子列表。
|
||
|
||
Returns a list of dicts with keys:
|
||
title, url, replies, node_name, node_title, content
|
||
"""
|
||
data = _get_json("https://www.v2ex.com/api/topics/hot.json")
|
||
results = []
|
||
for item in data[:limit]:
|
||
node = item.get("node") or {}
|
||
content = item.get("content", "") or ""
|
||
results.append(
|
||
{
|
||
"id": item.get("id", 0),
|
||
"title": item.get("title", ""),
|
||
"url": item.get("url", ""),
|
||
"replies": item.get("replies", 0),
|
||
"node_name": node.get("name", ""),
|
||
"node_title": node.get("title", ""),
|
||
"content": content[:200],
|
||
"created": item.get("created", 0),
|
||
}
|
||
)
|
||
return results
|
||
|
||
def get_node_topics(self, node_name: str, limit: int = 20) -> list:
|
||
"""获取指定节点的最新帖子。
|
||
|
||
Args:
|
||
node_name: 节点名称,如 "python"、"tech"、"jobs"
|
||
limit: 最多返回条数
|
||
|
||
Returns a list of dicts with keys:
|
||
title, url, replies, node_name, node_title, content
|
||
"""
|
||
url = (
|
||
f"https://www.v2ex.com/api/topics/show.json"
|
||
f"?node_name={node_name}&page=1"
|
||
)
|
||
data = _get_json(url)
|
||
results = []
|
||
for item in data[:limit]:
|
||
node = item.get("node") or {}
|
||
content = item.get("content", "") or ""
|
||
results.append(
|
||
{
|
||
"id": item.get("id", 0),
|
||
"title": item.get("title", ""),
|
||
"url": item.get("url", ""),
|
||
"replies": item.get("replies", 0),
|
||
"node_name": node.get("name", node_name),
|
||
"node_title": node.get("title", ""),
|
||
"content": content[:200],
|
||
"created": item.get("created", 0),
|
||
}
|
||
)
|
||
return results
|
||
|
||
def get_topic(self, topic_id: int) -> dict:
|
||
"""获取单个帖子详情和回复列表。
|
||
|
||
Args:
|
||
topic_id: 帖子 ID(从 URL https://www.v2ex.com/t/<id> 中获取)
|
||
|
||
Returns a dict with keys:
|
||
id, title, url, content, replies_count, node_name, node_title,
|
||
author, created, replies (list of dicts with: author, content, created)
|
||
"""
|
||
topic_data = _get_json(
|
||
f"https://www.v2ex.com/api/topics/show.json?id={topic_id}"
|
||
)
|
||
# API returns a list even for single-ID queries
|
||
if isinstance(topic_data, list):
|
||
topic = topic_data[0] if topic_data else {}
|
||
else:
|
||
topic = topic_data
|
||
|
||
node = topic.get("node") or {}
|
||
member = topic.get("member") or {}
|
||
|
||
# Fetch replies (first page)
|
||
try:
|
||
replies_raw = _get_json(
|
||
f"https://www.v2ex.com/api/replies/show.json"
|
||
f"?topic_id={topic_id}&page=1"
|
||
)
|
||
except Exception:
|
||
replies_raw = []
|
||
|
||
replies = [
|
||
{
|
||
"author": (r.get("member") or {}).get("username", ""),
|
||
"content": r.get("content", ""),
|
||
"created": r.get("created", 0),
|
||
}
|
||
for r in (replies_raw or [])
|
||
]
|
||
|
||
return {
|
||
"id": topic.get("id", topic_id),
|
||
"title": topic.get("title", ""),
|
||
"url": topic.get("url", f"https://www.v2ex.com/t/{topic_id}"),
|
||
"content": topic.get("content", ""),
|
||
"replies_count": topic.get("replies", 0),
|
||
"node_name": node.get("name", ""),
|
||
"node_title": node.get("title", ""),
|
||
"author": member.get("username", ""),
|
||
"created": topic.get("created", 0),
|
||
"replies": replies,
|
||
}
|
||
|
||
def get_user(self, username: str) -> dict:
|
||
"""获取用户信息。
|
||
|
||
Args:
|
||
username: V2EX 用户名
|
||
|
||
Returns a dict with keys:
|
||
id, username, url, website, twitter, psn, github, btc,
|
||
location, bio, avatar, created
|
||
"""
|
||
data = _get_json(
|
||
f"https://www.v2ex.com/api/members/show.json?username={username}"
|
||
)
|
||
return {
|
||
"id": data.get("id", 0),
|
||
"username": data.get("username", username),
|
||
"url": data.get("url", f"https://www.v2ex.com/member/{username}"),
|
||
"website": data.get("website", ""),
|
||
"twitter": data.get("twitter", ""),
|
||
"psn": data.get("psn", ""),
|
||
"github": data.get("github", ""),
|
||
"btc": data.get("btc", ""),
|
||
"location": data.get("location", ""),
|
||
"bio": data.get("bio", ""),
|
||
"avatar": data.get("avatar_large", data.get("avatar_normal", "")),
|
||
"created": data.get("created", 0),
|
||
}
|
||
|
||
def search(self, query: str, limit: int = 10) -> list:
|
||
"""搜索帖子。
|
||
|
||
注意:V2EX 公开 API 暂不支持全文搜索端点(/api/search.json 不可用)。
|
||
本方法通过 Jina Reader 代理 V2EX 站内搜索页面获取结果(纯文本,无结构化数据)。
|
||
|
||
如需精确搜索,建议直接访问 https://www.v2ex.com/?q=<query> 或
|
||
使用 Exa channel 的 site:v2ex.com 搜索。
|
||
|
||
Returns:
|
||
list of dicts with keys: title, url, snippet
|
||
如果搜索不可用,返回包含单条 {"error": str} 的列表。
|
||
"""
|
||
return [
|
||
{
|
||
"error": (
|
||
"V2EX 公开 API 不提供搜索端点。"
|
||
f"建议改用:https://www.v2ex.com/?q={query} "
|
||
"或通过 Exa channel 使用 site:v2ex.com 搜索。"
|
||
)
|
||
}
|
||
]
|