全局重命名: - 包名: agent_eyes → agent_reach - CLI: agent-eyes → agent-reach - 类名: AgentEyes → AgentReach - 显示名: Agent Eyes → Agent Reach - GitHub: Panniantong/agent-eyes → Panniantong/Agent-Reach 所有 36 个测试通过,CLI/doctor/read/search 全部正常。
49 lines
1.3 KiB
Python
49 lines
1.3 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""Web pages — via Jina Reader API (free, no config needed).
|
||
|
||
Backend: Jina Reader (https://r.jina.ai)
|
||
Swap to: Firecrawl, Trafilatura, or any other reader API
|
||
"""
|
||
|
||
import requests
|
||
from .base import Channel, ReadResult
|
||
|
||
|
||
class WebChannel(Channel):
|
||
name = "web"
|
||
description = "网页(任意 URL)"
|
||
backends = ["Jina Reader API"]
|
||
tier = 0
|
||
|
||
JINA_URL = "https://r.jina.ai/"
|
||
|
||
def can_handle(self, url: str) -> bool:
|
||
# Fallback — handles any URL not matched by other channels
|
||
return True
|
||
|
||
async def read(self, url: str, config=None) -> ReadResult:
|
||
resp = requests.get(
|
||
f"{self.JINA_URL}{url}",
|
||
headers={"Accept": "text/markdown"},
|
||
timeout=15,
|
||
)
|
||
resp.raise_for_status()
|
||
text = resp.text
|
||
|
||
# Extract title from first markdown heading
|
||
title = url
|
||
for line in text.split("\n"):
|
||
line = line.strip()
|
||
if line.startswith("# "):
|
||
title = line[2:].strip()
|
||
break
|
||
if line.startswith("Title:"):
|
||
title = line[6:].strip()
|
||
break
|
||
|
||
return ReadResult(
|
||
title=title,
|
||
content=text,
|
||
url=url,
|
||
platform="web",
|
||
)
|