From 3a3a0101cfbb068588dfecd4d51b19e378f3bd6b Mon Sep 17 00:00:00 2001 From: Panniantong Date: Tue, 24 Feb 2026 03:07:50 +0100 Subject: [PATCH] =?UTF-8?q?Agent=20Eyes=20v1.0.0=20=E2=80=94=20search=20+?= =?UTF-8?q?=20read=20the=20entire=20internet?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on x-reader by @runes_leo (MIT License). Extended with: - Reddit support (posts + comments, proxy support) - GitHub support (repos, issues, PRs) - Web search via Exa semantic search - Reddit search (bypasses IP blocks via Exa) - GitHub search (repos by stars) - Renamed package: x_reader → agent_eyes - New MCP tools: search, search_reddit, search_github - Agent-first positioning and documentation --- README.md | 295 ++++++------------ {x_reader => agent_eyes}/__init__.py | 0 {x_reader => agent_eyes}/cli.py | 6 +- {x_reader => agent_eyes}/fetchers/__init__.py | 0 {x_reader => agent_eyes}/fetchers/bilibili.py | 0 {x_reader => agent_eyes}/fetchers/browser.py | 0 agent_eyes/fetchers/github.py | 190 +++++++++++ {x_reader => agent_eyes}/fetchers/jina.py | 0 agent_eyes/fetchers/reddit.py | 132 ++++++++ {x_reader => agent_eyes}/fetchers/rss.py | 0 agent_eyes/fetchers/search.py | 94 ++++++ {x_reader => agent_eyes}/fetchers/telegram.py | 0 {x_reader => agent_eyes}/fetchers/twitter.py | 4 +- {x_reader => agent_eyes}/fetchers/wechat.py | 4 +- {x_reader => agent_eyes}/fetchers/xhs.py | 6 +- {x_reader => agent_eyes}/fetchers/youtube.py | 2 +- {x_reader => agent_eyes}/login.py | 0 {x_reader => agent_eyes}/reader.py | 54 +++- {x_reader => agent_eyes}/schema.py | 8 +- {x_reader => agent_eyes}/utils/__init__.py | 0 {x_reader => agent_eyes}/utils/storage.py | 2 +- mcp_server.py | 71 ++++- pyproject.toml | 22 +- 23 files changed, 659 insertions(+), 231 deletions(-) rename {x_reader => agent_eyes}/__init__.py (100%) rename {x_reader => agent_eyes}/cli.py (96%) rename {x_reader => agent_eyes}/fetchers/__init__.py (100%) rename {x_reader => agent_eyes}/fetchers/bilibili.py (100%) rename {x_reader => agent_eyes}/fetchers/browser.py (100%) create mode 100644 agent_eyes/fetchers/github.py rename {x_reader => agent_eyes}/fetchers/jina.py (100%) create mode 100644 agent_eyes/fetchers/reddit.py rename {x_reader => agent_eyes}/fetchers/rss.py (100%) create mode 100644 agent_eyes/fetchers/search.py rename {x_reader => agent_eyes}/fetchers/telegram.py (100%) rename {x_reader => agent_eyes}/fetchers/twitter.py (98%) rename {x_reader => agent_eyes}/fetchers/wechat.py (93%) rename {x_reader => agent_eyes}/fetchers/xhs.py (92%) rename {x_reader => agent_eyes}/fetchers/youtube.py (99%) rename {x_reader => agent_eyes}/login.py (100%) rename {x_reader => agent_eyes}/reader.py (69%) rename {x_reader => agent_eyes}/schema.py (98%) rename {x_reader => agent_eyes}/utils/__init__.py (100%) rename {x_reader => agent_eyes}/utils/storage.py (98%) diff --git a/README.md b/README.md index d28fc57..2e86eb2 100644 --- a/README.md +++ b/README.md @@ -1,247 +1,160 @@ -# x-reader +# đŸ‘ī¸ Agent Eyes + +**Give your AI Agent eyes to see the entire internet.** [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/) [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE) -Universal content reader — fetch, transcribe, and digest content from any platform. +Agent Eyes is an open-source infrastructure tool that gives any AI Agent the ability to **search** and **read** the entire internet. One install, 10+ platforms, unified output. -Give it a URL (article, video, podcast, tweet), get back structured content. Works as CLI, Python library, MCP server, or Claude Code skills. +> 🙏 Built on the shoulders of [x-reader](https://github.com/runesleo/x-reader) by [@runes_leo](https://x.com/runes_leo). Thank you for the foundation! -## What It Does +## Why Agent Eyes? -``` -Any URL → Platform Detection → Fetch Content → Unified Output - ↓ ↓ - auto-detect text: Jina Reader - 7+ platforms video: yt-dlp subtitles - audio: Whisper transcription - API: Bilibili / RSS / Telegram -``` +Your AI Agent is blind. It can only see what you manually feed it. -The Python layer handles text fetching and YouTube subtitle extraction. The **Claude Code skills** (optional) add full Whisper transcription for video/podcast and AI-powered content analysis. +Agent Eyes gives it **eyes** — the ability to: +- **Search** the web, Reddit, GitHub with a single command +- **Read** any URL from 10+ platforms (articles, videos, tweets, posts) +- **Transcribe** videos and podcasts to text -## Three Layers +Without this, your Agent is a chatbot waiting for instructions. +With this, it can autonomously find and consume information — just like you do. -x-reader is composable. Use the layers you need: - -| Layer | What | Format | Install | -|-------|------|--------|---------| -| **Python CLI/Library** | Basic content fetching + unified schema | See [Install](#install) | Required | -| **Claude Code Skills** | Video transcription + AI analysis | Copy `skills/` to `~/.claude/skills/` | Optional | -| **MCP Server** | Expose reading as MCP tools | `python mcp_server.py` | Optional | - -### Layer 1: Python CLI +## Quick Start ```bash -# Fetch any URL -x-reader https://mp.weixin.qq.com/s/abc123 +# Install +pip install git+https://github.com/Panniantong/agent-eyes.git -# Fetch a tweet -x-reader https://x.com/elonmusk/status/123456 +# Search the web +agent-eyes search "AI agent framework 2026" -# Fetch multiple URLs -x-reader https://url1.com https://url2.com +# Read any URL +agent-eyes read https://reddit.com/r/LocalLLaMA/comments/xxx +agent-eyes read https://github.com/openai/codex +agent-eyes read https://mp.weixin.qq.com/s/xxx +agent-eyes read https://x.com/elonmusk/status/xxx -# Login to a platform (one-time, for browser fallback) -x-reader login xhs - -# View inbox -x-reader list -``` - -### Layer 2: Claude Code Skills - -> Requires cloning the repo (not included in pip install). - -For video/podcast transcription and content analysis: - -``` -skills/ -├── video/ # YouTube/Bilibili/podcast → full transcript via Whisper -└── analyzer/ # Any content → structured analysis report -``` - -Install: -```bash -cp -r skills/video ~/.claude/skills/video -cp -r skills/analyzer ~/.claude/skills/analyzer -``` - -Then in Claude Code, just send a YouTube/Bilibili/podcast link — the video skill auto-triggers and produces a full transcript + summary. - -### Layer 3: MCP Server - -> Requires cloning the repo (mcp_server.py is not included in pip install). - -```bash -git clone https://github.com/runesleo/x-reader.git -cd x-reader -pip install -e ".[mcp]" -python mcp_server.py -``` - -Tools exposed: -- `read_url(url)` — fetch any URL -- `read_batch(urls)` — fetch multiple URLs concurrently -- `list_inbox()` — view previously fetched content -- `detect_platform(url)` — identify platform from URL - -Claude Code config (`~/.claude/claude_desktop_config.json`): -```json -{ - "mcpServers": { - "x-reader": { - "command": "python", - "args": ["/path/to/x-reader/mcp_server.py"] - } - } -} +# Your Agent now has eyes đŸ‘ī¸ ``` ## Supported Platforms -| Platform | Text Fetch | Video/Audio Transcript | -|----------|-----------|----------------------| -| YouTube | ✅ Jina | ✅ yt-dlp subtitles → Groq Whisper fallback | -| Bilibili (BįĢ™) | ✅ API | ✅ via Claude Code skill | -| X / Twitter | ✅ Jina → Playwright | — | -| WeChat (垎äŋĄå…Ŧäŧ—åˇ) | ✅ Jina → Playwright | — | -| Xiaohongshu (小įēĸäšĻ) | ✅ Jina → Playwright* | — | -| Telegram | ✅ Telethon | — | -| RSS | ✅ feedparser | — | -| 小厇厙 (Xiaoyuzhou) | — | ✅ via Claude Code skill | -| Apple Podcasts | — | ✅ via Claude Code skill | -| Any web page | ✅ Jina fallback | — | +| Platform | Read URL | Search | Notes | +|----------|:--------:|:------:|-------| +| 🔍 Web (any) | ✅ | ✅ Exa | Semantic search across the entire web | +| 🟠 Reddit | ✅ | ✅ | Posts + comments. Proxy support via `REDDIT_PROXY` | +| 🐙 GitHub | ✅ | ✅ | Repos (README), Issues, PRs | +| đŸĻ X / Twitter | ✅ | — | Tweets and threads | +| đŸ’Ŧ WeChat (垎äŋĄå…Ŧäŧ—åˇ) | ✅ | — | Anti-scraping bypass via Playwright | +| 📕 Xiaohongshu (小įēĸäšĻ) | ✅ | — | Session persistence for login-gated content | +| â–ļī¸ YouTube | ✅ | — | Subtitles + Whisper transcription | +| đŸ“ē Bilibili (BįĢ™) | ✅ | — | Official API | +| âœˆī¸ Telegram | ✅ | — | Channel message sync | +| 📡 RSS | ✅ | — | Any RSS/Atom feed | +| đŸŽ™ī¸ Podcasts | ✅ | — | 小厇厙, Apple Podcasts (via Whisper) | -> \*XHS requires a one-time login: `x-reader login xhs` (saves session for Playwright fallback) -> -> YouTube Whisper transcription requires `GROQ_API_KEY` — get a free key from [Groq](https://console.groq.com/keys) +## Three Layers -## Install +Use the layers you need: + +| Layer | What | For | +|-------|------|-----| +| **CLI** | `agent-eyes read/search` | Quick command-line use | +| **MCP Server** | 7 tools for any AI Agent | OpenClaw, Claude Code, etc. | +| **Python Library** | `from agent_eyes import UniversalReader` | Custom integrations | + +### As MCP Server (recommended for Agents) ```bash -# From GitHub (recommended) -pip install git+https://github.com/runesleo/x-reader.git +# Start the server +python mcp_server.py -# With Telegram support -pip install "x-reader[telegram] @ git+https://github.com/runesleo/x-reader.git" - -# With browser fallback (Playwright — for XHS/WeChat anti-scraping) -pip install "x-reader[browser] @ git+https://github.com/runesleo/x-reader.git" -playwright install chromium - -# With all optional dependencies -pip install "x-reader[all] @ git+https://github.com/runesleo/x-reader.git" -playwright install chromium +# Or with SSE transport +python mcp_server.py --transport sse ``` -Or clone and install locally: -```bash -git clone https://github.com/runesleo/x-reader.git -cd x-reader -pip install -e ".[all]" -playwright install chromium -``` +MCP Tools exposed: -### Dependencies for video/audio (optional) +| Tool | Description | +|------|-------------| +| `read_url(url)` | Read any URL → structured content | +| `read_batch(urls)` | Read multiple URLs concurrently | +| `search(query)` | Semantic web search (Exa) | +| `search_reddit(query, subreddit?)` | Search Reddit | +| `search_github(query)` | Search GitHub repos | +| `list_inbox()` | View previously fetched content | +| `detect_platform(url)` | Identify platform from URL | -```bash -# macOS -brew install yt-dlp ffmpeg - -# Linux -pip install yt-dlp -apt install ffmpeg -``` - -For Whisper transcription, get a free API key from [Groq](https://console.groq.com/keys) and set: -```bash -export GROQ_API_KEY=your_key_here -``` - -## Use as Library +### As Python Library ```python import asyncio -from x_reader.reader import UniversalReader +from agent_eyes.reader import UniversalReader async def main(): reader = UniversalReader() - content = await reader.read("https://mp.weixin.qq.com/s/abc123") + + # Read any URL + content = await reader.read("https://github.com/openai/codex") print(content.title) - print(content.content[:200]) + print(content.content[:500]) asyncio.run(main()) ``` -## Configuration - -Copy `.env.example` to `.env`: +## Install ```bash -cp .env.example .env +# Basic install +pip install git+https://github.com/Panniantong/agent-eyes.git + +# With browser fallback (for WeChat/XHS anti-scraping) +pip install "agent-eyes[browser] @ git+https://github.com/Panniantong/agent-eyes.git" +playwright install chromium + +# With Telegram support +pip install "agent-eyes[telegram] @ git+https://github.com/Panniantong/agent-eyes.git" + +# Everything +pip install "agent-eyes[all] @ git+https://github.com/Panniantong/agent-eyes.git" +playwright install chromium ``` +## Configuration + | Variable | Required | Description | |----------|----------|-------------| +| `EXA_API_KEY` | For search | Free key from [exa.ai](https://exa.ai) | +| `REDDIT_PROXY` | For Reddit (if IP blocked) | `http://user:pass@host:port` | +| `GITHUB_TOKEN` | No (higher rate limits) | GitHub personal access token | +| `GROQ_API_KEY` | For Whisper | Free key from [groq.com](https://console.groq.com/keys) | | `TG_API_ID` | Telegram only | From https://my.telegram.org | | `TG_API_HASH` | Telegram only | From https://my.telegram.org | -| `GROQ_API_KEY` | Whisper only | From https://console.groq.com/keys (free) | -| `INBOX_FILE` | No | Path to inbox JSON (default: `./unified_inbox.json`) | -| `OUTPUT_DIR` | No | Directory for Markdown output (default: disabled) | -| `OBSIDIAN_VAULT` | No | Path to Obsidian vault (writes to `01-æ”ļé›†įŽą/x-reader-inbox.md`) | -## Architecture +## What's New (vs x-reader) -``` -x-reader/ -├── x_reader/ # Python package -│ ├── cli.py # CLI entry point -│ ├── reader.py # URL dispatcher (UniversalReader) -│ ├── schema.py # Unified data model (UnifiedContent + Inbox) -│ ├── login.py # Browser login manager (saves sessions) -│ ├── fetchers/ -│ │ ├── jina.py # Jina Reader (universal fallback) -│ │ ├── browser.py # Playwright headless (anti-scraping fallback) -│ │ ├── bilibili.py # Bilibili API -│ │ ├── youtube.py # yt-dlp subtitle extraction -│ │ ├── rss.py # feedparser -│ │ ├── telegram.py # Telethon -│ │ ├── twitter.py # Jina-based -│ │ ├── wechat.py # Jina → Playwright fallback -│ │ └── xhs.py # Jina → Playwright + session fallback -│ └── utils/ -│ └── storage.py # JSON + Markdown dual output -├── skills/ # Claude Code skills -│ ├── video/ # Video/podcast → transcript + summary -│ └── analyzer/ # Content → structured analysis -├── mcp_server.py # MCP server entry point -└── pyproject.toml -``` +Agent Eyes extends x-reader with: -## How the Layers Work Together +- 🟠 **Reddit support** — Read posts + comments, search subreddits. Proxy support for blocked IPs. +- 🐙 **GitHub support** — Read repos (README), issues, PRs. Search repositories. +- 🔍 **Web search** — Semantic search across the entire web via Exa. +- đŸŽ¯ **Agent-first design** — MCP Server with 7 tools, ready to plug into any AI Agent. -``` -User sends URL - │ - ├─ Text content (article, tweet, WeChat) - │ └─ Python fetcher → UnifiedContent → inbox - │ - ├─ Video (YouTube, Bilibili, X video) - │ ├─ Python fetcher → metadata (title, description) - │ └─ Video skill → full transcript via subtitles/Whisper - │ - ├─ Podcast (小厇厙, Apple Podcasts) - │ └─ Video skill → full transcript via Whisper - │ - └─ Analysis requested - └─ Analyzer skill → structured report + action items -``` +## Philosophy -## Author +This is **Agent infrastructure**. In the Web 4.0 era where AI Agents act on behalf of humans, the first capability they need is the ability to **see the world**. -Built by [@runes_leo](https://x.com/runes_leo) — more AI tools at [leolabs.me](https://leolabs.me) +Agent Eyes is the sensory layer — the eyes — that every Agent needs. + +## Credits + +- [x-reader](https://github.com/runesleo/x-reader) by [@runes_leo](https://x.com/runes_leo) — the original universal content reader that inspired and powers the core of Agent Eyes +- [Jina Reader](https://jina.ai/reader/) — universal web content extraction +- [Exa](https://exa.ai) — semantic web search API +- [yt-dlp](https://github.com/yt-dlp/yt-dlp) — video/audio extraction ## License -MIT +MIT — use it, fork it, build on it. diff --git a/x_reader/__init__.py b/agent_eyes/__init__.py similarity index 100% rename from x_reader/__init__.py rename to agent_eyes/__init__.py diff --git a/x_reader/cli.py b/agent_eyes/cli.py similarity index 96% rename from x_reader/cli.py rename to agent_eyes/cli.py index c0b28d4..3a64656 100644 --- a/x_reader/cli.py +++ b/agent_eyes/cli.py @@ -17,8 +17,8 @@ from pathlib import Path from dotenv import load_dotenv load_dotenv() -from x_reader.reader import UniversalReader -from x_reader.schema import UnifiedInbox, SourceType +from agent_eyes.reader import UniversalReader +from agent_eyes.schema import UnifiedInbox, SourceType def get_inbox_path() -> str: @@ -87,7 +87,7 @@ def cmd_clear(): def cmd_login(platform: str): """Open browser for manual login to a platform.""" - from x_reader.login import login + from agent_eyes.login import login login(platform) diff --git a/x_reader/fetchers/__init__.py b/agent_eyes/fetchers/__init__.py similarity index 100% rename from x_reader/fetchers/__init__.py rename to agent_eyes/fetchers/__init__.py diff --git a/x_reader/fetchers/bilibili.py b/agent_eyes/fetchers/bilibili.py similarity index 100% rename from x_reader/fetchers/bilibili.py rename to agent_eyes/fetchers/bilibili.py diff --git a/x_reader/fetchers/browser.py b/agent_eyes/fetchers/browser.py similarity index 100% rename from x_reader/fetchers/browser.py rename to agent_eyes/fetchers/browser.py diff --git a/agent_eyes/fetchers/github.py b/agent_eyes/fetchers/github.py new file mode 100644 index 0000000..bf54a74 --- /dev/null +++ b/agent_eyes/fetchers/github.py @@ -0,0 +1,190 @@ +# -*- coding: utf-8 -*- +"""GitHub fetcher — extracts repo info, issues, PRs, and README content. + +Uses GitHub public API (no token needed for public repos). +For higher rate limits, set GITHUB_TOKEN env var. +""" + +import os +import re +import base64 +import requests +from loguru import logger +from typing import Dict, Any, List, Optional + + +API_BASE = "https://api.github.com" + + +def _get_headers() -> Dict[str, str]: + """Get request headers, optionally with auth token.""" + headers = { + "Accept": "application/vnd.github.v3+json", + "User-Agent": "AgentEyes/1.0", + } + token = os.environ.get("GITHUB_TOKEN") + if token: + headers["Authorization"] = f"Bearer {token}" + return headers + + +def _parse_github_url(url: str) -> Dict[str, str]: + """Parse GitHub URL into components.""" + # Match: github.com/owner/repo[/type/number] + match = re.search( + r'github\.com/([^/]+)/([^/]+?)(?:\.git)?(?:/(issues|pull|tree|blob)/(.+))?/?$', + url + ) + if not match: + raise ValueError(f"Cannot parse GitHub URL: {url}") + + return { + "owner": match.group(1), + "repo": match.group(2), + "type": match.group(3), # issues, pull, tree, blob, or None + "ref": match.group(4), # issue number, branch, file path, or None + } + + +async def fetch_github(url: str) -> Dict[str, Any]: + """Fetch content from a GitHub URL.""" + logger.info(f"Fetching GitHub: {url}") + + parsed = _parse_github_url(url) + owner = parsed["owner"] + repo = parsed["repo"] + content_type = parsed["type"] + ref = parsed["ref"] + + headers = _get_headers() + + if content_type == "issues" and ref: + return await _fetch_issue(owner, repo, ref, headers) + elif content_type == "pull" and ref: + return await _fetch_pull(owner, repo, ref, headers) + else: + return await _fetch_repo(owner, repo, headers) + + +async def _fetch_repo(owner: str, repo: str, headers: Dict) -> Dict[str, Any]: + """Fetch repo info + README.""" + # Get repo info + repo_resp = requests.get(f"{API_BASE}/repos/{owner}/{repo}", headers=headers, timeout=10) + repo_resp.raise_for_status() + repo_data = repo_resp.json() + + # Get README + readme_content = "" + try: + readme_resp = requests.get( + f"{API_BASE}/repos/{owner}/{repo}/readme", + headers=headers, timeout=10, + ) + if readme_resp.status_code == 200: + readme_data = readme_resp.json() + readme_content = base64.b64decode(readme_data.get("content", "")).decode("utf-8") + except Exception as e: + logger.warning(f"Could not fetch README: {e}") + + return { + "title": f"{owner}/{repo}", + "content": readme_content or repo_data.get("description", ""), + "description": repo_data.get("description", ""), + "author": owner, + "url": repo_data.get("html_url", ""), + "stars": repo_data.get("stargazers_count", 0), + "forks": repo_data.get("forks_count", 0), + "language": repo_data.get("language", ""), + "topics": repo_data.get("topics", []), + "license": (repo_data.get("license") or {}).get("spdx_id", ""), + "platform": "github", + } + + +async def _fetch_issue(owner: str, repo: str, number: str, headers: Dict) -> Dict[str, Any]: + """Fetch a GitHub issue with comments.""" + issue_num = re.match(r'(\d+)', number).group(1) + + # Get issue + resp = requests.get( + f"{API_BASE}/repos/{owner}/{repo}/issues/{issue_num}", + headers=headers, timeout=10, + ) + resp.raise_for_status() + issue = resp.json() + + # Get comments + comments_text = "" + if issue.get("comments", 0) > 0: + c_resp = requests.get( + f"{API_BASE}/repos/{owner}/{repo}/issues/{issue_num}/comments", + headers=headers, params={"per_page": 20}, timeout=10, + ) + if c_resp.status_code == 200: + comments = c_resp.json() + parts = ["\n---\n## Comments\n"] + for c in comments: + parts.append(f"**@{c.get('user', {}).get('login', '?')}**:\n{c.get('body', '')}\n") + comments_text = "\n".join(parts) + + return { + "title": f"[{owner}/{repo}#{issue_num}] {issue.get('title', '')}", + "content": (issue.get("body", "") or "") + comments_text, + "author": issue.get("user", {}).get("login", ""), + "url": issue.get("html_url", ""), + "state": issue.get("state", ""), + "labels": [l.get("name", "") for l in issue.get("labels", [])], + "platform": "github", + } + + +async def _fetch_pull(owner: str, repo: str, number: str, headers: Dict) -> Dict[str, Any]: + """Fetch a GitHub pull request.""" + pr_num = re.match(r'(\d+)', number).group(1) + + resp = requests.get( + f"{API_BASE}/repos/{owner}/{repo}/pulls/{pr_num}", + headers=headers, timeout=10, + ) + resp.raise_for_status() + pr = resp.json() + + return { + "title": f"[{owner}/{repo}#{pr_num}] {pr.get('title', '')}", + "content": pr.get("body", "") or "", + "author": pr.get("user", {}).get("login", ""), + "url": pr.get("html_url", ""), + "state": pr.get("state", ""), + "merged": pr.get("merged", False), + "additions": pr.get("additions", 0), + "deletions": pr.get("deletions", 0), + "changed_files": pr.get("changed_files", 0), + "platform": "github", + } + + +async def search_github(query: str, limit: int = 5) -> List[Dict[str, Any]]: + """Search GitHub repositories.""" + logger.info(f"Searching GitHub: {query}") + + resp = requests.get( + f"{API_BASE}/search/repositories", + headers=_get_headers(), + params={"q": query, "sort": "stars", "per_page": limit}, + timeout=10, + ) + resp.raise_for_status() + data = resp.json() + + results = [] + for item in data.get("items", []): + results.append({ + "title": item.get("full_name", ""), + "description": item.get("description", ""), + "url": item.get("html_url", ""), + "stars": item.get("stargazers_count", 0), + "language": item.get("language", ""), + "updated_at": item.get("updated_at", ""), + }) + + return results diff --git a/x_reader/fetchers/jina.py b/agent_eyes/fetchers/jina.py similarity index 100% rename from x_reader/fetchers/jina.py rename to agent_eyes/fetchers/jina.py diff --git a/agent_eyes/fetchers/reddit.py b/agent_eyes/fetchers/reddit.py new file mode 100644 index 0000000..a435b7b --- /dev/null +++ b/agent_eyes/fetchers/reddit.py @@ -0,0 +1,132 @@ +# -*- coding: utf-8 -*- +"""Reddit fetcher — extracts posts and comments via JSON API. + +Supports optional proxy via REDDIT_PROXY env var (many IPs are blocked by Reddit). +Example: REDDIT_PROXY=http://user:pass@host:port +""" + +import os +import re +import requests +from loguru import logger +from typing import Dict, Any, List, Optional + + +HEADERS = { + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" +} + + +def _get_proxies() -> Optional[Dict[str, str]]: + """Get proxy config from env.""" + proxy = os.environ.get("REDDIT_PROXY") + if proxy: + return {"http": proxy, "https": proxy} + return None + + +def _extract_post(post_data: Dict) -> Dict[str, Any]: + """Extract post info from Reddit JSON.""" + data = post_data.get("data", {}) + return { + "title": data.get("title", ""), + "author": data.get("author", "[deleted]"), + "selftext": data.get("selftext", ""), + "score": data.get("score", 0), + "num_comments": data.get("num_comments", 0), + "url": f"https://www.reddit.com{data.get('permalink', '')}", + "created_utc": data.get("created_utc", 0), + "subreddit": data.get("subreddit", ""), + } + + +def _extract_comments(comments_data: Dict, limit: int = 20) -> List[Dict[str, str]]: + """Extract top-level comments.""" + comments = [] + children = comments_data.get("data", {}).get("children", []) + for child in children[:limit]: + if child.get("kind") != "t1": + continue + data = child.get("data", {}) + comments.append({ + "author": data.get("author", "[deleted]"), + "body": data.get("body", ""), + "score": data.get("score", 0), + }) + return comments + + +async def fetch_reddit(url: str) -> Dict[str, Any]: + """Fetch Reddit post + comments via JSON API.""" + logger.info(f"Fetching Reddit: {url}") + + # Normalize URL and append .json + clean_url = re.sub(r'\?.*$', '', url.rstrip('/')) + json_url = f"{clean_url}.json" + + resp = requests.get( + json_url, + headers=HEADERS, + proxies=_get_proxies(), + timeout=15, + ) + resp.raise_for_status() + data = resp.json() + + # Reddit returns [post_listing, comments_listing] + if not isinstance(data, list) or len(data) < 2: + raise ValueError(f"Unexpected Reddit response format") + + post_listing = data[0].get("data", {}).get("children", []) + if not post_listing: + raise ValueError("No post found") + + post = _extract_post(post_listing[0]) + comments = _extract_comments(data[1]) + + # Build readable content + content_parts = [post["selftext"]] if post["selftext"] else [] + if comments: + content_parts.append("\n---\n## Top Comments\n") + for c in comments: + content_parts.append(f"**u/{c['author']}** ({c['score']} pts):\n{c['body']}\n") + + return { + "title": post["title"], + "content": "\n".join(content_parts), + "author": f"u/{post['author']}", + "url": post["url"], + "subreddit": post["subreddit"], + "score": post["score"], + "num_comments": post["num_comments"], + "platform": "reddit", + } + + +async def search_reddit(query: str, subreddit: str = None, limit: int = 10) -> List[Dict[str, Any]]: + """Search Reddit posts.""" + logger.info(f"Searching Reddit: {query} (sub={subreddit})") + + if subreddit: + search_url = f"https://www.reddit.com/r/{subreddit}/search.json" + params = {"q": query, "restrict_sr": "on", "limit": limit, "sort": "relevance"} + else: + search_url = "https://www.reddit.com/search.json" + params = {"q": query, "limit": limit, "sort": "relevance"} + + resp = requests.get( + search_url, + headers=HEADERS, + params=params, + proxies=_get_proxies(), + timeout=15, + ) + resp.raise_for_status() + data = resp.json() + + results = [] + for child in data.get("data", {}).get("children", []): + results.append(_extract_post(child)) + + return results diff --git a/x_reader/fetchers/rss.py b/agent_eyes/fetchers/rss.py similarity index 100% rename from x_reader/fetchers/rss.py rename to agent_eyes/fetchers/rss.py diff --git a/agent_eyes/fetchers/search.py b/agent_eyes/fetchers/search.py new file mode 100644 index 0000000..0d43290 --- /dev/null +++ b/agent_eyes/fetchers/search.py @@ -0,0 +1,94 @@ +# -*- coding: utf-8 -*- +"""Search fetcher — semantic web search via Exa API. + +Requires EXA_API_KEY env var. Get a free key at https://exa.ai +""" + +import os +import requests +from loguru import logger +from typing import Dict, Any, List, Optional + + +EXA_API_URL = "https://api.exa.ai/search" + + +async def search_web( + query: str, + num_results: int = 5, + search_type: str = "auto", +) -> List[Dict[str, Any]]: + """ + Search the web using Exa semantic search. + + Args: + query: Search query (supports site: prefix, e.g. "site:reddit.com AI agent") + num_results: Number of results to return (default 5, max 10) + search_type: "auto" (default) or "neural" or "keyword" + + Returns: + List of search results with title, url, snippet + """ + api_key = os.environ.get("EXA_API_KEY") + if not api_key: + raise ValueError( + "EXA_API_KEY not set. Get a free key at https://exa.ai\n" + "Then: export EXA_API_KEY=your_key_here" + ) + + logger.info(f"Exa search: {query} (n={num_results})") + + resp = requests.post( + EXA_API_URL, + headers={ + "Content-Type": "application/json", + "x-api-key": api_key, + }, + json={ + "query": query, + "numResults": min(num_results, 10), + "type": search_type, + "contents": { + "text": {"maxCharacters": 500}, + }, + }, + timeout=15, + ) + resp.raise_for_status() + data = resp.json() + + results = [] + for item in data.get("results", []): + results.append({ + "title": item.get("title", ""), + "url": item.get("url", ""), + "snippet": item.get("text", ""), + "published_date": item.get("publishedDate", ""), + "score": item.get("score", 0), + }) + + return results + + +async def search_reddit_via_exa( + query: str, + subreddit: Optional[str] = None, + num_results: int = 10, +) -> List[Dict[str, Any]]: + """ + Search Reddit content via Exa (bypasses Reddit IP blocks). + + Args: + query: Search query + subreddit: Optional subreddit to limit search (e.g. "LocalLLaMA") + num_results: Number of results + + Returns: + List of Reddit posts found + """ + if subreddit: + full_query = f"site:reddit.com/r/{subreddit} {query}" + else: + full_query = f"site:reddit.com {query}" + + return await search_web(full_query, num_results=num_results) diff --git a/x_reader/fetchers/telegram.py b/agent_eyes/fetchers/telegram.py similarity index 100% rename from x_reader/fetchers/telegram.py rename to agent_eyes/fetchers/telegram.py diff --git a/x_reader/fetchers/twitter.py b/agent_eyes/fetchers/twitter.py similarity index 98% rename from x_reader/fetchers/twitter.py rename to agent_eyes/fetchers/twitter.py index 362881b..476f089 100644 --- a/x_reader/fetchers/twitter.py +++ b/agent_eyes/fetchers/twitter.py @@ -15,7 +15,7 @@ import requests from loguru import logger from typing import Dict, Any -from x_reader.fetchers.jina import fetch_via_jina +from agent_eyes.fetchers.jina import fetch_via_jina OEMBED_URL = "https://publish.twitter.com/oembed" @@ -75,7 +75,7 @@ async def _fetch_via_playwright(url: str) -> Dict[str, Any]: " playwright install chromium" ) - from x_reader.fetchers.browser import get_session_path + from agent_eyes.fetchers.browser import get_session_path from pathlib import Path session_path = get_session_path("twitter") diff --git a/x_reader/fetchers/wechat.py b/agent_eyes/fetchers/wechat.py similarity index 93% rename from x_reader/fetchers/wechat.py rename to agent_eyes/fetchers/wechat.py index 9e23798..e41e5d6 100644 --- a/x_reader/fetchers/wechat.py +++ b/agent_eyes/fetchers/wechat.py @@ -23,7 +23,7 @@ async def fetch_wechat(url: str) -> Dict[str, Any]: # Tier 1: Jina Reader try: logger.info(f"[WeChat] Tier 1 — Jina: {url}") - from x_reader.fetchers.jina import fetch_via_jina + from agent_eyes.fetchers.jina import fetch_via_jina data = fetch_via_jina(url) if data.get("content"): @@ -41,7 +41,7 @@ async def fetch_wechat(url: str) -> Dict[str, Any]: # Tier 2: Playwright headless (no session needed) try: logger.info(f"[WeChat] Tier 2 — Playwright headless: {url}") - from x_reader.fetchers.browser import fetch_via_browser + from agent_eyes.fetchers.browser import fetch_via_browser data = await fetch_via_browser(url) return { diff --git a/x_reader/fetchers/xhs.py b/agent_eyes/fetchers/xhs.py similarity index 92% rename from x_reader/fetchers/xhs.py rename to agent_eyes/fetchers/xhs.py index 215a679..12faceb 100644 --- a/x_reader/fetchers/xhs.py +++ b/agent_eyes/fetchers/xhs.py @@ -13,7 +13,7 @@ from loguru import logger from typing import Dict, Any from pathlib import Path -from x_reader.fetchers.jina import fetch_via_jina +from agent_eyes.fetchers.jina import fetch_via_jina async def fetch_xhs(url: str) -> Dict[str, Any]: @@ -43,7 +43,7 @@ async def fetch_xhs(url: str) -> Dict[str, Any]: logger.warning(f"[XHS] Jina failed ({e}), falling back to browser") # Tier 2: Playwright with session - from x_reader.fetchers.browser import get_session_path, SESSION_DIR + from agent_eyes.fetchers.browser import get_session_path, SESSION_DIR session_path = get_session_path("xhs") if not Path(session_path).exists(): @@ -56,7 +56,7 @@ async def fetch_xhs(url: str) -> Dict[str, Any]: try: logger.info(f"[XHS] Tier 2 — Playwright with session: {url}") - from x_reader.fetchers.browser import fetch_via_browser + from agent_eyes.fetchers.browser import fetch_via_browser data = await fetch_via_browser(url, storage_state=session_path) return { diff --git a/x_reader/fetchers/youtube.py b/agent_eyes/fetchers/youtube.py similarity index 99% rename from x_reader/fetchers/youtube.py rename to agent_eyes/fetchers/youtube.py index 1282760..257911c 100644 --- a/x_reader/fetchers/youtube.py +++ b/agent_eyes/fetchers/youtube.py @@ -17,7 +17,7 @@ import tempfile from loguru import logger from typing import Dict, Any -from x_reader.fetchers.jina import fetch_via_jina +from agent_eyes.fetchers.jina import fetch_via_jina def _extract_video_id(url: str) -> str: diff --git a/x_reader/login.py b/agent_eyes/login.py similarity index 100% rename from x_reader/login.py rename to agent_eyes/login.py diff --git a/x_reader/reader.py b/agent_eyes/reader.py similarity index 69% rename from x_reader/reader.py rename to agent_eyes/reader.py index 150f3df..430790d 100644 --- a/x_reader/reader.py +++ b/agent_eyes/reader.py @@ -10,12 +10,12 @@ from urllib.parse import urlparse from loguru import logger from typing import Dict, Any, Optional -from x_reader.schema import ( - UnifiedContent, UnifiedInbox, SourceType, +from agent_eyes.schema import ( + UnifiedContent, UnifiedInbox, SourceType, MediaType, from_bilibili, from_twitter, from_wechat, from_xiaohongshu, from_youtube, from_rss, from_telegram, ) -from x_reader.fetchers.jina import fetch_via_jina +from agent_eyes.fetchers.jina import fetch_via_jina class UniversalReader: @@ -47,6 +47,10 @@ class UniversalReader: return "podcast" if "t.me" in domain or "telegram.org" in domain: return "telegram" + if "reddit.com" in domain or "redd.it" in domain: + return "reddit" + if "github.com" in domain: + return "github" if url.endswith(".xml") or "/rss" in url or "/feed" in url or "/atom" in url: return "rss" return "generic" @@ -74,7 +78,7 @@ class UniversalReader: logger.info(f"Saved to inbox: {content.title[:50]}") # Save to markdown output if configured - from x_reader.utils.storage import save_to_markdown + from agent_eyes.utils.storage import save_to_markdown save_to_markdown(content) return content @@ -87,39 +91,67 @@ class UniversalReader: """Dispatch to platform-specific fetcher.""" if platform == "bilibili": - from x_reader.fetchers.bilibili import fetch_bilibili + from agent_eyes.fetchers.bilibili import fetch_bilibili data = await fetch_bilibili(url) return from_bilibili(data) if platform == "twitter": - from x_reader.fetchers.twitter import fetch_twitter + from agent_eyes.fetchers.twitter import fetch_twitter data = await fetch_twitter(url) return from_twitter(data) if platform == "wechat": - from x_reader.fetchers.wechat import fetch_wechat + from agent_eyes.fetchers.wechat import fetch_wechat data = await fetch_wechat(url) return from_wechat(data) if platform == "xhs": - from x_reader.fetchers.xhs import fetch_xhs + from agent_eyes.fetchers.xhs import fetch_xhs data = await fetch_xhs(url) return from_xiaohongshu(data) if platform == "youtube": - from x_reader.fetchers.youtube import fetch_youtube + from agent_eyes.fetchers.youtube import fetch_youtube data = await fetch_youtube(url) return from_youtube(data) if platform == "rss": - from x_reader.fetchers.rss import fetch_rss + from agent_eyes.fetchers.rss import fetch_rss articles = await fetch_rss(url, limit=1) if articles: return from_rss(articles[0]) raise ValueError(f"No articles found in RSS feed: {url}") + if platform == "reddit": + from agent_eyes.fetchers.reddit import fetch_reddit + data = await fetch_reddit(url) + return UnifiedContent( + source_type=SourceType.REDDIT, + source_name=f"r/{data.get('subreddit', '')}", + title=data["title"], + content=data.get("content", ""), + url=data["url"], + author=data.get("author", ""), + media_type=MediaType.TEXT, + metadata={"score": data.get("score", 0), "num_comments": data.get("num_comments", 0)}, + ) + + if platform == "github": + from agent_eyes.fetchers.github import fetch_github + data = await fetch_github(url) + return UnifiedContent( + source_type=SourceType.GITHUB, + source_name=data.get("title", ""), + title=data["title"], + content=data.get("content", ""), + url=data["url"], + author=data.get("author", ""), + media_type=MediaType.TEXT, + metadata={k: v for k, v in data.items() if k not in ("title", "content", "url", "author", "platform")}, + ) + if platform == "telegram": - from x_reader.fetchers.telegram import fetch_telegram + from agent_eyes.fetchers.telegram import fetch_telegram # Extract channel username from t.me URL path = urlparse(url).path.strip("/").split("/")[0] channel = path if path else url diff --git a/x_reader/schema.py b/agent_eyes/schema.py similarity index 98% rename from x_reader/schema.py rename to agent_eyes/schema.py index 4885d18..1582eb5 100644 --- a/x_reader/schema.py +++ b/agent_eyes/schema.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -Unified content schema for x-reader. +Unified content schema for Agent Eyes. Defines the standard data format for all content sources: - Telegram channels @@ -10,6 +10,9 @@ Defines the standard data format for all content sources: - WeChat articles - X/Twitter posts - YouTube videos +- Reddit posts +- GitHub repos/issues/PRs +- Web search results - Manual input """ @@ -30,6 +33,9 @@ class SourceType(str, Enum): TWITTER = "twitter" WECHAT = "wechat" YOUTUBE = "youtube" + REDDIT = "reddit" + GITHUB = "github" + SEARCH = "search" MANUAL = "manual" diff --git a/x_reader/utils/__init__.py b/agent_eyes/utils/__init__.py similarity index 100% rename from x_reader/utils/__init__.py rename to agent_eyes/utils/__init__.py diff --git a/x_reader/utils/storage.py b/agent_eyes/utils/storage.py similarity index 98% rename from x_reader/utils/storage.py rename to agent_eyes/utils/storage.py index 9de0bf9..4b481c0 100644 --- a/x_reader/utils/storage.py +++ b/agent_eyes/utils/storage.py @@ -13,7 +13,7 @@ from datetime import datetime from pathlib import Path from loguru import logger -from x_reader.schema import UnifiedContent +from agent_eyes.schema import UnifiedContent def save_to_json(item: UnifiedContent, filepath: str = "unified_inbox.json"): diff --git a/mcp_server.py b/mcp_server.py index 1cde552..b7195da 100644 --- a/mcp_server.py +++ b/mcp_server.py @@ -9,7 +9,7 @@ Usage: Claude Code config (~/.claude/claude_desktop_config.json): { "mcpServers": { - "x-reader": { + "agent-eyes": { "command": "python", "args": ["/path/to/x-reader/mcp_server.py"] } @@ -23,12 +23,12 @@ from mcp.server.fastmcp import FastMCP load_dotenv() -from x_reader.reader import UniversalReader -from x_reader.schema import UnifiedInbox +from agent_eyes.reader import UniversalReader +from agent_eyes.schema import UnifiedInbox mcp = FastMCP( - "x-reader", - instructions="Universal content reader — give it any URL, get structured content back.", + "agent-eyes", + instructions="Give your AI Agent eyes to see the entire internet. Search, read, and extract content from any platform.", ) reader = UniversalReader(inbox=UnifiedInbox()) @@ -85,11 +85,70 @@ async def detect_platform(url: str) -> str: Detect which platform a URL belongs to. Returns the platform name: youtube, bilibili, twitter, wechat, - xhs, telegram, rss, or generic. + xhs, reddit, github, telegram, rss, or generic. """ return reader._detect_platform(url) +# ==================== Search Tools (NEW in Agent Eyes) ==================== + +@mcp.tool() +async def search(query: str, num_results: int = 5) -> str: + """ + Search the entire web using semantic search (powered by Exa). + + Great for finding articles, blog posts, discussions on any topic. + Supports site: prefix, e.g. "site:reddit.com AI agent" to limit to specific sites. + + Requires EXA_API_KEY env var. Get a free key at https://exa.ai + + Args: + query: Search query + num_results: Number of results (1-10, default 5) + """ + import json + from agent_eyes.fetchers.search import search_web + + results = await search_web(query, num_results=num_results) + return json.dumps(results, ensure_ascii=False, indent=2) + + +@mcp.tool() +async def search_reddit(query: str, subreddit: str = "", limit: int = 10) -> str: + """ + Search Reddit posts. Bypasses Reddit IP blocks via Exa. + + Args: + query: Search query + subreddit: Optional subreddit name (e.g. "LocalLLaMA"). Empty = all of Reddit. + limit: Number of results (default 10) + """ + import json + from agent_eyes.fetchers.search import search_reddit_via_exa + + sub = subreddit if subreddit else None + results = await search_reddit_via_exa(query, subreddit=sub, num_results=limit) + return json.dumps(results, ensure_ascii=False, indent=2) + + +@mcp.tool() +async def search_github(query: str, limit: int = 5) -> str: + """ + Search GitHub repositories by keyword. + + Returns repos sorted by stars. No API key needed for public repos. + + Args: + query: Search query (e.g. "LLM agent framework", "language:python RAG") + limit: Number of results (default 5) + """ + import json + from agent_eyes.fetchers.github import search_github as _search_gh + + results = await _search_gh(query, limit=limit) + return json.dumps(results, ensure_ascii=False, indent=2) + + if __name__ == "__main__": import sys diff --git a/pyproject.toml b/pyproject.toml index 3155637..98d5f50 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,14 +1,15 @@ [project] -name = "x-reader" -version = "0.2.0" -description = "Universal content reader — fetch, normalize, and digest content from 7+ platforms" +name = "agent-eyes" +version = "1.0.0" +description = "Give your AI Agent eyes to see the entire internet — search, read, and extract content from 10+ platforms" readme = "README.md" license = {text = "MIT"} requires-python = ">=3.10" authors = [ - {name = "Leo", email = "runes.leo@gmail.com"} + {name = "Neo Reid"}, + {name = "Leo (x-reader)", email = "runes.leo@gmail.com"}, ] -keywords = ["content-reader", "rss", "telegram", "bilibili", "xiaohongshu", "digest"] +keywords = ["ai-agent", "web-reader", "search", "reddit", "github", "content-extraction", "mcp", "agent-infrastructure"] dependencies = [ "requests>=2.28", "feedparser>=6.0", @@ -17,9 +18,9 @@ dependencies = [ ] [project.urls] -Homepage = "https://github.com/runesleo/x-reader" -Repository = "https://github.com/runesleo/x-reader" -Issues = "https://github.com/runesleo/x-reader/issues" +Homepage = "https://github.com/Panniantong/agent-eyes" +Repository = "https://github.com/Panniantong/agent-eyes" +Issues = "https://github.com/Panniantong/agent-eyes/issues" [project.optional-dependencies] telegram = ["telethon>=1.34"] @@ -28,11 +29,12 @@ browser = ["playwright>=1.40"] all = ["telethon>=1.34", "mcp[cli]>=1.0", "playwright>=1.40"] [project.scripts] -x-reader = "x_reader.cli:main" +agent-eyes = "agent_eyes.cli:main" +x-reader = "agent_eyes.cli:main" [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] -packages = ["x_reader"] +packages = ["agent_eyes"]