From e3804108fe82f46eacb9e91b3aff1e60a565d65f Mon Sep 17 00:00:00 2001 From: Panniantong Date: Wed, 25 Feb 2026 10:25:30 +0100 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=96=B0=E5=A2=9E=20Instagram=E3=80=81?= =?UTF-8?q?LinkedIn=E3=80=81Boss=E7=9B=B4=E8=81=98=20=E4=B8=89=E4=B8=AA?= =?UTF-8?q?=E6=B8=A0=E9=81=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 新增渠道: - Instagram: 基于 instaloader (⭐9.8K),读取帖子/Profile,Cookie 登录 - LinkedIn: 基于 linkedin-scraper-mcp (⭐900+) MCP 服务,Jina Reader fallback - Boss直聘: 基于 mcp-bosszp MCP 服务,Jina Reader fallback 代码改动: - 新建 channels/instagram.py, linkedin.py, bosszhipin.py - 注册到 channels/__init__.py - cli.py 添加 search-instagram/linkedin/bosszhipin 子命令 - cli.py 安装逻辑添加 instaloader 自动安装 - core.py 添加 search_instagram/linkedin/bosszhipin 方法 - README.md + docs/README_en.md 更新平台表格和选型表格 - docs/install.md 添加三个新渠道的配置说明和 Quick Reference --- README.md | 9 + agent_reach/channels/__init__.py | 6 + agent_reach/channels/bosszhipin.py | 183 +++++++++++++++++++++ agent_reach/channels/instagram.py | 216 ++++++++++++++++++++++++ agent_reach/channels/linkedin.py | 255 +++++++++++++++++++++++++++++ agent_reach/cli.py | 38 +++++ agent_reach/core.py | 18 ++ docs/README_en.md | 9 + docs/install.md | 42 ++++- 9 files changed, 775 insertions(+), 1 deletion(-) create mode 100644 agent_reach/channels/bosszhipin.py create mode 100644 agent_reach/channels/instagram.py create mode 100644 agent_reach/channels/linkedin.py diff --git a/README.md b/README.md index f5ca33b..a260357 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,9 @@ AI Agent 已经能帮你写代码、改文档、管项目——但你让它去 | 📺 **B站** | 本地:字幕提取 + 搜索 | 服务器也能用 | 告诉 Agent「帮我配代理」 | | 📖 **Reddit** | 搜索(通过 Exa 免费) | 读帖子和评论 | 告诉 Agent「帮我配代理」 | | 📕 **小红书** | — | 阅读、搜索、发帖、评论、点赞 | `docker run -d -p 18060:18060 xpzouying/xiaohongshu-mcp` 然后告诉 Agent「帮我配置小红书」 | +| 📷 **Instagram** | — | 读取帖子和 Profile | 告诉 Agent「帮我配 Instagram」 | +| 💼 **LinkedIn** | — | 读取 Profile、公司、职位搜索 | 告诉 Agent「帮我配 LinkedIn」 | +| 🏢 **Boss直聘** | — | 搜索职位、向 HR 打招呼 | 告诉 Agent「帮我配 Boss直聘」 | > **不知道怎么配?不用查文档。** 直接告诉 Agent「帮我配 XXX」,它知道需要什么、会一步一步引导你。 > @@ -139,6 +142,9 @@ channels/ ├── bilibili.py → yt-dlp ← 可以换成 bilibili-api…… ├── reddit.py → JSON API + Exa ← 可以换成 PRAW、Pushshift…… ├── xiaohongshu.py → mcporter MCP ← 可以换成其他 XHS 工具…… +├── instagram.py → instaloader ← 可以换成 instagrapi、官方 API…… +├── linkedin.py → linkedin-mcp ← 可以换成 LinkedIn API…… +├── bosszhipin.py → mcp-bosszp ← 可以换成其他招聘工具…… ├── rss.py → feedparser ← 可以换成 atoma…… ├── exa_search.py → mcporter MCP ← 可以换成 Tavily、SerpAPI…… └── __init__.py → 渠道注册 @@ -155,6 +161,9 @@ channels/ | GitHub | [gh CLI](https://cli.github.com) | 官方工具,认证后完整 API 能力 | | 读 RSS | [feedparser](https://github.com/kurtmckee/feedparser) | Python 生态标准选择,2.3K Star | | 小红书 | [xiaohongshu-mcp](https://github.com/xpzouying/xiaohongshu-mcp) | ⭐9K+,Go 语言,Docker 一键部署 | +| Instagram | [instaloader](https://github.com/instaloader/instaloader) | ⭐9.8K,Python CLI,Cookie 登录,免费 | +| LinkedIn | [linkedin-scraper-mcp](https://github.com/stickerdaniel/linkedin-mcp-server) | ⭐900+,MCP 服务,浏览器自动化 | +| Boss直聘 | [mcp-bosszp](https://github.com/mucsbr/mcp-bosszp) | MCP 服务,支持职位搜索和打招呼 | > 📌 这些都是「当前选型」。不满意?换掉对应文件就行。这正是脚手架的意义。 diff --git a/agent_reach/channels/__init__.py b/agent_reach/channels/__init__.py index d70a98e..f9aee55 100644 --- a/agent_reach/channels/__init__.py +++ b/agent_reach/channels/__init__.py @@ -20,6 +20,9 @@ from .rss import RSSChannel from .bilibili import BilibiliChannel from .exa_search import ExaSearchChannel from .xiaohongshu import XiaoHongShuChannel +from .instagram import InstagramChannel +from .linkedin import LinkedInChannel +from .bosszhipin import BossZhipinChannel # Channel registry — order matters (first match wins, web is last as fallback) @@ -30,6 +33,9 @@ ALL_CHANNELS: List[Channel] = [ RedditChannel(), BilibiliChannel(), XiaoHongShuChannel(), + InstagramChannel(), + LinkedInChannel(), + BossZhipinChannel(), RSSChannel(), ExaSearchChannel(), WebChannel(), # Fallback — handles any URL diff --git a/agent_reach/channels/bosszhipin.py b/agent_reach/channels/bosszhipin.py new file mode 100644 index 0000000..db7511a --- /dev/null +++ b/agent_reach/channels/bosszhipin.py @@ -0,0 +1,183 @@ +# -*- coding: utf-8 -*- +"""Boss直聘 (BOSS Zhipin) — via mcp-bosszp (MCP) or Jina Reader fallback. + +Backend: mcp-bosszp (161 stars, FastMCP + Playwright) +Swap to: any Boss直聘 access tool +""" + +import json +import shutil +import subprocess +from urllib.parse import urlparse +from .base import Channel, ReadResult, SearchResult +from typing import List +import requests + + +def _mcporter_has_bosszhipin() -> bool: + """Check if mcporter has Boss直聘 MCP configured.""" + if not shutil.which("mcporter"): + return False + try: + r = subprocess.run( + ["mcporter", "list"], capture_output=True, text=True, timeout=10 + ) + # Check for various possible config names + out = r.stdout.lower() + return "boss" in out or "zhipin" in out or "bosszhipin" in out + except Exception: + return False + + +def _mcporter_call(expr: str, timeout: int = 30) -> str: + """Call a Boss直聘 MCP tool via mcporter.""" + r = subprocess.run( + ["mcporter", "call", expr], + capture_output=True, text=True, timeout=timeout, + ) + if r.returncode != 0: + raise RuntimeError(r.stderr or r.stdout) + return r.stdout + + +def _get_mcp_name() -> str: + """Get the actual MCP server name configured in mcporter.""" + try: + r = subprocess.run( + ["mcporter", "list"], capture_output=True, text=True, timeout=10 + ) + for line in r.stdout.split("\n"): + line_lower = line.strip().lower() + for name in ["bosszhipin", "boss-zp", "bosszp", "boss"]: + if name in line_lower: + # Extract the actual server name + parts = line.strip().split() + if parts: + return parts[0] + return "bosszhipin" + except Exception: + return "bosszhipin" + + +class BossZhipinChannel(Channel): + name = "bosszhipin" + description = "Boss直聘职位搜索" + backends = ["mcp-bosszp", "Jina Reader"] + tier = 2 + + def can_handle(self, url: str) -> bool: + domain = urlparse(url).netloc.lower() + return "zhipin.com" in domain or "boss.com" in domain + + def check(self, config=None): + if _mcporter_has_bosszhipin(): + return "ok", "可搜索职位、向 HR 打招呼" + + return "off", ( + "可通过 Jina Reader 读取职位页面。完整功能需要:\n" + " 1. git clone https://github.com/mucsbr/mcp-bosszp.git\n" + " 2. cd mcp-bosszp && pip install -r requirements.txt && playwright install chromium\n" + " 3. python boss_zhipin_fastmcp_v2.py(启动 MCP 服务)\n" + " 4. mcporter config add bosszhipin http://localhost:8000/mcp\n" + " 或用 Docker:docker-compose up -d\n" + " 详见 https://github.com/mucsbr/mcp-bosszp" + ) + + async def read(self, url: str, config=None) -> ReadResult: + # Boss直聘 pages mostly work with Jina Reader + return await self._read_jina(url) + + async def _read_jina(self, url: str) -> ReadResult: + """Read Boss直聘 page via Jina Reader.""" + try: + resp = requests.get( + f"https://r.jina.ai/{url}", + headers={"Accept": "text/markdown"}, + timeout=15, + ) + resp.raise_for_status() + text = resp.text + + if len(text.strip()) < 50: + return ReadResult( + title="Boss直聘", + content=( + f"⚠️ 无法读取此页面内容: {url}\n\n" + "提示:\n" + "- 安装 mcp-bosszp 可解锁职位搜索和自动打招呼\n" + "- 详见 https://github.com/mucsbr/mcp-bosszp" + ), + url=url, + platform="bosszhipin", + ) + + return ReadResult( + title=text[:100] if text else url, + content=text, + url=url, + platform="bosszhipin", + ) + except Exception: + return ReadResult( + title="Boss直聘", + content=( + f"⚠️ 无法读取此 Boss直聘页面: {url}\n\n" + "提示:\n" + "- Boss直聘部分页面需要登录\n" + "- 安装 mcp-bosszp 可解锁完整功能\n" + "- 详见 https://github.com/mucsbr/mcp-bosszp" + ), + url=url, + platform="bosszhipin", + ) + + async def search(self, query: str, config=None, **kwargs) -> List[SearchResult]: + limit = kwargs.get("limit", 10) + + # Try MCP search first + if _mcporter_has_bosszhipin(): + try: + return await self._search_mcp(query, limit, config) + except Exception: + pass + + # Fallback to Exa + from agent_reach.channels.exa_search import ExaSearchChannel + exa = ExaSearchChannel() + return await exa.search(f"site:zhipin.com {query}", config=config, limit=limit) + + async def _search_mcp(self, query: str, limit: int, config=None) -> List[SearchResult]: + """Search Boss直聘 via MCP.""" + server = _get_mcp_name() + try: + out = _mcporter_call( + f'{server}.get_recommend_jobs_tool(page: 1)', + timeout=30, + ) + return self._parse_jobs(out, limit) + except Exception: + return [] + + def _parse_jobs(self, text: str, limit: int) -> List[SearchResult]: + """Parse MCP job search output into SearchResults.""" + results = [] + try: + data = json.loads(text) + jobs = data if isinstance(data, list) else data.get("jobs", data.get("results", [])) + for job in jobs[:limit]: + if isinstance(job, dict): + title = job.get("title") or job.get("jobName", "") + company = job.get("company") or job.get("brandName", "") + salary = job.get("salary") or job.get("salaryDesc", "") + url = job.get("url", "") + snippet = f"🏢 {company}" if company else "" + if salary: + snippet += f" · 💰 {salary}" + results.append(SearchResult( + title=title, + url=url, + snippet=snippet, + )) + except (json.JSONDecodeError, KeyError): + pass + return results diff --git a/agent_reach/channels/instagram.py b/agent_reach/channels/instagram.py new file mode 100644 index 0000000..c14a283 --- /dev/null +++ b/agent_reach/channels/instagram.py @@ -0,0 +1,216 @@ +# -*- coding: utf-8 -*- +"""Instagram — via instaloader (free, open source). + +Backend: instaloader (9.8K stars, Python CLI + library) +Swap to: any Instagram access tool +""" + +import re +import shutil +import subprocess +from urllib.parse import urlparse +from .base import Channel, ReadResult, SearchResult +from typing import List + + +class InstagramChannel(Channel): + name = "instagram" + description = "Instagram 帖子和 Profile" + backends = ["instaloader"] + tier = 2 # Needs login for full access + + def can_handle(self, url: str) -> bool: + domain = urlparse(url).netloc.lower() + return "instagram.com" in domain or "instagr.am" in domain + + def check(self, config=None): + # Check both CLI and Python module + has_cli = shutil.which("instaloader") + has_module = False + try: + import instaloader + has_module = True + except ImportError: + pass + + if not has_cli and not has_module: + return "off", ( + "需要安装 instaloader:pip install instaloader\n" + " 安装后可读取 Instagram 帖子和 Profile\n" + " 登录解锁更多功能:instaloader --login YOUR_USERNAME" + ) + return "ok", "可读取公开帖子和 Profile。登录后可访问更多内容" + + async def read(self, url: str, config=None) -> ReadResult: + # Try instaloader (module or CLI) + try: + import instaloader + return await self._read_instaloader(url, config) + except ImportError: + pass + # Fallback: Jina Reader + return await self._read_jina(url) + + async def _read_instaloader(self, url: str, config=None) -> ReadResult: + """Read Instagram content using instaloader Python API.""" + try: + import instaloader + L = instaloader.Instaloader( + download_pictures=False, + download_videos=False, + download_video_thumbnails=False, + download_geotags=False, + download_comments=False, + save_metadata=False, + compress_json=False, + ) + + # Try to load session if available + if config and config.get("instagram_username"): + try: + L.load_session_from_file(config.get("instagram_username")) + except Exception: + pass + + path = urlparse(url).path.strip("/") + + # Detect URL type + if "/p/" in url or "/reel/" in url: + return await self._read_post(L, url, path) + else: + return await self._read_profile(L, url, path) + + except ImportError: + return await self._read_jina(url) + except Exception as e: + # Fallback to Jina on any error + return await self._read_jina(url) + + async def _read_post(self, L, url: str, path: str) -> ReadResult: + """Read a single Instagram post.""" + import instaloader + + # Extract shortcode from URL + match = re.search(r"/(?:p|reel)/([A-Za-z0-9_-]+)", url) + if not match: + return await self._read_jina(url) + + shortcode = match.group(1) + try: + post = instaloader.Post.from_shortcode(L.context, shortcode) + + lines = [] + if post.caption: + lines.append(post.caption) + lines.append("") + lines.append(f"👤 @{post.owner_username}") + lines.append(f"❤️ {post.likes} likes") + if post.comments: + lines.append(f"💬 {post.comments} comments") + lines.append(f"📅 {post.date_utc.strftime('%Y-%m-%d %H:%M')}") + if post.location: + lines.append(f"📍 {post.location}") + if post.hashtags: + lines.append(f"#️⃣ {' '.join('#' + h for h in post.hashtags)}") + + return ReadResult( + title=f"@{post.owner_username}: {(post.caption or '')[:80]}", + content="\n".join(lines), + url=url, + author=f"@{post.owner_username}", + date=post.date_utc.strftime("%Y-%m-%d"), + platform="instagram", + extra={"likes": post.likes, "comments": post.comments}, + ) + except Exception: + return await self._read_jina(url) + + async def _read_profile(self, L, url: str, path: str) -> ReadResult: + """Read an Instagram profile.""" + import instaloader + + # Extract username from path + username = path.split("/")[0] if path else "" + if not username or username in ("p", "reel", "stories", "explore"): + return await self._read_jina(url) + + try: + profile = instaloader.Profile.from_username(L.context, username) + + lines = [] + lines.append(f"👤 {profile.full_name} (@{profile.username})") + if profile.biography: + lines.append(f"📝 {profile.biography}") + if profile.external_url: + lines.append(f"🔗 {profile.external_url}") + lines.append("") + lines.append(f"📊 {profile.mediacount} posts · " + f"{profile.followers} followers · " + f"{profile.followees} following") + if profile.is_verified: + lines.append("✅ Verified") + if profile.is_business_account and profile.business_category_name: + lines.append(f"🏢 {profile.business_category_name}") + + # Get recent posts (up to 5) + lines.append("") + lines.append("📸 Recent posts:") + count = 0 + for post in profile.get_posts(): + if count >= 5: + break + caption = (post.caption or "")[:100].replace("\n", " ") + lines.append(f" • ❤️{post.likes} | {post.date_utc.strftime('%m-%d')} | {caption}") + count += 1 + + return ReadResult( + title=f"{profile.full_name} (@{profile.username}) - Instagram", + content="\n".join(lines), + url=url, + author=f"@{profile.username}", + platform="instagram", + extra={ + "followers": profile.followers, + "posts": profile.mediacount, + }, + ) + except Exception: + return await self._read_jina(url) + + async def _read_jina(self, url: str) -> ReadResult: + """Fallback: use Jina Reader.""" + import requests + try: + resp = requests.get( + f"https://r.jina.ai/{url}", + headers={"Accept": "text/markdown"}, + timeout=15, + ) + resp.raise_for_status() + text = resp.text + return ReadResult( + title=text[:100] if text else url, + content=text, + url=url, + platform="instagram", + ) + except Exception: + return ReadResult( + title="Instagram", + content=( + f"⚠️ 无法读取此 Instagram 内容: {url}\n\n" + "提示:\n" + "- 确保 URL 正确\n" + "- 安装 instaloader: pip install instaloader\n" + "- 登录以访问更多内容: instaloader --login YOUR_USERNAME" + ), + url=url, + platform="instagram", + ) + + async def search(self, query: str, config=None, **kwargs) -> List[SearchResult]: + """Search Instagram via Exa.""" + limit = kwargs.get("limit", 10) + from agent_reach.channels.exa_search import ExaSearchChannel + exa = ExaSearchChannel() + return await exa.search(f"site:instagram.com {query}", config=config, limit=limit) diff --git a/agent_reach/channels/linkedin.py b/agent_reach/channels/linkedin.py new file mode 100644 index 0000000..5deb6c3 --- /dev/null +++ b/agent_reach/channels/linkedin.py @@ -0,0 +1,255 @@ +# -*- coding: utf-8 -*- +"""LinkedIn — via linkedin-scraper-mcp (MCP) or Jina Reader fallback. + +Backend: linkedin-scraper-mcp (916 stars, Patchright browser automation) +Swap to: any LinkedIn access tool +""" + +import shutil +import subprocess +from urllib.parse import urlparse +from .base import Channel, ReadResult, SearchResult +from typing import List +import requests + + +def _mcporter_has_linkedin() -> bool: + """Check if mcporter has linkedin MCP configured.""" + if not shutil.which("mcporter"): + return False + try: + r = subprocess.run( + ["mcporter", "list"], capture_output=True, text=True, timeout=10 + ) + return "linkedin" in r.stdout.lower() + except Exception: + return False + + +def _mcporter_call(expr: str, timeout: int = 30) -> str: + """Call a LinkedIn MCP tool via mcporter.""" + r = subprocess.run( + ["mcporter", "call", expr], + capture_output=True, text=True, timeout=timeout, + ) + if r.returncode != 0: + raise RuntimeError(r.stderr or r.stdout) + return r.stdout + + +class LinkedInChannel(Channel): + name = "linkedin" + description = "LinkedIn 个人/公司 Profile 和职位" + backends = ["linkedin-scraper-mcp", "Jina Reader"] + tier = 2 + + def can_handle(self, url: str) -> bool: + domain = urlparse(url).netloc.lower() + return "linkedin.com" in domain + + def check(self, config=None): + if _mcporter_has_linkedin(): + return "ok", "完整可用(Profile、公司、职位搜索)" + + # Check if linkedin-scraper-mcp is installed as CLI + if shutil.which("linkedin-scraper-mcp"): + return "warn", ( + "linkedin-scraper-mcp 已安装但未接入 mcporter。运行:\n" + " 1. uvx linkedin-scraper-mcp --transport streamable-http --port 8001\n" + " 2. mcporter config add linkedin http://localhost:8001/mcp\n" + " 或先登录:uvx linkedin-scraper-mcp --login" + ) + + return "off", ( + "可通过 Jina Reader 读取部分内容。完整功能需要:\n" + " 1. pip install linkedin-scraper-mcp 或 uvx linkedin-scraper-mcp --login\n" + " 2. uvx linkedin-scraper-mcp --transport streamable-http --port 8001\n" + " 3. mcporter config add linkedin http://localhost:8001/mcp\n" + " 详见 https://github.com/stickerdaniel/linkedin-mcp-server" + ) + + async def read(self, url: str, config=None) -> ReadResult: + path = urlparse(url).path.strip("/") + + # Try MCP first + if _mcporter_has_linkedin(): + try: + if "/in/" in url: + return await self._read_profile_mcp(url) + elif "/company/" in url: + return await self._read_company_mcp(url) + elif "/jobs/view/" in url: + return await self._read_job_mcp(url) + except Exception: + pass # Fall through to Jina + + # Fallback: Jina Reader + return await self._read_jina(url) + + async def _read_profile_mcp(self, url: str) -> ReadResult: + """Read a LinkedIn profile via MCP.""" + safe_url = url.replace('"', '\\"') + out = _mcporter_call( + f'linkedin.get_person_profile(url: "{safe_url}")', + timeout=30, + ) + return ReadResult( + title=self._extract_title(out) or "LinkedIn Profile", + content=out.strip(), + url=url, + platform="linkedin", + ) + + async def _read_company_mcp(self, url: str) -> ReadResult: + """Read a LinkedIn company page via MCP.""" + safe_url = url.replace('"', '\\"') + out = _mcporter_call( + f'linkedin.get_company_profile(url: "{safe_url}")', + timeout=30, + ) + return ReadResult( + title=self._extract_title(out) or "LinkedIn Company", + content=out.strip(), + url=url, + platform="linkedin", + ) + + async def _read_job_mcp(self, url: str) -> ReadResult: + """Read a LinkedIn job posting via MCP.""" + import re + match = re.search(r"/jobs/view/(\d+)", url) + if not match: + return await self._read_jina(url) + + job_id = match.group(1) + out = _mcporter_call( + f'linkedin.get_job_details(job_id: "{job_id}")', + timeout=30, + ) + return ReadResult( + title=self._extract_title(out) or f"LinkedIn Job {job_id}", + content=out.strip(), + url=url, + platform="linkedin", + ) + + async def _read_jina(self, url: str) -> ReadResult: + """Fallback: use Jina Reader.""" + try: + resp = requests.get( + f"https://r.jina.ai/{url}", + headers={"Accept": "text/markdown"}, + timeout=15, + ) + resp.raise_for_status() + text = resp.text + + # Check if content is usable + if len(text.strip()) < 100 or "Sign in" in text[:200]: + return ReadResult( + title="LinkedIn", + content=( + f"⚠️ LinkedIn 页面需要登录才能完整查看。\n\n" + f"URL: {url}\n\n" + "完整功能需安装 linkedin-scraper-mcp:\n" + " pip install linkedin-scraper-mcp\n" + " uvx linkedin-scraper-mcp --login\n" + " 详见 https://github.com/stickerdaniel/linkedin-mcp-server" + ), + url=url, + platform="linkedin", + ) + + return ReadResult( + title=text[:100] if text else url, + content=text, + url=url, + platform="linkedin", + ) + except Exception: + return ReadResult( + title="LinkedIn", + content=( + f"⚠️ 无法读取此 LinkedIn 页面: {url}\n\n" + "提示:\n" + "- LinkedIn 需要登录才能查看大部分内容\n" + "- 安装 linkedin-scraper-mcp 解锁完整功能\n" + "- 详见 https://github.com/stickerdaniel/linkedin-mcp-server" + ), + url=url, + platform="linkedin", + ) + + async def search(self, query: str, config=None, **kwargs) -> List[SearchResult]: + limit = kwargs.get("limit", 10) + + # Try MCP search first + if _mcporter_has_linkedin(): + try: + return await self._search_mcp(query, limit) + except Exception: + pass + + # Fallback to Exa + from agent_reach.channels.exa_search import ExaSearchChannel + exa = ExaSearchChannel() + return await exa.search(f"site:linkedin.com {query}", config=config, limit=limit) + + async def _search_mcp(self, query: str, limit: int) -> List[SearchResult]: + """Search LinkedIn via MCP.""" + safe_q = query.replace('"', '\\"') + # Try job search first (most common use case) + try: + out = _mcporter_call( + f'linkedin.search_jobs(keywords: "{safe_q}", limit: {limit})', + timeout=30, + ) + results = self._parse_search_results(out, "job") + if results: + return results + except Exception: + pass + + # Try people search + try: + out = _mcporter_call( + f'linkedin.search_people(keywords: "{safe_q}", limit: {limit})', + timeout=30, + ) + results = self._parse_search_results(out, "people") + if results: + return results + except Exception: + pass + + return [] + + def _parse_search_results(self, text: str, result_type: str) -> List[SearchResult]: + """Parse MCP search output into SearchResults.""" + import json + results = [] + try: + data = json.loads(text) + items = data if isinstance(data, list) else data.get("results", data.get("jobs", [])) + for item in items: + if isinstance(item, dict): + title = item.get("title") or item.get("name") or item.get("headline", "") + url = item.get("url") or item.get("link", "") + snippet = item.get("description") or item.get("company", "") + results.append(SearchResult( + title=title, + url=url, + snippet=snippet[:200] if snippet else "", + )) + except (json.JSONDecodeError, KeyError): + # Try line-by-line parsing + pass + return results + + def _extract_title(self, text: str) -> str: + """Extract a title from MCP output.""" + for line in text.split("\n"): + line = line.strip() + if line and not line.startswith(("{", "[", "#", "http")): + return line[:80] + return "" diff --git a/agent_reach/cli.py b/agent_reach/cli.py index 08173cb..5be2e86 100644 --- a/agent_reach/cli.py +++ b/agent_reach/cli.py @@ -89,6 +89,21 @@ def main(): p_sx.add_argument("query", nargs="+", help="Search query") p_sx.add_argument("-n", "--num", type=int, default=10, help="Number of results") + # ── search-instagram ── + p_si = sub.add_parser("search-instagram", help="Search Instagram") + p_si.add_argument("query", nargs="+", help="Search query") + p_si.add_argument("-n", "--num", type=int, default=10, help="Number of results") + + # ── search-linkedin ── + p_sl = sub.add_parser("search-linkedin", help="Search LinkedIn") + p_sl.add_argument("query", nargs="+", help="Search query") + p_sl.add_argument("-n", "--num", type=int, default=10, help="Number of results") + + # ── search-bosszhipin ── + p_sbz = sub.add_parser("search-bosszhipin", help="Search Boss直聘") + p_sbz.add_argument("query", nargs="+", help="Search query") + p_sbz.add_argument("-n", "--num", type=int, default=10, help="Number of results") + # ── setup ── sub.add_parser("setup", help="Interactive configuration wizard") @@ -369,6 +384,23 @@ def _install_system_deps(): else: print(" ⬜ bird CLI requires Node.js (optional — Twitter reading still works via Jina)") + # ── instaloader (for Instagram) ── + if shutil.which("instaloader"): + print(" ✅ instaloader already installed") + else: + print(" 📥 Installing instaloader...") + try: + subprocess.run( + [sys.executable, "-m", "pip", "install", "instaloader"], + capture_output=True, text=True, timeout=120, + ) + if shutil.which("instaloader"): + print(" ✅ instaloader installed (Instagram reading)") + else: + print(" ⬜ instaloader install failed (optional — try: pip install instaloader)") + except Exception: + print(" ⬜ instaloader install failed (optional — try: pip install instaloader)") + def _install_mcporter(): """Install mcporter and configure Exa + XiaoHongShu MCP servers.""" @@ -761,6 +793,12 @@ async def _cmd_search(args): results = await eyes.search_bilibili(query, limit=num) elif args.command == "search-xhs": results = await eyes.search_xhs(query, limit=num) + elif args.command == "search-instagram": + results = await eyes.search_instagram(query, limit=num) + elif args.command == "search-linkedin": + results = await eyes.search_linkedin(query, limit=num) + elif args.command == "search-bosszhipin": + results = await eyes.search_bosszhipin(query, limit=num) else: print(f"Unknown command: {args.command}", file=sys.stderr) sys.exit(1) diff --git a/agent_reach/core.py b/agent_reach/core.py index ac0009f..3128677 100644 --- a/agent_reach/core.py +++ b/agent_reach/core.py @@ -101,6 +101,24 @@ class AgentReach: results = await ch.search(query, config=self.config, limit=limit) return [r.to_dict() for r in results] + async def search_instagram(self, query: str, limit: int = 10) -> List[Dict[str, Any]]: + """Search Instagram via Exa.""" + ch = get_channel("instagram") + results = await ch.search(query, config=self.config, limit=limit) + return [r.to_dict() for r in results] + + async def search_linkedin(self, query: str, limit: int = 10) -> List[Dict[str, Any]]: + """Search LinkedIn via MCP or Exa.""" + ch = get_channel("linkedin") + results = await ch.search(query, config=self.config, limit=limit) + return [r.to_dict() for r in results] + + async def search_bosszhipin(self, query: str, limit: int = 10) -> List[Dict[str, Any]]: + """Search Boss直聘 via MCP or Exa.""" + ch = get_channel("bosszhipin") + results = await ch.search(query, config=self.config, limit=limit) + return [r.to_dict() for r in results] + # ── Health ────────────────────────────────────────── def doctor(self) -> Dict[str, dict]: diff --git a/docs/README_en.md b/docs/README_en.md index cead304..17eef58 100644 --- a/docs/README_en.md +++ b/docs/README_en.md @@ -58,6 +58,9 @@ Copy that to your Agent. A few minutes later, it can read tweets, search Reddit, | 🌐 **Web** | Read | Zero config | Any URL → clean Markdown ([Jina Reader](https://github.com/jina-ai/reader) ⭐9.8K) | | 🐦 **Twitter/X** | Read · Search | Zero config / Cookie | Single tweets readable out of the box. Cookie unlocks search, timeline, posting ([bird](https://github.com/steipete/bird)) | | 📕 **XiaoHongShu** | Read · Search · **Post · Comment · Like** | mcporter | Via [xiaohongshu-mcp](https://github.com/user/xiaohongshu-mcp) internal API, install and go | +| 📷 **Instagram** | Read · Search | instaloader | Posts, profiles, hashtags ([instaloader](https://github.com/instaloader/instaloader) ⭐9.8K) | +| 💼 **LinkedIn** | Read · Search | mcporter / Jina | Profiles, companies, job search ([linkedin-scraper-mcp](https://github.com/stickerdaniel/linkedin-mcp-server) ⭐900+) | +| 🏢 **Boss直聘** | Read · Search | mcporter / Jina | Job search, greet recruiters ([mcp-bosszp](https://github.com/mucsbr/mcp-bosszp)) | | 🔍 **Web Search** | Search | Auto-configured | Auto-configured during install, free, no API key ([Exa](https://exa.ai) via [mcporter](https://github.com/nicepkg/mcporter)) | | 📦 **GitHub** | Read · Search | Zero config | [gh CLI](https://cli.github.com) powered. Public repos work immediately. `gh auth login` unlocks Fork, Issue, PR | | 📺 **YouTube** | Read · **Search** | Zero config | Subtitles + search across 1800+ video sites ([yt-dlp](https://github.com/yt-dlp/yt-dlp) ⭐148K) | @@ -182,6 +185,9 @@ channels/ ├── bilibili.py → yt-dlp ← swap to bilibili-api… ├── reddit.py → JSON API + Exa ← swap to PRAW, Pushshift… ├── xiaohongshu.py → mcporter MCP ← swap to other XHS tools… +├── instagram.py → instaloader ← swap to instagrapi, official API… +├── linkedin.py → linkedin-mcp ← swap to LinkedIn API… +├── bosszhipin.py → mcp-bosszp ← swap to other job tools… ├── rss.py → feedparser ← swap to atoma… ├── exa_search.py → mcporter MCP ← swap to Tavily, SerpAPI… └── __init__.py → Channel registry @@ -198,6 +204,9 @@ channels/ | GitHub | [gh CLI](https://cli.github.com) | Official tool, full API after auth | | Read RSS | [feedparser](https://github.com/kurtmckee/feedparser) | Python ecosystem standard, 2.3K stars | | XiaoHongShu | [xiaohongshu-mcp](https://github.com/user/xiaohongshu-mcp) | Internal API, bypasses anti-bot | +| Instagram | [instaloader](https://github.com/instaloader/instaloader) | 9.8K stars, Python CLI, cookie auth, free | +| LinkedIn | [linkedin-scraper-mcp](https://github.com/stickerdaniel/linkedin-mcp-server) | 900+ stars, MCP server, browser automation | +| Boss直聘 | [mcp-bosszp](https://github.com/mucsbr/mcp-bosszp) | MCP server, job search + recruiter greeting | > 📌 These are the *current* choices. Don't like one? Swap out the file. That's the whole point of scaffolding. diff --git a/docs/install.md b/docs/install.md index 976583d..51fb4cb 100644 --- a/docs/install.md +++ b/docs/install.md @@ -69,7 +69,44 @@ mcporter config add xiaohongshu http://localhost:18060/mcp > 如果在服务器上,建议加代理避免 IP 风控: > `docker run -d --name xiaohongshu-mcp -p 18060:18060 -e XHS_PROXY=http://user:pass@ip:port xpzouying/xiaohongshu-mcp` > -> 首次使用需要扫码登录,打开 http://localhost:18060 操作。 +> **登录方式:** +> - **本地电脑(有浏览器):** 打开 http://localhost:18060 扫码登录即可。 +> - **服务器(无 UI 界面):** 服务器上通常没有浏览器,无法直接扫码。最方便的方式是在自己的电脑上用浏览器登录小红书,然后用 [Cookie-Editor](https://chromewebstore.google.com/detail/cookie-editor/hlkenndednhfkekhgcdicdfddnkalmdm) 插件导出 Cookie(Header String 格式),发给 Agent 即可完成配置。详见 [Cookie 导出指南](cookie-export.md)。 + +**Instagram (需要 instaloader):** +> "Instagram 需要 instaloader。我来帮你安装。" + +```bash +pip install instaloader +``` + +> 登录后可访问更多内容(私密帖子等):`instaloader --login YOUR_USERNAME` +> 服务器用户同样可以在本地电脑登录后,导出 Cookie 发给 Agent。 + +**LinkedIn (可选 — linkedin-scraper-mcp):** +> "LinkedIn 基本内容可通过 Jina Reader 读取。完整功能(Profile 详情、职位搜索)需要 linkedin-scraper-mcp。" + +```bash +pip install linkedin-scraper-mcp +uvx linkedin-scraper-mcp --login # 首次需在有浏览器的机器上登录 +uvx linkedin-scraper-mcp --transport streamable-http --port 8001 # 启动 MCP 服务 +mcporter config add linkedin http://localhost:8001/mcp +``` + +> 详见 https://github.com/stickerdaniel/linkedin-mcp-server + +**Boss直聘 (可选 — mcp-bosszp):** +> "Boss直聘职位页面可直接读取。完整搜索和打招呼功能需要 mcp-bosszp。" + +```bash +git clone https://github.com/mucsbr/mcp-bosszp.git && cd mcp-bosszp +pip install -r requirements.txt && playwright install chromium +python boss_zhipin_fastmcp_v2.py # 启动 MCP 服务 +mcporter config add bosszhipin http://localhost:8000/mcp +``` + +> 或用 Docker:`docker-compose up -d` +> 详见 https://github.com/mucsbr/mcp-bosszp ### Step 4: Final check @@ -112,3 +149,6 @@ If the user wants a different agent to handle it, let them choose. | `agent-reach search-youtube "query"` | Search YouTube | | `agent-reach search-bilibili "query"` | Search Bilibili | | `agent-reach search-xhs "query"` | Search XiaoHongShu | +| `agent-reach search-instagram "query"` | Search Instagram | +| `agent-reach search-linkedin "query"` | Search LinkedIn | +| `agent-reach search-bosszhipin "query"` | Search Boss直聘 |