feat: 新增 Instagram、LinkedIn、Boss直聘 三个渠道
新增渠道: - Instagram: 基于 instaloader (⭐9.8K),读取帖子/Profile,Cookie 登录 - LinkedIn: 基于 linkedin-scraper-mcp (⭐900+) MCP 服务,Jina Reader fallback - Boss直聘: 基于 mcp-bosszp MCP 服务,Jina Reader fallback 代码改动: - 新建 channels/instagram.py, linkedin.py, bosszhipin.py - 注册到 channels/__init__.py - cli.py 添加 search-instagram/linkedin/bosszhipin 子命令 - cli.py 安装逻辑添加 instaloader 自动安装 - core.py 添加 search_instagram/linkedin/bosszhipin 方法 - README.md + docs/README_en.md 更新平台表格和选型表格 - docs/install.md 添加三个新渠道的配置说明和 Quick Reference
This commit is contained in:
parent
c0a0518c01
commit
e3804108fe
9 changed files with 775 additions and 1 deletions
|
|
@ -69,6 +69,9 @@ AI Agent 已经能帮你写代码、改文档、管项目——但你让它去
|
|||
| 📺 **B站** | 本地:字幕提取 + 搜索 | 服务器也能用 | 告诉 Agent「帮我配代理」 |
|
||||
| 📖 **Reddit** | 搜索(通过 Exa 免费) | 读帖子和评论 | 告诉 Agent「帮我配代理」 |
|
||||
| 📕 **小红书** | — | 阅读、搜索、发帖、评论、点赞 | `docker run -d -p 18060:18060 xpzouying/xiaohongshu-mcp` 然后告诉 Agent「帮我配置小红书」 |
|
||||
| 📷 **Instagram** | — | 读取帖子和 Profile | 告诉 Agent「帮我配 Instagram」 |
|
||||
| 💼 **LinkedIn** | — | 读取 Profile、公司、职位搜索 | 告诉 Agent「帮我配 LinkedIn」 |
|
||||
| 🏢 **Boss直聘** | — | 搜索职位、向 HR 打招呼 | 告诉 Agent「帮我配 Boss直聘」 |
|
||||
|
||||
> **不知道怎么配?不用查文档。** 直接告诉 Agent「帮我配 XXX」,它知道需要什么、会一步一步引导你。
|
||||
>
|
||||
|
|
@ -139,6 +142,9 @@ channels/
|
|||
├── bilibili.py → yt-dlp ← 可以换成 bilibili-api……
|
||||
├── reddit.py → JSON API + Exa ← 可以换成 PRAW、Pushshift……
|
||||
├── xiaohongshu.py → mcporter MCP ← 可以换成其他 XHS 工具……
|
||||
├── instagram.py → instaloader ← 可以换成 instagrapi、官方 API……
|
||||
├── linkedin.py → linkedin-mcp ← 可以换成 LinkedIn API……
|
||||
├── bosszhipin.py → mcp-bosszp ← 可以换成其他招聘工具……
|
||||
├── rss.py → feedparser ← 可以换成 atoma……
|
||||
├── exa_search.py → mcporter MCP ← 可以换成 Tavily、SerpAPI……
|
||||
└── __init__.py → 渠道注册
|
||||
|
|
@ -155,6 +161,9 @@ channels/
|
|||
| GitHub | [gh CLI](https://cli.github.com) | 官方工具,认证后完整 API 能力 |
|
||||
| 读 RSS | [feedparser](https://github.com/kurtmckee/feedparser) | Python 生态标准选择,2.3K Star |
|
||||
| 小红书 | [xiaohongshu-mcp](https://github.com/xpzouying/xiaohongshu-mcp) | ⭐9K+,Go 语言,Docker 一键部署 |
|
||||
| Instagram | [instaloader](https://github.com/instaloader/instaloader) | ⭐9.8K,Python CLI,Cookie 登录,免费 |
|
||||
| LinkedIn | [linkedin-scraper-mcp](https://github.com/stickerdaniel/linkedin-mcp-server) | ⭐900+,MCP 服务,浏览器自动化 |
|
||||
| Boss直聘 | [mcp-bosszp](https://github.com/mucsbr/mcp-bosszp) | MCP 服务,支持职位搜索和打招呼 |
|
||||
|
||||
> 📌 这些都是「当前选型」。不满意?换掉对应文件就行。这正是脚手架的意义。
|
||||
|
||||
|
|
|
|||
|
|
@ -20,6 +20,9 @@ from .rss import RSSChannel
|
|||
from .bilibili import BilibiliChannel
|
||||
from .exa_search import ExaSearchChannel
|
||||
from .xiaohongshu import XiaoHongShuChannel
|
||||
from .instagram import InstagramChannel
|
||||
from .linkedin import LinkedInChannel
|
||||
from .bosszhipin import BossZhipinChannel
|
||||
|
||||
|
||||
# Channel registry — order matters (first match wins, web is last as fallback)
|
||||
|
|
@ -30,6 +33,9 @@ ALL_CHANNELS: List[Channel] = [
|
|||
RedditChannel(),
|
||||
BilibiliChannel(),
|
||||
XiaoHongShuChannel(),
|
||||
InstagramChannel(),
|
||||
LinkedInChannel(),
|
||||
BossZhipinChannel(),
|
||||
RSSChannel(),
|
||||
ExaSearchChannel(),
|
||||
WebChannel(), # Fallback — handles any URL
|
||||
|
|
|
|||
183
agent_reach/channels/bosszhipin.py
Normal file
183
agent_reach/channels/bosszhipin.py
Normal file
|
|
@ -0,0 +1,183 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""Boss直聘 (BOSS Zhipin) — via mcp-bosszp (MCP) or Jina Reader fallback.
|
||||
|
||||
Backend: mcp-bosszp (161 stars, FastMCP + Playwright)
|
||||
Swap to: any Boss直聘 access tool
|
||||
"""
|
||||
|
||||
import json
|
||||
import shutil
|
||||
import subprocess
|
||||
from urllib.parse import urlparse
|
||||
from .base import Channel, ReadResult, SearchResult
|
||||
from typing import List
|
||||
import requests
|
||||
|
||||
|
||||
def _mcporter_has_bosszhipin() -> bool:
|
||||
"""Check if mcporter has Boss直聘 MCP configured."""
|
||||
if not shutil.which("mcporter"):
|
||||
return False
|
||||
try:
|
||||
r = subprocess.run(
|
||||
["mcporter", "list"], capture_output=True, text=True, timeout=10
|
||||
)
|
||||
# Check for various possible config names
|
||||
out = r.stdout.lower()
|
||||
return "boss" in out or "zhipin" in out or "bosszhipin" in out
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _mcporter_call(expr: str, timeout: int = 30) -> str:
|
||||
"""Call a Boss直聘 MCP tool via mcporter."""
|
||||
r = subprocess.run(
|
||||
["mcporter", "call", expr],
|
||||
capture_output=True, text=True, timeout=timeout,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
raise RuntimeError(r.stderr or r.stdout)
|
||||
return r.stdout
|
||||
|
||||
|
||||
def _get_mcp_name() -> str:
|
||||
"""Get the actual MCP server name configured in mcporter."""
|
||||
try:
|
||||
r = subprocess.run(
|
||||
["mcporter", "list"], capture_output=True, text=True, timeout=10
|
||||
)
|
||||
for line in r.stdout.split("\n"):
|
||||
line_lower = line.strip().lower()
|
||||
for name in ["bosszhipin", "boss-zp", "bosszp", "boss"]:
|
||||
if name in line_lower:
|
||||
# Extract the actual server name
|
||||
parts = line.strip().split()
|
||||
if parts:
|
||||
return parts[0]
|
||||
return "bosszhipin"
|
||||
except Exception:
|
||||
return "bosszhipin"
|
||||
|
||||
|
||||
class BossZhipinChannel(Channel):
|
||||
name = "bosszhipin"
|
||||
description = "Boss直聘职位搜索"
|
||||
backends = ["mcp-bosszp", "Jina Reader"]
|
||||
tier = 2
|
||||
|
||||
def can_handle(self, url: str) -> bool:
|
||||
domain = urlparse(url).netloc.lower()
|
||||
return "zhipin.com" in domain or "boss.com" in domain
|
||||
|
||||
def check(self, config=None):
|
||||
if _mcporter_has_bosszhipin():
|
||||
return "ok", "可搜索职位、向 HR 打招呼"
|
||||
|
||||
return "off", (
|
||||
"可通过 Jina Reader 读取职位页面。完整功能需要:\n"
|
||||
" 1. git clone https://github.com/mucsbr/mcp-bosszp.git\n"
|
||||
" 2. cd mcp-bosszp && pip install -r requirements.txt && playwright install chromium\n"
|
||||
" 3. python boss_zhipin_fastmcp_v2.py(启动 MCP 服务)\n"
|
||||
" 4. mcporter config add bosszhipin http://localhost:8000/mcp\n"
|
||||
" 或用 Docker:docker-compose up -d\n"
|
||||
" 详见 https://github.com/mucsbr/mcp-bosszp"
|
||||
)
|
||||
|
||||
async def read(self, url: str, config=None) -> ReadResult:
|
||||
# Boss直聘 pages mostly work with Jina Reader
|
||||
return await self._read_jina(url)
|
||||
|
||||
async def _read_jina(self, url: str) -> ReadResult:
|
||||
"""Read Boss直聘 page via Jina Reader."""
|
||||
try:
|
||||
resp = requests.get(
|
||||
f"https://r.jina.ai/{url}",
|
||||
headers={"Accept": "text/markdown"},
|
||||
timeout=15,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
text = resp.text
|
||||
|
||||
if len(text.strip()) < 50:
|
||||
return ReadResult(
|
||||
title="Boss直聘",
|
||||
content=(
|
||||
f"⚠️ 无法读取此页面内容: {url}\n\n"
|
||||
"提示:\n"
|
||||
"- 安装 mcp-bosszp 可解锁职位搜索和自动打招呼\n"
|
||||
"- 详见 https://github.com/mucsbr/mcp-bosszp"
|
||||
),
|
||||
url=url,
|
||||
platform="bosszhipin",
|
||||
)
|
||||
|
||||
return ReadResult(
|
||||
title=text[:100] if text else url,
|
||||
content=text,
|
||||
url=url,
|
||||
platform="bosszhipin",
|
||||
)
|
||||
except Exception:
|
||||
return ReadResult(
|
||||
title="Boss直聘",
|
||||
content=(
|
||||
f"⚠️ 无法读取此 Boss直聘页面: {url}\n\n"
|
||||
"提示:\n"
|
||||
"- Boss直聘部分页面需要登录\n"
|
||||
"- 安装 mcp-bosszp 可解锁完整功能\n"
|
||||
"- 详见 https://github.com/mucsbr/mcp-bosszp"
|
||||
),
|
||||
url=url,
|
||||
platform="bosszhipin",
|
||||
)
|
||||
|
||||
async def search(self, query: str, config=None, **kwargs) -> List[SearchResult]:
|
||||
limit = kwargs.get("limit", 10)
|
||||
|
||||
# Try MCP search first
|
||||
if _mcporter_has_bosszhipin():
|
||||
try:
|
||||
return await self._search_mcp(query, limit, config)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fallback to Exa
|
||||
from agent_reach.channels.exa_search import ExaSearchChannel
|
||||
exa = ExaSearchChannel()
|
||||
return await exa.search(f"site:zhipin.com {query}", config=config, limit=limit)
|
||||
|
||||
async def _search_mcp(self, query: str, limit: int, config=None) -> List[SearchResult]:
|
||||
"""Search Boss直聘 via MCP."""
|
||||
server = _get_mcp_name()
|
||||
try:
|
||||
out = _mcporter_call(
|
||||
f'{server}.get_recommend_jobs_tool(page: 1)',
|
||||
timeout=30,
|
||||
)
|
||||
return self._parse_jobs(out, limit)
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
def _parse_jobs(self, text: str, limit: int) -> List[SearchResult]:
|
||||
"""Parse MCP job search output into SearchResults."""
|
||||
results = []
|
||||
try:
|
||||
data = json.loads(text)
|
||||
jobs = data if isinstance(data, list) else data.get("jobs", data.get("results", []))
|
||||
for job in jobs[:limit]:
|
||||
if isinstance(job, dict):
|
||||
title = job.get("title") or job.get("jobName", "")
|
||||
company = job.get("company") or job.get("brandName", "")
|
||||
salary = job.get("salary") or job.get("salaryDesc", "")
|
||||
url = job.get("url", "")
|
||||
snippet = f"🏢 {company}" if company else ""
|
||||
if salary:
|
||||
snippet += f" · 💰 {salary}"
|
||||
results.append(SearchResult(
|
||||
title=title,
|
||||
url=url,
|
||||
snippet=snippet,
|
||||
))
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
pass
|
||||
return results
|
||||
216
agent_reach/channels/instagram.py
Normal file
216
agent_reach/channels/instagram.py
Normal file
|
|
@ -0,0 +1,216 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""Instagram — via instaloader (free, open source).
|
||||
|
||||
Backend: instaloader (9.8K stars, Python CLI + library)
|
||||
Swap to: any Instagram access tool
|
||||
"""
|
||||
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
from urllib.parse import urlparse
|
||||
from .base import Channel, ReadResult, SearchResult
|
||||
from typing import List
|
||||
|
||||
|
||||
class InstagramChannel(Channel):
|
||||
name = "instagram"
|
||||
description = "Instagram 帖子和 Profile"
|
||||
backends = ["instaloader"]
|
||||
tier = 2 # Needs login for full access
|
||||
|
||||
def can_handle(self, url: str) -> bool:
|
||||
domain = urlparse(url).netloc.lower()
|
||||
return "instagram.com" in domain or "instagr.am" in domain
|
||||
|
||||
def check(self, config=None):
|
||||
# Check both CLI and Python module
|
||||
has_cli = shutil.which("instaloader")
|
||||
has_module = False
|
||||
try:
|
||||
import instaloader
|
||||
has_module = True
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
if not has_cli and not has_module:
|
||||
return "off", (
|
||||
"需要安装 instaloader:pip install instaloader\n"
|
||||
" 安装后可读取 Instagram 帖子和 Profile\n"
|
||||
" 登录解锁更多功能:instaloader --login YOUR_USERNAME"
|
||||
)
|
||||
return "ok", "可读取公开帖子和 Profile。登录后可访问更多内容"
|
||||
|
||||
async def read(self, url: str, config=None) -> ReadResult:
|
||||
# Try instaloader (module or CLI)
|
||||
try:
|
||||
import instaloader
|
||||
return await self._read_instaloader(url, config)
|
||||
except ImportError:
|
||||
pass
|
||||
# Fallback: Jina Reader
|
||||
return await self._read_jina(url)
|
||||
|
||||
async def _read_instaloader(self, url: str, config=None) -> ReadResult:
|
||||
"""Read Instagram content using instaloader Python API."""
|
||||
try:
|
||||
import instaloader
|
||||
L = instaloader.Instaloader(
|
||||
download_pictures=False,
|
||||
download_videos=False,
|
||||
download_video_thumbnails=False,
|
||||
download_geotags=False,
|
||||
download_comments=False,
|
||||
save_metadata=False,
|
||||
compress_json=False,
|
||||
)
|
||||
|
||||
# Try to load session if available
|
||||
if config and config.get("instagram_username"):
|
||||
try:
|
||||
L.load_session_from_file(config.get("instagram_username"))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
path = urlparse(url).path.strip("/")
|
||||
|
||||
# Detect URL type
|
||||
if "/p/" in url or "/reel/" in url:
|
||||
return await self._read_post(L, url, path)
|
||||
else:
|
||||
return await self._read_profile(L, url, path)
|
||||
|
||||
except ImportError:
|
||||
return await self._read_jina(url)
|
||||
except Exception as e:
|
||||
# Fallback to Jina on any error
|
||||
return await self._read_jina(url)
|
||||
|
||||
async def _read_post(self, L, url: str, path: str) -> ReadResult:
|
||||
"""Read a single Instagram post."""
|
||||
import instaloader
|
||||
|
||||
# Extract shortcode from URL
|
||||
match = re.search(r"/(?:p|reel)/([A-Za-z0-9_-]+)", url)
|
||||
if not match:
|
||||
return await self._read_jina(url)
|
||||
|
||||
shortcode = match.group(1)
|
||||
try:
|
||||
post = instaloader.Post.from_shortcode(L.context, shortcode)
|
||||
|
||||
lines = []
|
||||
if post.caption:
|
||||
lines.append(post.caption)
|
||||
lines.append("")
|
||||
lines.append(f"👤 @{post.owner_username}")
|
||||
lines.append(f"❤️ {post.likes} likes")
|
||||
if post.comments:
|
||||
lines.append(f"💬 {post.comments} comments")
|
||||
lines.append(f"📅 {post.date_utc.strftime('%Y-%m-%d %H:%M')}")
|
||||
if post.location:
|
||||
lines.append(f"📍 {post.location}")
|
||||
if post.hashtags:
|
||||
lines.append(f"#️⃣ {' '.join('#' + h for h in post.hashtags)}")
|
||||
|
||||
return ReadResult(
|
||||
title=f"@{post.owner_username}: {(post.caption or '')[:80]}",
|
||||
content="\n".join(lines),
|
||||
url=url,
|
||||
author=f"@{post.owner_username}",
|
||||
date=post.date_utc.strftime("%Y-%m-%d"),
|
||||
platform="instagram",
|
||||
extra={"likes": post.likes, "comments": post.comments},
|
||||
)
|
||||
except Exception:
|
||||
return await self._read_jina(url)
|
||||
|
||||
async def _read_profile(self, L, url: str, path: str) -> ReadResult:
|
||||
"""Read an Instagram profile."""
|
||||
import instaloader
|
||||
|
||||
# Extract username from path
|
||||
username = path.split("/")[0] if path else ""
|
||||
if not username or username in ("p", "reel", "stories", "explore"):
|
||||
return await self._read_jina(url)
|
||||
|
||||
try:
|
||||
profile = instaloader.Profile.from_username(L.context, username)
|
||||
|
||||
lines = []
|
||||
lines.append(f"👤 {profile.full_name} (@{profile.username})")
|
||||
if profile.biography:
|
||||
lines.append(f"📝 {profile.biography}")
|
||||
if profile.external_url:
|
||||
lines.append(f"🔗 {profile.external_url}")
|
||||
lines.append("")
|
||||
lines.append(f"📊 {profile.mediacount} posts · "
|
||||
f"{profile.followers} followers · "
|
||||
f"{profile.followees} following")
|
||||
if profile.is_verified:
|
||||
lines.append("✅ Verified")
|
||||
if profile.is_business_account and profile.business_category_name:
|
||||
lines.append(f"🏢 {profile.business_category_name}")
|
||||
|
||||
# Get recent posts (up to 5)
|
||||
lines.append("")
|
||||
lines.append("📸 Recent posts:")
|
||||
count = 0
|
||||
for post in profile.get_posts():
|
||||
if count >= 5:
|
||||
break
|
||||
caption = (post.caption or "")[:100].replace("\n", " ")
|
||||
lines.append(f" • ❤️{post.likes} | {post.date_utc.strftime('%m-%d')} | {caption}")
|
||||
count += 1
|
||||
|
||||
return ReadResult(
|
||||
title=f"{profile.full_name} (@{profile.username}) - Instagram",
|
||||
content="\n".join(lines),
|
||||
url=url,
|
||||
author=f"@{profile.username}",
|
||||
platform="instagram",
|
||||
extra={
|
||||
"followers": profile.followers,
|
||||
"posts": profile.mediacount,
|
||||
},
|
||||
)
|
||||
except Exception:
|
||||
return await self._read_jina(url)
|
||||
|
||||
async def _read_jina(self, url: str) -> ReadResult:
|
||||
"""Fallback: use Jina Reader."""
|
||||
import requests
|
||||
try:
|
||||
resp = requests.get(
|
||||
f"https://r.jina.ai/{url}",
|
||||
headers={"Accept": "text/markdown"},
|
||||
timeout=15,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
text = resp.text
|
||||
return ReadResult(
|
||||
title=text[:100] if text else url,
|
||||
content=text,
|
||||
url=url,
|
||||
platform="instagram",
|
||||
)
|
||||
except Exception:
|
||||
return ReadResult(
|
||||
title="Instagram",
|
||||
content=(
|
||||
f"⚠️ 无法读取此 Instagram 内容: {url}\n\n"
|
||||
"提示:\n"
|
||||
"- 确保 URL 正确\n"
|
||||
"- 安装 instaloader: pip install instaloader\n"
|
||||
"- 登录以访问更多内容: instaloader --login YOUR_USERNAME"
|
||||
),
|
||||
url=url,
|
||||
platform="instagram",
|
||||
)
|
||||
|
||||
async def search(self, query: str, config=None, **kwargs) -> List[SearchResult]:
|
||||
"""Search Instagram via Exa."""
|
||||
limit = kwargs.get("limit", 10)
|
||||
from agent_reach.channels.exa_search import ExaSearchChannel
|
||||
exa = ExaSearchChannel()
|
||||
return await exa.search(f"site:instagram.com {query}", config=config, limit=limit)
|
||||
255
agent_reach/channels/linkedin.py
Normal file
255
agent_reach/channels/linkedin.py
Normal file
|
|
@ -0,0 +1,255 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""LinkedIn — via linkedin-scraper-mcp (MCP) or Jina Reader fallback.
|
||||
|
||||
Backend: linkedin-scraper-mcp (916 stars, Patchright browser automation)
|
||||
Swap to: any LinkedIn access tool
|
||||
"""
|
||||
|
||||
import shutil
|
||||
import subprocess
|
||||
from urllib.parse import urlparse
|
||||
from .base import Channel, ReadResult, SearchResult
|
||||
from typing import List
|
||||
import requests
|
||||
|
||||
|
||||
def _mcporter_has_linkedin() -> bool:
|
||||
"""Check if mcporter has linkedin MCP configured."""
|
||||
if not shutil.which("mcporter"):
|
||||
return False
|
||||
try:
|
||||
r = subprocess.run(
|
||||
["mcporter", "list"], capture_output=True, text=True, timeout=10
|
||||
)
|
||||
return "linkedin" in r.stdout.lower()
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _mcporter_call(expr: str, timeout: int = 30) -> str:
|
||||
"""Call a LinkedIn MCP tool via mcporter."""
|
||||
r = subprocess.run(
|
||||
["mcporter", "call", expr],
|
||||
capture_output=True, text=True, timeout=timeout,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
raise RuntimeError(r.stderr or r.stdout)
|
||||
return r.stdout
|
||||
|
||||
|
||||
class LinkedInChannel(Channel):
|
||||
name = "linkedin"
|
||||
description = "LinkedIn 个人/公司 Profile 和职位"
|
||||
backends = ["linkedin-scraper-mcp", "Jina Reader"]
|
||||
tier = 2
|
||||
|
||||
def can_handle(self, url: str) -> bool:
|
||||
domain = urlparse(url).netloc.lower()
|
||||
return "linkedin.com" in domain
|
||||
|
||||
def check(self, config=None):
|
||||
if _mcporter_has_linkedin():
|
||||
return "ok", "完整可用(Profile、公司、职位搜索)"
|
||||
|
||||
# Check if linkedin-scraper-mcp is installed as CLI
|
||||
if shutil.which("linkedin-scraper-mcp"):
|
||||
return "warn", (
|
||||
"linkedin-scraper-mcp 已安装但未接入 mcporter。运行:\n"
|
||||
" 1. uvx linkedin-scraper-mcp --transport streamable-http --port 8001\n"
|
||||
" 2. mcporter config add linkedin http://localhost:8001/mcp\n"
|
||||
" 或先登录:uvx linkedin-scraper-mcp --login"
|
||||
)
|
||||
|
||||
return "off", (
|
||||
"可通过 Jina Reader 读取部分内容。完整功能需要:\n"
|
||||
" 1. pip install linkedin-scraper-mcp 或 uvx linkedin-scraper-mcp --login\n"
|
||||
" 2. uvx linkedin-scraper-mcp --transport streamable-http --port 8001\n"
|
||||
" 3. mcporter config add linkedin http://localhost:8001/mcp\n"
|
||||
" 详见 https://github.com/stickerdaniel/linkedin-mcp-server"
|
||||
)
|
||||
|
||||
async def read(self, url: str, config=None) -> ReadResult:
|
||||
path = urlparse(url).path.strip("/")
|
||||
|
||||
# Try MCP first
|
||||
if _mcporter_has_linkedin():
|
||||
try:
|
||||
if "/in/" in url:
|
||||
return await self._read_profile_mcp(url)
|
||||
elif "/company/" in url:
|
||||
return await self._read_company_mcp(url)
|
||||
elif "/jobs/view/" in url:
|
||||
return await self._read_job_mcp(url)
|
||||
except Exception:
|
||||
pass # Fall through to Jina
|
||||
|
||||
# Fallback: Jina Reader
|
||||
return await self._read_jina(url)
|
||||
|
||||
async def _read_profile_mcp(self, url: str) -> ReadResult:
|
||||
"""Read a LinkedIn profile via MCP."""
|
||||
safe_url = url.replace('"', '\\"')
|
||||
out = _mcporter_call(
|
||||
f'linkedin.get_person_profile(url: "{safe_url}")',
|
||||
timeout=30,
|
||||
)
|
||||
return ReadResult(
|
||||
title=self._extract_title(out) or "LinkedIn Profile",
|
||||
content=out.strip(),
|
||||
url=url,
|
||||
platform="linkedin",
|
||||
)
|
||||
|
||||
async def _read_company_mcp(self, url: str) -> ReadResult:
|
||||
"""Read a LinkedIn company page via MCP."""
|
||||
safe_url = url.replace('"', '\\"')
|
||||
out = _mcporter_call(
|
||||
f'linkedin.get_company_profile(url: "{safe_url}")',
|
||||
timeout=30,
|
||||
)
|
||||
return ReadResult(
|
||||
title=self._extract_title(out) or "LinkedIn Company",
|
||||
content=out.strip(),
|
||||
url=url,
|
||||
platform="linkedin",
|
||||
)
|
||||
|
||||
async def _read_job_mcp(self, url: str) -> ReadResult:
|
||||
"""Read a LinkedIn job posting via MCP."""
|
||||
import re
|
||||
match = re.search(r"/jobs/view/(\d+)", url)
|
||||
if not match:
|
||||
return await self._read_jina(url)
|
||||
|
||||
job_id = match.group(1)
|
||||
out = _mcporter_call(
|
||||
f'linkedin.get_job_details(job_id: "{job_id}")',
|
||||
timeout=30,
|
||||
)
|
||||
return ReadResult(
|
||||
title=self._extract_title(out) or f"LinkedIn Job {job_id}",
|
||||
content=out.strip(),
|
||||
url=url,
|
||||
platform="linkedin",
|
||||
)
|
||||
|
||||
async def _read_jina(self, url: str) -> ReadResult:
|
||||
"""Fallback: use Jina Reader."""
|
||||
try:
|
||||
resp = requests.get(
|
||||
f"https://r.jina.ai/{url}",
|
||||
headers={"Accept": "text/markdown"},
|
||||
timeout=15,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
text = resp.text
|
||||
|
||||
# Check if content is usable
|
||||
if len(text.strip()) < 100 or "Sign in" in text[:200]:
|
||||
return ReadResult(
|
||||
title="LinkedIn",
|
||||
content=(
|
||||
f"⚠️ LinkedIn 页面需要登录才能完整查看。\n\n"
|
||||
f"URL: {url}\n\n"
|
||||
"完整功能需安装 linkedin-scraper-mcp:\n"
|
||||
" pip install linkedin-scraper-mcp\n"
|
||||
" uvx linkedin-scraper-mcp --login\n"
|
||||
" 详见 https://github.com/stickerdaniel/linkedin-mcp-server"
|
||||
),
|
||||
url=url,
|
||||
platform="linkedin",
|
||||
)
|
||||
|
||||
return ReadResult(
|
||||
title=text[:100] if text else url,
|
||||
content=text,
|
||||
url=url,
|
||||
platform="linkedin",
|
||||
)
|
||||
except Exception:
|
||||
return ReadResult(
|
||||
title="LinkedIn",
|
||||
content=(
|
||||
f"⚠️ 无法读取此 LinkedIn 页面: {url}\n\n"
|
||||
"提示:\n"
|
||||
"- LinkedIn 需要登录才能查看大部分内容\n"
|
||||
"- 安装 linkedin-scraper-mcp 解锁完整功能\n"
|
||||
"- 详见 https://github.com/stickerdaniel/linkedin-mcp-server"
|
||||
),
|
||||
url=url,
|
||||
platform="linkedin",
|
||||
)
|
||||
|
||||
async def search(self, query: str, config=None, **kwargs) -> List[SearchResult]:
|
||||
limit = kwargs.get("limit", 10)
|
||||
|
||||
# Try MCP search first
|
||||
if _mcporter_has_linkedin():
|
||||
try:
|
||||
return await self._search_mcp(query, limit)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fallback to Exa
|
||||
from agent_reach.channels.exa_search import ExaSearchChannel
|
||||
exa = ExaSearchChannel()
|
||||
return await exa.search(f"site:linkedin.com {query}", config=config, limit=limit)
|
||||
|
||||
async def _search_mcp(self, query: str, limit: int) -> List[SearchResult]:
|
||||
"""Search LinkedIn via MCP."""
|
||||
safe_q = query.replace('"', '\\"')
|
||||
# Try job search first (most common use case)
|
||||
try:
|
||||
out = _mcporter_call(
|
||||
f'linkedin.search_jobs(keywords: "{safe_q}", limit: {limit})',
|
||||
timeout=30,
|
||||
)
|
||||
results = self._parse_search_results(out, "job")
|
||||
if results:
|
||||
return results
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Try people search
|
||||
try:
|
||||
out = _mcporter_call(
|
||||
f'linkedin.search_people(keywords: "{safe_q}", limit: {limit})',
|
||||
timeout=30,
|
||||
)
|
||||
results = self._parse_search_results(out, "people")
|
||||
if results:
|
||||
return results
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return []
|
||||
|
||||
def _parse_search_results(self, text: str, result_type: str) -> List[SearchResult]:
|
||||
"""Parse MCP search output into SearchResults."""
|
||||
import json
|
||||
results = []
|
||||
try:
|
||||
data = json.loads(text)
|
||||
items = data if isinstance(data, list) else data.get("results", data.get("jobs", []))
|
||||
for item in items:
|
||||
if isinstance(item, dict):
|
||||
title = item.get("title") or item.get("name") or item.get("headline", "")
|
||||
url = item.get("url") or item.get("link", "")
|
||||
snippet = item.get("description") or item.get("company", "")
|
||||
results.append(SearchResult(
|
||||
title=title,
|
||||
url=url,
|
||||
snippet=snippet[:200] if snippet else "",
|
||||
))
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
# Try line-by-line parsing
|
||||
pass
|
||||
return results
|
||||
|
||||
def _extract_title(self, text: str) -> str:
|
||||
"""Extract a title from MCP output."""
|
||||
for line in text.split("\n"):
|
||||
line = line.strip()
|
||||
if line and not line.startswith(("{", "[", "#", "http")):
|
||||
return line[:80]
|
||||
return ""
|
||||
|
|
@ -89,6 +89,21 @@ def main():
|
|||
p_sx.add_argument("query", nargs="+", help="Search query")
|
||||
p_sx.add_argument("-n", "--num", type=int, default=10, help="Number of results")
|
||||
|
||||
# ── search-instagram ──
|
||||
p_si = sub.add_parser("search-instagram", help="Search Instagram")
|
||||
p_si.add_argument("query", nargs="+", help="Search query")
|
||||
p_si.add_argument("-n", "--num", type=int, default=10, help="Number of results")
|
||||
|
||||
# ── search-linkedin ──
|
||||
p_sl = sub.add_parser("search-linkedin", help="Search LinkedIn")
|
||||
p_sl.add_argument("query", nargs="+", help="Search query")
|
||||
p_sl.add_argument("-n", "--num", type=int, default=10, help="Number of results")
|
||||
|
||||
# ── search-bosszhipin ──
|
||||
p_sbz = sub.add_parser("search-bosszhipin", help="Search Boss直聘")
|
||||
p_sbz.add_argument("query", nargs="+", help="Search query")
|
||||
p_sbz.add_argument("-n", "--num", type=int, default=10, help="Number of results")
|
||||
|
||||
# ── setup ──
|
||||
sub.add_parser("setup", help="Interactive configuration wizard")
|
||||
|
||||
|
|
@ -369,6 +384,23 @@ def _install_system_deps():
|
|||
else:
|
||||
print(" ⬜ bird CLI requires Node.js (optional — Twitter reading still works via Jina)")
|
||||
|
||||
# ── instaloader (for Instagram) ──
|
||||
if shutil.which("instaloader"):
|
||||
print(" ✅ instaloader already installed")
|
||||
else:
|
||||
print(" 📥 Installing instaloader...")
|
||||
try:
|
||||
subprocess.run(
|
||||
[sys.executable, "-m", "pip", "install", "instaloader"],
|
||||
capture_output=True, text=True, timeout=120,
|
||||
)
|
||||
if shutil.which("instaloader"):
|
||||
print(" ✅ instaloader installed (Instagram reading)")
|
||||
else:
|
||||
print(" ⬜ instaloader install failed (optional — try: pip install instaloader)")
|
||||
except Exception:
|
||||
print(" ⬜ instaloader install failed (optional — try: pip install instaloader)")
|
||||
|
||||
|
||||
def _install_mcporter():
|
||||
"""Install mcporter and configure Exa + XiaoHongShu MCP servers."""
|
||||
|
|
@ -761,6 +793,12 @@ async def _cmd_search(args):
|
|||
results = await eyes.search_bilibili(query, limit=num)
|
||||
elif args.command == "search-xhs":
|
||||
results = await eyes.search_xhs(query, limit=num)
|
||||
elif args.command == "search-instagram":
|
||||
results = await eyes.search_instagram(query, limit=num)
|
||||
elif args.command == "search-linkedin":
|
||||
results = await eyes.search_linkedin(query, limit=num)
|
||||
elif args.command == "search-bosszhipin":
|
||||
results = await eyes.search_bosszhipin(query, limit=num)
|
||||
else:
|
||||
print(f"Unknown command: {args.command}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
|
|
|||
|
|
@ -101,6 +101,24 @@ class AgentReach:
|
|||
results = await ch.search(query, config=self.config, limit=limit)
|
||||
return [r.to_dict() for r in results]
|
||||
|
||||
async def search_instagram(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
|
||||
"""Search Instagram via Exa."""
|
||||
ch = get_channel("instagram")
|
||||
results = await ch.search(query, config=self.config, limit=limit)
|
||||
return [r.to_dict() for r in results]
|
||||
|
||||
async def search_linkedin(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
|
||||
"""Search LinkedIn via MCP or Exa."""
|
||||
ch = get_channel("linkedin")
|
||||
results = await ch.search(query, config=self.config, limit=limit)
|
||||
return [r.to_dict() for r in results]
|
||||
|
||||
async def search_bosszhipin(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
|
||||
"""Search Boss直聘 via MCP or Exa."""
|
||||
ch = get_channel("bosszhipin")
|
||||
results = await ch.search(query, config=self.config, limit=limit)
|
||||
return [r.to_dict() for r in results]
|
||||
|
||||
# ── Health ──────────────────────────────────────────
|
||||
|
||||
def doctor(self) -> Dict[str, dict]:
|
||||
|
|
|
|||
|
|
@ -58,6 +58,9 @@ Copy that to your Agent. A few minutes later, it can read tweets, search Reddit,
|
|||
| 🌐 **Web** | Read | Zero config | Any URL → clean Markdown ([Jina Reader](https://github.com/jina-ai/reader) ⭐9.8K) |
|
||||
| 🐦 **Twitter/X** | Read · Search | Zero config / Cookie | Single tweets readable out of the box. Cookie unlocks search, timeline, posting ([bird](https://github.com/steipete/bird)) |
|
||||
| 📕 **XiaoHongShu** | Read · Search · **Post · Comment · Like** | mcporter | Via [xiaohongshu-mcp](https://github.com/user/xiaohongshu-mcp) internal API, install and go |
|
||||
| 📷 **Instagram** | Read · Search | instaloader | Posts, profiles, hashtags ([instaloader](https://github.com/instaloader/instaloader) ⭐9.8K) |
|
||||
| 💼 **LinkedIn** | Read · Search | mcporter / Jina | Profiles, companies, job search ([linkedin-scraper-mcp](https://github.com/stickerdaniel/linkedin-mcp-server) ⭐900+) |
|
||||
| 🏢 **Boss直聘** | Read · Search | mcporter / Jina | Job search, greet recruiters ([mcp-bosszp](https://github.com/mucsbr/mcp-bosszp)) |
|
||||
| 🔍 **Web Search** | Search | Auto-configured | Auto-configured during install, free, no API key ([Exa](https://exa.ai) via [mcporter](https://github.com/nicepkg/mcporter)) |
|
||||
| 📦 **GitHub** | Read · Search | Zero config | [gh CLI](https://cli.github.com) powered. Public repos work immediately. `gh auth login` unlocks Fork, Issue, PR |
|
||||
| 📺 **YouTube** | Read · **Search** | Zero config | Subtitles + search across 1800+ video sites ([yt-dlp](https://github.com/yt-dlp/yt-dlp) ⭐148K) |
|
||||
|
|
@ -182,6 +185,9 @@ channels/
|
|||
├── bilibili.py → yt-dlp ← swap to bilibili-api…
|
||||
├── reddit.py → JSON API + Exa ← swap to PRAW, Pushshift…
|
||||
├── xiaohongshu.py → mcporter MCP ← swap to other XHS tools…
|
||||
├── instagram.py → instaloader ← swap to instagrapi, official API…
|
||||
├── linkedin.py → linkedin-mcp ← swap to LinkedIn API…
|
||||
├── bosszhipin.py → mcp-bosszp ← swap to other job tools…
|
||||
├── rss.py → feedparser ← swap to atoma…
|
||||
├── exa_search.py → mcporter MCP ← swap to Tavily, SerpAPI…
|
||||
└── __init__.py → Channel registry
|
||||
|
|
@ -198,6 +204,9 @@ channels/
|
|||
| GitHub | [gh CLI](https://cli.github.com) | Official tool, full API after auth |
|
||||
| Read RSS | [feedparser](https://github.com/kurtmckee/feedparser) | Python ecosystem standard, 2.3K stars |
|
||||
| XiaoHongShu | [xiaohongshu-mcp](https://github.com/user/xiaohongshu-mcp) | Internal API, bypasses anti-bot |
|
||||
| Instagram | [instaloader](https://github.com/instaloader/instaloader) | 9.8K stars, Python CLI, cookie auth, free |
|
||||
| LinkedIn | [linkedin-scraper-mcp](https://github.com/stickerdaniel/linkedin-mcp-server) | 900+ stars, MCP server, browser automation |
|
||||
| Boss直聘 | [mcp-bosszp](https://github.com/mucsbr/mcp-bosszp) | MCP server, job search + recruiter greeting |
|
||||
|
||||
> 📌 These are the *current* choices. Don't like one? Swap out the file. That's the whole point of scaffolding.
|
||||
|
||||
|
|
|
|||
|
|
@ -69,7 +69,44 @@ mcporter config add xiaohongshu http://localhost:18060/mcp
|
|||
> 如果在服务器上,建议加代理避免 IP 风控:
|
||||
> `docker run -d --name xiaohongshu-mcp -p 18060:18060 -e XHS_PROXY=http://user:pass@ip:port xpzouying/xiaohongshu-mcp`
|
||||
>
|
||||
> 首次使用需要扫码登录,打开 http://localhost:18060 操作。
|
||||
> **登录方式:**
|
||||
> - **本地电脑(有浏览器):** 打开 http://localhost:18060 扫码登录即可。
|
||||
> - **服务器(无 UI 界面):** 服务器上通常没有浏览器,无法直接扫码。最方便的方式是在自己的电脑上用浏览器登录小红书,然后用 [Cookie-Editor](https://chromewebstore.google.com/detail/cookie-editor/hlkenndednhfkekhgcdicdfddnkalmdm) 插件导出 Cookie(Header String 格式),发给 Agent 即可完成配置。详见 [Cookie 导出指南](cookie-export.md)。
|
||||
|
||||
**Instagram (需要 instaloader):**
|
||||
> "Instagram 需要 instaloader。我来帮你安装。"
|
||||
|
||||
```bash
|
||||
pip install instaloader
|
||||
```
|
||||
|
||||
> 登录后可访问更多内容(私密帖子等):`instaloader --login YOUR_USERNAME`
|
||||
> 服务器用户同样可以在本地电脑登录后,导出 Cookie 发给 Agent。
|
||||
|
||||
**LinkedIn (可选 — linkedin-scraper-mcp):**
|
||||
> "LinkedIn 基本内容可通过 Jina Reader 读取。完整功能(Profile 详情、职位搜索)需要 linkedin-scraper-mcp。"
|
||||
|
||||
```bash
|
||||
pip install linkedin-scraper-mcp
|
||||
uvx linkedin-scraper-mcp --login # 首次需在有浏览器的机器上登录
|
||||
uvx linkedin-scraper-mcp --transport streamable-http --port 8001 # 启动 MCP 服务
|
||||
mcporter config add linkedin http://localhost:8001/mcp
|
||||
```
|
||||
|
||||
> 详见 https://github.com/stickerdaniel/linkedin-mcp-server
|
||||
|
||||
**Boss直聘 (可选 — mcp-bosszp):**
|
||||
> "Boss直聘职位页面可直接读取。完整搜索和打招呼功能需要 mcp-bosszp。"
|
||||
|
||||
```bash
|
||||
git clone https://github.com/mucsbr/mcp-bosszp.git && cd mcp-bosszp
|
||||
pip install -r requirements.txt && playwright install chromium
|
||||
python boss_zhipin_fastmcp_v2.py # 启动 MCP 服务
|
||||
mcporter config add bosszhipin http://localhost:8000/mcp
|
||||
```
|
||||
|
||||
> 或用 Docker:`docker-compose up -d`
|
||||
> 详见 https://github.com/mucsbr/mcp-bosszp
|
||||
|
||||
### Step 4: Final check
|
||||
|
||||
|
|
@ -112,3 +149,6 @@ If the user wants a different agent to handle it, let them choose.
|
|||
| `agent-reach search-youtube "query"` | Search YouTube |
|
||||
| `agent-reach search-bilibili "query"` | Search Bilibili |
|
||||
| `agent-reach search-xhs "query"` | Search XiaoHongShu |
|
||||
| `agent-reach search-instagram "query"` | Search Instagram |
|
||||
| `agent-reach search-linkedin "query"` | Search LinkedIn |
|
||||
| `agent-reach search-bosszhipin "query"` | Search Boss直聘 |
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue