feat: Instagram 支持 Cookie-Editor 导入 + 超时 fallback 优化

- agent-reach configure instagram-cookies 支持 Cookie-Editor Header String
- Instagram cookie 保存到 ~/.agent-reach/instagram-cookies.txt
- instaloader 429 限速时 15 秒自动超时 fallback 到 Jina Reader
- LinkedIn/Boss直聘 保持使用各自 MCP 的原生登录流程(不侵入)
- doctor 提示信息更新
This commit is contained in:
Panniantong 2026-02-25 11:13:52 +01:00
parent aafd6839e9
commit e8fe33c80c
4 changed files with 76 additions and 12 deletions

View file

@ -77,7 +77,7 @@ class BossZhipinChannel(Channel):
"可通过 Jina Reader 读取职位页面。完整功能需要:\n"
" 1. git clone https://github.com/mucsbr/mcp-bosszp.git\n"
" 2. cd mcp-bosszp && pip install -r requirements.txt && playwright install chromium\n"
" 3. python boss_zhipin_fastmcp_v2.py启动 MCP 服务\n"
" 3. python boss_zhipin_fastmcp_v2.py启动后扫码登录\n"
" 4. mcporter config add bosszhipin http://localhost:8000/mcp\n"
" 或用 Dockerdocker-compose up -d\n"
" 详见 https://github.com/mucsbr/mcp-bosszp"

View file

@ -8,6 +8,7 @@ Swap to: any Instagram access tool
import re
import shutil
import subprocess
from pathlib import Path
from urllib.parse import urlparse
from .base import Channel, ReadResult, SearchResult
from typing import List
@ -37,9 +38,14 @@ class InstagramChannel(Channel):
return "off", (
"需要安装 instaloaderpip install instaloader\n"
" 安装后可读取 Instagram 帖子和 Profile\n"
" 登录解锁更多功能instaloader --login YOUR_USERNAME"
" 登录: agent-reach configure instagram-cookies \"sessionid=xxx; csrftoken=yyy; ...\""
)
return "ok", "可读取公开帖子和 Profile。登录后可访问更多内容"
# Check if cookies are configured
cookie_file = Path.home() / ".agent-reach" / "instagram-cookies.txt"
if cookie_file.exists():
return "ok", "已登录,可读取 Instagram 帖子和 Profile"
return "ok", "可读取公开帖子和 Profile。登录可访问更多内容:\n agent-reach configure instagram-cookies \"sessionid=xxx; csrftoken=yyy; ...\""
async def read(self, url: str, config=None) -> ReadResult:
# Try instaloader (module or CLI)
@ -69,8 +75,24 @@ class InstagramChannel(Channel):
max_connection_attempts=1, # Don't retry on rate limit
)
# Try to load session if available
if config and config.get("instagram_username"):
# Try to load session: cookie file > saved session
cookie_file = Path.home() / ".agent-reach" / "instagram-cookies.txt"
if cookie_file.exists():
try:
cookie_str = cookie_file.read_text().strip()
cookies = {}
for part in cookie_str.split(";"):
part = part.strip()
if "=" in part:
k, v = part.split("=", 1)
cookies[k.strip()] = v.strip()
if "sessionid" in cookies and "csrftoken" in cookies:
# Extract username from ds_user_id or use generic
username = cookies.get("ds_user_id", "user")
L.context.load_session(username, cookies)
except Exception:
pass
elif config and config.get("instagram_username"):
try:
L.load_session_from_file(config.get("instagram_username"))
except Exception:

View file

@ -55,16 +55,17 @@ class LinkedInChannel(Channel):
if shutil.which("linkedin-scraper-mcp"):
return "warn", (
"linkedin-scraper-mcp 已安装但未接入 mcporter。运行\n"
" 1. uvx linkedin-scraper-mcp --transport streamable-http --port 8001\n"
" 2. mcporter config add linkedin http://localhost:8001/mcp\n"
" 或先登录uvx linkedin-scraper-mcp --login"
" 1. linkedin-scraper-mcp --login在有浏览器的机器上登录\n"
" 2. linkedin-scraper-mcp --transport streamable-http --port 8001\n"
" 3. mcporter config add linkedin http://localhost:8001/mcp"
)
return "off", (
"可通过 Jina Reader 读取部分内容。完整功能需要:\n"
" 1. pip install linkedin-scraper-mcp 或 uvx linkedin-scraper-mcp --login\n"
" 2. uvx linkedin-scraper-mcp --transport streamable-http --port 8001\n"
" 3. mcporter config add linkedin http://localhost:8001/mcp\n"
" 1. pip install linkedin-scraper-mcp\n"
" 2. linkedin-scraper-mcp --login在有浏览器的机器上登录\n"
" 3. linkedin-scraper-mcp --transport streamable-http --port 8001\n"
" 4. mcporter config add linkedin http://localhost:8001/mcp\n"
" 详见 https://github.com/stickerdaniel/linkedin-mcp-server"
)

View file

@ -118,7 +118,8 @@ def main():
p_conf = sub.add_parser("configure", help="Set a config value or auto-extract from browser")
p_conf.add_argument("key", nargs="?", default=None,
choices=["proxy", "github-token", "groq-key",
"twitter-cookies", "youtube-cookies"],
"twitter-cookies", "youtube-cookies",
"instagram-cookies"],
help="What to configure (omit if using --from-browser)")
p_conf.add_argument("value", nargs="*", help="The value(s) to set")
p_conf.add_argument("--from-browser", metavar="BROWSER",
@ -645,6 +646,9 @@ def _cmd_configure(args):
config.set("groq_api_key", value)
print(f"✅ Groq key configured!")
elif args.key == "instagram-cookies":
_configure_instagram_cookies(value)
def _cmd_doctor():
from agent_reach.config import Config
@ -654,6 +658,43 @@ def _cmd_doctor():
print(format_report(results))
def _parse_cookie_header(cookie_str: str) -> dict:
"""Parse Cookie-Editor 'Header String' format into a dict."""
cookies = {}
for part in cookie_str.split(";"):
part = part.strip()
if "=" in part:
k, v = part.split("=", 1)
cookies[k.strip()] = v.strip()
return cookies
def _configure_instagram_cookies(value: str):
"""Save Instagram cookies from Cookie-Editor Header String."""
from pathlib import Path
cookies = _parse_cookie_header(value)
if "sessionid" not in cookies:
print("❌ Cookie 里缺少 sessionid。")
print(" 确保你已登录 Instagram然后用 Cookie-Editor 导出 Header String。")
print(' 格式: agent-reach configure instagram-cookies "sessionid=xxx; csrftoken=yyy; ..."')
return
cookie_dir = Path.home() / ".agent-reach"
cookie_dir.mkdir(parents=True, exist_ok=True)
cookie_file = cookie_dir / "instagram-cookies.txt"
cookie_file.write_text(value.strip())
cookie_file.chmod(0o600)
print(f"✅ Instagram cookies 已保存!")
print(f" sessionid: {cookies['sessionid'][:8]}...")
if "csrftoken" in cookies:
print(f" csrftoken: ✅")
if "ds_user_id" in cookies:
print(f" ds_user_id: {cookies['ds_user_id']}")
print(f" 文件: {cookie_file}")
def _cmd_setup():
from agent_reach.config import Config