feat: Instagram 支持 Cookie-Editor 导入 + 超时 fallback 优化
- agent-reach configure instagram-cookies 支持 Cookie-Editor Header String - Instagram cookie 保存到 ~/.agent-reach/instagram-cookies.txt - instaloader 429 限速时 15 秒自动超时 fallback 到 Jina Reader - LinkedIn/Boss直聘 保持使用各自 MCP 的原生登录流程(不侵入) - doctor 提示信息更新
This commit is contained in:
parent
aafd6839e9
commit
e8fe33c80c
4 changed files with 76 additions and 12 deletions
|
|
@ -77,7 +77,7 @@ class BossZhipinChannel(Channel):
|
|||
"可通过 Jina Reader 读取职位页面。完整功能需要:\n"
|
||||
" 1. git clone https://github.com/mucsbr/mcp-bosszp.git\n"
|
||||
" 2. cd mcp-bosszp && pip install -r requirements.txt && playwright install chromium\n"
|
||||
" 3. python boss_zhipin_fastmcp_v2.py(启动 MCP 服务)\n"
|
||||
" 3. python boss_zhipin_fastmcp_v2.py(启动后扫码登录)\n"
|
||||
" 4. mcporter config add bosszhipin http://localhost:8000/mcp\n"
|
||||
" 或用 Docker:docker-compose up -d\n"
|
||||
" 详见 https://github.com/mucsbr/mcp-bosszp"
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ Swap to: any Instagram access tool
|
|||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
from .base import Channel, ReadResult, SearchResult
|
||||
from typing import List
|
||||
|
|
@ -37,9 +38,14 @@ class InstagramChannel(Channel):
|
|||
return "off", (
|
||||
"需要安装 instaloader:pip install instaloader\n"
|
||||
" 安装后可读取 Instagram 帖子和 Profile\n"
|
||||
" 登录解锁更多功能:instaloader --login YOUR_USERNAME"
|
||||
" 登录: agent-reach configure instagram-cookies \"sessionid=xxx; csrftoken=yyy; ...\""
|
||||
)
|
||||
return "ok", "可读取公开帖子和 Profile。登录后可访问更多内容"
|
||||
|
||||
# Check if cookies are configured
|
||||
cookie_file = Path.home() / ".agent-reach" / "instagram-cookies.txt"
|
||||
if cookie_file.exists():
|
||||
return "ok", "已登录,可读取 Instagram 帖子和 Profile"
|
||||
return "ok", "可读取公开帖子和 Profile。登录可访问更多内容:\n agent-reach configure instagram-cookies \"sessionid=xxx; csrftoken=yyy; ...\""
|
||||
|
||||
async def read(self, url: str, config=None) -> ReadResult:
|
||||
# Try instaloader (module or CLI)
|
||||
|
|
@ -69,8 +75,24 @@ class InstagramChannel(Channel):
|
|||
max_connection_attempts=1, # Don't retry on rate limit
|
||||
)
|
||||
|
||||
# Try to load session if available
|
||||
if config and config.get("instagram_username"):
|
||||
# Try to load session: cookie file > saved session
|
||||
cookie_file = Path.home() / ".agent-reach" / "instagram-cookies.txt"
|
||||
if cookie_file.exists():
|
||||
try:
|
||||
cookie_str = cookie_file.read_text().strip()
|
||||
cookies = {}
|
||||
for part in cookie_str.split(";"):
|
||||
part = part.strip()
|
||||
if "=" in part:
|
||||
k, v = part.split("=", 1)
|
||||
cookies[k.strip()] = v.strip()
|
||||
if "sessionid" in cookies and "csrftoken" in cookies:
|
||||
# Extract username from ds_user_id or use generic
|
||||
username = cookies.get("ds_user_id", "user")
|
||||
L.context.load_session(username, cookies)
|
||||
except Exception:
|
||||
pass
|
||||
elif config and config.get("instagram_username"):
|
||||
try:
|
||||
L.load_session_from_file(config.get("instagram_username"))
|
||||
except Exception:
|
||||
|
|
|
|||
|
|
@ -55,16 +55,17 @@ class LinkedInChannel(Channel):
|
|||
if shutil.which("linkedin-scraper-mcp"):
|
||||
return "warn", (
|
||||
"linkedin-scraper-mcp 已安装但未接入 mcporter。运行:\n"
|
||||
" 1. uvx linkedin-scraper-mcp --transport streamable-http --port 8001\n"
|
||||
" 2. mcporter config add linkedin http://localhost:8001/mcp\n"
|
||||
" 或先登录:uvx linkedin-scraper-mcp --login"
|
||||
" 1. linkedin-scraper-mcp --login(在有浏览器的机器上登录)\n"
|
||||
" 2. linkedin-scraper-mcp --transport streamable-http --port 8001\n"
|
||||
" 3. mcporter config add linkedin http://localhost:8001/mcp"
|
||||
)
|
||||
|
||||
return "off", (
|
||||
"可通过 Jina Reader 读取部分内容。完整功能需要:\n"
|
||||
" 1. pip install linkedin-scraper-mcp 或 uvx linkedin-scraper-mcp --login\n"
|
||||
" 2. uvx linkedin-scraper-mcp --transport streamable-http --port 8001\n"
|
||||
" 3. mcporter config add linkedin http://localhost:8001/mcp\n"
|
||||
" 1. pip install linkedin-scraper-mcp\n"
|
||||
" 2. linkedin-scraper-mcp --login(在有浏览器的机器上登录)\n"
|
||||
" 3. linkedin-scraper-mcp --transport streamable-http --port 8001\n"
|
||||
" 4. mcporter config add linkedin http://localhost:8001/mcp\n"
|
||||
" 详见 https://github.com/stickerdaniel/linkedin-mcp-server"
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -118,7 +118,8 @@ def main():
|
|||
p_conf = sub.add_parser("configure", help="Set a config value or auto-extract from browser")
|
||||
p_conf.add_argument("key", nargs="?", default=None,
|
||||
choices=["proxy", "github-token", "groq-key",
|
||||
"twitter-cookies", "youtube-cookies"],
|
||||
"twitter-cookies", "youtube-cookies",
|
||||
"instagram-cookies"],
|
||||
help="What to configure (omit if using --from-browser)")
|
||||
p_conf.add_argument("value", nargs="*", help="The value(s) to set")
|
||||
p_conf.add_argument("--from-browser", metavar="BROWSER",
|
||||
|
|
@ -645,6 +646,9 @@ def _cmd_configure(args):
|
|||
config.set("groq_api_key", value)
|
||||
print(f"✅ Groq key configured!")
|
||||
|
||||
elif args.key == "instagram-cookies":
|
||||
_configure_instagram_cookies(value)
|
||||
|
||||
|
||||
def _cmd_doctor():
|
||||
from agent_reach.config import Config
|
||||
|
|
@ -654,6 +658,43 @@ def _cmd_doctor():
|
|||
print(format_report(results))
|
||||
|
||||
|
||||
def _parse_cookie_header(cookie_str: str) -> dict:
|
||||
"""Parse Cookie-Editor 'Header String' format into a dict."""
|
||||
cookies = {}
|
||||
for part in cookie_str.split(";"):
|
||||
part = part.strip()
|
||||
if "=" in part:
|
||||
k, v = part.split("=", 1)
|
||||
cookies[k.strip()] = v.strip()
|
||||
return cookies
|
||||
|
||||
|
||||
def _configure_instagram_cookies(value: str):
|
||||
"""Save Instagram cookies from Cookie-Editor Header String."""
|
||||
from pathlib import Path
|
||||
|
||||
cookies = _parse_cookie_header(value)
|
||||
if "sessionid" not in cookies:
|
||||
print("❌ Cookie 里缺少 sessionid。")
|
||||
print(" 确保你已登录 Instagram,然后用 Cookie-Editor 导出 Header String。")
|
||||
print(' 格式: agent-reach configure instagram-cookies "sessionid=xxx; csrftoken=yyy; ..."')
|
||||
return
|
||||
|
||||
cookie_dir = Path.home() / ".agent-reach"
|
||||
cookie_dir.mkdir(parents=True, exist_ok=True)
|
||||
cookie_file = cookie_dir / "instagram-cookies.txt"
|
||||
cookie_file.write_text(value.strip())
|
||||
cookie_file.chmod(0o600)
|
||||
|
||||
print(f"✅ Instagram cookies 已保存!")
|
||||
print(f" sessionid: {cookies['sessionid'][:8]}...")
|
||||
if "csrftoken" in cookies:
|
||||
print(f" csrftoken: ✅")
|
||||
if "ds_user_id" in cookies:
|
||||
print(f" ds_user_id: {cookies['ds_user_id']}")
|
||||
print(f" 文件: {cookie_file}")
|
||||
|
||||
|
||||
def _cmd_setup():
|
||||
from agent_reach.config import Config
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue