feat: Instagram 支持 Cookie-Editor 导入 + 超时 fallback 优化

- agent-reach configure instagram-cookies 支持 Cookie-Editor Header String - Instagram cookie 保存到 ~/.agent-reach/instagram-cookies.txt - instaloader 429 限速时 15 秒自动超时 fallback 到 Jina Reader - LinkedIn/Boss直聘保持使用各自 MCP 的原生登录流程（不侵入） - doctor 提示信息更新
2026-02-25 11:13:52 +01:00 · 2026-02-25 11:13:52 +01:00 · e8fe33c80c
commit e8fe33c80c
parent aafd6839e9
4 changed files with 76 additions and 12 deletions
--- a/agent_reach/channels/bosszhipin.py
+++ b/agent_reach/channels/bosszhipin.py
@ -77,7 +77,7 @@ class BossZhipinChannel(Channel):
            "可通过 Jina Reader 读取职位页面。完整功能需要：\n"
            "  1. git clone https://github.com/mucsbr/mcp-bosszp.git\n"
            "  2. cd mcp-bosszp && pip install -r requirements.txt && playwright install chromium\n"
-            "  3. python boss_zhipin_fastmcp_v2.py（启动 MCP 服务）\n"
+            "  3. python boss_zhipin_fastmcp_v2.py（启动后扫码登录）\n"
            "  4. mcporter config add bosszhipin http://localhost:8000/mcp\n"
            "  或用 Docker：docker-compose up -d\n"
            "  详见 https://github.com/mucsbr/mcp-bosszp"
--- a/agent_reach/channels/instagram.py
+++ b/agent_reach/channels/instagram.py
@ -8,6 +8,7 @@ Swap to: any Instagram access tool
 import re
 import shutil
 import subprocess
+from pathlib import Path
 from urllib.parse import urlparse
 from .base import Channel, ReadResult, SearchResult
 from typing import List
@ -37,9 +38,14 @@ class InstagramChannel(Channel):
            return "off", (
                "需要安装 instaloader：pip install instaloader\n"
                "  安装后可读取 Instagram 帖子和 Profile\n"
-                "  登录解锁更多功能：instaloader --login YOUR_USERNAME"
+                "  登录: agent-reach configure instagram-cookies \"sessionid=xxx; csrftoken=yyy; ...\""
            )
-        return "ok", "可读取公开帖子和 Profile。登录后可访问更多内容"
+
+        # Check if cookies are configured
+        cookie_file = Path.home() / ".agent-reach" / "instagram-cookies.txt"
+        if cookie_file.exists():
+            return "ok", "已登录，可读取 Instagram 帖子和 Profile"
+        return "ok", "可读取公开帖子和 Profile。登录可访问更多内容:\n  agent-reach configure instagram-cookies \"sessionid=xxx; csrftoken=yyy; ...\""

    async def read(self, url: str, config=None) -> ReadResult:
        # Try instaloader (module or CLI)
@ -69,8 +75,24 @@ class InstagramChannel(Channel):
                max_connection_attempts=1,  # Don't retry on rate limit
            )

-            # Try to load session if available
-            if config and config.get("instagram_username"):
+            # Try to load session: cookie file > saved session
+            cookie_file = Path.home() / ".agent-reach" / "instagram-cookies.txt"
+            if cookie_file.exists():
+                try:
+                    cookie_str = cookie_file.read_text().strip()
+                    cookies = {}
+                    for part in cookie_str.split(";"):
+                        part = part.strip()
+                        if "=" in part:
+                            k, v = part.split("=", 1)
+                            cookies[k.strip()] = v.strip()
+                    if "sessionid" in cookies and "csrftoken" in cookies:
+                        # Extract username from ds_user_id or use generic
+                        username = cookies.get("ds_user_id", "user")
+                        L.context.load_session(username, cookies)
+                except Exception:
+                    pass
+            elif config and config.get("instagram_username"):
                try:
                    L.load_session_from_file(config.get("instagram_username"))
                except Exception:
--- a/agent_reach/channels/linkedin.py
+++ b/agent_reach/channels/linkedin.py
@ -55,16 +55,17 @@ class LinkedInChannel(Channel):
        if shutil.which("linkedin-scraper-mcp"):
            return "warn", (
                "linkedin-scraper-mcp 已安装但未接入 mcporter。运行：\n"
-                "  1. uvx linkedin-scraper-mcp --transport streamable-http --port 8001\n"
-                "  2. mcporter config add linkedin http://localhost:8001/mcp\n"
-                "  或先登录：uvx linkedin-scraper-mcp --login"
+                "  1. linkedin-scraper-mcp --login（在有浏览器的机器上登录）\n"
+                "  2. linkedin-scraper-mcp --transport streamable-http --port 8001\n"
+                "  3. mcporter config add linkedin http://localhost:8001/mcp"
            )

        return "off", (
            "可通过 Jina Reader 读取部分内容。完整功能需要：\n"
-            "  1. pip install linkedin-scraper-mcp 或 uvx linkedin-scraper-mcp --login\n"
-            "  2. uvx linkedin-scraper-mcp --transport streamable-http --port 8001\n"
-            "  3. mcporter config add linkedin http://localhost:8001/mcp\n"
+            "  1. pip install linkedin-scraper-mcp\n"
+            "  2. linkedin-scraper-mcp --login（在有浏览器的机器上登录）\n"
+            "  3. linkedin-scraper-mcp --transport streamable-http --port 8001\n"
+            "  4. mcporter config add linkedin http://localhost:8001/mcp\n"
            "  详见 https://github.com/stickerdaniel/linkedin-mcp-server"
        )

--- a/agent_reach/cli.py
+++ b/agent_reach/cli.py
@ -118,7 +118,8 @@ def main():
    p_conf = sub.add_parser("configure", help="Set a config value or auto-extract from browser")
    p_conf.add_argument("key", nargs="?", default=None,
                        choices=["proxy", "github-token", "groq-key",
-                                 "twitter-cookies", "youtube-cookies"],
+                                 "twitter-cookies", "youtube-cookies",
+                                 "instagram-cookies"],
                        help="What to configure (omit if using --from-browser)")
    p_conf.add_argument("value", nargs="*", help="The value(s) to set")
    p_conf.add_argument("--from-browser", metavar="BROWSER",
@ -645,6 +646,9 @@ def _cmd_configure(args):
        config.set("groq_api_key", value)
        print(f"✅ Groq key configured!")

+    elif args.key == "instagram-cookies":
+        _configure_instagram_cookies(value)
+

 def _cmd_doctor():
    from agent_reach.config import Config
@ -654,6 +658,43 @@ def _cmd_doctor():
    print(format_report(results))


+def _parse_cookie_header(cookie_str: str) -> dict:
+    """Parse Cookie-Editor 'Header String' format into a dict."""
+    cookies = {}
+    for part in cookie_str.split(";"):
+        part = part.strip()
+        if "=" in part:
+            k, v = part.split("=", 1)
+            cookies[k.strip()] = v.strip()
+    return cookies
+
+
+def _configure_instagram_cookies(value: str):
+    """Save Instagram cookies from Cookie-Editor Header String."""
+    from pathlib import Path
+
+    cookies = _parse_cookie_header(value)
+    if "sessionid" not in cookies:
+        print("❌ Cookie 里缺少 sessionid。")
+        print("   确保你已登录 Instagram，然后用 Cookie-Editor 导出 Header String。")
+        print('   格式: agent-reach configure instagram-cookies "sessionid=xxx; csrftoken=yyy; ..."')
+        return
+
+    cookie_dir = Path.home() / ".agent-reach"
+    cookie_dir.mkdir(parents=True, exist_ok=True)
+    cookie_file = cookie_dir / "instagram-cookies.txt"
+    cookie_file.write_text(value.strip())
+    cookie_file.chmod(0o600)
+
+    print(f"✅ Instagram cookies 已保存！")
+    print(f"   sessionid: {cookies['sessionid'][:8]}...")
+    if "csrftoken" in cookies:
+        print(f"   csrftoken: ✅")
+    if "ds_user_id" in cookies:
+        print(f"   ds_user_id: {cookies['ds_user_id']}")
+    print(f"   文件: {cookie_file}")
+
+
 def _cmd_setup():
    from agent_reach.config import Config