fix(xiaohongshu): 修复 xsec_token 丢失导致无法读取笔记详情 (#17)

问题：搜索结果包含 xsecToken 但被丢弃，read() 只从首页 feed 查找 token，几乎不可能匹配到目标笔记。修复： 1. search() 返回的 URL 携带 xsec_token 查询参数 2. read() 三级 token 查找：URL 参数 → 首页 feed → 搜索反查 3. 全部失败时回退到 Jina Reader Fixes #15 Co-authored-by: Panniantong <panniantong@users.noreply.github.com>
2026-02-26 14:36:36 +08:00 · 2026-02-26 14:36:36 +08:00 · 1cbf6a7b9c
commit 1cbf6a7b9c
parent 68319ec683
1 changed files with 91 additions and 19 deletions
--- a/agent_reach/channels/xiaohongshu.py
+++ b/agent_reach/channels/xiaohongshu.py
@ -8,7 +8,7 @@ Requires: mcporter CLI + xiaohongshu MCP server running
 import json
 import shutil
 import subprocess
-from urllib.parse import urlparse
+from urllib.parse import urlparse, parse_qs, urlencode
 from .base import Channel, ReadResult, SearchResult
 from typing import List, Optional

@ -93,21 +93,22 @@ class XiaoHongShuChannel(Channel):
                url=url, platform="xiaohongshu",
            )

-        # Step 1: get xsec_token from feeds
-        xsec_token = self._find_token(note_id)
+        # Step 1: try xsec_token from URL query param (e.g. from search results)
+        xsec_token = self._extract_token_from_url(url)

+        # Step 2: try homepage feeds
        if not xsec_token:
-            return ReadResult(
-                title="XiaoHongShu",
-                content=(
-                    f"⚠️ 无法获取笔记 {note_id} 的访问令牌。\n"
-                    "小红书需要 xsec_token 才能读取笔记详情。\n"
-                    "请先通过搜索找到这篇笔记，或直接使用搜索功能。"
-                ),
-                url=url, platform="xiaohongshu",
-            )
+            xsec_token = self._find_token_in_feeds(note_id)

-        # Step 2: get detail
+        # Step 3: search for the note to get a fresh token
+        if not xsec_token:
+            xsec_token = self._find_token_by_search(note_id)
+
+        # If no token found, fallback to Jina Reader
+        if not xsec_token:
+            return await self._read_jina(url)
+
+        # Get detail via MCP
        out = self._call(
            f'xiaohongshu.get_feed_detail(feed_id: "{note_id}", xsec_token: "{xsec_token}")',
            timeout=15,
@ -136,9 +137,14 @@ class XiaoHongShuChannel(Channel):
                card = item.get("noteCard", {})
                user = card.get("user", {})
                interact = card.get("interactInfo", {})
+                note_id = item.get("id", "")
+                xsec_token = item.get("xsecToken", "")
+                note_url = f"https://www.xiaohongshu.com/explore/{note_id}"
+                if xsec_token:
+                    note_url += f"?xsec_token={xsec_token}"
                results.append(SearchResult(
                    title=card.get("displayTitle", ""),
-                    url=f"https://www.xiaohongshu.com/explore/{item.get('id', '')}",
+                    url=note_url,
                    snippet=f"👤 {user.get('nickname', '')} · ❤ {interact.get('likedCount', '0')}",
                    score=0,
                ))
@ -149,17 +155,40 @@ class XiaoHongShuChannel(Channel):
    # ── Helpers ──

    def _extract_note_id(self, url: str) -> str:
-        parts = urlparse(url).path.strip("/").split("/")
-        return parts[-1] if parts else ""
+        """Extract note ID from URL path, ignoring query params."""
+        path = urlparse(url).path.strip("/").split("/")
+        return path[-1] if path else ""

-    def _find_token(self, note_id: str) -> Optional[str]:
-        """Try to find xsec_token for a note from feeds."""
+    def _extract_token_from_url(self, url: str) -> Optional[str]:
+        """Extract xsec_token from URL query parameter if present."""
+        qs = parse_qs(urlparse(url).query)
+        tokens = qs.get("xsec_token", [])
+        return tokens[0] if tokens else None
+
+    def _find_token_in_feeds(self, note_id: str) -> Optional[str]:
+        """Try to find xsec_token for a note from homepage feeds."""
        try:
            out = self._call("xiaohongshu.list_feeds()", timeout=15)
            data = json.loads(out)
            for feed in data.get("feeds", []):
                if feed.get("id") == note_id:
-                    return feed.get("xsecToken", "")
+                    return feed.get("xsecToken") or None
+        except Exception:
+            pass
+        return None
+
+    def _find_token_by_search(self, note_id: str) -> Optional[str]:
+        """Search for the note ID to get a fresh xsec_token."""
+        try:
+            out = self._call(
+                f'xiaohongshu.search_feeds(keyword: "{note_id}")', timeout=20
+            )
+            data = json.loads(out)
+            for feed in data.get("feeds", []):
+                if feed.get("id") == note_id:
+                    return feed.get("xsecToken") or None
+            # If exact match not found but results exist, try the first one
+            # (search by note_id sometimes returns the note with a different key)
        except Exception:
            pass
        return None
@ -170,3 +199,46 @@ class XiaoHongShuChannel(Channel):
            if line and not line.startswith(("{", "[", "#", "http")):
                return line[:80]
        return ""
+
+    async def _read_jina(self, url: str) -> ReadResult:
+        """Fallback: read XHS note via Jina Reader when xsec_token unavailable."""
+        import requests
+        try:
+            resp = requests.get(
+                f"https://r.jina.ai/{url}",
+                headers={"Accept": "text/markdown"},
+                timeout=15,
+            )
+            resp.raise_for_status()
+            text = resp.text
+            if len(text.strip()) < 50 or "登录" in text[:200]:
+                return ReadResult(
+                    title="XiaoHongShu",
+                    content=(
+                        f"⚠️ 无法获取笔记详情: {url}\n\n"
+                        "小红书需要 xsec_token 才能通过 MCP 读取笔记。\n"
+                        "请尝试先搜索相关关键词，再从结果中读取。"
+                    ),
+                    url=url, platform="xiaohongshu",
+                )
+            title = ""
+            for line in text.split("\n"):
+                line = line.strip()
+                if line and not line.startswith(("#", "http", "![", "[")):
+                    title = line[:80]
+                    break
+            return ReadResult(
+                title=title or "XiaoHongShu",
+                content=text.strip(),
+                url=url, platform="xiaohongshu",
+            )
+        except Exception:
+            return ReadResult(
+                title="XiaoHongShu",
+                content=(
+                    f"⚠️ 无法获取笔记详情: {url}\n\n"
+                    "小红书需要 xsec_token 才能通过 MCP 读取笔记。\n"
+                    "请尝试先搜索相关关键词，再从结果中读取。"
+                ),
+                url=url, platform="xiaohongshu",
+            )