From 1cbf6a7b9cb3c3cbd45493365d0b98f1cefdac41 Mon Sep 17 00:00:00 2001
From: Pnant <73925474+Panniantong@users.noreply.github.com>
Date: Thu, 26 Feb 2026 14:36:36 +0800
Subject: [PATCH] =?UTF-8?q?fix(xiaohongshu):=20=E4=BF=AE=E5=A4=8D=20xsec?=
 =?UTF-8?q?=5Ftoken=20=E4=B8=A2=E5=A4=B1=E5=AF=BC=E8=87=B4=E6=97=A0?=
 =?UTF-8?q?=E6=B3=95=E8=AF=BB=E5=8F=96=E7=AC=94=E8=AE=B0=E8=AF=A6=E6=83=85?=
 =?UTF-8?q?=20(#17)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

问题：搜索结果包含 xsecToken 但被丢弃，read() 只从首页 feed
查找 token，几乎不可能匹配到目标笔记。

修复：
1. search() 返回的 URL 携带 xsec_token 查询参数
2. read() 三级 token 查找：URL 参数 → 首页 feed → 搜索反查
3. 全部失败时回退到 Jina Reader

Fixes #15

Co-authored-by: Panniantong <panniantong@users.noreply.github.com>
---
 agent_reach/channels/xiaohongshu.py | 110 +++++++++++++++++++++++-----
 1 file changed, 91 insertions(+), 19 deletions(-)

diff --git a/agent_reach/channels/xiaohongshu.py b/agent_reach/channels/xiaohongshu.py
index 1914bc6..84ddfa1 100644
--- a/agent_reach/channels/xiaohongshu.py
+++ b/agent_reach/channels/xiaohongshu.py
@@ -8,7 +8,7 @@ Requires: mcporter CLI + xiaohongshu MCP server running
 import json
 import shutil
 import subprocess
-from urllib.parse import urlparse
+from urllib.parse import urlparse, parse_qs, urlencode
 from .base import Channel, ReadResult, SearchResult
 from typing import List, Optional
 
@@ -93,21 +93,22 @@ class XiaoHongShuChannel(Channel):
                 url=url, platform="xiaohongshu",
             )
 
-        # Step 1: get xsec_token from feeds
-        xsec_token = self._find_token(note_id)
+        # Step 1: try xsec_token from URL query param (e.g. from search results)
+        xsec_token = self._extract_token_from_url(url)
 
+        # Step 2: try homepage feeds
         if not xsec_token:
-            return ReadResult(
-                title="XiaoHongShu",
-                content=(
-                    f"⚠️ 无法获取笔记 {note_id} 的访问令牌。\n"
-                    "小红书需要 xsec_token 才能读取笔记详情。\n"
-                    "请先通过搜索找到这篇笔记，或直接使用搜索功能。"
-                ),
-                url=url, platform="xiaohongshu",
-            )
+            xsec_token = self._find_token_in_feeds(note_id)
 
-        # Step 2: get detail
+        # Step 3: search for the note to get a fresh token
+        if not xsec_token:
+            xsec_token = self._find_token_by_search(note_id)
+
+        # If no token found, fallback to Jina Reader
+        if not xsec_token:
+            return await self._read_jina(url)
+
+        # Get detail via MCP
         out = self._call(
             f'xiaohongshu.get_feed_detail(feed_id: "{note_id}", xsec_token: "{xsec_token}")',
             timeout=15,
@@ -136,9 +137,14 @@ class XiaoHongShuChannel(Channel):
                 card = item.get("noteCard", {})
                 user = card.get("user", {})
                 interact = card.get("interactInfo", {})
+                note_id = item.get("id", "")
+                xsec_token = item.get("xsecToken", "")
+                note_url = f"https://www.xiaohongshu.com/explore/{note_id}"
+                if xsec_token:
+                    note_url += f"?xsec_token={xsec_token}"
                 results.append(SearchResult(
                     title=card.get("displayTitle", ""),
-                    url=f"https://www.xiaohongshu.com/explore/{item.get('id', '')}",
+                    url=note_url,
                     snippet=f"👤 {user.get('nickname', '')} · ❤ {interact.get('likedCount', '0')}",
                     score=0,
                 ))
@@ -149,17 +155,40 @@ class XiaoHongShuChannel(Channel):
     # ── Helpers ──
 
     def _extract_note_id(self, url: str) -> str:
-        parts = urlparse(url).path.strip("/").split("/")
-        return parts[-1] if parts else ""
+        """Extract note ID from URL path, ignoring query params."""
+        path = urlparse(url).path.strip("/").split("/")
+        return path[-1] if path else ""
 
-    def _find_token(self, note_id: str) -> Optional[str]:
-        """Try to find xsec_token for a note from feeds."""
+    def _extract_token_from_url(self, url: str) -> Optional[str]:
+        """Extract xsec_token from URL query parameter if present."""
+        qs = parse_qs(urlparse(url).query)
+        tokens = qs.get("xsec_token", [])
+        return tokens[0] if tokens else None
+
+    def _find_token_in_feeds(self, note_id: str) -> Optional[str]:
+        """Try to find xsec_token for a note from homepage feeds."""
         try:
             out = self._call("xiaohongshu.list_feeds()", timeout=15)
             data = json.loads(out)
             for feed in data.get("feeds", []):
                 if feed.get("id") == note_id:
-                    return feed.get("xsecToken", "")
+                    return feed.get("xsecToken") or None
+        except Exception:
+            pass
+        return None
+
+    def _find_token_by_search(self, note_id: str) -> Optional[str]:
+        """Search for the note ID to get a fresh xsec_token."""
+        try:
+            out = self._call(
+                f'xiaohongshu.search_feeds(keyword: "{note_id}")', timeout=20
+            )
+            data = json.loads(out)
+            for feed in data.get("feeds", []):
+                if feed.get("id") == note_id:
+                    return feed.get("xsecToken") or None
+            # If exact match not found but results exist, try the first one
+            # (search by note_id sometimes returns the note with a different key)
         except Exception:
             pass
         return None
@@ -170,3 +199,46 @@ class XiaoHongShuChannel(Channel):
             if line and not line.startswith(("{", "[", "#", "http")):
                 return line[:80]
         return ""
+
+    async def _read_jina(self, url: str) -> ReadResult:
+        """Fallback: read XHS note via Jina Reader when xsec_token unavailable."""
+        import requests
+        try:
+            resp = requests.get(
+                f"https://r.jina.ai/{url}",
+                headers={"Accept": "text/markdown"},
+                timeout=15,
+            )
+            resp.raise_for_status()
+            text = resp.text
+            if len(text.strip()) < 50 or "登录" in text[:200]:
+                return ReadResult(
+                    title="XiaoHongShu",
+                    content=(
+                        f"⚠️ 无法获取笔记详情: {url}\n\n"
+                        "小红书需要 xsec_token 才能通过 MCP 读取笔记。\n"
+                        "请尝试先搜索相关关键词，再从结果中读取。"
+                    ),
+                    url=url, platform="xiaohongshu",
+                )
+            title = ""
+            for line in text.split("\n"):
+                line = line.strip()
+                if line and not line.startswith(("#", "http", "![", "[")):
+                    title = line[:80]
+                    break
+            return ReadResult(
+                title=title or "XiaoHongShu",
+                content=text.strip(),
+                url=url, platform="xiaohongshu",
+            )
+        except Exception:
+            return ReadResult(
+                title="XiaoHongShu",
+                content=(
+                    f"⚠️ 无法获取笔记详情: {url}\n\n"
+                    "小红书需要 xsec_token 才能通过 MCP 读取笔记。\n"
+                    "请尝试先搜索相关关键词，再从结果中读取。"
+                ),
+                url=url, platform="xiaohongshu",
+            )