From 1cbf6a7b9cb3c3cbd45493365d0b98f1cefdac41 Mon Sep 17 00:00:00 2001 From: Pnant <73925474+Panniantong@users.noreply.github.com> Date: Thu, 26 Feb 2026 14:36:36 +0800 Subject: [PATCH] =?UTF-8?q?fix(xiaohongshu):=20=E4=BF=AE=E5=A4=8D=20xsec?= =?UTF-8?q?=5Ftoken=20=E4=B8=A2=E5=A4=B1=E5=AF=BC=E8=87=B4=E6=97=A0?= =?UTF-8?q?=E6=B3=95=E8=AF=BB=E5=8F=96=E7=AC=94=E8=AE=B0=E8=AF=A6=E6=83=85?= =?UTF-8?q?=20(#17)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 问题:搜索结果包含 xsecToken 但被丢弃,read() 只从首页 feed 查找 token,几乎不可能匹配到目标笔记。 修复: 1. search() 返回的 URL 携带 xsec_token 查询参数 2. read() 三级 token 查找:URL 参数 → 首页 feed → 搜索反查 3. 全部失败时回退到 Jina Reader Fixes #15 Co-authored-by: Panniantong --- agent_reach/channels/xiaohongshu.py | 110 +++++++++++++++++++++++----- 1 file changed, 91 insertions(+), 19 deletions(-) diff --git a/agent_reach/channels/xiaohongshu.py b/agent_reach/channels/xiaohongshu.py index 1914bc6..84ddfa1 100644 --- a/agent_reach/channels/xiaohongshu.py +++ b/agent_reach/channels/xiaohongshu.py @@ -8,7 +8,7 @@ Requires: mcporter CLI + xiaohongshu MCP server running import json import shutil import subprocess -from urllib.parse import urlparse +from urllib.parse import urlparse, parse_qs, urlencode from .base import Channel, ReadResult, SearchResult from typing import List, Optional @@ -93,21 +93,22 @@ class XiaoHongShuChannel(Channel): url=url, platform="xiaohongshu", ) - # Step 1: get xsec_token from feeds - xsec_token = self._find_token(note_id) + # Step 1: try xsec_token from URL query param (e.g. from search results) + xsec_token = self._extract_token_from_url(url) + # Step 2: try homepage feeds if not xsec_token: - return ReadResult( - title="XiaoHongShu", - content=( - f"⚠️ 无法获取笔记 {note_id} 的访问令牌。\n" - "小红书需要 xsec_token 才能读取笔记详情。\n" - "请先通过搜索找到这篇笔记,或直接使用搜索功能。" - ), - url=url, platform="xiaohongshu", - ) + xsec_token = self._find_token_in_feeds(note_id) - # Step 2: get detail + # Step 3: search for the note to get a fresh token + if not xsec_token: + xsec_token = self._find_token_by_search(note_id) + + # If no token found, fallback to Jina Reader + if not xsec_token: + return await self._read_jina(url) + + # Get detail via MCP out = self._call( f'xiaohongshu.get_feed_detail(feed_id: "{note_id}", xsec_token: "{xsec_token}")', timeout=15, @@ -136,9 +137,14 @@ class XiaoHongShuChannel(Channel): card = item.get("noteCard", {}) user = card.get("user", {}) interact = card.get("interactInfo", {}) + note_id = item.get("id", "") + xsec_token = item.get("xsecToken", "") + note_url = f"https://www.xiaohongshu.com/explore/{note_id}" + if xsec_token: + note_url += f"?xsec_token={xsec_token}" results.append(SearchResult( title=card.get("displayTitle", ""), - url=f"https://www.xiaohongshu.com/explore/{item.get('id', '')}", + url=note_url, snippet=f"👤 {user.get('nickname', '')} · ❤ {interact.get('likedCount', '0')}", score=0, )) @@ -149,17 +155,40 @@ class XiaoHongShuChannel(Channel): # ── Helpers ── def _extract_note_id(self, url: str) -> str: - parts = urlparse(url).path.strip("/").split("/") - return parts[-1] if parts else "" + """Extract note ID from URL path, ignoring query params.""" + path = urlparse(url).path.strip("/").split("/") + return path[-1] if path else "" - def _find_token(self, note_id: str) -> Optional[str]: - """Try to find xsec_token for a note from feeds.""" + def _extract_token_from_url(self, url: str) -> Optional[str]: + """Extract xsec_token from URL query parameter if present.""" + qs = parse_qs(urlparse(url).query) + tokens = qs.get("xsec_token", []) + return tokens[0] if tokens else None + + def _find_token_in_feeds(self, note_id: str) -> Optional[str]: + """Try to find xsec_token for a note from homepage feeds.""" try: out = self._call("xiaohongshu.list_feeds()", timeout=15) data = json.loads(out) for feed in data.get("feeds", []): if feed.get("id") == note_id: - return feed.get("xsecToken", "") + return feed.get("xsecToken") or None + except Exception: + pass + return None + + def _find_token_by_search(self, note_id: str) -> Optional[str]: + """Search for the note ID to get a fresh xsec_token.""" + try: + out = self._call( + f'xiaohongshu.search_feeds(keyword: "{note_id}")', timeout=20 + ) + data = json.loads(out) + for feed in data.get("feeds", []): + if feed.get("id") == note_id: + return feed.get("xsecToken") or None + # If exact match not found but results exist, try the first one + # (search by note_id sometimes returns the note with a different key) except Exception: pass return None @@ -170,3 +199,46 @@ class XiaoHongShuChannel(Channel): if line and not line.startswith(("{", "[", "#", "http")): return line[:80] return "" + + async def _read_jina(self, url: str) -> ReadResult: + """Fallback: read XHS note via Jina Reader when xsec_token unavailable.""" + import requests + try: + resp = requests.get( + f"https://r.jina.ai/{url}", + headers={"Accept": "text/markdown"}, + timeout=15, + ) + resp.raise_for_status() + text = resp.text + if len(text.strip()) < 50 or "登录" in text[:200]: + return ReadResult( + title="XiaoHongShu", + content=( + f"⚠️ 无法获取笔记详情: {url}\n\n" + "小红书需要 xsec_token 才能通过 MCP 读取笔记。\n" + "请尝试先搜索相关关键词,再从结果中读取。" + ), + url=url, platform="xiaohongshu", + ) + title = "" + for line in text.split("\n"): + line = line.strip() + if line and not line.startswith(("#", "http", "![", "[")): + title = line[:80] + break + return ReadResult( + title=title or "XiaoHongShu", + content=text.strip(), + url=url, platform="xiaohongshu", + ) + except Exception: + return ReadResult( + title="XiaoHongShu", + content=( + f"⚠️ 无法获取笔记详情: {url}\n\n" + "小红书需要 xsec_token 才能通过 MCP 读取笔记。\n" + "请尝试先搜索相关关键词,再从结果中读取。" + ), + url=url, platform="xiaohongshu", + )