fix(xiaohongshu): 修复 xsec_token 丢失导致无法读取笔记详情 (#17)
问题:搜索结果包含 xsecToken 但被丢弃,read() 只从首页 feed 查找 token,几乎不可能匹配到目标笔记。 修复: 1. search() 返回的 URL 携带 xsec_token 查询参数 2. read() 三级 token 查找:URL 参数 → 首页 feed → 搜索反查 3. 全部失败时回退到 Jina Reader Fixes #15 Co-authored-by: Panniantong <panniantong@users.noreply.github.com>
This commit is contained in:
parent
68319ec683
commit
1cbf6a7b9c
1 changed files with 91 additions and 19 deletions
|
|
@ -8,7 +8,7 @@ Requires: mcporter CLI + xiaohongshu MCP server running
|
|||
import json
|
||||
import shutil
|
||||
import subprocess
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import urlparse, parse_qs, urlencode
|
||||
from .base import Channel, ReadResult, SearchResult
|
||||
from typing import List, Optional
|
||||
|
||||
|
|
@ -93,21 +93,22 @@ class XiaoHongShuChannel(Channel):
|
|||
url=url, platform="xiaohongshu",
|
||||
)
|
||||
|
||||
# Step 1: get xsec_token from feeds
|
||||
xsec_token = self._find_token(note_id)
|
||||
# Step 1: try xsec_token from URL query param (e.g. from search results)
|
||||
xsec_token = self._extract_token_from_url(url)
|
||||
|
||||
# Step 2: try homepage feeds
|
||||
if not xsec_token:
|
||||
return ReadResult(
|
||||
title="XiaoHongShu",
|
||||
content=(
|
||||
f"⚠️ 无法获取笔记 {note_id} 的访问令牌。\n"
|
||||
"小红书需要 xsec_token 才能读取笔记详情。\n"
|
||||
"请先通过搜索找到这篇笔记,或直接使用搜索功能。"
|
||||
),
|
||||
url=url, platform="xiaohongshu",
|
||||
)
|
||||
xsec_token = self._find_token_in_feeds(note_id)
|
||||
|
||||
# Step 2: get detail
|
||||
# Step 3: search for the note to get a fresh token
|
||||
if not xsec_token:
|
||||
xsec_token = self._find_token_by_search(note_id)
|
||||
|
||||
# If no token found, fallback to Jina Reader
|
||||
if not xsec_token:
|
||||
return await self._read_jina(url)
|
||||
|
||||
# Get detail via MCP
|
||||
out = self._call(
|
||||
f'xiaohongshu.get_feed_detail(feed_id: "{note_id}", xsec_token: "{xsec_token}")',
|
||||
timeout=15,
|
||||
|
|
@ -136,9 +137,14 @@ class XiaoHongShuChannel(Channel):
|
|||
card = item.get("noteCard", {})
|
||||
user = card.get("user", {})
|
||||
interact = card.get("interactInfo", {})
|
||||
note_id = item.get("id", "")
|
||||
xsec_token = item.get("xsecToken", "")
|
||||
note_url = f"https://www.xiaohongshu.com/explore/{note_id}"
|
||||
if xsec_token:
|
||||
note_url += f"?xsec_token={xsec_token}"
|
||||
results.append(SearchResult(
|
||||
title=card.get("displayTitle", ""),
|
||||
url=f"https://www.xiaohongshu.com/explore/{item.get('id', '')}",
|
||||
url=note_url,
|
||||
snippet=f"👤 {user.get('nickname', '')} · ❤ {interact.get('likedCount', '0')}",
|
||||
score=0,
|
||||
))
|
||||
|
|
@ -149,17 +155,40 @@ class XiaoHongShuChannel(Channel):
|
|||
# ── Helpers ──
|
||||
|
||||
def _extract_note_id(self, url: str) -> str:
|
||||
parts = urlparse(url).path.strip("/").split("/")
|
||||
return parts[-1] if parts else ""
|
||||
"""Extract note ID from URL path, ignoring query params."""
|
||||
path = urlparse(url).path.strip("/").split("/")
|
||||
return path[-1] if path else ""
|
||||
|
||||
def _find_token(self, note_id: str) -> Optional[str]:
|
||||
"""Try to find xsec_token for a note from feeds."""
|
||||
def _extract_token_from_url(self, url: str) -> Optional[str]:
|
||||
"""Extract xsec_token from URL query parameter if present."""
|
||||
qs = parse_qs(urlparse(url).query)
|
||||
tokens = qs.get("xsec_token", [])
|
||||
return tokens[0] if tokens else None
|
||||
|
||||
def _find_token_in_feeds(self, note_id: str) -> Optional[str]:
|
||||
"""Try to find xsec_token for a note from homepage feeds."""
|
||||
try:
|
||||
out = self._call("xiaohongshu.list_feeds()", timeout=15)
|
||||
data = json.loads(out)
|
||||
for feed in data.get("feeds", []):
|
||||
if feed.get("id") == note_id:
|
||||
return feed.get("xsecToken", "")
|
||||
return feed.get("xsecToken") or None
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
def _find_token_by_search(self, note_id: str) -> Optional[str]:
|
||||
"""Search for the note ID to get a fresh xsec_token."""
|
||||
try:
|
||||
out = self._call(
|
||||
f'xiaohongshu.search_feeds(keyword: "{note_id}")', timeout=20
|
||||
)
|
||||
data = json.loads(out)
|
||||
for feed in data.get("feeds", []):
|
||||
if feed.get("id") == note_id:
|
||||
return feed.get("xsecToken") or None
|
||||
# If exact match not found but results exist, try the first one
|
||||
# (search by note_id sometimes returns the note with a different key)
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
|
@ -170,3 +199,46 @@ class XiaoHongShuChannel(Channel):
|
|||
if line and not line.startswith(("{", "[", "#", "http")):
|
||||
return line[:80]
|
||||
return ""
|
||||
|
||||
async def _read_jina(self, url: str) -> ReadResult:
|
||||
"""Fallback: read XHS note via Jina Reader when xsec_token unavailable."""
|
||||
import requests
|
||||
try:
|
||||
resp = requests.get(
|
||||
f"https://r.jina.ai/{url}",
|
||||
headers={"Accept": "text/markdown"},
|
||||
timeout=15,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
text = resp.text
|
||||
if len(text.strip()) < 50 or "登录" in text[:200]:
|
||||
return ReadResult(
|
||||
title="XiaoHongShu",
|
||||
content=(
|
||||
f"⚠️ 无法获取笔记详情: {url}\n\n"
|
||||
"小红书需要 xsec_token 才能通过 MCP 读取笔记。\n"
|
||||
"请尝试先搜索相关关键词,再从结果中读取。"
|
||||
),
|
||||
url=url, platform="xiaohongshu",
|
||||
)
|
||||
title = ""
|
||||
for line in text.split("\n"):
|
||||
line = line.strip()
|
||||
if line and not line.startswith(("#", "http", "![", "[")):
|
||||
title = line[:80]
|
||||
break
|
||||
return ReadResult(
|
||||
title=title or "XiaoHongShu",
|
||||
content=text.strip(),
|
||||
url=url, platform="xiaohongshu",
|
||||
)
|
||||
except Exception:
|
||||
return ReadResult(
|
||||
title="XiaoHongShu",
|
||||
content=(
|
||||
f"⚠️ 无法获取笔记详情: {url}\n\n"
|
||||
"小红书需要 xsec_token 才能通过 MCP 读取笔记。\n"
|
||||
"请尝试先搜索相关关键词,再从结果中读取。"
|
||||
),
|
||||
url=url, platform="xiaohongshu",
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue