diff --git a/agent_reach/channels/xiaohongshu.py b/agent_reach/channels/xiaohongshu.py index 1914bc6..84ddfa1 100644 --- a/agent_reach/channels/xiaohongshu.py +++ b/agent_reach/channels/xiaohongshu.py @@ -8,7 +8,7 @@ Requires: mcporter CLI + xiaohongshu MCP server running import json import shutil import subprocess -from urllib.parse import urlparse +from urllib.parse import urlparse, parse_qs, urlencode from .base import Channel, ReadResult, SearchResult from typing import List, Optional @@ -93,21 +93,22 @@ class XiaoHongShuChannel(Channel): url=url, platform="xiaohongshu", ) - # Step 1: get xsec_token from feeds - xsec_token = self._find_token(note_id) + # Step 1: try xsec_token from URL query param (e.g. from search results) + xsec_token = self._extract_token_from_url(url) + # Step 2: try homepage feeds if not xsec_token: - return ReadResult( - title="XiaoHongShu", - content=( - f"⚠️ 无法获取笔记 {note_id} 的访问令牌。\n" - "小红书需要 xsec_token 才能读取笔记详情。\n" - "请先通过搜索找到这篇笔记,或直接使用搜索功能。" - ), - url=url, platform="xiaohongshu", - ) + xsec_token = self._find_token_in_feeds(note_id) - # Step 2: get detail + # Step 3: search for the note to get a fresh token + if not xsec_token: + xsec_token = self._find_token_by_search(note_id) + + # If no token found, fallback to Jina Reader + if not xsec_token: + return await self._read_jina(url) + + # Get detail via MCP out = self._call( f'xiaohongshu.get_feed_detail(feed_id: "{note_id}", xsec_token: "{xsec_token}")', timeout=15, @@ -136,9 +137,14 @@ class XiaoHongShuChannel(Channel): card = item.get("noteCard", {}) user = card.get("user", {}) interact = card.get("interactInfo", {}) + note_id = item.get("id", "") + xsec_token = item.get("xsecToken", "") + note_url = f"https://www.xiaohongshu.com/explore/{note_id}" + if xsec_token: + note_url += f"?xsec_token={xsec_token}" results.append(SearchResult( title=card.get("displayTitle", ""), - url=f"https://www.xiaohongshu.com/explore/{item.get('id', '')}", + url=note_url, snippet=f"👤 {user.get('nickname', '')} · ❤ {interact.get('likedCount', '0')}", score=0, )) @@ -149,17 +155,40 @@ class XiaoHongShuChannel(Channel): # ── Helpers ── def _extract_note_id(self, url: str) -> str: - parts = urlparse(url).path.strip("/").split("/") - return parts[-1] if parts else "" + """Extract note ID from URL path, ignoring query params.""" + path = urlparse(url).path.strip("/").split("/") + return path[-1] if path else "" - def _find_token(self, note_id: str) -> Optional[str]: - """Try to find xsec_token for a note from feeds.""" + def _extract_token_from_url(self, url: str) -> Optional[str]: + """Extract xsec_token from URL query parameter if present.""" + qs = parse_qs(urlparse(url).query) + tokens = qs.get("xsec_token", []) + return tokens[0] if tokens else None + + def _find_token_in_feeds(self, note_id: str) -> Optional[str]: + """Try to find xsec_token for a note from homepage feeds.""" try: out = self._call("xiaohongshu.list_feeds()", timeout=15) data = json.loads(out) for feed in data.get("feeds", []): if feed.get("id") == note_id: - return feed.get("xsecToken", "") + return feed.get("xsecToken") or None + except Exception: + pass + return None + + def _find_token_by_search(self, note_id: str) -> Optional[str]: + """Search for the note ID to get a fresh xsec_token.""" + try: + out = self._call( + f'xiaohongshu.search_feeds(keyword: "{note_id}")', timeout=20 + ) + data = json.loads(out) + for feed in data.get("feeds", []): + if feed.get("id") == note_id: + return feed.get("xsecToken") or None + # If exact match not found but results exist, try the first one + # (search by note_id sometimes returns the note with a different key) except Exception: pass return None @@ -170,3 +199,46 @@ class XiaoHongShuChannel(Channel): if line and not line.startswith(("{", "[", "#", "http")): return line[:80] return "" + + async def _read_jina(self, url: str) -> ReadResult: + """Fallback: read XHS note via Jina Reader when xsec_token unavailable.""" + import requests + try: + resp = requests.get( + f"https://r.jina.ai/{url}", + headers={"Accept": "text/markdown"}, + timeout=15, + ) + resp.raise_for_status() + text = resp.text + if len(text.strip()) < 50 or "登录" in text[:200]: + return ReadResult( + title="XiaoHongShu", + content=( + f"⚠️ 无法获取笔记详情: {url}\n\n" + "小红书需要 xsec_token 才能通过 MCP 读取笔记。\n" + "请尝试先搜索相关关键词,再从结果中读取。" + ), + url=url, platform="xiaohongshu", + ) + title = "" + for line in text.split("\n"): + line = line.strip() + if line and not line.startswith(("#", "http", "![", "[")): + title = line[:80] + break + return ReadResult( + title=title or "XiaoHongShu", + content=text.strip(), + url=url, platform="xiaohongshu", + ) + except Exception: + return ReadResult( + title="XiaoHongShu", + content=( + f"⚠️ 无法获取笔记详情: {url}\n\n" + "小红书需要 xsec_token 才能通过 MCP 读取笔记。\n" + "请尝试先搜索相关关键词,再从结果中读取。" + ), + url=url, platform="xiaohongshu", + )