feat: add WeChat search via miku_ai (sogou) (#68)
- wechat channel now supports both search and read - Search: miku_ai (sogou weixin search) → article list with URLs - Read: camoufox (stealth Firefox) → full Markdown content - Doctor shows granular status (search only / read only / both) - SKILL.md updated with search + read workflow examples Co-authored-by: Panniantong <panniantong@users.noreply.github.com>
This commit is contained in:
parent
4f4ad99d49
commit
476935be92
2 changed files with 54 additions and 12 deletions
|
|
@ -1,5 +1,9 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""WeChat Official Account articles — check if wechat-article-for-ai is available."""
|
||||
"""WeChat Official Account articles — read and search.
|
||||
|
||||
Read: wechat-article-for-ai (Camoufox stealth browser)
|
||||
Search: miku_ai (Sogou WeChat search)
|
||||
"""
|
||||
|
||||
import shutil
|
||||
import subprocess
|
||||
|
|
@ -9,7 +13,7 @@ from .base import Channel
|
|||
class WeChatChannel(Channel):
|
||||
name = "wechat"
|
||||
description = "微信公众号文章"
|
||||
backends = ["wechat-article-for-ai (Camoufox)"]
|
||||
backends = ["wechat-article-for-ai (Camoufox)", "miku_ai (搜狗搜索)"]
|
||||
tier = 2
|
||||
|
||||
def can_handle(self, url: str) -> bool:
|
||||
|
|
@ -18,15 +22,36 @@ class WeChatChannel(Channel):
|
|||
return "mp.weixin.qq.com" in d or "weixin.qq.com" in d
|
||||
|
||||
def check(self, config=None):
|
||||
has_read = False
|
||||
has_search = False
|
||||
|
||||
try:
|
||||
import camoufox # noqa: F401
|
||||
has_read = True
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
import miku_ai # noqa: F401
|
||||
has_search = True
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
if has_read and has_search:
|
||||
return "ok", "完整可用(搜索 + 阅读公众号文章)"
|
||||
elif has_read:
|
||||
return "ok", "可阅读公众号文章(URL → Markdown)。安装 miku_ai 可解锁搜索:pip install miku_ai"
|
||||
elif has_search:
|
||||
return "warn", (
|
||||
"可搜索公众号文章但无法阅读全文。安装阅读工具:\n"
|
||||
" pip install camoufox[geoip] markdownify beautifulsoup4 httpx mcp"
|
||||
)
|
||||
else:
|
||||
return "off", (
|
||||
"需要 wechat-article-for-ai。安装:\n"
|
||||
"需要安装微信公众号工具:\n"
|
||||
" # 阅读(URL → Markdown):\n"
|
||||
" pip install camoufox[geoip] markdownify beautifulsoup4 httpx mcp\n"
|
||||
" # 或完整安装:\n"
|
||||
" git clone https://github.com/bzd6661/wechat-article-for-ai.git\n"
|
||||
" cd wechat-article-for-ai && pip install -r requirements.txt\n"
|
||||
" # 搜索(关键词 → 文章列表):\n"
|
||||
" pip install miku_ai\n"
|
||||
" 详见 https://github.com/bzd6661/wechat-article-for-ai"
|
||||
)
|
||||
return "ok", "可读取微信公众号文章(URL → Markdown)"
|
||||
|
|
|
|||
|
|
@ -253,21 +253,38 @@ mcporter call 'bosszhipin.get_job_detail_tool(job_url: "https://www.zhipin.com/j
|
|||
|
||||
Fallback: `curl -s "https://r.jina.ai/https://www.zhipin.com/job_detail/xxx"`
|
||||
|
||||
### 微信公众号 (wechat-article-for-ai)
|
||||
### 微信公众号 (wechat-article-for-ai + miku_ai)
|
||||
|
||||
Uses Camoufox (stealth Firefox) to bypass WeChat's anti-bot detection and extract full article content.
|
||||
**Search** (miku_ai — Sogou WeChat search):
|
||||
|
||||
```python
|
||||
# Search WeChat articles by keyword
|
||||
python3 -c "
|
||||
import asyncio
|
||||
from miku_ai import get_wexin_article
|
||||
|
||||
async def search():
|
||||
articles = await get_wexin_article('AI Agent', 5)
|
||||
for a in articles:
|
||||
print(f'{a[\"title\"]} | {a[\"source\"]} | {a[\"date\"]}')
|
||||
print(f' {a[\"url\"]}')
|
||||
|
||||
asyncio.run(search())
|
||||
"
|
||||
```
|
||||
|
||||
**Read** (Camoufox — stealth Firefox, bypasses WeChat anti-bot):
|
||||
|
||||
```bash
|
||||
# Read a WeChat article (returns Markdown with images)
|
||||
cd /path/to/wechat-article-for-ai && python3 main.py "https://mp.weixin.qq.com/s/ARTICLE_ID"
|
||||
|
||||
# Batch convert multiple articles
|
||||
python3 main.py "URL1" "URL2" "URL3" -o ./output
|
||||
|
||||
# Run as MCP server (for AI agent integration)
|
||||
python3 mcp_server.py
|
||||
```
|
||||
|
||||
Typical agent workflow: search → get URLs → immediately read full content.
|
||||
|
||||
Note: WeChat articles require a real browser to render. Jina Reader and curl cannot read them.
|
||||
|
||||
### RSS (feedparser)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue