47 lines
1.3 KiB
Python
47 lines
1.3 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""RSS feed fetcher — uses feedparser."""
|
|
|
|
import feedparser
|
|
from loguru import logger
|
|
from typing import Dict, Any, List
|
|
|
|
|
|
async def fetch_rss(url: str, limit: int = 20) -> List[Dict[str, Any]]:
|
|
"""
|
|
Fetch and parse an RSS/Atom feed.
|
|
|
|
Args:
|
|
url: RSS feed URL
|
|
limit: Max number of entries to return
|
|
|
|
Returns:
|
|
List of article dicts with: title, summary, url, source, published
|
|
"""
|
|
logger.info(f"Fetching RSS: {url}")
|
|
|
|
feed = feedparser.parse(url)
|
|
|
|
if feed.bozo and not feed.entries:
|
|
raise ValueError(f"Failed to parse RSS feed: {feed.bozo_exception}")
|
|
|
|
source_name = feed.feed.get("title", url)
|
|
articles = []
|
|
|
|
for entry in feed.entries[:limit]:
|
|
summary = ""
|
|
if hasattr(entry, "summary"):
|
|
summary = entry.summary
|
|
elif hasattr(entry, "content"):
|
|
summary = entry.content[0].get("value", "")
|
|
|
|
articles.append({
|
|
"title": entry.get("title", ""),
|
|
"summary": summary,
|
|
"url": entry.get("link", ""),
|
|
"source": source_name,
|
|
"published": entry.get("published", ""),
|
|
"platform": "rss",
|
|
})
|
|
|
|
logger.info(f"RSS: {len(articles)} articles from {source_name}")
|
|
return articles
|