Agent-Reach/x_reader/fetchers/rss.py

47 lines
1.3 KiB
Python

# -*- coding: utf-8 -*-
"""RSS feed fetcher — uses feedparser."""
import feedparser
from loguru import logger
from typing import Dict, Any, List
async def fetch_rss(url: str, limit: int = 20) -> List[Dict[str, Any]]:
"""
Fetch and parse an RSS/Atom feed.
Args:
url: RSS feed URL
limit: Max number of entries to return
Returns:
List of article dicts with: title, summary, url, source, published
"""
logger.info(f"Fetching RSS: {url}")
feed = feedparser.parse(url)
if feed.bozo and not feed.entries:
raise ValueError(f"Failed to parse RSS feed: {feed.bozo_exception}")
source_name = feed.feed.get("title", url)
articles = []
for entry in feed.entries[:limit]:
summary = ""
if hasattr(entry, "summary"):
summary = entry.summary
elif hasattr(entry, "content"):
summary = entry.content[0].get("value", "")
articles.append({
"title": entry.get("title", ""),
"summary": summary,
"url": entry.get("link", ""),
"source": source_name,
"published": entry.get("published", ""),
"platform": "rss",
})
logger.info(f"RSS: {len(articles)} articles from {source_name}")
return articles