fix: improve error handling for Twitter Jina fallback and invalid URL inputs

- Twitter _read_jina now detects unusable X.com responses (JS-required pages)
  and shows a friendly error instead of garbage HTML
- CLI read command now shows user-friendly messages for invalid URLs
  and connection errors instead of raw HTTP exception traces
This commit is contained in:
Panniantong 2026-02-24 12:36:57 +01:00
parent ac64f607e0
commit a5b7b93b1f
2 changed files with 58 additions and 15 deletions

View file

@ -62,21 +62,56 @@ class TwitterChannel(Channel):
)
async def _read_jina(self, url: str) -> ReadResult:
resp = requests.get(
f"https://r.jina.ai/{url}",
headers={"Accept": "text/markdown"},
timeout=15,
)
resp.raise_for_status()
text = resp.text
title = text[:100] if text else url
try:
resp = requests.get(
f"https://r.jina.ai/{url}",
headers={"Accept": "text/markdown"},
timeout=15,
)
resp.raise_for_status()
text = resp.text
return ReadResult(
title=title,
content=text,
url=url,
platform="twitter",
)
# Detect unusable Jina responses for X/Twitter (JS-required pages)
unusable_indicators = [
"this page doesn't exist",
"Don't miss what's happening",
"Something went wrong. Try reloading",
"Log in</a>",
]
if any(indicator in text for indicator in unusable_indicators):
return ReadResult(
title="Twitter/X",
content="⚠️ Could not read this tweet.\n"
"The tweet may have been deleted, or the account is private.\n\n"
"Tips:\n"
"- Make sure the URL is correct\n"
"- Try: birdx read <url> (if birdx is installed)\n"
"- For protected tweets, configure Twitter cookies: "
"agent-reach configure twitter-cookies AUTH_TOKEN CT0",
url=url,
platform="twitter",
)
title = text[:100] if text else url
return ReadResult(
title=title,
content=text,
url=url,
platform="twitter",
)
except Exception:
return ReadResult(
title="Twitter/X",
content="⚠️ Could not read this tweet.\n"
"The tweet may have been deleted, or the account is private.\n\n"
"Tips:\n"
"- Make sure the URL is correct\n"
"- Try: birdx read <url> (if birdx is installed)\n"
"- For protected tweets, configure Twitter cookies: "
"agent-reach configure twitter-cookies AUTH_TOKEN CT0",
url=url,
platform="twitter",
)
async def search(self, query: str, config=None, **kwargs) -> List[SearchResult]:
limit = kwargs.get("limit", 10)

View file

@ -532,7 +532,15 @@ async def _cmd_read(args):
print(f"👤 {result['author']}")
print(f"\n{result.get('content', '')}")
except Exception as e:
print(f"❌ Error: {e}", file=sys.stderr)
error_str = str(e)
if "400" in error_str and "Bad Request" in error_str:
print(f"❌ Invalid URL: {args.url}", file=sys.stderr)
print(" Please provide a valid URL (e.g., https://example.com)", file=sys.stderr)
elif "ConnectionError" in type(e).__name__ or "Timeout" in type(e).__name__:
print(f"❌ Could not connect to: {args.url}", file=sys.stderr)
print(" Check your internet connection or the URL.", file=sys.stderr)
else:
print(f"❌ Error: {e}", file=sys.stderr)
sys.exit(1)