fix: handle curly quotes in Twitter Jina fallback detection
Jina Reader returns smart/curly quotes (U+2019) instead of ASCII apostrophes. Use apostrophe-free substrings for detection.
This commit is contained in:
parent
a5b7b93b1f
commit
2d1b8658b6
1 changed files with 3 additions and 3 deletions
|
|
@ -73,10 +73,10 @@ class TwitterChannel(Channel):
|
|||
|
||||
# Detect unusable Jina responses for X/Twitter (JS-required pages)
|
||||
unusable_indicators = [
|
||||
"this page doesn't exist",
|
||||
"Don't miss what's happening",
|
||||
"page doesn", # "this page doesn't exist" (handles both ' and ')
|
||||
"miss what", # "Don't miss what's happening"
|
||||
"Something went wrong. Try reloading",
|
||||
"Log in</a>",
|
||||
"Log in](", # Markdown link: [Log in](...)
|
||||
]
|
||||
if any(indicator in text for indicator in unusable_indicators):
|
||||
return ReadResult(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue