Fix unreliable HTML parsing
All checks were successful
Build and Publish / build_and_publish (push) Successful in 5m17s

This commit is contained in:
Nexus 2024-07-08 01:56:20 +01:00
parent d856c10260
commit c618070718
Signed by: nex
GPG key ID: 0FA334385D0B689F

View file

@ -25,7 +25,7 @@ class RSSItem:
class OnionFeed(commands.Cog):
SOURCE = "https://www.theonion.com/rss"
EPOCH = datetime.datetime(2024, 7, 7, tzinfo=datetime.timezone.utc)
EPOCH = datetime.datetime(2024, 7, 1, tzinfo=datetime.timezone.utc)
def __init__(self, bot):
self.bot: commands.Bot = bot
@ -38,6 +38,7 @@ class OnionFeed(commands.Cog):
@staticmethod
def parse_item(item: BeautifulSoup) -> RSSItem:
description = BeautifulSoup(item.description.get_text(), "html.parser").p.get_text(strip=True).strip()[:-1]
kwargs = {
"title": item.title.get_text(strip=True).strip(),
"link": item.link.get_text(strip=True).strip(),
@ -45,7 +46,7 @@ class OnionFeed(commands.Cog):
item.pubDate.get_text(strip=True).strip(), "%a, %d %b %Y %H:%M:%S %Z"
),
"guid": item.guid.get_text(strip=True).strip(),
"description": BeautifulSoup(item.description.get_text()).p.get_text(strip=True).strip()[:-1],
"description": description,
"thumbnail": item.find("media:thumbnail")["url"],
}
return RSSItem(**kwargs)