From c61807071888624db7fa9fd498f4d270140ee879 Mon Sep 17 00:00:00 2001 From: nexy7574 Date: Mon, 8 Jul 2024 01:56:20 +0100 Subject: [PATCH] Fix unreliable HTML parsing --- src/cogs/onion_feed.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cogs/onion_feed.py b/src/cogs/onion_feed.py index 18c6b08..a1e5ca9 100644 --- a/src/cogs/onion_feed.py +++ b/src/cogs/onion_feed.py @@ -25,7 +25,7 @@ class RSSItem: class OnionFeed(commands.Cog): SOURCE = "https://www.theonion.com/rss" - EPOCH = datetime.datetime(2024, 7, 7, tzinfo=datetime.timezone.utc) + EPOCH = datetime.datetime(2024, 7, 1, tzinfo=datetime.timezone.utc) def __init__(self, bot): self.bot: commands.Bot = bot @@ -38,6 +38,7 @@ class OnionFeed(commands.Cog): @staticmethod def parse_item(item: BeautifulSoup) -> RSSItem: + description = BeautifulSoup(item.description.get_text(), "html.parser").p.get_text(strip=True).strip()[:-1] kwargs = { "title": item.title.get_text(strip=True).strip(), "link": item.link.get_text(strip=True).strip(), @@ -45,7 +46,7 @@ class OnionFeed(commands.Cog): item.pubDate.get_text(strip=True).strip(), "%a, %d %b %Y %H:%M:%S %Z" ), "guid": item.guid.get_text(strip=True).strip(), - "description": BeautifulSoup(item.description.get_text()).p.get_text(strip=True).strip()[:-1], + "description": description, "thumbnail": item.find("media:thumbnail")["url"], } return RSSItem(**kwargs)