Fix unreliable HTML parsing
All checks were successful
Build and Publish / build_and_publish (push) Successful in 5m17s
All checks were successful
Build and Publish / build_and_publish (push) Successful in 5m17s
This commit is contained in:
parent
d856c10260
commit
c618070718
1 changed files with 3 additions and 2 deletions
|
@ -25,7 +25,7 @@ class RSSItem:
|
|||
|
||||
class OnionFeed(commands.Cog):
|
||||
SOURCE = "https://www.theonion.com/rss"
|
||||
EPOCH = datetime.datetime(2024, 7, 7, tzinfo=datetime.timezone.utc)
|
||||
EPOCH = datetime.datetime(2024, 7, 1, tzinfo=datetime.timezone.utc)
|
||||
|
||||
def __init__(self, bot):
|
||||
self.bot: commands.Bot = bot
|
||||
|
@ -38,6 +38,7 @@ class OnionFeed(commands.Cog):
|
|||
|
||||
@staticmethod
|
||||
def parse_item(item: BeautifulSoup) -> RSSItem:
|
||||
description = BeautifulSoup(item.description.get_text(), "html.parser").p.get_text(strip=True).strip()[:-1]
|
||||
kwargs = {
|
||||
"title": item.title.get_text(strip=True).strip(),
|
||||
"link": item.link.get_text(strip=True).strip(),
|
||||
|
@ -45,7 +46,7 @@ class OnionFeed(commands.Cog):
|
|||
item.pubDate.get_text(strip=True).strip(), "%a, %d %b %Y %H:%M:%S %Z"
|
||||
),
|
||||
"guid": item.guid.get_text(strip=True).strip(),
|
||||
"description": BeautifulSoup(item.description.get_text()).p.get_text(strip=True).strip()[:-1],
|
||||
"description": description,
|
||||
"thumbnail": item.find("media:thumbnail")["url"],
|
||||
}
|
||||
return RSSItem(**kwargs)
|
||||
|
|
Loading…
Reference in a new issue