Fix unreliable HTML parsing
All checks were successful
Build and Publish / build_and_publish (push) Successful in 5m17s
All checks were successful
Build and Publish / build_and_publish (push) Successful in 5m17s
This commit is contained in:
parent
d856c10260
commit
c618070718
1 changed files with 3 additions and 2 deletions
|
@ -25,7 +25,7 @@ class RSSItem:
|
||||||
|
|
||||||
class OnionFeed(commands.Cog):
|
class OnionFeed(commands.Cog):
|
||||||
SOURCE = "https://www.theonion.com/rss"
|
SOURCE = "https://www.theonion.com/rss"
|
||||||
EPOCH = datetime.datetime(2024, 7, 7, tzinfo=datetime.timezone.utc)
|
EPOCH = datetime.datetime(2024, 7, 1, tzinfo=datetime.timezone.utc)
|
||||||
|
|
||||||
def __init__(self, bot):
|
def __init__(self, bot):
|
||||||
self.bot: commands.Bot = bot
|
self.bot: commands.Bot = bot
|
||||||
|
@ -38,6 +38,7 @@ class OnionFeed(commands.Cog):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def parse_item(item: BeautifulSoup) -> RSSItem:
|
def parse_item(item: BeautifulSoup) -> RSSItem:
|
||||||
|
description = BeautifulSoup(item.description.get_text(), "html.parser").p.get_text(strip=True).strip()[:-1]
|
||||||
kwargs = {
|
kwargs = {
|
||||||
"title": item.title.get_text(strip=True).strip(),
|
"title": item.title.get_text(strip=True).strip(),
|
||||||
"link": item.link.get_text(strip=True).strip(),
|
"link": item.link.get_text(strip=True).strip(),
|
||||||
|
@ -45,7 +46,7 @@ class OnionFeed(commands.Cog):
|
||||||
item.pubDate.get_text(strip=True).strip(), "%a, %d %b %Y %H:%M:%S %Z"
|
item.pubDate.get_text(strip=True).strip(), "%a, %d %b %Y %H:%M:%S %Z"
|
||||||
),
|
),
|
||||||
"guid": item.guid.get_text(strip=True).strip(),
|
"guid": item.guid.get_text(strip=True).strip(),
|
||||||
"description": BeautifulSoup(item.description.get_text()).p.get_text(strip=True).strip()[:-1],
|
"description": description,
|
||||||
"thumbnail": item.find("media:thumbnail")["url"],
|
"thumbnail": item.find("media:thumbnail")["url"],
|
||||||
}
|
}
|
||||||
return RSSItem(**kwargs)
|
return RSSItem(**kwargs)
|
||||||
|
|
Loading…
Reference in a new issue