Merge branch 'master' of i-am.nexus:nex/college-bot-v2
All checks were successful
Build and Publish / build_and_publish (push) Successful in 5m31s

This commit is contained in:
Nexus 2024-07-10 23:20:26 +01:00
commit d22d439a4f

101
src/cogs/onion_feed.py Normal file
View file

@ -0,0 +1,101 @@
"""
Scrapes the onion RSS feed once every hour and posts any new articles to the desired channel
"""
import asyncio
import dataclasses
import datetime
import logging
from bs4 import BeautifulSoup
import discord
from discord.ext import commands, tasks
import httpx
from conf import CONFIG
import redis
@dataclasses.dataclass
class RSSItem:
title: str
link: str
description: str
pubDate: datetime.datetime
guid: str
thumbnail: str
class OnionFeed(commands.Cog):
SOURCE = "https://www.theonion.com/rss"
EPOCH = datetime.datetime(2024, 7, 1, tzinfo=datetime.timezone.utc)
def __init__(self, bot):
self.bot: commands.Bot = bot
self.log = logging.getLogger("jimmy.cogs.onion_feed")
self.check_onion_feed.start()
self.redis = redis.Redis(**CONFIG["redis"])
def cog_unload(self) -> None:
self.check_onion_feed.cancel()
@staticmethod
def parse_item(item: BeautifulSoup) -> RSSItem:
description = BeautifulSoup(item.description.get_text(), "html.parser").p.get_text(strip=True).strip()[:-1]
kwargs = {
"title": item.title.get_text(strip=True).strip(),
"link": item.link.get_text(strip=True).strip(),
"pubDate": datetime.datetime.strptime(
item.pubDate.get_text(strip=True).strip(), "%a, %d %b %Y %H:%M:%S %Z"
),
"guid": item.guid.get_text(strip=True).strip(),
"description": description,
"thumbnail": item.find("media:thumbnail")["url"],
}
return RSSItem(**kwargs)
def parse_feed(self, text: str) -> list[RSSItem]:
soup = BeautifulSoup(text, "xml")
return [self.parse_item(item) for item in soup.find_all("item")]
@tasks.loop(hours=1)
async def check_onion_feed(self):
if not self.bot.is_ready():
await self.bot.wait_until_ready()
guild = self.bot.get_guild(994710566612500550)
if not guild:
return self.log.error("Nonsense guild not found. Can't do onion feed.")
channel = discord.utils.get(guild.text_channels, name="spam")
if not channel:
return self.log.error("Spam channel not found.")
async with httpx.AsyncClient() as client:
response = await client.get(self.SOURCE)
if response.status_code != 200:
return self.log.error(f"Failed to fetch onion feed: {response.status_code}")
items: list[RSSItem] = await asyncio.to_thread(self.parse_feed, response.text)
for item in items:
if self.redis.get("onion-" + item.guid):
continue
embed = discord.Embed(
title=item.title,
url=item.link,
description=item.description + f"... [Read More]({item.link})",
color=0x00DF78,
timestamp=item.pubDate,
)
embed.set_thumbnail(url=item.thumbnail)
try:
msg = await channel.send(embed=embed)
# noinspection PyAsyncCall
self.redis.set("onion-" + item.guid, str(msg.id))
except discord.HTTPException:
self.log.exception(f"Failed to send onion feed message: {item.title}")
else:
self.log.debug(f"Sent onion feed message: {item.title}")
@check_onion_feed.before_loop
async def before_check_onion_feed(self):
await self.bot.wait_until_ready()
def setup(bot):
bot.add_cog(OnionFeed(bot))