college-bot-v2/cogs/screenshot.py

334 lines
14 KiB
Python
Raw Normal View History

import datetime
import io
import logging
import shutil
import tempfile
import time
import zipfile
from urllib.parse import urlparse
from PIL import Image
import discord
from discord.ext import commands
import asyncio
import aiohttp
from pathlib import Path
from selenium import webdriver
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.chrome.options import Options as ChromeOptions
from selenium.webdriver.chrome.service import Service as ChromeService
class ScreenshotCog(commands.Cog):
def __init__(self, bot: commands.Bot):
self.bot = bot
self.chrome_options = ChromeOptions()
self.chrome_options.add_argument("--headless")
self.chrome_options.add_argument("--disable-dev-shm-usage")
# self.chrome_options.add_argument("--disable-gpu")
self.chrome_options.add_argument("--disable-extensions")
self.chrome_options.add_argument("--incognito")
prefs = {
# "download.open_pdf_in_system_reader": False,
# "download.prompt_for_download": True,
# "download.default_directory": "/dev/null",
# "plugins.always_open_pdf_externally": False,
"download_restrictions": 3,
}
self.chrome_options.add_experimental_option(
"prefs", prefs
)
self.dir = Path(__file__).parent.parent / "chrome"
self.dir.mkdir(mode=0o775, exist_ok=True)
self.chrome_dir = self.dir / "chrome-headless-shell-linux64"
self.chrome_bin = self.chrome_dir / "chrome-headless-shell"
self.chromedriver_dir = self.dir / "chromedriver-linux64"
self.chromedriver_bin = self.chromedriver_dir / "chromedriver"
self.chrome_options.binary_location = str(self.chrome_bin.resolve())
self.log = logging.getLogger("jimmy.cogs.screenshot")
def clear_directories(self):
shutil.rmtree(self.chrome_dir, ignore_errors=True)
shutil.rmtree(self.chromedriver_dir, ignore_errors=True)
self.chrome_dir.mkdir(mode=0o775, exist_ok=True)
self.chromedriver_dir.mkdir(mode=0o775, exist_ok=True)
async def download_latest_chrome(self, current: str = None, *, channel: str = "Stable"):
async with aiohttp.ClientSession(raise_for_status=True) as session:
async with session.get(
"https://googlechromelabs.github.io/chrome-for-testing/last-known-good-versions-with-downloads.json",
) as response:
versions = await response.json()
self.log.debug("Got chrome versions: %r", versions)
downloads = versions["channels"][channel]["downloads"]
version = versions["channels"][channel]["version"]
if version == current:
self.log.debug(f"Chrome is up to date ({versions} == {current})")
return
self.log.debug("Downloading chrome...")
chrome_zip_url = filter(lambda x: x["platform"] == "linux64", downloads["chrome-headless-shell"])
if not chrome_zip_url:
self.log.critical("No chrome zip url found for linux64 in %r.", downloads["chrome-headless-shell"])
raise RuntimeError("No chrome zip url found for linux64.")
chrome_zip_url = next(chrome_zip_url)["url"]
self.log.debug("Chrome zip url: %s", chrome_zip_url)
self.clear_directories()
chrome_target = (self.chrome_dir.parent / f"chrome-download-{version}.zip")
chromedriver_target = (self.chromedriver_dir.parent / f"chromedriver-download-{version}.zip")
if chrome_target.exists():
chrome_target.unlink()
if chromedriver_target.exists():
chromedriver_target.unlink()
with chrome_target.open("wb+") as file:
async with session.get(chrome_zip_url) as response:
async for data in response.content.iter_any():
self.log.debug("Read %d bytes from chrome zip.", len(data))
file.write(data)
self.log.debug("Extracting chrome...")
with zipfile.ZipFile(chrome_target) as zip_file:
await asyncio.get_event_loop().run_in_executor(
None,
zip_file.extractall,
self.chrome_dir.parent
)
self.log.debug("Finished extracting chrome.")
self.log.debug("Downloading chromedriver...")
chromedriver_zip_url = filter(lambda x: x["platform"] == "linux64", downloads["chromedriver"])
if not chromedriver_zip_url:
self.log.critical("No chromedriver zip url found for linux64 in %r.", downloads["chromedriver"])
raise RuntimeError("No chromedriver zip url found for linux64.")
chromedriver_zip_url = next(chromedriver_zip_url)["url"]
self.log.debug("Chromedriver zip url: %s", chromedriver_zip_url)
with chromedriver_target.open("wb+") as file:
async with session.get(chromedriver_zip_url) as response:
async for data in response.content.iter_any():
self.log.debug("Read %d bytes from chromedriver zip.", len(data))
file.write(data)
self.log.debug("Extracting chromedriver...")
with zipfile.ZipFile(chromedriver_target) as zip_file:
await asyncio.get_event_loop().run_in_executor(
None,
zip_file.extractall,
self.chromedriver_dir.parent
)
self.log.debug("Finished extracting chromedriver.")
self.log.debug("Making binaries executable.")
await asyncio.get_event_loop().run_in_executor(
None,
self.chrome_bin.chmod,
0o775
)
await asyncio.get_event_loop().run_in_executor(
None,
self.chromedriver_bin.chmod,
0o775
)
self.log.debug("Finished making binaries executable.")
async def get_version(self, full: bool = False) -> str:
proc = await asyncio.create_subprocess_exec(
str(self.chromedriver_bin),
"--version",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await proc.communicate()
if proc.returncode != 0:
raise RuntimeError(f"Error getting chromedriver version: {stderr.decode()}")
if full:
return stdout.decode().strip()
return stdout.decode().strip().split(" ")[1]
async def is_up_to_date(self, channel: str = "Stable"):
try:
current = await self.get_version(False)
except (RuntimeError, FileNotFoundError):
return False
async with aiohttp.ClientSession(raise_for_status=True) as session:
async with session.get(
"https://googlechromelabs.github.io/chrome-for-testing/last-known-good-versions-with-downloads.json",
) as response:
versions = await response.json()
self.log.debug("Got chrome versions: %r", versions)
version = versions["channels"][channel]["version"]
if version == current:
self.log.debug(f"Chrome is up to date ({versions} == {current})")
return True
return False
@commands.command(name="update-chrome")
async def update_chrome(self, ctx: commands.Context, channel: str = "Stable"):
channel = channel.title()
if await self.is_up_to_date(channel):
await ctx.reply("Chrome is already up to date. Updating anyway.")
async with ctx.channel.typing():
try:
await self.download_latest_chrome(channel)
except RuntimeError as e:
return await ctx.reply(f"\N{cross mark} Error downloading chrome: {e}")
chrome_okay = self.chrome_bin.exists() and self.chrome_bin.is_file()
chromedriver_okay = self.chromedriver_bin.exists() and self.chromedriver_bin.is_file()
try:
chromedriver_version = await self.get_version(True)
except RuntimeError as e:
chromedriver_version = str(e)
return await ctx.reply(
f"\N{white heavy check mark} Done.\n"
f"CHANNEL: {channel}\n"
f"CHROME OKAY: {chrome_okay}\n"
f"CHROMEDRIVER OKAY: {chromedriver_okay}\n"
f"CHROMEDRIVER VERSION: {chromedriver_version}"
)
def compress_png(self, input_file: io.BytesIO) -> io.BytesIO:
img = Image.open(input_file)
img = img.convert("RGB")
with tempfile.NamedTemporaryFile(suffix=".webp") as file:
quality = 100
while quality > 0:
if quality == 100:
quality_r = 99
else:
quality_r = quality
self.log.debug("Compressing image with quality %d%%", quality_r)
img.save(file.name, "webp", quality=quality_r)
file.seek(0)
value = io.BytesIO(file.read())
if len(value.getvalue()) <= 24 * 1024 * 1024:
self.log.debug("%d%% was sufficient.", quality_r)
break
quality -= 15
else:
raise RuntimeError("Couldn't compress image.")
return value
@commands.slash_command()
async def screenshot(
self,
ctx: discord.ApplicationContext,
url: str,
load_timeout: int = 10,
render_timeout: int = None,
eager: bool = None,
resolution: str = "1920x1080"
):
"""Screenshots a webpage."""
await ctx.defer()
if eager is None:
eager = render_timeout is None
if render_timeout is None:
render_timeout = 30 if eager else 10
if not url.startswith("http"):
url = "https://" + url
parsed = urlparse(url)
await ctx.respond("Initialising...")
if not all(map(lambda x: x.exists() and x.is_file(), (self.chrome_bin, self.chromedriver_bin))):
await ctx.edit(content="Chrome is not installed, downloading. This may take a minute.")
await self.download_latest_chrome()
await ctx.edit(content="Initialising...")
elif not await self.is_up_to_date():
await ctx.edit(content="Updating chrome. This may take a minute.")
await self.download_latest_chrome()
await ctx.edit(content="Initialising...")
start_init = time.time()
service = await asyncio.to_thread(ChromeService, str(self.chromedriver_bin))
driver: webdriver.Chrome = await asyncio.to_thread(
webdriver.Chrome,
service=service,
options=self.chrome_options
)
driver.set_page_load_timeout(load_timeout)
if resolution:
try:
width, height = map(int, resolution.split("x"))
driver.set_window_size(width, height)
if height > 4320 or width > 7680:
return await ctx.respond("Invalid resolution. Max resolution is 7680x4320 (8K).")
except ValueError:
return await ctx.respond("Invalid resolution. please provide width x height, e.g. 1920x1080")
if eager:
driver.implicitly_wait(render_timeout)
end_init = time.time()
await ctx.edit(content=("Loading webpage..." if not eager else "Loading & screenshotting webpage..."))
start_request = time.time()
await asyncio.to_thread(driver.get, url)
end_request = time.time()
if not eager:
now = discord.utils.utcnow()
expires = now + datetime.timedelta(seconds=render_timeout)
await ctx.edit(content=f"Rendering (expires {discord.utils.format_dt(expires, 'R')})...")
start_wait = time.time()
await asyncio.sleep(render_timeout)
end_wait = time.time()
else:
start_wait = end_wait = 1
await ctx.edit(content="Saving screenshot...")
start_save = time.time()
ss = await asyncio.to_thread(driver.get_screenshot_as_png)
file = io.BytesIO()
await asyncio.to_thread(file.write, ss)
file.seek(0)
end_save = time.time()
if len(await asyncio.to_thread(file.getvalue)) > 24 * 1024 * 1024:
start_compress = time.time()
file = await asyncio.to_thread(self.compress_png, file)
fn = "screenshot.webp"
end_compress = time.time()
else:
fn = "screenshot.png"
start_compress = end_compress = 1
await ctx.edit(content="Cleaning up...")
start_cleanup = time.time()
await asyncio.to_thread(driver.quit)
end_cleanup = time.time()
screenshot_size_mb = round(len(await asyncio.to_thread(file.getvalue)) / 1024 / 1024, 2)
def seconds(start: float, end: float) -> float:
return round(end - start, 2)
embed = discord.Embed(
title=f"Screenshot of {parsed.hostname}",
description=f"Init time: {seconds(start_init, end_init)}s\n"
f"Request time: {seconds(start_request, end_request)}s\n"
f"Wait time: {seconds(start_wait, end_wait)}s\n"
f"Save time: {seconds(start_save, end_save)}s\n"
f"Compress time: {seconds(start_compress, end_compress)}s\n"
f"Cleanup time: {seconds(start_cleanup, end_cleanup)}s\n"
f"Screenshot size: {screenshot_size_mb}MB\n",
colour=discord.Colour.dark_theme(),
timestamp=discord.utils.utcnow()
)
embed.set_image(url="attachment://" + fn)
return await ctx.edit(content=None, embed=embed, file=discord.File(file, filename=fn))
def setup(bot):
bot.add_cog(ScreenshotCog(bot))