From e0a276c8efa6971bc0aa320a4faac50b09f94c26 Mon Sep 17 00:00:00 2001 From: nex Date: Fri, 28 Apr 2023 21:31:00 +0100 Subject: [PATCH] Improve yt_dl --- cogs/other.py | 250 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 208 insertions(+), 42 deletions(-) diff --git a/cogs/other.py b/cogs/other.py index f9455ad..73f7036 100644 --- a/cogs/other.py +++ b/cogs/other.py @@ -6,6 +6,7 @@ import random import re import tempfile import textwrap +import gzip from datetime import timedelta from io import BytesIO @@ -23,7 +24,7 @@ from urllib.parse import urlparse import aiohttp import discord import psutil -from discord.ext import commands +from discord.ext import commands, pages from rich.tree import Tree from selenium import webdriver from selenium.common.exceptions import WebDriverException @@ -35,18 +36,98 @@ from selenium.webdriver.firefox.service import Service as FirefoxService from utils import console +try: + from config import proxy +except ImportError: + proxy = None +try: + from config import proxies +except ImportError: + if proxy: + proxies = [proxy] * 2 + else: + proxies = [] + _engine = pyttsx3.init() # noinspection PyTypeChecker VOICES = [x.id for x in _engine.getProperty("voices")] del _engine +def format_autocomplete(ctx: discord.AutocompleteContext): + url = ctx.options.get("url", os.urandom(6).hex()) + self: "OtherCog" = ctx.bot.cogs["OtherCog"] # type: ignore + if url in self._fmt_cache: + return [x for x in self._fmt_cache[url].keys() if ctx.value.lower() in x.lower()] + + try: + parsed = urlparse(url, allow_fragments=True) + except ValueError: + pass + else: + if parsed.scheme in ("http", "https") and parsed.netloc: + self._fmt_queue.put_nowait(url) + return ["no formats cached (yet)."] + + # noinspection DuplicatedCode class OtherCog(commands.Cog): def __init__(self, bot): self.bot = bot self.lock = asyncio.Lock() self.http = httpx.AsyncClient() + self._fmt_cache = {} + self._fmt_queue = asyncio.Queue() + self._worker_task = self.bot.loop.create_task(self.cache_population_job()) + + def cog_unload(self): + self._worker_task.cancel() + + async def cache_population_job(self): + while True: + url = await self._fmt_queue.get() + if url not in self._fmt_cache: + await self.list_formats(url, use_proxy=1) + self._fmt_queue.task_done() + + async def list_formats(self, url: str, *, use_proxy: int = 0) -> dict: + if url in self._fmt_cache: + return self._fmt_cache[url] + + args = [ + "yt-dlp", + "-J", + url + ] + if use_proxy == 1 and proxy: + args.append("--proxy") + args.append(proxy) + console.log("list_formats using proxy: %r" % args[-1]) + elif use_proxy == 2 and proxies: + args.append("--proxy") + args.append(random.choice(proxies)) + console.log("list_formats using random proxy: %r" % args[-1]) + process = await asyncio.create_subprocess_exec( + *args, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, stderr = await process.communicate() + data = json.loads(stdout.decode()) + formats = data["formats"] + new = {} + for fmt in formats: + new[fmt["format_id"]] = { + "id": fmt["format_id"], + "ext": fmt["ext"], + "protocol": fmt["protocol"], + "acodec": fmt["acodec"], + "vcodec": fmt["vcodec"], + "resolution": fmt["resolution"], + "filesize": fmt.get("filesize", float('inf')), + } + self._fmt_cache[url] = new + return new class AbortScreenshotTask(discord.ui.View): def __init__(self, task: asyncio.Task): @@ -748,37 +829,116 @@ class OtherCog(commands.Cog): self, ctx: discord.ApplicationContext, url: str, - video_format: str = "", - upload_log: bool = True + video_format: discord.Option( + description="The format to download the video in.", + autocomplete=format_autocomplete, + default="" + ) = "", + upload_log: bool = True, + list_formats: bool = False, + proxy_mode: discord.Option( + str, + choices=[ + "No Proxy", + "Dedicated Proxy", + "Random Public Proxy" + ], + description="Only use if a download was blocked or 403'd.", + default="No Proxy", + ) = "No Proxy", ): """Downloads a video from using youtube-dl""" + use_proxy = ["No Proxy", "Dedicated Proxy", "Random Public Proxy"].index(proxy_mode) + embed = discord.Embed( + description="Loading..." + ) + embed.set_thumbnail(url="https://cdn.discordapp.com/emojis/1101463077586735174.gif?v=1") await ctx.defer() + + await ctx.respond(embed=embed) + if list_formats: + # Nothing actually downloads here + try: + formats = await self.list_formats(url, use_proxy=use_proxy) + except FileNotFoundError: + _embed = embed.copy() + _embed.description = "yt-dlp not found." + _embed.colour = discord.Colour.red() + _embed.set_thumbnail(url=discord.Embed.Empty) + return await ctx.edit(embed=_embed) + except json.JSONDecodeError: + _embed = embed.copy() + _embed.description = "Unable to find formats. You're on your own. Wing it." + _embed.colour = discord.Colour.red() + _embed.set_thumbnail(url=discord.Embed.Empty) + return await ctx.edit(embed=_embed) + else: + embeds = [] + for fmt in formats.keys(): + fs = formats[fmt]["filesize"] or 0.1 + if fs == float("inf"): + fs = 0 + units = ["B"] + else: + units = ["B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"] + while fs > 1024: + fs /= 1024 + units.pop(0) + embeds.append( + discord.Embed( + title=fmt, + description="- Encoding: {0[vcodec]} + {0[acodec]}\n" + "- Extension: `.{0[ext]}`\n" + "- Resolution: {0[resolution]}\n" + "- Filesize: {1}\n" + "- Protocol: {0[protocol]}\n".format(formats[fmt], f"{round(fs, 2)}{units[0]}"), + colour=discord.Colour.blurple() + ).add_field( + name="Download:", + value="{} url:{} video_format:{}".format( + self.bot.get_application_command("yt-dl").mention, + url, + fmt + ) + ) + ) + _paginator = pages.Paginator(embeds, loop_pages=True) + await ctx.delete(delay=0.1) + return await _paginator.respond(ctx.interaction) + with tempfile.TemporaryDirectory(prefix="jimmy-ytdl-") as tempdir: video_format = video_format.lower() - OUTPUT_FILE = str(Path(tempdir) / f"{ctx.user.id}.%(ext)s") MAX_SIZE = round(ctx.guild.filesize_limit / 1024 / 1024) + if MAX_SIZE == 8: + MAX_SIZE = 25 options = [ "--no-colors", "--no-playlist", "--no-check-certificates", - # "--max-filesize", str(MAX_SIZE) + "M", "--no-warnings", - "--output", OUTPUT_FILE, - "--newline" + "--newline", + "--output", + f"{ctx.user.id}.%(title)s.%(ext)s", ] if video_format: options.extend(["--format", f"({video_format})[filesize<={MAX_SIZE}M]"]) else: options.extend(["--format", f"(bv*+ba/b/ba)[filesize<={MAX_SIZE}M]"]) + if use_proxy == 1 and proxy: + options.append("--proxy") + options.append(proxy) + console.log("yt-dlp using proxy: %r", proxy) + elif use_proxy == 2 and proxies: + options.append("--proxy") + options.append(random.choice(proxies)) + console.log("yt-dlp using random proxy: %r", options[-1]) + + _embed = embed.copy() + _embed.description = "Downloading..." + _embed.colour = discord.Colour.blurple() await ctx.edit( - embed=discord.Embed( - description="\u200b" - ).set_author( - name="Downloading...", - icon_url="https://cdn.discordapp.com/emojis/1101463077586735174.gif?v=1", - url=url - ) + embed=_embed, ) try: venv = Path.cwd() / "venv" / ("Scripts" if os.name == "nt" else "bin") @@ -815,51 +975,56 @@ class OtherCog(commands.Cog): stderr_log_file ] if b"format is not available" in stderr: - process = await asyncio.create_subprocess_exec( - "yt-dlp", - "-J", - url, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - stdout, stderr = await process.communicate() - data = json.loads(stdout.decode()) - formats = data["formats"] - paginator = commands.Paginator() - for fmt in formats: - fs = round((fmt.get("filesize") or len(fmt.get("fragments", [b'\0'])) * 10) / 1024 / 1024, 1) - paginator.add_line( - "* {0[format_id]}:\n" - "\t- Encoding: {0[vcodec]} + {0[acodec]}\n" - "\t- Extension: {0[ext]}\n" - "\t- Protocol: {0[protocol]}\n" - "\t- Resolution: {0[resolution]}\n" - "\t- Size: {1!s}MB".format(fmt, fs) - ) - await ctx.edit(content="Invalid format. Available formats:", embed=None) - for page in paginator.pages: - await ctx.send(page) - return await ctx.send(files=files) + formats = await self.list_formats(url) return await ctx.edit(content=f"Download failed:\n```\n{stderr.decode()}\n```", files=files) - await ctx.edit(content="Uploading video...") + _embed = embed.copy() + _embed.description = "Download complete." + _embed.colour = discord.Colour.green() + _embed.set_thumbnail(url=discord.Embed.Empty) + await ctx.edit(embed=_embed) files = [ stdout_log_file, stderr_log_file ] if upload_log else [] + cum_size = 0 + for file in files: + n_b = len(file.fp.read()) + file.fp.seek(0) + if n_b == 0: + files.remove(file) + continue + elif n_b >= 1024 * 1024 * 256: + data = file.fp.read() + compressed = await self.bot.loop.run_in_executor( + gzip.compress, data, 9 + ) + file.fp.close() + file.fp = io.BytesIO(compressed) + file.fp.seek(0) + file.filename += ".gz" + cum_size += len(compressed) + else: + cum_size += n_b + for file_name in Path(tempdir).glob(f"{ctx.user.id}.*"): stat = file_name.stat() size_mb = stat.st_size / 1024 / 1024 - if size_mb > MAX_SIZE - 0.5: + if (size_mb * 1024 * 1024 + cum_size) >= (MAX_SIZE - 0.256) * 1024 * 1024: + warning = f"File {file_name.name} was too large ({size_mb:,.1f}MB vs {MAX_SIZE:.1f}MB)".encode() _x = io.BytesIO( - f"File {file_name.name} was too large ({size_mb:,.1f}MB vs {MAX_SIZE:.1f}MB)".encode() + warning ) + _x.seek(0) + cum_size += len(warning) files.append(discord.File(_x, filename=file_name.name + ".txt")) try: video = discord.File(file_name, filename=file_name.name) files.append(video) except FileNotFoundError: continue + else: + cum_size += size_mb * 1024 * 1024 if not files: return await ctx.edit(embed=discord.Embed(description="No files found.", color=discord.Colour.red())) @@ -1009,6 +1174,7 @@ class OtherCog(commands.Cog): async def quote(self, ctx: discord.ApplicationContext): """Generates a random quote""" emoji = discord.PartialEmoji(name='loading', animated=True, id=1101463077586735174) + async def get_quote() -> str | discord.File: try: response = await self.http.get("https://inspirobot.me/api?generate=true")