college-bot-v2/src/cogs/ytdl.py
nexy7574 c41d6c3843
All checks were successful
Build and Publish Jimmy.2 / build_and_publish (push) Successful in 30s
Change yt-dl update frequency
2024-06-02 23:57:43 +01:00

642 lines
28 KiB
Python

import asyncio
import functools
import hashlib
import logging
import math
import time
import datetime
import httpx
import subprocess
import tempfile
import textwrap
import typing
import uuid
from pathlib import Path
from urllib.parse import urlparse
import aiosqlite
import discord
import yt_dlp
from discord.ext import commands
COOKIES_TXT = Path.cwd() / "cookies.txt"
class YTDLCog(commands.Cog):
def __init__(self, bot: commands.Bot) -> None:
self.bot = bot
self.log = logging.getLogger("jimmy.cogs.ytdl")
self.common_formats = {
"144p": "bv[width<=144]+ba[ext=webm]/bv[width<=144]+ba[ext=m4a]/bv[width<=144]+ba/b[width<=144]",
"240p": "bv[width<=240]+ba[ext=webm]/bv[width<=240]+ba[ext=m4a]/bv[width<=240]+ba/b[width<=240]",
"360p": "bv[width<=360]+ba[ext=webm]/bv[width<=360]+ba[ext=m4a]/bv[width<=360]+ba/b[width<=360]",
"480p": "bv[width<=500]+ba[ext=webm]/bv[width<=500]+ba[ext=m4a]/bv[width<=500]+bab[width<=480]",
"720p": "bv[width<=720]+ba[ext=webm]/bv[width<=720]+ba[ext=m4a]/bv[width<=720]+ba/b[width<=720]",
"1080p": "bv[width<=1080]+ba[ext=webm]/bv[width<=1080]+ba[ext=m4a]/bv[width<=1080]+ba",
"1440p": "bv[width<=1440]+ba[ext=webm]/bv[width<=1440]+ba[ext=m4a]/bv[width<=1440]+ba",
"2160p": "bv[width<=2160]+ba[ext=webm]/bv[width<=2160]+ba[ext=m4a]/bv[width<=2160]+ba",
"mp3": "ba[filesize<500M]",
"m4a": "ba[ext=m4a][filesize<500M]",
"opus": "ba[ext=webm][filesize<500M]",
"vorbis": "ba[ext=webm][filesize<500M]",
"ogg": "ba[ext=webm][filesize<500M]",
}
self.default_options = {
"noplaylist": True,
"nocheckcertificate": True,
"no_color": True,
"noprogress": True,
"logger": self.log,
"format": "((bv+ba/b)[vcodec!=h265][filesize<500M]/b[filesize<=500M]/b)",
"outtmpl": "%(title).50s.%(ext)s",
"format_sort": [
"vcodec:h264",
"acodec:aac",
"vcodec:vp9",
"acodec:opus",
"acodec:vorbis",
"vcodec:vp8",
"ext",
],
"merge_output_format": "webm/mp4/mov/m4a/oga/ogg/mp3/mka/mkv",
"source_address": "0.0.0.0",
"concurrent_fragment_downloads": 4,
# "max_filesize": (25 * 1024 * 1024) - 256
}
self.colours = {
"youtube.com": 0xFF0000,
"youtu.be": 0xFF0000,
"tiktok.com": 0x25F5EF,
"instagram.com": 0xE1306C,
"shronk.net": 0xFFF952,
}
async def _init_db(self):
async with aiosqlite.connect("./data/ytdl.db") as db:
await db.execute(
"""
CREATE TABLE IF NOT EXISTS downloads (
key TEXT PRIMARY KEY,
message_id INTEGER NOT NULL UNIQUE,
channel_id INTEGER NOT NULL,
webpage_url TEXT NOT NULL,
format_id TEXT NOT NULL,
attachment_index INTEGER NOT NULL DEFAULT 0
)
"""
)
await db.commit()
return
async def save_link(
self,
message: discord.Message,
webpage_url: str,
format_id: str,
attachment_index: int = 0,
*,
snip: typing.Optional[str] = None,
):
"""
Saves a link to discord to prevent having to re-download it.
:param message: The download message with the attachment.
:param webpage_url: The "webpage_url" key of the metadata
:param format_id: The "format_Id" key of the metadata
:param attachment_index: The index of the attachment. Defaults to 0
:param snip: The start and end time to snip the video. e.g. 00:00:00-00:10:00
:return: The created hash key
"""
snip = snip or "*"
_hash = hashlib.md5(f"{webpage_url}:{format_id}:{snip}".encode()).hexdigest()
try:
await self._init_db()
except Exception as e:
logging.error("Failed to initialise ytdl database: %s", e, exc_info=True)
return
async with aiosqlite.connect("./data/ytdl.db") as db:
self.log.debug(
"Saving %r (%r:%r:%r) with message %d>%d, index %d",
_hash,
webpage_url,
format_id,
snip,
message.channel.id,
message.id,
attachment_index,
)
await db.execute(
"""
INSERT INTO downloads (key, message_id, channel_id, webpage_url, format_id, attachment_index)
VALUES (?, ?, ?, ?, ?, ?)
ON CONFLICT (key) DO UPDATE SET
message_id=excluded.message_id,
channel_id=excluded.channel_id,
attachment_index=excluded.attachment_index
""",
(_hash, message.id, message.channel.id, webpage_url, format_id, attachment_index),
)
await db.commit()
return _hash
async def get_saved(self, webpage_url: str, format_id: str, snip: str) -> typing.Optional[str]:
"""
Attempts to retrieve the attachment URL of a previously saved download.
:param webpage_url: The webpage url
:param format_id: The format ID
:param snip: The start and end time to snip the video. e.g. 00:00:00-00:10:00
:return: the URL, if found and valid.
"""
try:
await self._init_db()
except Exception as e:
logging.error("Failed to initialise ytdl database: %s", e, exc_info=True)
return
async with aiosqlite.connect("./data/ytdl.db") as db:
_hash = hashlib.md5(f"{webpage_url}:{format_id}:{snip}".encode()).hexdigest()
self.log.debug(
"Attempting to find a saved download for '%s:%s:%s' (%r).", webpage_url, format_id, snip, _hash
)
cursor = await db.execute(
"SELECT message_id, channel_id, attachment_index FROM downloads WHERE key=?", (_hash,)
)
entry = await cursor.fetchone()
if not entry:
self.log.debug("There was no saved download.")
return
message_id, channel_id, attachment_index = entry
channel = self.bot.get_channel(channel_id)
if not channel:
self.log.debug("Channel %r was not found.", channel_id)
return
try:
message = await channel.fetch_message(message_id)
except discord.HTTPException:
self.log.debug("%r did not contain a message with ID %r", channel, message_id)
await db.execute("DELETE FROM downloads WHERE key=?", (_hash,))
return
try:
url = message.attachments[attachment_index].url
self.log.debug("Found URL %r, returning.", url)
return url
except IndexError:
self.log.debug("Attachment index %d is out of range (%r)", attachment_index, message.attachments)
return
def convert_to_m4a(self, file: Path) -> Path:
"""
Converts a file to m4a format.
:param file: The file to convert
:return: The converted file
"""
new_file = file.with_suffix(".m4a")
args = [
"-vn",
"-sn",
"-i",
str(file),
"-c:a",
"aac",
"-b:a",
"96k",
"-movflags",
"faststart",
"-y",
str(new_file),
]
self.log.debug("Running command: ffmpeg %s", " ".join(args))
process = subprocess.run(["ffmpeg", *args], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if process.returncode != 0:
raise RuntimeError(process.stderr.decode())
return new_file
@staticmethod
async def upload_to_0x0(name: str, data: typing.IO[bytes], mime_type: str | None = None) -> str:
if not mime_type:
import magic
mime_type = await asyncio.to_thread(magic.from_buffer, data.read(4096), mime=True)
data.seek(0)
async with httpx.AsyncClient() as client:
response = await client.post(
"https://0x0.st",
files={"file": (name, data, mime_type)},
headers={"User-Agent": "CollegeBot (matrix: @nex:nexy7574.co.uk)"},
)
if response.status_code == 200:
return urlparse(response.text).path[1:]
response.raise_for_status()
@commands.slash_command(name="yt-dl")
# @commands.bot_has_permissions(send_messages=True, embed_links=True, attach_files=True)
async def yt_dl_command(
self,
ctx: discord.ApplicationContext,
url: typing.Annotated[str, discord.Option(str, description="The URL to download from.", required=True)],
user_format: typing.Annotated[
typing.Optional[str],
discord.Option(
str,
name="format",
description="The name of the format to download. Can also specify resolutions for youtube.",
required=False,
default=None,
),
],
audio_only: typing.Annotated[
bool,
discord.Option(
bool,
name="audio-only",
description="Whether to convert result into an m4a file. Overwrites `format` if True.",
required=False,
default=False,
),
],
snip: typing.Annotated[
typing.Optional[str],
discord.Option(description="A start and end position to trim. e.g. 00:00:00-00:10:00.", required=False),
],
subtitles: typing.Annotated[
typing.Optional[str],
discord.Option(
str,
description="The language code of the subtitles to download. e.g. 'en', 'auto'",
required=False,
),
]
):
"""Runs yt-dlp and outputs into discord."""
await ctx.defer()
last_edit = time.time()
options = self.default_options.copy()
stop = asyncio.Event()
def _download_hook(_data: dict[str, typing.Any]):
if stop.is_set():
raise RuntimeError("Download cancelled.")
n = time.time()
_total = _data.get("total_bytes", _data.get("total_bytes_estimate")) or ctx.guild.filesize_limit
if _total:
_percent = round((_data.get("downloaded_bytes") or 0) / _total * 100, 2)
else:
_total = max(1, _data.get("fragment_count", 4096) or 4096)
_percent = round(max(_data.get("fragment_index", 1) or 1, 1) / _total * 100, 2)
_speed_bytes_per_second = _data.get("speed", 1) or 1 or 1
_speed_megabits_per_second = round((_speed_bytes_per_second * 8) / 1024 / 1024)
if _data.get("eta"):
_eta = discord.utils.utcnow() + datetime.timedelta(seconds=_data.get("eta"))
else:
_eta = discord.utils.utcnow() + datetime.timedelta(minutes=1)
blocks = "#" * math.floor(_percent / 10)
bar = f"{blocks}{'.' * (10 - len(blocks))}"
line = (f"{_percent}% [{bar}] | {_speed_megabits_per_second}Mbps | "
f"ETA {discord.utils.format_dt(_eta, 'R')}")
nonlocal last_edit
if (n - last_edit) >= 1.1:
embed.clear_fields()
embed.add_field(name="Progress", value=line)
ctx.bot.loop.create_task(ctx.edit(embed=embed))
last_edit = time.time()
options["progress_hooks"] = [_download_hook]
description = ""
with tempfile.TemporaryDirectory(prefix="jimmy-ytdl-") as temp_dir:
temp_dir = Path(temp_dir)
paths = {
target: str(temp_dir)
for target in (
"home",
"temp",
)
}
chosen_format = self.default_options["format"]
if user_format:
if user_format in self.common_formats:
chosen_format = self.common_formats[user_format]
else:
chosen_format = user_format
options.setdefault("postprocessors", [])
if audio_only:
# Overwrite format here to be best audio under 25 megabytes.
chosen_format = "ba[filesize<20M]"
# Also force sorting by the best audio bitrate first.
options["format_sort"] = ["abr", "br"]
# noinspection PyTypeChecker
options["postprocessors"].append(
{"key": "FFmpegExtractAudio", "preferredquality": "96", "preferredcodec": "best"}
)
options["format"] = chosen_format
options["paths"] = paths
if subtitles:
subtitles, burn = subtitles.split("+", 1) if "+" in subtitles else (subtitles, "0")
burn = burn[0].lower() in ("y", "1", "t")
if subtitles.lower() == "auto":
options["writeautosubtitles"] = True
else:
options["writesubtitles"] = True
options["subtitleslangs"] = [subtitles]
if burn:
# noinspection PyTypeChecker
options["postprocessors"].append(
{"key": "FFmpegEmbedSubtitle", "already_have_subtitle": True}
)
with yt_dlp.YoutubeDL(options) as downloader:
await ctx.respond(embed=discord.Embed().set_footer(text="Downloading (step 1/10)"))
try:
# noinspection PyTypeChecker
extracted_info = await asyncio.to_thread(downloader.extract_info, url, download=False)
except yt_dlp.utils.DownloadError as e:
extracted_info = {
"title": "error",
"thumbnail_url": None,
"webpage_url": url,
"format": "error",
"format_id": "-1",
"ext": "wav",
"format_note": str(e),
"resolution": "1x1",
"fps": "1",
"vcodec": "error",
"acodec": "error",
"filesize": 0,
}
title = "error"
description = str(e)
thumbnail_url = webpage_url = None
likes = views = 0
chosen_format_id = str(uuid.uuid4())
else:
title = extracted_info.get("title", url) or url
title = textwrap.shorten(title, 100)
thumbnail_url = extracted_info.get("thumbnail") or None
webpage_url = extracted_info.get("webpage_url", url)
chosen_format = extracted_info.get("format") or chosen_format or str(uuid.uuid4())
chosen_format_id = extracted_info.get("format_id") or str(uuid.uuid4())
final_extension = extracted_info.get("ext") or "mp4"
format_note = extracted_info.get("format_note", "%s (%s)" % (chosen_format, chosen_format_id)) or ""
resolution = extracted_info.get("resolution") or "1x1"
fps = extracted_info.get("fps", 0.0) or 0.0
vcodec = extracted_info.get("vcodec") or "h264"
acodec = extracted_info.get("acodec") or "aac"
filesize = extracted_info.get("filesize", extracted_info.get("filesize_approx", 1))
likes = extracted_info.get("like_count", extracted_info.get("average_rating", 0))
views = extracted_info.get("view_count", 0)
lines = []
if chosen_format and chosen_format_id:
lines.append(
"* Chosen format: `%s` (`%s`)" % (chosen_format, chosen_format_id),
)
if format_note:
lines.append("* Format note: %r" % format_note)
if final_extension:
lines.append("* File extension: " + final_extension)
if resolution:
_s = resolution
if fps:
_s += " @ %s FPS" % fps
lines.append("* Resolution: " + _s)
if vcodec or acodec:
lines.append("%s+%s" % (vcodec or "N/A", acodec or "N/A"))
if filesize:
lines.append("* Filesize: %s" % yt_dlp.utils.format_bytes(filesize))
if lines:
description += "\n"
description += "\n".join(lines)
domain = urlparse(webpage_url).netloc
embed = discord.Embed(
title=title,
description=description,
url=webpage_url,
colour=self.colours.get(domain, discord.Colour.og_blurple()),
)
embed.add_field(
name="Progress",
value="0% [..........]"
)
embed.set_footer(text="Downloading (step 2/10)")
embed.set_thumbnail(url=thumbnail_url)
class StopView(discord.ui.View):
@discord.ui.button(label="Cancel download", style=discord.ButtonStyle.danger)
async def _stop(self, button: discord.ui.Button, interaction: discord.Interaction):
stop.set()
button.label = "Cancelling..."
button.disabled = True
await interaction.response.edit_message(view=self)
self.stop()
await ctx.edit(
embed=embed,
view=StopView(timeout=86400)
)
previous = await self.get_saved(webpage_url, chosen_format_id, snip or "*")
if previous:
await ctx.edit(
content=previous,
embed=discord.Embed(
title=f"Downloaded {title}!",
description="Used previously downloaded attachment.",
colour=discord.Colour.green(),
timestamp=discord.utils.utcnow(),
url=previous,
fields=[discord.EmbedField(name="URL", value=previous, inline=False)],
).set_image(url=previous),
)
return
last_edit = time.time()
try:
await asyncio.to_thread(functools.partial(downloader.download, [url]))
except yt_dlp.DownloadError as e:
logging.error(e, exc_info=True)
return await ctx.edit(
embed=discord.Embed(
title="Error",
description=f"Download failed:\n```\n{e}\n```",
colour=discord.Colour.red(),
url=webpage_url,
),
delete_after=120,
view=None
)
except RuntimeError:
return await ctx.edit(
embed=discord.Embed(
title="Error",
description="Download was cancelled.",
colour=discord.Colour.red(),
url=webpage_url,
),
delete_after=120,
view=None
)
await ctx.edit(view=None)
try:
if audio_only is False:
file: Path = next(temp_dir.glob("*." + extracted_info["ext"]))
else:
# can be .opus, .m4a, .mp3, .ogg, .oga
for _file in temp_dir.iterdir():
if _file.suffix in (".opus", ".m4a", ".mp3", ".ogg", ".oga", ".aac", ".wav"):
file: Path = _file
break
else:
raise StopIteration
except StopIteration:
ext = extracted_info["ext"]
self.log.warning(
"Failed to locate downloaded file. Was supposed to be looking for a file extension of "
"%r amongst files %r, however none were found.",
ext,
list(map(str, temp_dir.iterdir())),
)
return await ctx.edit(
embed=discord.Embed(
title="Error",
description="Failed to locate downloaded video file."
f" Was expecting a file with the extension {ext}.\n"
f"Files: {', '.join(list(map(str, temp_dir.iterdir())))}",
colour=discord.Colour.red(),
url=webpage_url,
)
)
if snip:
try:
trim_start, trim_end = snip.split("-")
except ValueError:
trim_start, trim_end = snip, None
trim_start = trim_start or "00:00:00"
trim_end = trim_end or extracted_info.get("duration_string", "00:30:00")
new_file = temp_dir / ("output" + file.suffix)
args = [
"-hwaccel",
"auto",
"-i",
str(file),
"-ss",
trim_start,
"-to",
trim_end,
"-preset",
"fast",
"-crf",
"24",
"-deadline",
"realtime",
"-cpu-used",
"5",
"-movflags",
"faststart",
"-b:a",
"96k",
"-y",
"-strict",
"2",
str(new_file),
]
async with ctx.channel.typing():
await ctx.edit(
embed=discord.Embed(
title=f"Trimming from {trim_start} to {trim_end}.",
description="Please wait, this may take a couple of minutes.",
colour=discord.Colour.og_blurple(),
timestamp=discord.utils.utcnow(),
)
)
self.log.debug("Running command: 'ffmpeg %s'", " ".join(args))
process = await asyncio.create_subprocess_exec(
"ffmpeg", *args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
self.log.debug("STDOUT:\n%r", stdout.decode())
self.log.debug("STDERR:\n%r", stderr.decode())
if process.returncode != 0:
return await ctx.edit(
embed=discord.Embed(
title="Error",
description=f"Trimming failed:\n```\n{stderr.decode()}\n```",
colour=discord.Colour.red(),
url=webpage_url,
)
)
file = new_file
if audio_only and file.suffix != ".m4a":
self.log.info("Converting %r to m4a.", file)
file: Path = await asyncio.to_thread(self.convert_to_m4a, file)
stat = file.stat()
size_bytes = stat.st_size
if size_bytes >= ((500 * 1024 * 1024) - 256):
return await ctx.edit(
embed=discord.Embed(
title="Error",
description=f"File is too large to upload ({round(size_bytes / 1024 / 1024)}MB).",
colour=discord.Colour.red(),
url=webpage_url,
)
)
size_megabits = (size_bytes * 8) / 1024 / 1024
eta_seconds = size_megabits / 20
await ctx.edit(
embed=discord.Embed(
title="Uploading...",
description=f"ETA <t:{int(eta_seconds + discord.utils.utcnow().timestamp()) + 2}:R>",
colour=discord.Colour.og_blurple(),
timestamp=discord.utils.utcnow(),
)
)
embed = discord.Embed(
title=f"Downloaded {title}!",
description="Views: {:,} | Likes: {:,}".format(views or 0, likes or 0),
colour=discord.Colour.green(),
timestamp=discord.utils.utcnow(),
url=webpage_url,
)
try:
if size_bytes >= (20 * 1024 * 1024) or vcodec.lower() in ["hevc", "h265", "av1", "av01"]:
with file.open("rb") as fb:
part = await self.upload_to_0x0(
file.name,
fb
)
embed.add_field(name="URL", value=f"https://0x0.st/{part}", inline=False)
await ctx.edit(
embed=embed
)
await ctx.respond("https://embeds.video/0x0/" + part)
else:
upload_file = await asyncio.to_thread(discord.File, file, filename=file.name)
msg = await ctx.edit(
file=upload_file,
embed=embed
)
await self.save_link(msg, webpage_url, chosen_format_id, snip=snip or "*")
except (discord.HTTPException, ConnectionError, httpx.HTTPStatusError) as e:
self.log.error(e, exc_info=True)
return await ctx.edit(
embed=discord.Embed(
title="Error",
description=f"Upload failed:\n```\n{e}\n```",
colour=discord.Colour.red(),
url=webpage_url,
)
)
def setup(bot):
bot.add_cog(YTDLCog(bot))