From 7524e5ecfeaf3c23afb194da02d8b8c3ea81f02e Mon Sep 17 00:00:00 2001 From: nexy7574 Date: Sun, 14 Apr 2024 19:44:30 +0100 Subject: [PATCH 1/5] add Run OCR message command --- cogs/other.py | 137 ++++++++++++++++++++++++++++---------------------- 1 file changed, 77 insertions(+), 60 deletions(-) diff --git a/cogs/other.py b/cogs/other.py index 2ba1f4d..433b958 100644 --- a/cogs/other.py +++ b/cogs/other.py @@ -1,55 +1,42 @@ import asyncio -import fnmatch -import functools import glob import hashlib import io import json import logging -import math import os import pathlib import random import re import shutil -import subprocess -import sys import tempfile import textwrap -import traceback import typing from functools import partial -from io import BytesIO from pathlib import Path -from time import sleep, time, time_ns -from typing import Dict, Literal, Optional, Tuple +from time import time +from typing import Dict, Literal, Tuple from urllib.parse import urlparse -import aiofiles import aiohttp +import config import discord -import dns.resolver import httpx import openai import psutil import pydub import pytesseract import pyttsx3 -from discord import Interaction -from discord.ext import commands -from dns import asyncresolver from PIL import Image -from rich import print -from rich.tree import Tree +from discord.ext import commands from selenium import webdriver -from selenium.common.exceptions import WebDriverException from selenium.webdriver.chrome.options import Options as ChromeOptions from selenium.webdriver.chrome.service import Service as ChromeService from selenium.webdriver.firefox.options import Options as FirefoxOptions from selenium.webdriver.firefox.service import Service as FirefoxService -import config -from utils import Timer, console +from utils import Timer + try: from config import proxy @@ -530,21 +517,8 @@ class OtherCog(commands.Cog): else: return await ctx.respond(result, view=GenerateNewView()) - @commands.slash_command() - @commands.cooldown(1, 30, commands.BucketType.user) - @commands.max_concurrency(1, commands.BucketType.user) - async def ocr( - self, - ctx: discord.ApplicationContext, - attachment: discord.Option( - discord.SlashCommandOptionType.attachment, - description="Image to perform OCR on", - ), - ): - """OCRs an image""" - await ctx.defer() - timings: Dict[str, float] = {} - attachment: discord.Attachment + async def _ocr_core(self, attachment: discord.Attachment) -> tuple[dict[str, float], str]: + timings: dict[str, float] = {} with Timer() as _t: data = await attachment.read() file = io.BytesIO(data) @@ -558,45 +532,88 @@ class OtherCog(commands.Cog): text = await self.bot.loop.run_in_executor(None, pytesseract.image_to_string, img) timings["Perform OCR"] = _t.total except pytesseract.TesseractError as e: - return await ctx.respond(f"Failed to perform OCR: `{e}`") + raise RuntimeError(f"Failed to perform OCR: `{e}`") - if len(text) > 4096: + if len(text) >= 1744: with Timer() as _t: try: - response = await self.http.put( - "https://api.mystb.in/paste", - json={ - "files": [{"filename": "ocr.txt", "content": text}], + file.seek(0) + response = await self.http.post( + "https://paste.nexy7574.co.uk/upload", + data={ + "expiration": "1week", + "burn_after": "0", + "syntax_highlight": "none", + "privacy": "unlisted", }, + files={ + "file": (attachment.filename, file, attachment.content_type) + }, + follow_redirects=False ) response.raise_for_status() - except httpx.HTTPError: - return await ctx.respond("OCR content too large to post.") + except httpx.HTTPError as e: + raise RuntimeError(f"Failed to upload OCR content: `{e}`") else: - data = response.json() - with Timer(timings, "Respond (URL)"): - embed = discord.Embed( - description="View on [mystb.in](%s)" % ("https://mystb.in/" + data["id"]), - colour=discord.Colour.dark_theme(), - ) - await ctx.respond(embed=embed) - timings["Upload text to mystbin"] = _t.total - elif len(text) <= 1500: - with Timer() as _t: - await ctx.respond(embed=discord.Embed(description=text)) - timings["Respond (Text)"] = _t.total - else: - with Timer() as _t: - out_file = io.BytesIO(text.encode("utf-8", "replace")) - await ctx.respond(file=discord.File(out_file, filename="ocr.txt")) - timings["Respond (File)"] = _t.total + text = "View on [paste.nexy7574.co.uk](%s)" % response.next_request.url + timings["Upload text to pastebin"] = _t.total + return timings, text + + @commands.slash_command() + @commands.cooldown(1, 30, commands.BucketType.user) + @commands.max_concurrency(1, commands.BucketType.user) + async def ocr( + self, + ctx: discord.ApplicationContext, + attachment: discord.Option( + discord.SlashCommandOptionType.attachment, + description="Image to perform OCR on", + ), + ): + """OCRs an image""" + await ctx.defer() + attachment: discord.Attachment + + timings, text = await self._ocr_core(attachment) + embed = discord.Embed( + description=text, + colour=discord.Colour.blurple() + ) + embed.set_image(url=attachment.url) + with Timer() as _t: + await ctx.respond( + embed=embed + ) + timings["Respond (Text)"] = _t.total if timings: text = "Timings:\n" + "\n".join("{}: {:.2f}s".format(k.title(), v) for k, v in timings.items()) await ctx.edit( - content=text, + content=text ) + @commands.message_command(name="Run OCR") + async def message_ocr(self, ctx: discord.ApplicationContext, message: discord.Message): + await ctx.defer() + attachment: discord.Attachment | None + for attachment in message.attachments: + if attachment.content_type.startswith("image/"): + break + else: + return await ctx.respond(":x: No images found in message.", delete_after=30) + + timings, text = await self._ocr_core(attachment) + embed = discord.Embed( + title="OCR for " + attachment.filename, + description=text, + colour=discord.Colour.blurple(), + url=message.jump_url + ) + embed.set_image(url=attachment.url) + await ctx.respond( + embed=embed + ) + @commands.message_command(name="Convert Image to GIF") async def convert_image_to_gif(self, ctx: discord.ApplicationContext, message: discord.Message): await ctx.defer() From 03325a266ff9c53ce2c01d72a5c34643969ea939 Mon Sep 17 00:00:00 2001 From: nexy7574 Date: Sun, 14 Apr 2024 19:48:02 +0100 Subject: [PATCH 2/5] Allow OCRing multiple images --- cogs/other.py | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/cogs/other.py b/cogs/other.py index 433b958..8fd94de 100644 --- a/cogs/other.py +++ b/cogs/other.py @@ -595,24 +595,26 @@ class OtherCog(commands.Cog): @commands.message_command(name="Run OCR") async def message_ocr(self, ctx: discord.ApplicationContext, message: discord.Message): await ctx.defer() - attachment: discord.Attachment | None + + embeds = [] for attachment in message.attachments: if attachment.content_type.startswith("image/"): - break - else: - return await ctx.respond(":x: No images found in message.", delete_after=30) + timings, text = await self._ocr_core(attachment) + embed = discord.Embed( + title="OCR for " + attachment.filename, + description=text, + colour=discord.Colour.blurple(), + url=message.jump_url + ) + embed.set_image(url=attachment.url) + embeds.append(embed) + if len(embeds) == 25: + break - timings, text = await self._ocr_core(attachment) - embed = discord.Embed( - title="OCR for " + attachment.filename, - description=text, - colour=discord.Colour.blurple(), - url=message.jump_url - ) - embed.set_image(url=attachment.url) - await ctx.respond( - embed=embed - ) + if not embeds: + return await ctx.respond(":x: No images found in message.", delete_after=30) + else: + return await ctx.respond(embeds) @commands.message_command(name="Convert Image to GIF") async def convert_image_to_gif(self, ctx: discord.ApplicationContext, message: discord.Message): From 8d5744c23dc3809917e21b4d58cd93761109d0cb Mon Sep 17 00:00:00 2001 From: nexy7574 Date: Sun, 14 Apr 2024 19:48:30 +0100 Subject: [PATCH 3/5] Fix responding with raw embed array --- cogs/other.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cogs/other.py b/cogs/other.py index 8fd94de..d426780 100644 --- a/cogs/other.py +++ b/cogs/other.py @@ -614,7 +614,7 @@ class OtherCog(commands.Cog): if not embeds: return await ctx.respond(":x: No images found in message.", delete_after=30) else: - return await ctx.respond(embeds) + return await ctx.respond(embeds=embeds) @commands.message_command(name="Convert Image to GIF") async def convert_image_to_gif(self, ctx: discord.ApplicationContext, message: discord.Message): From 74b3385051e797f32d8d0db6f0847dbd10691c1f Mon Sep 17 00:00:00 2001 From: nexy7574 Date: Sun, 14 Apr 2024 19:52:18 +0100 Subject: [PATCH 4/5] conditionally raise error --- cogs/other.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cogs/other.py b/cogs/other.py index d426780..bdeb1a1 100644 --- a/cogs/other.py +++ b/cogs/other.py @@ -551,7 +551,8 @@ class OtherCog(commands.Cog): }, follow_redirects=False ) - response.raise_for_status() + if response.status_code not in range(200, 400): + response.raise_for_status() except httpx.HTTPError as e: raise RuntimeError(f"Failed to upload OCR content: `{e}`") else: From c90fc5e7fa126c6c683308df603e8bf29ed7001d Mon Sep 17 00:00:00 2001 From: nexy7574 Date: Sun, 14 Apr 2024 19:54:00 +0100 Subject: [PATCH 5/5] Forgot to include content --- .idea/.gitignore | 2 ++ .idea/dataSources.local.xml | 2 +- .idea/misc.xml | 2 +- .idea/the-hi5-group.iml | 2 ++ cogs/other.py | 1 + 5 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.idea/.gitignore b/.idea/.gitignore index 26d3352..8f00030 100644 --- a/.idea/.gitignore +++ b/.idea/.gitignore @@ -1,3 +1,5 @@ # Default ignored files /shelf/ /workspace.xml +# GitHub Copilot persisted chat sessions +/copilot/chatSessions diff --git a/.idea/dataSources.local.xml b/.idea/dataSources.local.xml index 1fde411..5627475 100644 --- a/.idea/dataSources.local.xml +++ b/.idea/dataSources.local.xml @@ -1,6 +1,6 @@ - + " diff --git a/.idea/misc.xml b/.idea/misc.xml index c134054..894dfd5 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,5 +3,5 @@ - + \ No newline at end of file diff --git a/.idea/the-hi5-group.iml b/.idea/the-hi5-group.iml index 74d515a..83318fc 100644 --- a/.idea/the-hi5-group.iml +++ b/.idea/the-hi5-group.iml @@ -2,6 +2,8 @@ + + diff --git a/cogs/other.py b/cogs/other.py index bdeb1a1..209c310 100644 --- a/cogs/other.py +++ b/cogs/other.py @@ -545,6 +545,7 @@ class OtherCog(commands.Cog): "burn_after": "0", "syntax_highlight": "none", "privacy": "unlisted", + "content": text }, files={ "file": (attachment.filename, file, attachment.content_type)