add Run OCR message command

This commit is contained in:
Nexus 2024-04-14 19:44:30 +01:00
parent ce1f4e92b7
commit 7524e5ecfe
Signed by: nex
GPG key ID: 0FA334385D0B689F

View file

@ -1,55 +1,42 @@
import asyncio import asyncio
import fnmatch
import functools
import glob import glob
import hashlib import hashlib
import io import io
import json import json
import logging import logging
import math
import os import os
import pathlib import pathlib
import random import random
import re import re
import shutil import shutil
import subprocess
import sys
import tempfile import tempfile
import textwrap import textwrap
import traceback
import typing import typing
from functools import partial from functools import partial
from io import BytesIO
from pathlib import Path from pathlib import Path
from time import sleep, time, time_ns from time import time
from typing import Dict, Literal, Optional, Tuple from typing import Dict, Literal, Tuple
from urllib.parse import urlparse from urllib.parse import urlparse
import aiofiles
import aiohttp import aiohttp
import config
import discord import discord
import dns.resolver
import httpx import httpx
import openai import openai
import psutil import psutil
import pydub import pydub
import pytesseract import pytesseract
import pyttsx3 import pyttsx3
from discord import Interaction
from discord.ext import commands
from dns import asyncresolver
from PIL import Image from PIL import Image
from rich import print from discord.ext import commands
from rich.tree import Tree
from selenium import webdriver from selenium import webdriver
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.chrome.options import Options as ChromeOptions from selenium.webdriver.chrome.options import Options as ChromeOptions
from selenium.webdriver.chrome.service import Service as ChromeService from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.firefox.options import Options as FirefoxOptions from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.webdriver.firefox.service import Service as FirefoxService from selenium.webdriver.firefox.service import Service as FirefoxService
import config from utils import Timer
from utils import Timer, console
try: try:
from config import proxy from config import proxy
@ -530,21 +517,8 @@ class OtherCog(commands.Cog):
else: else:
return await ctx.respond(result, view=GenerateNewView()) return await ctx.respond(result, view=GenerateNewView())
@commands.slash_command() async def _ocr_core(self, attachment: discord.Attachment) -> tuple[dict[str, float], str]:
@commands.cooldown(1, 30, commands.BucketType.user) timings: dict[str, float] = {}
@commands.max_concurrency(1, commands.BucketType.user)
async def ocr(
self,
ctx: discord.ApplicationContext,
attachment: discord.Option(
discord.SlashCommandOptionType.attachment,
description="Image to perform OCR on",
),
):
"""OCRs an image"""
await ctx.defer()
timings: Dict[str, float] = {}
attachment: discord.Attachment
with Timer() as _t: with Timer() as _t:
data = await attachment.read() data = await attachment.read()
file = io.BytesIO(data) file = io.BytesIO(data)
@ -558,43 +532,86 @@ class OtherCog(commands.Cog):
text = await self.bot.loop.run_in_executor(None, pytesseract.image_to_string, img) text = await self.bot.loop.run_in_executor(None, pytesseract.image_to_string, img)
timings["Perform OCR"] = _t.total timings["Perform OCR"] = _t.total
except pytesseract.TesseractError as e: except pytesseract.TesseractError as e:
return await ctx.respond(f"Failed to perform OCR: `{e}`") raise RuntimeError(f"Failed to perform OCR: `{e}`")
if len(text) > 4096: if len(text) >= 1744:
with Timer() as _t: with Timer() as _t:
try: try:
response = await self.http.put( file.seek(0)
"https://api.mystb.in/paste", response = await self.http.post(
json={ "https://paste.nexy7574.co.uk/upload",
"files": [{"filename": "ocr.txt", "content": text}], data={
"expiration": "1week",
"burn_after": "0",
"syntax_highlight": "none",
"privacy": "unlisted",
}, },
files={
"file": (attachment.filename, file, attachment.content_type)
},
follow_redirects=False
) )
response.raise_for_status() response.raise_for_status()
except httpx.HTTPError: except httpx.HTTPError as e:
return await ctx.respond("OCR content too large to post.") raise RuntimeError(f"Failed to upload OCR content: `{e}`")
else: else:
data = response.json() text = "View on [paste.nexy7574.co.uk](%s)" % response.next_request.url
with Timer(timings, "Respond (URL)"): timings["Upload text to pastebin"] = _t.total
return timings, text
@commands.slash_command()
@commands.cooldown(1, 30, commands.BucketType.user)
@commands.max_concurrency(1, commands.BucketType.user)
async def ocr(
self,
ctx: discord.ApplicationContext,
attachment: discord.Option(
discord.SlashCommandOptionType.attachment,
description="Image to perform OCR on",
),
):
"""OCRs an image"""
await ctx.defer()
attachment: discord.Attachment
timings, text = await self._ocr_core(attachment)
embed = discord.Embed( embed = discord.Embed(
description="View on [mystb.in](%s)" % ("https://mystb.in/" + data["id"]), description=text,
colour=discord.Colour.dark_theme(), colour=discord.Colour.blurple()
) )
await ctx.respond(embed=embed) embed.set_image(url=attachment.url)
timings["Upload text to mystbin"] = _t.total
elif len(text) <= 1500:
with Timer() as _t: with Timer() as _t:
await ctx.respond(embed=discord.Embed(description=text)) await ctx.respond(
embed=embed
)
timings["Respond (Text)"] = _t.total timings["Respond (Text)"] = _t.total
else:
with Timer() as _t:
out_file = io.BytesIO(text.encode("utf-8", "replace"))
await ctx.respond(file=discord.File(out_file, filename="ocr.txt"))
timings["Respond (File)"] = _t.total
if timings: if timings:
text = "Timings:\n" + "\n".join("{}: {:.2f}s".format(k.title(), v) for k, v in timings.items()) text = "Timings:\n" + "\n".join("{}: {:.2f}s".format(k.title(), v) for k, v in timings.items())
await ctx.edit( await ctx.edit(
content=text, content=text
)
@commands.message_command(name="Run OCR")
async def message_ocr(self, ctx: discord.ApplicationContext, message: discord.Message):
await ctx.defer()
attachment: discord.Attachment | None
for attachment in message.attachments:
if attachment.content_type.startswith("image/"):
break
else:
return await ctx.respond(":x: No images found in message.", delete_after=30)
timings, text = await self._ocr_core(attachment)
embed = discord.Embed(
title="OCR for " + attachment.filename,
description=text,
colour=discord.Colour.blurple(),
url=message.jump_url
)
embed.set_image(url=attachment.url)
await ctx.respond(
embed=embed
) )
@commands.message_command(name="Convert Image to GIF") @commands.message_command(name="Convert Image to GIF")