mirror of
https://github.com/nexy7574/LCC-bot.git
synced 2024-09-19 18:16:34 +01:00
Make jimmy look nice
This commit is contained in:
parent
d12ec1b31a
commit
9438d9a159
1 changed files with 106 additions and 41 deletions
145
cogs/other.py
145
cogs/other.py
|
@ -1837,6 +1837,12 @@ class OtherCog(commands.Cog):
|
||||||
@commands.max_concurrency(1, commands.BucketType.user, wait=True)
|
@commands.max_concurrency(1, commands.BucketType.user, wait=True)
|
||||||
async def ollama(self, ctx: commands.Context, *, query: str):
|
async def ollama(self, ctx: commands.Context, *, query: str):
|
||||||
""":3"""
|
""":3"""
|
||||||
|
try_hosts = {
|
||||||
|
"127.0.0.1:11434": "localhost",
|
||||||
|
"100.106.34.86:11434": "Nex Laptop",
|
||||||
|
"100.66.187.46:11434": "Nexbox",
|
||||||
|
"100.116.242.161:11434": "PortaPi"
|
||||||
|
}
|
||||||
if query.startswith("model:"):
|
if query.startswith("model:"):
|
||||||
model, query = query.split(" ", 1)
|
model, query = query.split(" ", 1)
|
||||||
model = model[6:].casefold()
|
model = model[6:].casefold()
|
||||||
|
@ -1863,14 +1869,14 @@ class OtherCog(commands.Cog):
|
||||||
except ValueError:
|
except ValueError:
|
||||||
host += ":11434"
|
host += ":11434"
|
||||||
else:
|
else:
|
||||||
try_hosts = [
|
# try_hosts = [
|
||||||
"127.0.0.1:11434", # Localhost
|
# "127.0.0.1:11434", # Localhost
|
||||||
"100.106.34.86:11434", # Laptop
|
# "100.106.34.86:11434", # Laptop
|
||||||
"100.66.187.46:11434", # optiplex
|
# "100.66.187.46:11434", # optiplex
|
||||||
"100.116.242.161:11434" # Raspberry Pi
|
# "100.116.242.161:11434" # Raspberry Pi
|
||||||
]
|
# ]
|
||||||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||||
for host in try_hosts:
|
for host in try_hosts.keys():
|
||||||
try:
|
try:
|
||||||
response = await client.get(
|
response = await client.get(
|
||||||
f"http://{host}/api/tags",
|
f"http://{host}/api/tags",
|
||||||
|
@ -1883,15 +1889,32 @@ class OtherCog(commands.Cog):
|
||||||
else:
|
else:
|
||||||
return await ctx.reply(":x: No servers available.")
|
return await ctx.reply(":x: No servers available.")
|
||||||
|
|
||||||
msg = await ctx.reply(f"Preparing [{model!r}](http://{host}) <a:loading:1101463077586735174>")
|
embed = discord.Embed(
|
||||||
|
colour=discord.Colour.greyple()
|
||||||
|
)
|
||||||
|
embed.set_author(
|
||||||
|
name=f"Loading {model}",
|
||||||
|
url=f"http://{host}",
|
||||||
|
icon_url="https://cdn.discordapp.com/emojis/1101463077586735174.gif"
|
||||||
|
)
|
||||||
|
embed.set_footer(text="Using server {} ({})".format(host, try_hosts.get(host, "Other")))
|
||||||
|
|
||||||
|
msg = await ctx.reply(embed=embed)
|
||||||
async with httpx.AsyncClient(base_url=f"http://{host}/api", follow_redirects=True) as client:
|
async with httpx.AsyncClient(base_url=f"http://{host}/api", follow_redirects=True) as client:
|
||||||
# get models
|
# get models
|
||||||
try:
|
try:
|
||||||
response = await client.post("/show", json={"name": model})
|
response = await client.post("/show", json={"name": model})
|
||||||
except httpx.TransportError as e:
|
except httpx.TransportError as e:
|
||||||
return await msg.edit(content="Failed to connect to Ollama: `%s`" % e)
|
embed = discord.Embed(
|
||||||
|
title="Failed to connect to Ollama.",
|
||||||
|
description=str(e),
|
||||||
|
colour=discord.Colour.red()
|
||||||
|
)
|
||||||
|
embed.set_footer(text="Using server {} ({})".format(host, try_hosts.get(host, "Other")))
|
||||||
|
return await msg.edit(embed=embed)
|
||||||
if response.status_code == 404:
|
if response.status_code == 404:
|
||||||
await msg.edit(content=f"Downloading model {model}, please wait.")
|
embed.title = f"Downloading {model}"
|
||||||
|
await msg.edit(embed=embed)
|
||||||
async with ctx.channel.typing():
|
async with ctx.channel.typing():
|
||||||
async with client.stream(
|
async with client.stream(
|
||||||
"POST",
|
"POST",
|
||||||
|
@ -1901,10 +1924,15 @@ class OtherCog(commands.Cog):
|
||||||
) as response:
|
) as response:
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
error = await response.aread()
|
error = await response.aread()
|
||||||
return await msg.edit(content="Failed to download model: `%s`" % error.decode())
|
embed = discord.Embed(
|
||||||
progresses = [5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99, 100]
|
title=f"Failed to download model {model}:",
|
||||||
|
description=f"HTTP {response.status_code}:\n```{error or '<no body>'}\n```",
|
||||||
|
colour=discord.Colour.red()
|
||||||
|
)
|
||||||
|
embed.set_footer(text="Using server {} ({})".format(host, try_hosts.get(host, "Other")))
|
||||||
|
return await msg.edit(embed=embed)
|
||||||
|
lines: dict[str, str] = {}
|
||||||
async for chunk in ollama_stream_reader(response):
|
async for chunk in ollama_stream_reader(response):
|
||||||
print(chunk)
|
|
||||||
if "total" in chunk and "completed" in chunk:
|
if "total" in chunk and "completed" in chunk:
|
||||||
completed = chunk["completed"] or 1 # avoid division by zero
|
completed = chunk["completed"] or 1 # avoid division by zero
|
||||||
total = chunk["total"] or 1
|
total = chunk["total"] or 1
|
||||||
|
@ -1913,29 +1941,37 @@ class OtherCog(commands.Cog):
|
||||||
percent = round(completed / total * 100, 2)
|
percent = round(completed / total * 100, 2)
|
||||||
total_gigabytes = total / 1024 / 1024 / 1024
|
total_gigabytes = total / 1024 / 1024 / 1024
|
||||||
completed_gigabytes = completed / 1024 / 1024 / 1024
|
completed_gigabytes = completed / 1024 / 1024 / 1024
|
||||||
if percent in progresses:
|
lines[chunk["status"]] = (f"{percent}% "
|
||||||
await msg.edit(
|
f"({completed_gigabytes:.2f}GB/{total_gigabytes:.2f}GB)")
|
||||||
content=f"`{chunk['status']}` - {percent}% "
|
|
||||||
f"({completed_gigabytes:,.2f}GB/{total_gigabytes:,.2f}GB)"
|
|
||||||
)
|
|
||||||
progresses.pop()
|
|
||||||
else:
|
else:
|
||||||
await msg.edit(content=f"`{chunk['status']}`")
|
lines[chunk["status"]] = chunk["status"]
|
||||||
await msg.edit(content=f"Downloaded model {model}.")
|
|
||||||
|
embed.description = "\n".join(f"`{k}`: {v}" for k, v in lines.items())
|
||||||
|
if (time() - msg.created_at.timestamp()) >= 5:
|
||||||
|
await msg.edit(embed=embed)
|
||||||
|
embed.title = f"Downloaded {model}!"
|
||||||
|
embed.colour = discord.Colour.green()
|
||||||
|
await msg.edit(embed=embed)
|
||||||
while (await client.post("/show", json={"name": model})).status_code != 200:
|
while (await client.post("/show", json={"name": model})).status_code != 200:
|
||||||
await asyncio.sleep(5)
|
await asyncio.sleep(5)
|
||||||
elif response.status_code != 200:
|
elif response.status_code != 200:
|
||||||
error = await response.aread()
|
error = await response.aread()
|
||||||
return await msg.edit(content="Failed to get model: `%s`" % error.decode())
|
embed = discord.Embed(
|
||||||
|
title=f"Failed to download model {model}:",
|
||||||
|
description=f"HTTP {response.status_code}:\n```{error or '<no body>'}\n```",
|
||||||
|
colour=discord.Colour.red()
|
||||||
|
)
|
||||||
|
embed.set_footer(text="Using server {} ({})".format(host, try_hosts.get(host, "Other")))
|
||||||
|
return await msg.edit(embed=embed)
|
||||||
|
|
||||||
output = discord.Embed(
|
embed = discord.Embed(
|
||||||
title=f"{model} says:",
|
title=f"{model} says:",
|
||||||
description="",
|
description="",
|
||||||
colour=discord.Colour.blurple(),
|
colour=discord.Colour.blurple(),
|
||||||
timestamp=discord.utils.utcnow()
|
timestamp=discord.utils.utcnow()
|
||||||
)
|
)
|
||||||
output.set_footer(text=f"Powered by Ollama @ {host}")
|
embed.set_footer(text=f"Powered by Ollama • {host} ({try_hosts.get(host, 'Other')})")
|
||||||
await msg.edit(embed=output)
|
await msg.edit(embed=embed)
|
||||||
async with ctx.channel.typing():
|
async with ctx.channel.typing():
|
||||||
with open("./assets/ollama-prompt.txt") as file:
|
with open("./assets/ollama-prompt.txt") as file:
|
||||||
system_prompt = file.read().replace("\n", " ").strip()
|
system_prompt = file.read().replace("\n", " ").strip()
|
||||||
|
@ -1953,7 +1989,13 @@ class OtherCog(commands.Cog):
|
||||||
) as response:
|
) as response:
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
error = await response.aread()
|
error = await response.aread()
|
||||||
return await msg.edit(content="Failed to generate text: `%s`" % error.decode())
|
embed = discord.Embed(
|
||||||
|
title=f"Failed to generate response from {model}:",
|
||||||
|
description=f"HTTP {response.status_code}:\n```{error or '<no body>'}\n```",
|
||||||
|
colour=discord.Colour.red()
|
||||||
|
)
|
||||||
|
embed.set_footer(text="Using server {} ({})".format(host, try_hosts.get(host, "Other")))
|
||||||
|
return await msg.edit(embed=embed)
|
||||||
self.ollama_locks[msg] = asyncio.Event()
|
self.ollama_locks[msg] = asyncio.Event()
|
||||||
view = self.OllamaKillSwitchView(ctx, msg)
|
view = self.OllamaKillSwitchView(ctx, msg)
|
||||||
await msg.edit(view=view)
|
await msg.edit(view=view)
|
||||||
|
@ -1961,21 +2003,35 @@ class OtherCog(commands.Cog):
|
||||||
if "done" not in chunk.keys() or "response" not in chunk.keys():
|
if "done" not in chunk.keys() or "response" not in chunk.keys():
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
content = "Response is still being generated..."
|
|
||||||
if chunk["done"] is True:
|
if chunk["done"] is True:
|
||||||
content = None
|
content = None
|
||||||
output.description += chunk["response"]
|
embed.remove_author()
|
||||||
|
else:
|
||||||
|
embed.set_author(
|
||||||
|
name=f"Generating response with {model}",
|
||||||
|
url=f"http://{host}",
|
||||||
|
icon_url="https://cdn.discordapp.com/emojis/1101463077586735174.gif"
|
||||||
|
)
|
||||||
|
embed.description += chunk["response"]
|
||||||
last_edit = msg.edited_at.timestamp() if msg.edited_at else msg.created_at.timestamp()
|
last_edit = msg.edited_at.timestamp() if msg.edited_at else msg.created_at.timestamp()
|
||||||
if (time() - last_edit) >= 5 or chunk["done"] is True:
|
if (time() - last_edit) >= 5 or chunk["done"] is True:
|
||||||
await msg.edit(content=content, embed=output, view=view)
|
await msg.edit(content=content, embed=embed, view=view)
|
||||||
if self.ollama_locks[msg].is_set():
|
if self.ollama_locks[msg].is_set():
|
||||||
return await msg.edit(content="Aborted.", embed=output, view=None)
|
embed.title = embed.title[:-1] + " (Aborted)"
|
||||||
if len(output.description) >= 4000:
|
embed.colour = discord.Colour.red()
|
||||||
output.add_field(
|
return await msg.edit(embed=embed, view=None)
|
||||||
|
if len(embed.description) >= 4000:
|
||||||
|
embed.add_field(
|
||||||
name="Aborting early",
|
name="Aborting early",
|
||||||
value="Output exceeded 4000 characters."
|
value="Output exceeded 4000 characters."
|
||||||
)
|
)
|
||||||
|
embed.title = embed.title[:-1] + " (Aborted)"
|
||||||
|
embed.colour = discord.Colour.red()
|
||||||
|
embed.description = embed.description[:4096]
|
||||||
break
|
break
|
||||||
|
else:
|
||||||
|
embed.colour = discord.Colour.green()
|
||||||
|
embed.remove_author()
|
||||||
|
|
||||||
def get_time_spent(nanoseconds: int) -> str:
|
def get_time_spent(nanoseconds: int) -> str:
|
||||||
hours, minutes, seconds = 0, 0, 0
|
hours, minutes, seconds = 0, 0, 0
|
||||||
|
@ -2008,16 +2064,25 @@ class OtherCog(commands.Cog):
|
||||||
|
|
||||||
total_time_spent = get_time_spent(chunk["total_duration"])
|
total_time_spent = get_time_spent(chunk["total_duration"])
|
||||||
eval_time_spent = get_time_spent(chunk["eval_duration"])
|
eval_time_spent = get_time_spent(chunk["eval_duration"])
|
||||||
tokens_per_second = chunk["eval_count"] / chunk["eval_duration"]
|
load_time_spent = get_time_spent(chunk["load_duration"])
|
||||||
output.add_field(
|
sample_time_sent = get_time_spent(chunk["sample_duration"])
|
||||||
name="Timings",
|
prompt_eval_time_spent = get_time_spent(chunk["prompt_eval_duration"])
|
||||||
value="Total: {}\nEval: {} ({:,.2f}/s)".format(
|
value = ("* Total: {}\n"
|
||||||
|
"* Model load: {}\n"
|
||||||
|
"* Sample generation: {}\n"
|
||||||
|
"* Prompt eval: {}\n"
|
||||||
|
"* Response generation: {}").format(
|
||||||
total_time_spent,
|
total_time_spent,
|
||||||
eval_time_spent,
|
load_time_spent,
|
||||||
tokens_per_second
|
sample_time_sent,
|
||||||
),
|
prompt_eval_time_spent,
|
||||||
|
eval_time_spent
|
||||||
)
|
)
|
||||||
await msg.edit(content=None, embed=output, view=None)
|
embed.add_field(
|
||||||
|
name="Timings",
|
||||||
|
value=value
|
||||||
|
)
|
||||||
|
await msg.edit(content=None, embed=embed, view=None)
|
||||||
self.ollama_locks.pop(msg, None)
|
self.ollama_locks.pop(msg, None)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue