add PS command
All checks were successful
Build and Publish / build_and_publish (push) Successful in 48s

This commit is contained in:
Nexus 2024-06-11 01:37:20 +01:00
parent 3c61504cb3
commit e32d866ad4
Signed by: nex
GPG key ID: 0FA334385D0B689F

View file

@ -1,4 +1,5 @@
import asyncio import asyncio
import datetime
import io import io
import logging import logging
import time import time
@ -66,9 +67,7 @@ class Chat(commands.Cog):
ollama_group = discord.SlashCommandGroup( ollama_group = discord.SlashCommandGroup(
name="ollama", name="ollama",
description="Commands related to ollama.", description="Commands related to ollama.",
guild_only=True, guild_only=True
max_concurrency=commands.MaxConcurrency(1, per=commands.BucketType.user, wait=False),
cooldown=commands.CooldownMapping(commands.Cooldown(1, 10), commands.BucketType.user)
) )
@ollama_group.command() @ollama_group.command()
@ -495,6 +494,67 @@ class Chat(commands.Cog):
embed.description = f"Downloaded {model} on {server}." embed.description = f"Downloaded {model} on {server}."
await ctx.edit(embed=embed, delete_after=30, view=None) await ctx.edit(embed=embed, delete_after=30, view=None)
@ollama_group.command(name="ps")
async def ollama_proc_list(
self,
ctx: discord.ApplicationContext,
server: typing.Annotated[
str,
discord.Option(
discord.SlashCommandOptionType.string,
description="The server to use.",
autocomplete=_ServerOptionAutocomplete,
default=get_servers()[0].name
)
]
):
"""Checks the loaded models on the target server"""
await ctx.defer()
server = get_server(server)
if not server:
return await ctx.respond("\N{cross mark} Unknown server.")
elif not await server.is_online():
return await ctx.respond(f"\N{cross mark} Server {server.name!r} is not responding")
async with ollama_client(str(server.base_url)) as client:
response = (await client.ps())["models"]
if not response:
embed = discord.Embed(
title=f"No models loaded on {server}.",
color=discord.Color.blurple()
)
await ctx.respond(embed=embed)
embed = discord.Embed(
title=f"Models loaded on {server}",
color=discord.Color.blurple()
)
for model in response[:25]:
size = naturalsize(model["size"], binary=True)
size_vram = naturalsize(model["size_vram"], binary=True)
size_ram = naturalsize(model["size"] - model["size_vram"], binary=True)
percent_in_vram = round(model["size_vram"] / model["size"] * 100)
percent_in_ram = 100 - percent_in_vram
expires = datetime.datetime.fromisoformat(model["expires_at"])
lines = [
f"* Size: {size}",
f"* Unloaded: {discord.utils.format_dt(expires, style='R')}",
]
if percent_in_ram > 0:
lines.extend(
[
f"* VRAM/RAM: {percent_in_vram}%/{percent_in_ram}%",
f"* VRAM Size: {size_vram}",
f"* RAM Size: {size_ram}"
]
)
else:
lines.append(f"* VRAM Size: {size_vram} (100%)")
embed.add_field(
name=model["model"],
value="\n".join(lines),
inline=False
)
await ctx.respond(embed=embed)
def setup(bot): def setup(bot):
bot.add_cog(Chat(bot)) bot.add_cog(Chat(bot))