diff --git a/jimmy/cogs/chat.py b/jimmy/cogs/chat.py index c345dca..77adc4a 100644 --- a/jimmy/cogs/chat.py +++ b/jimmy/cogs/chat.py @@ -1,4 +1,5 @@ import asyncio +import datetime import io import logging import time @@ -66,9 +67,7 @@ class Chat(commands.Cog): ollama_group = discord.SlashCommandGroup( name="ollama", description="Commands related to ollama.", - guild_only=True, - max_concurrency=commands.MaxConcurrency(1, per=commands.BucketType.user, wait=False), - cooldown=commands.CooldownMapping(commands.Cooldown(1, 10), commands.BucketType.user) + guild_only=True ) @ollama_group.command() @@ -495,6 +494,67 @@ class Chat(commands.Cog): embed.description = f"Downloaded {model} on {server}." await ctx.edit(embed=embed, delete_after=30, view=None) + @ollama_group.command(name="ps") + async def ollama_proc_list( + self, + ctx: discord.ApplicationContext, + server: typing.Annotated[ + str, + discord.Option( + discord.SlashCommandOptionType.string, + description="The server to use.", + autocomplete=_ServerOptionAutocomplete, + default=get_servers()[0].name + ) + ] + ): + """Checks the loaded models on the target server""" + await ctx.defer() + server = get_server(server) + if not server: + return await ctx.respond("\N{cross mark} Unknown server.") + elif not await server.is_online(): + return await ctx.respond(f"\N{cross mark} Server {server.name!r} is not responding") + async with ollama_client(str(server.base_url)) as client: + response = (await client.ps())["models"] + if not response: + embed = discord.Embed( + title=f"No models loaded on {server}.", + color=discord.Color.blurple() + ) + await ctx.respond(embed=embed) + embed = discord.Embed( + title=f"Models loaded on {server}", + color=discord.Color.blurple() + ) + for model in response[:25]: + size = naturalsize(model["size"], binary=True) + size_vram = naturalsize(model["size_vram"], binary=True) + size_ram = naturalsize(model["size"] - model["size_vram"], binary=True) + percent_in_vram = round(model["size_vram"] / model["size"] * 100) + percent_in_ram = 100 - percent_in_vram + expires = datetime.datetime.fromisoformat(model["expires_at"]) + lines = [ + f"* Size: {size}", + f"* Unloaded: {discord.utils.format_dt(expires, style='R')}", + ] + if percent_in_ram > 0: + lines.extend( + [ + f"* VRAM/RAM: {percent_in_vram}%/{percent_in_ram}%", + f"* VRAM Size: {size_vram}", + f"* RAM Size: {size_ram}" + ] + ) + else: + lines.append(f"* VRAM Size: {size_vram} (100%)") + embed.add_field( + name=model["model"], + value="\n".join(lines), + inline=False + ) + await ctx.respond(embed=embed) + def setup(bot): bot.add_cog(Chat(bot))