add PS command

2024-06-11 01:37:20 +01:00 · 2024-06-11 01:37:20 +01:00 · e32d866ad4
commit e32d866ad4
parent 3c61504cb3
1 changed files with 63 additions and 3 deletions
--- a/jimmy/cogs/chat.py
+++ b/jimmy/cogs/chat.py
@ -1,4 +1,5 @@
 import asyncio
 import datetime
 import io
 import logging
 import time
@ -66,9 +67,7 @@ class Chat(commands.Cog):
    ollama_group = discord.SlashCommandGroup(
        name="ollama",
        description="Commands related to ollama.",
-        guild_only=True,
+        guild_only=True
        max_concurrency=commands.MaxConcurrency(1, per=commands.BucketType.user, wait=False),
        cooldown=commands.CooldownMapping(commands.Cooldown(1, 10), commands.BucketType.user)
    )
    @ollama_group.command()
@ -495,6 +494,67 @@ class Chat(commands.Cog):
                embed.description = f"Downloaded {model} on {server}."
                await ctx.edit(embed=embed, delete_after=30, view=None)
    @ollama_group.command(name="ps")
    async def ollama_proc_list(
            self,
            ctx: discord.ApplicationContext,
            server: typing.Annotated[
                str,
                discord.Option(
                    discord.SlashCommandOptionType.string,
                    description="The server to use.",
                    autocomplete=_ServerOptionAutocomplete,
                    default=get_servers()[0].name
                )
            ]
    ):
        """Checks the loaded models on the target server"""
        await ctx.defer()
        server = get_server(server)
        if not server:
            return await ctx.respond("\N{cross mark} Unknown server.")
        elif not await server.is_online():
            return await ctx.respond(f"\N{cross mark} Server {server.name!r} is not responding")
        async with ollama_client(str(server.base_url)) as client:
            response = (await client.ps())["models"]
        if not response:
            embed = discord.Embed(
                title=f"No models loaded on {server}.",
                color=discord.Color.blurple()
            )
            await ctx.respond(embed=embed)
        embed = discord.Embed(
            title=f"Models loaded on {server}",
            color=discord.Color.blurple()
        )
        for model in response[:25]:
            size = naturalsize(model["size"], binary=True)
            size_vram = naturalsize(model["size_vram"], binary=True)
            size_ram = naturalsize(model["size"] - model["size_vram"], binary=True)
            percent_in_vram = round(model["size_vram"] / model["size"] * 100)
            percent_in_ram = 100 - percent_in_vram
            expires = datetime.datetime.fromisoformat(model["expires_at"])
            lines = [
                f"* Size: {size}",
                f"* Unloaded: {discord.utils.format_dt(expires, style='R')}",
            ]
            if percent_in_ram > 0:
                lines.extend(
                    [
                        f"* VRAM/RAM: {percent_in_vram}%/{percent_in_ram}%",
                        f"* VRAM Size: {size_vram}",
                        f"* RAM Size: {size_ram}"
                    ]
                )
            else:
                lines.append(f"* VRAM Size: {size_vram} (100%)")
            embed.add_field(
                name=model["model"],
                value="\n".join(lines),
                inline=False
            )
        await ctx.respond(embed=embed)
 def setup(bot):
    bot.add_cog(Chat(bot))