add PS command
All checks were successful
Build and Publish / build_and_publish (push) Successful in 48s
All checks were successful
Build and Publish / build_and_publish (push) Successful in 48s
This commit is contained in:
parent
3c61504cb3
commit
e32d866ad4
1 changed files with 63 additions and 3 deletions
|
@ -1,4 +1,5 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import datetime
|
||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
|
@ -66,9 +67,7 @@ class Chat(commands.Cog):
|
||||||
ollama_group = discord.SlashCommandGroup(
|
ollama_group = discord.SlashCommandGroup(
|
||||||
name="ollama",
|
name="ollama",
|
||||||
description="Commands related to ollama.",
|
description="Commands related to ollama.",
|
||||||
guild_only=True,
|
guild_only=True
|
||||||
max_concurrency=commands.MaxConcurrency(1, per=commands.BucketType.user, wait=False),
|
|
||||||
cooldown=commands.CooldownMapping(commands.Cooldown(1, 10), commands.BucketType.user)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
@ollama_group.command()
|
@ollama_group.command()
|
||||||
|
@ -495,6 +494,67 @@ class Chat(commands.Cog):
|
||||||
embed.description = f"Downloaded {model} on {server}."
|
embed.description = f"Downloaded {model} on {server}."
|
||||||
await ctx.edit(embed=embed, delete_after=30, view=None)
|
await ctx.edit(embed=embed, delete_after=30, view=None)
|
||||||
|
|
||||||
|
@ollama_group.command(name="ps")
|
||||||
|
async def ollama_proc_list(
|
||||||
|
self,
|
||||||
|
ctx: discord.ApplicationContext,
|
||||||
|
server: typing.Annotated[
|
||||||
|
str,
|
||||||
|
discord.Option(
|
||||||
|
discord.SlashCommandOptionType.string,
|
||||||
|
description="The server to use.",
|
||||||
|
autocomplete=_ServerOptionAutocomplete,
|
||||||
|
default=get_servers()[0].name
|
||||||
|
)
|
||||||
|
]
|
||||||
|
):
|
||||||
|
"""Checks the loaded models on the target server"""
|
||||||
|
await ctx.defer()
|
||||||
|
server = get_server(server)
|
||||||
|
if not server:
|
||||||
|
return await ctx.respond("\N{cross mark} Unknown server.")
|
||||||
|
elif not await server.is_online():
|
||||||
|
return await ctx.respond(f"\N{cross mark} Server {server.name!r} is not responding")
|
||||||
|
async with ollama_client(str(server.base_url)) as client:
|
||||||
|
response = (await client.ps())["models"]
|
||||||
|
if not response:
|
||||||
|
embed = discord.Embed(
|
||||||
|
title=f"No models loaded on {server}.",
|
||||||
|
color=discord.Color.blurple()
|
||||||
|
)
|
||||||
|
await ctx.respond(embed=embed)
|
||||||
|
embed = discord.Embed(
|
||||||
|
title=f"Models loaded on {server}",
|
||||||
|
color=discord.Color.blurple()
|
||||||
|
)
|
||||||
|
for model in response[:25]:
|
||||||
|
size = naturalsize(model["size"], binary=True)
|
||||||
|
size_vram = naturalsize(model["size_vram"], binary=True)
|
||||||
|
size_ram = naturalsize(model["size"] - model["size_vram"], binary=True)
|
||||||
|
percent_in_vram = round(model["size_vram"] / model["size"] * 100)
|
||||||
|
percent_in_ram = 100 - percent_in_vram
|
||||||
|
expires = datetime.datetime.fromisoformat(model["expires_at"])
|
||||||
|
lines = [
|
||||||
|
f"* Size: {size}",
|
||||||
|
f"* Unloaded: {discord.utils.format_dt(expires, style='R')}",
|
||||||
|
]
|
||||||
|
if percent_in_ram > 0:
|
||||||
|
lines.extend(
|
||||||
|
[
|
||||||
|
f"* VRAM/RAM: {percent_in_vram}%/{percent_in_ram}%",
|
||||||
|
f"* VRAM Size: {size_vram}",
|
||||||
|
f"* RAM Size: {size_ram}"
|
||||||
|
]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
lines.append(f"* VRAM Size: {size_vram} (100%)")
|
||||||
|
embed.add_field(
|
||||||
|
name=model["model"],
|
||||||
|
value="\n".join(lines),
|
||||||
|
inline=False
|
||||||
|
)
|
||||||
|
await ctx.respond(embed=embed)
|
||||||
|
|
||||||
|
|
||||||
def setup(bot):
|
def setup(bot):
|
||||||
bot.add_cog(Chat(bot))
|
bot.add_cog(Chat(bot))
|
||||||
|
|
Loading…
Reference in a new issue