Add docker file

I **__HATE__** relative imports
This commit is contained in:
Nexus 2024-06-10 16:56:12 +01:00
parent df41b2594f
commit 90c7293d14
Signed by: nex
GPG key ID: 0FA334385D0B689F
8 changed files with 114 additions and 29 deletions

4
.dockerignore Normal file
View file

@ -0,0 +1,4 @@
config.toml
**/config.toml
*.db
*.db-*

3
.gitignore vendored
View file

@ -282,5 +282,6 @@ pyrightconfig.json
# End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,pycharm+all
.venv/
default.db
*.db
*.db-*
config.toml

8
Dockerfile Normal file
View file

@ -0,0 +1,8 @@
FROM python:3.12-alpine
WORKDIR /jimmy
RUN apk add --update --no-cache py3-pip py3-setuptools py3-wheel
COPY requirements.txt .
RUN pip install -r requirements.txt
COPY ./jimmy/ /jimmy/
CMD ["python3", "main.py"]

30
README.md Normal file
View file

@ -0,0 +1,30 @@
# Sentient Jimmy
Another Ollama bot for discord, however designed for mesh self-hosting.
## Example config.toml
```toml
[bot]
token = "your-bot-token"
debug_guilds = [0123456789] # omit for global commands
[ollama]
order = ["server1", "server2", "fallback"]
# ^ order of preference for Ollama servers. If server1 is offline, server2 will be tried, and so on
[ollama.server1]
base_url = "https://hosted.ollama.internal" # default port is 443, because HTTPS
gpu = true
vram_gb = 8
[ollama.server2]
base_url = "http://192.168.1.2:11434"
gpu = true
vram_gb = 4 # <8GB will enable "low VRAM mode" in ollama
[ollama.fallback]
base_url = "http://192.168.1.250:11434"
gpu = false
vram_gb = 32 # in the case of CPU Ollama, "vram" is actually just regular RAM.
```

View file

@ -4,16 +4,15 @@ import logging
import time
import typing
import contextlib
from fnmatch import fnmatch
import discord
from discord import Interaction
from ollama import AsyncClient, ResponseError, Options
from discord.ext import commands
from jimmy.utils import async_ratio, create_ollama_message
from jimmy.config import get_servers, ServerConfig, get_server
from jimmy.db import OllamaThread
from humanize import naturalsize
from jimmy .utils import create_ollama_message, find_suitable_server, decorate_server_name as decorate_name
from jimmy .config import get_servers, get_server
from jimmy .db import OllamaThread
from humanize import naturalsize, naturaldelta
@contextlib.asynccontextmanager
@ -66,11 +65,6 @@ class Chat(commands.Cog):
"""Checks the status on all servers."""
await ctx.defer()
def decorate_name(_s: ServerConfig):
if _s.gpu:
return f"{_s.name} (\u26A1)"
return _s.name
embed = discord.Embed(
title="Ollama Statuses:",
color=discord.Color.blurple()
@ -164,18 +158,31 @@ class Chat(commands.Cog):
description="The thread ID to continue.",
default=None
)
],
temperature: typing.Annotated[
float,
discord.Option(
discord.SlashCommandOptionType.number,
description="The temperature to use.",
default=1.5,
min_value=0.0,
max_value=2.0
)
]
):
"""Have a chat with ollama"""
await ctx.defer()
server = get_server(server)
if not await server.is_online():
await ctx.respond(
content=f"{server} is offline. Finding a suitable server...",
)
try:
server = await find_suitable_server()
except ValueError as err:
return await ctx.edit(content=str(err), delete_after=30)
await ctx.delete(delay=5)
async with self.server_locks[server.name]:
if not await server.is_online():
await ctx.respond(
content=f"{server} is offline.",
delete_after=60
)
return
async with ollama_client(str(server.base_url)) as client:
client: AsyncClient
self.log.info("Checking if %r has the model %r", server, model)
@ -203,7 +210,7 @@ class Chat(commands.Cog):
embed.colour = discord.Colour.red()
await ctx.edit(embed=embed)
return
self.log.info("Response from %r: %r", server, line)
self.log.debug("Response from %r: %r", server, line)
if line["status"] in {
"pulling manifest",
"verifying sha256 digest",
@ -223,12 +230,14 @@ class Chat(commands.Cog):
last_completed = completed
last_completed_ts = time.time()
mbps = round((bytes_per_second * 8) / 1024 / 1024)
eta = (total - completed) / max(1, bytes_per_second)
progress_bar = f"[{pb_fill}{pb_empty}]"
ns_total = naturalsize(total, binary=True)
ns_completed = naturalsize(completed, binary=True)
embed.description = (
f"{line['status'].capitalize()} {percent}% {progress_bar} "
f"({ns_completed}/{ns_total} @ {mbps} Mb/s)"
f"({ns_completed}/{ns_total} @ {mbps} Mb/s) "
f"[ETA: {naturaldelta(eta)}]"
)
if time.time() - last_edit >= 2.5:
@ -268,7 +277,11 @@ class Chat(commands.Cog):
if system_prompt:
messages.append(await create_ollama_message(system_prompt, role="system"))
messages.append(await create_ollama_message(prompt, images=[await image.read()] if image else None))
embed = discord.Embed(title=f"{model}:", description="")
embed = discord.Embed(description="")
embed.set_author(
name=f"{model} @ {decorate_name(server)!r}" if server.gpu else model,
icon_url="https://ollama.com/public/icon-64x64.png"
)
view = StopDownloadView(ctx)
msg = await ctx.respond(
embed=embed,
@ -283,10 +296,11 @@ class Chat(commands.Cog):
options=Options(
num_ctx=4096,
low_vram=server.vram_gb < 8,
temperature=1.5
temperature=temperature
)
):
self.log.info("Response from %r: %r", server, response)
response: dict
self.log.debug("Response from %r: %r", server, response)
buffer.write(response["message"]["content"])
if len(buffer.getvalue()) > 4096:

View file

@ -1,12 +1,11 @@
import os
import sys
import logging
import discord
import sys
from discord.ext import commands
from tortoise import Tortoise
sys.path.extend("..") # noqa: E402
from .config import get_config
from config import get_config
sys.path.extend([".", ".."])
log = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
@ -24,7 +23,7 @@ class SentientJimmy(commands.Bot):
strip_after_prefix=True,
debug_guilds=get_config()["bot"].get("debug_guilds"),
)
self.load_extension("jimmy.cogs.chat")
self.load_extension("cogs.chat")
self.load_extension("jishaku")
async def start(self, token: str, *, reconnect: bool = True) -> None:

View file

@ -4,13 +4,24 @@ from functools import partial
from fuzzywuzzy.fuzz import ratio
from ollama import Message
if typing.TYPE_CHECKING:
from .config import ServerConfig
__all__ = (
'async_ratio',
'create_ollama_message',
'find_suitable_server',
'decorate_server_name'
)
def decorate_server_name(_s: "ServerConfig") -> str:
if _s.gpu:
return f"{_s.name} (\u26A1)"
return _s.name
async def async_ratio(a: str, b: str) -> int:
"""
Wraps fuzzywuzzy ratio in an async function
@ -45,3 +56,23 @@ async def create_ollama_message(
images=images
)
)
async def find_suitable_server(cpu_fallback: bool = True) -> "ServerConfig":
"""
Finds a suitable server to use for Ollama.
:param cpu_fallback: bool - whether to fall back to CPU servers if GPU servers are unavailable.
:return: ServerConfig - the server to use
"""
from .config import get_servers
servers = get_servers()
if not servers:
raise ValueError("No servers configured.")
for server in servers:
if cpu_fallback is False and server.gpu is False:
continue
if not await server.is_online():
continue
return server
raise ValueError("No servers available.")

View file

@ -1,8 +1,6 @@
py-cord~=2.5
ollama~=0.2
tortoise-orm[asyncpg]~=0.21
uvicorn[standard]~=0.30
fastapi~=0.111
jishaku~=2.5
fuzzywuzzy~=0.18
humanize~=4.9