Add docker file
I **__HATE__** relative imports
This commit is contained in:
parent
df41b2594f
commit
90c7293d14
8 changed files with 114 additions and 29 deletions
4
.dockerignore
Normal file
4
.dockerignore
Normal file
|
@ -0,0 +1,4 @@
|
|||
config.toml
|
||||
**/config.toml
|
||||
*.db
|
||||
*.db-*
|
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -282,5 +282,6 @@ pyrightconfig.json
|
|||
|
||||
# End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,pycharm+all
|
||||
.venv/
|
||||
default.db
|
||||
*.db
|
||||
*.db-*
|
||||
config.toml
|
||||
|
|
8
Dockerfile
Normal file
8
Dockerfile
Normal file
|
@ -0,0 +1,8 @@
|
|||
FROM python:3.12-alpine
|
||||
|
||||
WORKDIR /jimmy
|
||||
RUN apk add --update --no-cache py3-pip py3-setuptools py3-wheel
|
||||
COPY requirements.txt .
|
||||
RUN pip install -r requirements.txt
|
||||
COPY ./jimmy/ /jimmy/
|
||||
CMD ["python3", "main.py"]
|
30
README.md
Normal file
30
README.md
Normal file
|
@ -0,0 +1,30 @@
|
|||
# Sentient Jimmy
|
||||
|
||||
Another Ollama bot for discord, however designed for mesh self-hosting.
|
||||
|
||||
## Example config.toml
|
||||
|
||||
```toml
|
||||
[bot]
|
||||
token = "your-bot-token"
|
||||
debug_guilds = [0123456789] # omit for global commands
|
||||
|
||||
[ollama]
|
||||
order = ["server1", "server2", "fallback"]
|
||||
# ^ order of preference for Ollama servers. If server1 is offline, server2 will be tried, and so on
|
||||
|
||||
[ollama.server1]
|
||||
base_url = "https://hosted.ollama.internal" # default port is 443, because HTTPS
|
||||
gpu = true
|
||||
vram_gb = 8
|
||||
|
||||
[ollama.server2]
|
||||
base_url = "http://192.168.1.2:11434"
|
||||
gpu = true
|
||||
vram_gb = 4 # <8GB will enable "low VRAM mode" in ollama
|
||||
|
||||
[ollama.fallback]
|
||||
base_url = "http://192.168.1.250:11434"
|
||||
gpu = false
|
||||
vram_gb = 32 # in the case of CPU Ollama, "vram" is actually just regular RAM.
|
||||
```
|
|
@ -4,16 +4,15 @@ import logging
|
|||
import time
|
||||
import typing
|
||||
import contextlib
|
||||
from fnmatch import fnmatch
|
||||
|
||||
import discord
|
||||
from discord import Interaction
|
||||
from ollama import AsyncClient, ResponseError, Options
|
||||
from discord.ext import commands
|
||||
from jimmy.utils import async_ratio, create_ollama_message
|
||||
from jimmy.config import get_servers, ServerConfig, get_server
|
||||
from jimmy .utils import create_ollama_message, find_suitable_server, decorate_server_name as decorate_name
|
||||
from jimmy .config import get_servers, get_server
|
||||
from jimmy .db import OllamaThread
|
||||
from humanize import naturalsize
|
||||
from humanize import naturalsize, naturaldelta
|
||||
|
||||
|
||||
@contextlib.asynccontextmanager
|
||||
|
@ -66,11 +65,6 @@ class Chat(commands.Cog):
|
|||
"""Checks the status on all servers."""
|
||||
await ctx.defer()
|
||||
|
||||
def decorate_name(_s: ServerConfig):
|
||||
if _s.gpu:
|
||||
return f"{_s.name} (\u26A1)"
|
||||
return _s.name
|
||||
|
||||
embed = discord.Embed(
|
||||
title="Ollama Statuses:",
|
||||
color=discord.Color.blurple()
|
||||
|
@ -164,18 +158,31 @@ class Chat(commands.Cog):
|
|||
description="The thread ID to continue.",
|
||||
default=None
|
||||
)
|
||||
],
|
||||
temperature: typing.Annotated[
|
||||
float,
|
||||
discord.Option(
|
||||
discord.SlashCommandOptionType.number,
|
||||
description="The temperature to use.",
|
||||
default=1.5,
|
||||
min_value=0.0,
|
||||
max_value=2.0
|
||||
)
|
||||
]
|
||||
):
|
||||
"""Have a chat with ollama"""
|
||||
await ctx.defer()
|
||||
server = get_server(server)
|
||||
async with self.server_locks[server.name]:
|
||||
if not await server.is_online():
|
||||
await ctx.respond(
|
||||
content=f"{server} is offline.",
|
||||
delete_after=60
|
||||
content=f"{server} is offline. Finding a suitable server...",
|
||||
)
|
||||
return
|
||||
try:
|
||||
server = await find_suitable_server()
|
||||
except ValueError as err:
|
||||
return await ctx.edit(content=str(err), delete_after=30)
|
||||
await ctx.delete(delay=5)
|
||||
async with self.server_locks[server.name]:
|
||||
async with ollama_client(str(server.base_url)) as client:
|
||||
client: AsyncClient
|
||||
self.log.info("Checking if %r has the model %r", server, model)
|
||||
|
@ -203,7 +210,7 @@ class Chat(commands.Cog):
|
|||
embed.colour = discord.Colour.red()
|
||||
await ctx.edit(embed=embed)
|
||||
return
|
||||
self.log.info("Response from %r: %r", server, line)
|
||||
self.log.debug("Response from %r: %r", server, line)
|
||||
if line["status"] in {
|
||||
"pulling manifest",
|
||||
"verifying sha256 digest",
|
||||
|
@ -223,12 +230,14 @@ class Chat(commands.Cog):
|
|||
last_completed = completed
|
||||
last_completed_ts = time.time()
|
||||
mbps = round((bytes_per_second * 8) / 1024 / 1024)
|
||||
eta = (total - completed) / max(1, bytes_per_second)
|
||||
progress_bar = f"[{pb_fill}{pb_empty}]"
|
||||
ns_total = naturalsize(total, binary=True)
|
||||
ns_completed = naturalsize(completed, binary=True)
|
||||
embed.description = (
|
||||
f"{line['status'].capitalize()} {percent}% {progress_bar} "
|
||||
f"({ns_completed}/{ns_total} @ {mbps} Mb/s) "
|
||||
f"[ETA: {naturaldelta(eta)}]"
|
||||
)
|
||||
|
||||
if time.time() - last_edit >= 2.5:
|
||||
|
@ -268,7 +277,11 @@ class Chat(commands.Cog):
|
|||
if system_prompt:
|
||||
messages.append(await create_ollama_message(system_prompt, role="system"))
|
||||
messages.append(await create_ollama_message(prompt, images=[await image.read()] if image else None))
|
||||
embed = discord.Embed(title=f"{model}:", description="")
|
||||
embed = discord.Embed(description="")
|
||||
embed.set_author(
|
||||
name=f"{model} @ {decorate_name(server)!r}" if server.gpu else model,
|
||||
icon_url="https://ollama.com/public/icon-64x64.png"
|
||||
)
|
||||
view = StopDownloadView(ctx)
|
||||
msg = await ctx.respond(
|
||||
embed=embed,
|
||||
|
@ -283,10 +296,11 @@ class Chat(commands.Cog):
|
|||
options=Options(
|
||||
num_ctx=4096,
|
||||
low_vram=server.vram_gb < 8,
|
||||
temperature=1.5
|
||||
temperature=temperature
|
||||
)
|
||||
):
|
||||
self.log.info("Response from %r: %r", server, response)
|
||||
response: dict
|
||||
self.log.debug("Response from %r: %r", server, response)
|
||||
buffer.write(response["message"]["content"])
|
||||
|
||||
if len(buffer.getvalue()) > 4096:
|
||||
|
|
|
@ -1,12 +1,11 @@
|
|||
import os
|
||||
import sys
|
||||
import logging
|
||||
import discord
|
||||
import sys
|
||||
from discord.ext import commands
|
||||
from tortoise import Tortoise
|
||||
sys.path.extend("..") # noqa: E402
|
||||
from .config import get_config
|
||||
|
||||
from config import get_config
|
||||
sys.path.extend([".", ".."])
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
@ -24,7 +23,7 @@ class SentientJimmy(commands.Bot):
|
|||
strip_after_prefix=True,
|
||||
debug_guilds=get_config()["bot"].get("debug_guilds"),
|
||||
)
|
||||
self.load_extension("jimmy.cogs.chat")
|
||||
self.load_extension("cogs.chat")
|
||||
self.load_extension("jishaku")
|
||||
|
||||
async def start(self, token: str, *, reconnect: bool = True) -> None:
|
||||
|
|
|
@ -4,13 +4,24 @@ from functools import partial
|
|||
from fuzzywuzzy.fuzz import ratio
|
||||
from ollama import Message
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from .config import ServerConfig
|
||||
|
||||
|
||||
__all__ = (
|
||||
'async_ratio',
|
||||
'create_ollama_message',
|
||||
'find_suitable_server',
|
||||
'decorate_server_name'
|
||||
)
|
||||
|
||||
|
||||
def decorate_server_name(_s: "ServerConfig") -> str:
|
||||
if _s.gpu:
|
||||
return f"{_s.name} (\u26A1)"
|
||||
return _s.name
|
||||
|
||||
|
||||
async def async_ratio(a: str, b: str) -> int:
|
||||
"""
|
||||
Wraps fuzzywuzzy ratio in an async function
|
||||
|
@ -45,3 +56,23 @@ async def create_ollama_message(
|
|||
images=images
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
async def find_suitable_server(cpu_fallback: bool = True) -> "ServerConfig":
|
||||
"""
|
||||
Finds a suitable server to use for Ollama.
|
||||
|
||||
:param cpu_fallback: bool - whether to fall back to CPU servers if GPU servers are unavailable.
|
||||
:return: ServerConfig - the server to use
|
||||
"""
|
||||
from .config import get_servers
|
||||
servers = get_servers()
|
||||
if not servers:
|
||||
raise ValueError("No servers configured.")
|
||||
for server in servers:
|
||||
if cpu_fallback is False and server.gpu is False:
|
||||
continue
|
||||
if not await server.is_online():
|
||||
continue
|
||||
return server
|
||||
raise ValueError("No servers available.")
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
py-cord~=2.5
|
||||
ollama~=0.2
|
||||
tortoise-orm[asyncpg]~=0.21
|
||||
uvicorn[standard]~=0.30
|
||||
fastapi~=0.111
|
||||
jishaku~=2.5
|
||||
fuzzywuzzy~=0.18
|
||||
humanize~=4.9
|
||||
|
|
Loading…
Reference in a new issue