Add docker file
I **__HATE__** relative imports
This commit is contained in:
parent
df41b2594f
commit
90c7293d14
8 changed files with 114 additions and 29 deletions
4
.dockerignore
Normal file
4
.dockerignore
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
config.toml
|
||||||
|
**/config.toml
|
||||||
|
*.db
|
||||||
|
*.db-*
|
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -282,5 +282,6 @@ pyrightconfig.json
|
||||||
|
|
||||||
# End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,pycharm+all
|
# End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,pycharm+all
|
||||||
.venv/
|
.venv/
|
||||||
default.db
|
*.db
|
||||||
|
*.db-*
|
||||||
config.toml
|
config.toml
|
||||||
|
|
8
Dockerfile
Normal file
8
Dockerfile
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
FROM python:3.12-alpine
|
||||||
|
|
||||||
|
WORKDIR /jimmy
|
||||||
|
RUN apk add --update --no-cache py3-pip py3-setuptools py3-wheel
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install -r requirements.txt
|
||||||
|
COPY ./jimmy/ /jimmy/
|
||||||
|
CMD ["python3", "main.py"]
|
30
README.md
Normal file
30
README.md
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
# Sentient Jimmy
|
||||||
|
|
||||||
|
Another Ollama bot for discord, however designed for mesh self-hosting.
|
||||||
|
|
||||||
|
## Example config.toml
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[bot]
|
||||||
|
token = "your-bot-token"
|
||||||
|
debug_guilds = [0123456789] # omit for global commands
|
||||||
|
|
||||||
|
[ollama]
|
||||||
|
order = ["server1", "server2", "fallback"]
|
||||||
|
# ^ order of preference for Ollama servers. If server1 is offline, server2 will be tried, and so on
|
||||||
|
|
||||||
|
[ollama.server1]
|
||||||
|
base_url = "https://hosted.ollama.internal" # default port is 443, because HTTPS
|
||||||
|
gpu = true
|
||||||
|
vram_gb = 8
|
||||||
|
|
||||||
|
[ollama.server2]
|
||||||
|
base_url = "http://192.168.1.2:11434"
|
||||||
|
gpu = true
|
||||||
|
vram_gb = 4 # <8GB will enable "low VRAM mode" in ollama
|
||||||
|
|
||||||
|
[ollama.fallback]
|
||||||
|
base_url = "http://192.168.1.250:11434"
|
||||||
|
gpu = false
|
||||||
|
vram_gb = 32 # in the case of CPU Ollama, "vram" is actually just regular RAM.
|
||||||
|
```
|
|
@ -4,16 +4,15 @@ import logging
|
||||||
import time
|
import time
|
||||||
import typing
|
import typing
|
||||||
import contextlib
|
import contextlib
|
||||||
from fnmatch import fnmatch
|
|
||||||
|
|
||||||
import discord
|
import discord
|
||||||
from discord import Interaction
|
from discord import Interaction
|
||||||
from ollama import AsyncClient, ResponseError, Options
|
from ollama import AsyncClient, ResponseError, Options
|
||||||
from discord.ext import commands
|
from discord.ext import commands
|
||||||
from jimmy.utils import async_ratio, create_ollama_message
|
from jimmy .utils import create_ollama_message, find_suitable_server, decorate_server_name as decorate_name
|
||||||
from jimmy.config import get_servers, ServerConfig, get_server
|
from jimmy .config import get_servers, get_server
|
||||||
from jimmy .db import OllamaThread
|
from jimmy .db import OllamaThread
|
||||||
from humanize import naturalsize
|
from humanize import naturalsize, naturaldelta
|
||||||
|
|
||||||
|
|
||||||
@contextlib.asynccontextmanager
|
@contextlib.asynccontextmanager
|
||||||
|
@ -66,11 +65,6 @@ class Chat(commands.Cog):
|
||||||
"""Checks the status on all servers."""
|
"""Checks the status on all servers."""
|
||||||
await ctx.defer()
|
await ctx.defer()
|
||||||
|
|
||||||
def decorate_name(_s: ServerConfig):
|
|
||||||
if _s.gpu:
|
|
||||||
return f"{_s.name} (\u26A1)"
|
|
||||||
return _s.name
|
|
||||||
|
|
||||||
embed = discord.Embed(
|
embed = discord.Embed(
|
||||||
title="Ollama Statuses:",
|
title="Ollama Statuses:",
|
||||||
color=discord.Color.blurple()
|
color=discord.Color.blurple()
|
||||||
|
@ -164,18 +158,31 @@ class Chat(commands.Cog):
|
||||||
description="The thread ID to continue.",
|
description="The thread ID to continue.",
|
||||||
default=None
|
default=None
|
||||||
)
|
)
|
||||||
|
],
|
||||||
|
temperature: typing.Annotated[
|
||||||
|
float,
|
||||||
|
discord.Option(
|
||||||
|
discord.SlashCommandOptionType.number,
|
||||||
|
description="The temperature to use.",
|
||||||
|
default=1.5,
|
||||||
|
min_value=0.0,
|
||||||
|
max_value=2.0
|
||||||
|
)
|
||||||
]
|
]
|
||||||
):
|
):
|
||||||
"""Have a chat with ollama"""
|
"""Have a chat with ollama"""
|
||||||
await ctx.defer()
|
await ctx.defer()
|
||||||
server = get_server(server)
|
server = get_server(server)
|
||||||
async with self.server_locks[server.name]:
|
|
||||||
if not await server.is_online():
|
if not await server.is_online():
|
||||||
await ctx.respond(
|
await ctx.respond(
|
||||||
content=f"{server} is offline.",
|
content=f"{server} is offline. Finding a suitable server...",
|
||||||
delete_after=60
|
|
||||||
)
|
)
|
||||||
return
|
try:
|
||||||
|
server = await find_suitable_server()
|
||||||
|
except ValueError as err:
|
||||||
|
return await ctx.edit(content=str(err), delete_after=30)
|
||||||
|
await ctx.delete(delay=5)
|
||||||
|
async with self.server_locks[server.name]:
|
||||||
async with ollama_client(str(server.base_url)) as client:
|
async with ollama_client(str(server.base_url)) as client:
|
||||||
client: AsyncClient
|
client: AsyncClient
|
||||||
self.log.info("Checking if %r has the model %r", server, model)
|
self.log.info("Checking if %r has the model %r", server, model)
|
||||||
|
@ -203,7 +210,7 @@ class Chat(commands.Cog):
|
||||||
embed.colour = discord.Colour.red()
|
embed.colour = discord.Colour.red()
|
||||||
await ctx.edit(embed=embed)
|
await ctx.edit(embed=embed)
|
||||||
return
|
return
|
||||||
self.log.info("Response from %r: %r", server, line)
|
self.log.debug("Response from %r: %r", server, line)
|
||||||
if line["status"] in {
|
if line["status"] in {
|
||||||
"pulling manifest",
|
"pulling manifest",
|
||||||
"verifying sha256 digest",
|
"verifying sha256 digest",
|
||||||
|
@ -223,12 +230,14 @@ class Chat(commands.Cog):
|
||||||
last_completed = completed
|
last_completed = completed
|
||||||
last_completed_ts = time.time()
|
last_completed_ts = time.time()
|
||||||
mbps = round((bytes_per_second * 8) / 1024 / 1024)
|
mbps = round((bytes_per_second * 8) / 1024 / 1024)
|
||||||
|
eta = (total - completed) / max(1, bytes_per_second)
|
||||||
progress_bar = f"[{pb_fill}{pb_empty}]"
|
progress_bar = f"[{pb_fill}{pb_empty}]"
|
||||||
ns_total = naturalsize(total, binary=True)
|
ns_total = naturalsize(total, binary=True)
|
||||||
ns_completed = naturalsize(completed, binary=True)
|
ns_completed = naturalsize(completed, binary=True)
|
||||||
embed.description = (
|
embed.description = (
|
||||||
f"{line['status'].capitalize()} {percent}% {progress_bar} "
|
f"{line['status'].capitalize()} {percent}% {progress_bar} "
|
||||||
f"({ns_completed}/{ns_total} @ {mbps} Mb/s) "
|
f"({ns_completed}/{ns_total} @ {mbps} Mb/s) "
|
||||||
|
f"[ETA: {naturaldelta(eta)}]"
|
||||||
)
|
)
|
||||||
|
|
||||||
if time.time() - last_edit >= 2.5:
|
if time.time() - last_edit >= 2.5:
|
||||||
|
@ -268,7 +277,11 @@ class Chat(commands.Cog):
|
||||||
if system_prompt:
|
if system_prompt:
|
||||||
messages.append(await create_ollama_message(system_prompt, role="system"))
|
messages.append(await create_ollama_message(system_prompt, role="system"))
|
||||||
messages.append(await create_ollama_message(prompt, images=[await image.read()] if image else None))
|
messages.append(await create_ollama_message(prompt, images=[await image.read()] if image else None))
|
||||||
embed = discord.Embed(title=f"{model}:", description="")
|
embed = discord.Embed(description="")
|
||||||
|
embed.set_author(
|
||||||
|
name=f"{model} @ {decorate_name(server)!r}" if server.gpu else model,
|
||||||
|
icon_url="https://ollama.com/public/icon-64x64.png"
|
||||||
|
)
|
||||||
view = StopDownloadView(ctx)
|
view = StopDownloadView(ctx)
|
||||||
msg = await ctx.respond(
|
msg = await ctx.respond(
|
||||||
embed=embed,
|
embed=embed,
|
||||||
|
@ -283,10 +296,11 @@ class Chat(commands.Cog):
|
||||||
options=Options(
|
options=Options(
|
||||||
num_ctx=4096,
|
num_ctx=4096,
|
||||||
low_vram=server.vram_gb < 8,
|
low_vram=server.vram_gb < 8,
|
||||||
temperature=1.5
|
temperature=temperature
|
||||||
)
|
)
|
||||||
):
|
):
|
||||||
self.log.info("Response from %r: %r", server, response)
|
response: dict
|
||||||
|
self.log.debug("Response from %r: %r", server, response)
|
||||||
buffer.write(response["message"]["content"])
|
buffer.write(response["message"]["content"])
|
||||||
|
|
||||||
if len(buffer.getvalue()) > 4096:
|
if len(buffer.getvalue()) > 4096:
|
||||||
|
|
|
@ -1,12 +1,11 @@
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
import logging
|
import logging
|
||||||
import discord
|
import discord
|
||||||
|
import sys
|
||||||
from discord.ext import commands
|
from discord.ext import commands
|
||||||
from tortoise import Tortoise
|
from tortoise import Tortoise
|
||||||
sys.path.extend("..") # noqa: E402
|
from config import get_config
|
||||||
from .config import get_config
|
sys.path.extend([".", ".."])
|
||||||
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
@ -24,7 +23,7 @@ class SentientJimmy(commands.Bot):
|
||||||
strip_after_prefix=True,
|
strip_after_prefix=True,
|
||||||
debug_guilds=get_config()["bot"].get("debug_guilds"),
|
debug_guilds=get_config()["bot"].get("debug_guilds"),
|
||||||
)
|
)
|
||||||
self.load_extension("jimmy.cogs.chat")
|
self.load_extension("cogs.chat")
|
||||||
self.load_extension("jishaku")
|
self.load_extension("jishaku")
|
||||||
|
|
||||||
async def start(self, token: str, *, reconnect: bool = True) -> None:
|
async def start(self, token: str, *, reconnect: bool = True) -> None:
|
||||||
|
|
|
@ -4,13 +4,24 @@ from functools import partial
|
||||||
from fuzzywuzzy.fuzz import ratio
|
from fuzzywuzzy.fuzz import ratio
|
||||||
from ollama import Message
|
from ollama import Message
|
||||||
|
|
||||||
|
if typing.TYPE_CHECKING:
|
||||||
|
from .config import ServerConfig
|
||||||
|
|
||||||
|
|
||||||
__all__ = (
|
__all__ = (
|
||||||
'async_ratio',
|
'async_ratio',
|
||||||
'create_ollama_message',
|
'create_ollama_message',
|
||||||
|
'find_suitable_server',
|
||||||
|
'decorate_server_name'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def decorate_server_name(_s: "ServerConfig") -> str:
|
||||||
|
if _s.gpu:
|
||||||
|
return f"{_s.name} (\u26A1)"
|
||||||
|
return _s.name
|
||||||
|
|
||||||
|
|
||||||
async def async_ratio(a: str, b: str) -> int:
|
async def async_ratio(a: str, b: str) -> int:
|
||||||
"""
|
"""
|
||||||
Wraps fuzzywuzzy ratio in an async function
|
Wraps fuzzywuzzy ratio in an async function
|
||||||
|
@ -45,3 +56,23 @@ async def create_ollama_message(
|
||||||
images=images
|
images=images
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def find_suitable_server(cpu_fallback: bool = True) -> "ServerConfig":
|
||||||
|
"""
|
||||||
|
Finds a suitable server to use for Ollama.
|
||||||
|
|
||||||
|
:param cpu_fallback: bool - whether to fall back to CPU servers if GPU servers are unavailable.
|
||||||
|
:return: ServerConfig - the server to use
|
||||||
|
"""
|
||||||
|
from .config import get_servers
|
||||||
|
servers = get_servers()
|
||||||
|
if not servers:
|
||||||
|
raise ValueError("No servers configured.")
|
||||||
|
for server in servers:
|
||||||
|
if cpu_fallback is False and server.gpu is False:
|
||||||
|
continue
|
||||||
|
if not await server.is_online():
|
||||||
|
continue
|
||||||
|
return server
|
||||||
|
raise ValueError("No servers available.")
|
||||||
|
|
|
@ -1,8 +1,6 @@
|
||||||
py-cord~=2.5
|
py-cord~=2.5
|
||||||
ollama~=0.2
|
ollama~=0.2
|
||||||
tortoise-orm[asyncpg]~=0.21
|
tortoise-orm[asyncpg]~=0.21
|
||||||
uvicorn[standard]~=0.30
|
|
||||||
fastapi~=0.111
|
|
||||||
jishaku~=2.5
|
jishaku~=2.5
|
||||||
fuzzywuzzy~=0.18
|
fuzzywuzzy~=0.18
|
||||||
humanize~=4.9
|
humanize~=4.9
|
||||||
|
|
Loading…
Reference in a new issue