Compare commits

..

15 commits

Author SHA1 Message Date
nex
9a42ba815a Act cache should be properly configured now
Some checks failed
Build and Publish / build_and_publish (push) Failing after 2m39s
2024-06-17 00:53:14 +01:00
597ffd386c Fix TypeError
All checks were successful
Build and Publish / build_and_publish (push) Successful in 49s
2024-06-16 16:15:01 +01:00
290d5c9ccb Fix ollama APi endpoint
All checks were successful
Build and Publish / build_and_publish (push) Successful in 51s
2024-06-16 16:10:50 +01:00
448a23affa Allow importing legacy threads
All checks were successful
Build and Publish / build_and_publish (push) Successful in 58s
2024-06-16 15:53:43 +01:00
d203376850
Update the README
All checks were successful
Build and Publish / build_and_publish (push) Successful in 1m49s
2024-06-11 01:56:03 +01:00
76d3684449
Remove reference to throttle
All checks were successful
Build and Publish / build_and_publish (push) Successful in 45s
2024-06-11 01:44:34 +01:00
d4d550d7ba
Add a proper timeout to is_online
Some checks failed
Build and Publish / build_and_publish (push) Has been cancelled
2024-06-11 01:44:09 +01:00
b6d747a63b
Don't follow up with an empty embed
All checks were successful
Build and Publish / build_and_publish (push) Successful in 44s
2024-06-11 01:41:20 +01:00
e32d866ad4
add PS command
All checks were successful
Build and Publish / build_and_publish (push) Successful in 48s
2024-06-11 01:37:20 +01:00
3c61504cb3
Fix /ollama pull 2024-06-11 01:21:34 +01:00
af11baeeaa
Clarify on-the-fly server names
All checks were successful
Build and Publish / build_and_publish (push) Successful in 45s
2024-06-11 01:15:25 +01:00
954d01bca5
Add server info command
All checks were successful
Build and Publish / build_and_publish (push) Successful in 50s
2024-06-11 01:09:59 +01:00
c04e73dff9
Properly build master
All checks were successful
Build and Publish / build_and_publish (push) Successful in 44s
2024-06-11 01:03:51 +01:00
28908f217c
Enable ollama pull
Some checks failed
Build and Publish / build_and_publish (push) Failing after 1m53s
2024-06-11 00:58:17 +01:00
99001a60ba
Enable on-the-fly server construction 2024-06-11 00:53:48 +01:00
8 changed files with 324 additions and 122 deletions

View file

@ -16,10 +16,7 @@ jobs:
id: meta
uses: docker/metadata-action@v5
with:
images: |
git.i-am.nexus/nex/sentient-jimmy
tags: |
type=sha
images: git.i-am.nexus/nex/sentient-jimmy
- name: Log into forgejo CR
uses: docker/login-action@v3
@ -36,4 +33,4 @@ jobs:
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
cache-to: type=gha,mode=max

View file

@ -8,6 +8,14 @@ Another Ollama bot for discord, however designed for mesh self-hosting.
[bot]
token = "your-bot-token"
debug_guilds = [0123456789] # omit for global commands
db_url = "sqlite://:memory:"
# ^ The database URL. Overridden by $DATABASE_URL.
# The default in a docker environment is IN MEMORY, i.e. `sqlite://:memory:`.
# The default in a non-docker environment is sqlite://default.db, aka sqlite @ ./default.db
# If $DATABASE_URL is set, it will override this setting.
# You can use SQLite, or PostgreSQL.
# You can choose to build jimmy with mysql/mssql&oracle support by changing extra `asyncpg` to `asyncmy`/`asyncodbc`
# in the tortoise-orm requirement in requrements.txt
[ollama]
order = ["server1", "server2", "fallback"]
@ -17,14 +25,23 @@ order = ["server1", "server2", "fallback"]
base_url = "https://hosted.ollama.internal" # default port is 443, because HTTPS
gpu = true
vram_gb = 8
default_model="llama3:latest" # sets the default model for /ollama chat
[ollama.server2]
base_url = "http://192.168.1.2:11434"
gpu = true
vram_gb = 4 # <8GB will enable "low VRAM mode" in ollama
default_model = "llama2:latest"
[ollama.fallback]
base_url = "http://192.168.1.250:11434"
gpu = false
vram_gb = 32 # in the case of CPU Ollama, "vram" is actually just regular RAM.
default_model = "orca-mini:3b"
```
## Running
See [the example docker-compose.yml](/docker-compose.yml) for an example of how to run this bot with docker-compose.
Alternatively, you can just run the docker image: `git.i-am.nexus/nex/sentient-jimmy:master`.

View file

@ -1,13 +0,0 @@
You are the former United States president, Donald Trump, convicted of 34 felonies.
You have been posting approximately 50 times a day on your platform 'Truth Social' spreading your
words of truth to the american people in order to make america great again.
Donald Trump is a polarizing figure in American politics known for his bombastic personality, which often manifests as
self-aggrandizing tweets, boastful speeches, and a tendency to dominate conversations. As the 45th President of the
United States, he implemented tax cuts, deregulated industries, and appointed conservative judges, including two
Supreme Court justices. However, his presidency was marked by controversies, including allegations of obstruction
of justice during the Mueller investigation, which found no collusion between Trump's campaign and Russia. Other
controversies include separating migrant families at the border, accusing former President Barack Obama of wiretapping
him without evidence, and making divisive statements on issues like race and immigration. Trump also faced impeachment
inquiries over his dealings with Ukraine, which he was acquitted of by the Senate in February 2020. Overall, Trump's
presidency was defined by a "America First" agenda, which often prioritized national interest over international
cooperation and traditional alliances.

View file

@ -1,38 +0,0 @@
{
"trump": {
"name": "trump",
"type": "truth"
},
"tate": {
"name": "tate",
"type": "truth"
},
"nigel": {
"name": "Nigel Farage",
"type": "tweet"
},
"rishi": {
"name": "Rishi Sunak",
"type": "tweet"
},
"laurence": {
"name": "Laurence Fox",
"type": "tweet"
},
"tommy": {
"name": "Tommy Robinson \uD83C\uDDEC\uD83C\uDDE7",
"type": "tweet"
},
"kier": {
"name": "Kier Starmer",
"type": "tweet"
},
"boris": {
"name": "Boris Johnson",
"type": "tweet"
},
"ron": {
"name": "Ron DeSantis",
"type": "tweet"
}
}

View file

@ -1,4 +1,5 @@
import asyncio
import datetime
import io
import logging
import time
@ -6,11 +7,12 @@ import typing
import contextlib
import discord
import httpx
from discord import Interaction
from ollama import AsyncClient, ResponseError, Options
from discord.ext import commands
from jimmy.utils import create_ollama_message, find_suitable_server, decorate_server_name as decorate_name
from jimmy.config import get_servers, get_server
from jimmy.config import get_servers, get_server, get_config
from jimmy.db import OllamaThread
from humanize import naturalsize, naturaldelta
@ -46,10 +48,13 @@ async def get_available_tags_autocomplete(ctx: discord.AutocompleteContext):
chosen_server = get_server(ctx.options.get("server") or get_servers()[0].name)
async with ollama_client(str(chosen_server.base_url), timeout=2) as client:
tags = (await client.list())["models"]
return [tag["model"] for tag in tags if ctx.value.casefold() in tag["model"].casefold()]
v = [tag["model"] for tag in tags if ctx.value.casefold() in tag["model"].casefold()]
return [ctx.value, *v][:25]
_ServerOptionChoices = [discord.OptionChoice(server.name, server.name) for server in get_servers()]
_ServerOptionAutocomplete = discord.utils.basic_autocomplete(
[x.name for x in get_servers()]
)
class Chat(commands.Cog):
@ -60,7 +65,13 @@ class Chat(commands.Cog):
self.server_locks[server.name] = asyncio.Lock()
self.log = logging.getLogger(__name__)
@commands.slash_command()
ollama_group = discord.SlashCommandGroup(
name="ollama",
description="Commands related to ollama.",
guild_only=True
)
@ollama_group.command()
async def status(self, ctx: discord.ApplicationContext):
"""Checks the status on all servers."""
await ctx.defer()
@ -71,10 +82,10 @@ class Chat(commands.Cog):
)
fields = {}
for server in get_servers():
if server.throttle and self.server_locks[server.name].locked():
if self.server_locks[server.name].locked():
embed.add_field(
name=decorate_name(server),
value=f"\N{closed lock with key} In use.",
value="\N{closed lock with key} In use.",
inline=False
)
fields[server] = len(embed.fields) - 1
@ -82,7 +93,7 @@ class Chat(commands.Cog):
else:
embed.add_field(
name=decorate_name(server),
value=f"\N{hourglass with flowing sand} Waiting...",
value="\N{hourglass with flowing sand} Waiting...",
inline=False
)
fields[server] = len(embed.fields) - 1
@ -90,7 +101,7 @@ class Chat(commands.Cog):
await ctx.respond(embed=embed)
tasks = {}
for server in get_servers():
if server.throttle and self.server_locks[server.name].locked():
if self.server_locks[server.name].locked():
continue
tasks[server] = asyncio.create_task(server.is_online())
@ -100,19 +111,52 @@ class Chat(commands.Cog):
embed.set_field_at(
fields[server],
name=decorate_name(server),
value=f"\N{white heavy check mark} Online.",
value="\N{white heavy check mark} Online.",
inline=False
)
else:
embed.set_field_at(
fields[server],
name=decorate_name(server),
value=f"\N{cross mark} Offline.",
value="\N{cross mark} Offline.",
inline=False
)
await ctx.edit(embed=embed)
@commands.slash_command(name="ollama")
@ollama_group.command(name="server-info")
async def get_server_info(
self,
ctx: discord.ApplicationContext,
server: typing.Annotated[
str,
discord.Option(
discord.SlashCommandOptionType.string,
description="The server to use.",
autocomplete=_ServerOptionAutocomplete,
default=get_servers()[0].name
)
]
):
"""Gets information on a given server"""
await ctx.defer()
server = get_server(server)
is_online = await server.is_online()
y = "\N{white heavy check mark}"
x = "\N{cross mark}"
t = {True: y, False: x}
rt = "VRAM" if server.gpu else "RAM"
lines = [
f"Name: {server.name!r}",
f"Base URL: {server.base_url!r}",
f"GPU Enabled: {t[server.gpu]}",
f"{rt}: {server.vram_gb:,} GB",
f"Default Model: {server.default_model!r}",
f"Is Online: {t[is_online]}"
]
p = "```md\n" + "\n".join(lines) + "```"
return await ctx.respond(p)
@ollama_group.command(name="chat")
async def start_ollama_chat(
self,
ctx: discord.ApplicationContext,
@ -130,7 +174,7 @@ class Chat(commands.Cog):
discord.Option(
discord.SlashCommandOptionType.string,
description="The server to use.",
choices=_ServerOptionChoices,
autocomplete=_ServerOptionAutocomplete,
default=get_servers()[0].name
)
],
@ -140,7 +184,7 @@ class Chat(commands.Cog):
discord.SlashCommandOptionType.string,
description="The model to use.",
autocomplete=get_available_tags_autocomplete,
default="llama3:latest"
default="default"
)
],
image: typing.Annotated[
@ -173,7 +217,9 @@ class Chat(commands.Cog):
"""Have a chat with ollama"""
await ctx.defer()
server = get_server(server)
if not await server.is_online():
if not server:
return await ctx.respond("\N{cross mark} Unknown Server.")
elif not await server.is_online():
await ctx.respond(
content=f"{server} is offline. Finding a suitable server...",
)
@ -183,14 +229,17 @@ class Chat(commands.Cog):
return await ctx.edit(content=str(err), delete_after=30)
await ctx.delete(delay=5)
async with self.server_locks[server.name]:
if model == "default":
model = server.default_model
async with ollama_client(str(server.base_url)) as client:
client: AsyncClient
self.log.info("Checking if %r has the model %r", server, model)
tags = (await client.list())["models"]
# Download code. It's recommended to collapse this in the editor.
if model not in [x["model"] for x in tags]:
embed = discord.Embed(
title=f"Downloading {model} on {server}.",
description=f"Initiating download...",
description="Initiating download...",
color=discord.Color.blurple()
)
view = StopDownloadView(ctx)
@ -265,6 +314,7 @@ class Chat(commands.Cog):
await ctx.edit(embed=embed, delete_after=30, view=None)
messages = []
thread = None
if thread_id:
thread = await OllamaThread.get_or_none(thread_id=thread_id)
if thread:
@ -272,8 +322,29 @@ class Chat(commands.Cog):
messages.append(
await create_ollama_message(msg["content"], role=msg["role"])
)
elif len(thread_id) == 6:
# Is a legacy thread
_cfg = get_config()["truth_api"]
async with httpx.AsyncClient(
base_url=_cfg["url"],
auth=(_cfg["username"], _cfg["password"])
) as http_client:
response = await http_client.get(f"/ollama/thread/threads:{thread_id}")
if response.status_code == 200:
thread = response.json()
messages = thread["messages"]
thread = OllamaThread(
messages=[{"role": m["role"], "content": m["content"]} for m in messages],
)
await thread.save()
else:
return await ctx.respond(
content="Failed to fetch legacy ollama thread from jimmy v2: HTTP %d (`%r`)" % (
response.status_code, response.text
),
)
else:
await ctx.respond(content="No thread with that ID exists.", delete_after=30)
return await ctx.respond(content="No thread with that ID exists.", delete_after=30)
if system_prompt:
messages.append(await create_ollama_message(system_prompt, role="system"))
messages.append(await create_ollama_message(prompt, images=[await image.read()] if image else None))
@ -325,18 +396,187 @@ class Chat(commands.Cog):
embed.add_field(
name="Full chat",
value="The chat was too long to fit in this message. "
f"You can download the `full-chat.txt` file to see the full message."
"You can download the `full-chat.txt` file to see the full message."
)
else:
file = discord.utils.MISSING
thread = OllamaThread(
messages=[{"role": m["role"], "content": m["content"]} for m in messages],
)
await thread.save()
if not thread:
thread = OllamaThread(
messages=[{"role": m["role"], "content": m["content"]} for m in messages],
)
await thread.save()
embed.set_footer(text=f"Chat ID: {thread.thread_id}")
await msg.edit(embed=embed, view=None, file=file)
@ollama_group.command(name="pull")
async def pull_ollama_model(
self,
ctx: discord.ApplicationContext,
server: typing.Annotated[
str,
discord.Option(
discord.SlashCommandOptionType.string,
description="The server to use.",
autocomplete=_ServerOptionAutocomplete,
default=get_servers()[0].name
)
],
model: typing.Annotated[
str,
discord.Option(
discord.SlashCommandOptionType.string,
description="The model to use.",
autocomplete=get_available_tags_autocomplete,
default="llama3:latest"
)
],
):
"""Downloads a tag on the target server"""
await ctx.defer()
server = get_server(server)
if not server:
return await ctx.respond("\N{cross mark} Unknown server.")
elif not await server.is_online():
return await ctx.respond(f"\N{cross mark} Server {server.name!r} is not responding")
embed = discord.Embed(
title=f"Downloading {model} on {server}.",
description="Initiating download...",
color=discord.Color.blurple()
)
view = StopDownloadView(ctx)
await ctx.respond(
embed=embed,
view=view
)
last_edit = 0
async with ctx.typing():
try:
last_completed = 0
last_completed_ts = time.time()
async with ollama_client(str(server.base_url)) as client:
async for line in await client.pull(model, stream=True):
if view.event.is_set():
embed.add_field(name="Error!", value="Download cancelled.")
embed.colour = discord.Colour.red()
await ctx.edit(embed=embed)
return
self.log.debug("Response from %r: %r", server, line)
if line["status"] in {
"pulling manifest",
"verifying sha256 digest",
"writing manifest",
"removing any unused layers",
"success"
}:
embed.description = line["status"].capitalize()
else:
total = line["total"]
completed = line.get("completed", 0)
percent = round(completed / total * 100, 1)
pb_fill = "" * int(percent / 10)
pb_empty = "" * (10 - int(percent / 10))
bytes_per_second = completed - last_completed
bytes_per_second /= (time.time() - last_completed_ts)
last_completed = completed
last_completed_ts = time.time()
mbps = round((bytes_per_second * 8) / 1024 / 1024)
eta = (total - completed) / max(1, bytes_per_second)
progress_bar = f"[{pb_fill}{pb_empty}]"
ns_total = naturalsize(total, binary=True)
ns_completed = naturalsize(completed, binary=True)
embed.description = (
f"{line['status'].capitalize()} {percent}% {progress_bar} "
f"({ns_completed}/{ns_total} @ {mbps} Mb/s) "
f"[ETA: {naturaldelta(eta)}]"
)
if time.time() - last_edit >= 2.5:
await ctx.edit(embed=embed)
last_edit = time.time()
except ResponseError as err:
if err.error.endswith("file does not exist"):
await ctx.edit(
embed=None,
content="The model %r does not exist." % model,
delete_after=60,
view=None
)
else:
embed.add_field(
name="Error!",
value=err.error
)
embed.colour = discord.Colour.red()
await ctx.edit(embed=embed, view=None)
return
else:
embed.colour = discord.Colour.green()
embed.description = f"Downloaded {model} on {server}."
await ctx.edit(embed=embed, delete_after=30, view=None)
@ollama_group.command(name="ps")
async def ollama_proc_list(
self,
ctx: discord.ApplicationContext,
server: typing.Annotated[
str,
discord.Option(
discord.SlashCommandOptionType.string,
description="The server to use.",
autocomplete=_ServerOptionAutocomplete,
default=get_servers()[0].name
)
]
):
"""Checks the loaded models on the target server"""
await ctx.defer()
server = get_server(server)
if not server:
return await ctx.respond("\N{cross mark} Unknown server.")
elif not await server.is_online():
return await ctx.respond(f"\N{cross mark} Server {server.name!r} is not responding")
async with ollama_client(str(server.base_url)) as client:
response = (await client.ps())["models"]
if not response:
embed = discord.Embed(
title=f"No models loaded on {server}.",
color=discord.Color.blurple()
)
return await ctx.respond(embed=embed)
embed = discord.Embed(
title=f"Models loaded on {server}",
color=discord.Color.blurple()
)
for model in response[:25]:
size = naturalsize(model["size"], binary=True)
size_vram = naturalsize(model["size_vram"], binary=True)
size_ram = naturalsize(model["size"] - model["size_vram"], binary=True)
percent_in_vram = round(model["size_vram"] / model["size"] * 100)
percent_in_ram = 100 - percent_in_vram
expires = datetime.datetime.fromisoformat(model["expires_at"])
lines = [
f"* Size: {size}",
f"* Unloaded: {discord.utils.format_dt(expires, style='R')}",
]
if percent_in_ram > 0:
lines.extend(
[
f"* VRAM/RAM: {percent_in_vram}%/{percent_in_ram}%",
f"* VRAM Size: {size_vram}",
f"* RAM Size: {size_ram}"
]
)
else:
lines.append(f"* VRAM Size: {size_vram} (100%)")
embed.add_field(
name=model["model"],
value="\n".join(lines),
inline=False
)
await ctx.respond(embed=embed)
def setup(bot):
bot.add_cog(Chat(bot))

View file

@ -1,35 +0,0 @@
import discord
from discord.ext import commands
from jimmy.config import *
from jimmy.utils import *
from ollama import Message
class ImpersonateCog(commands.Cog):
def __init__(self, bot):
self.bot = bot
impersonate = discord.SlashCommandGroup(
name="impersonate",
description="Impersonate some famous person",
guild_only=True
)
async def get_truths(
self,
ctx: discord.ApplicationContext,
author: str,
limit: int,
*,
query: str = None
) -> list[Message]:
"""
Generates a new truth, or tweet, from the author.
"""
if query is None:
query = "Generate a new tweet, in reply to"
@impersonate.command()
async def trump(self, ctx: discord.ApplicationContext, dataset_size: int = 0, query: str = None):
"""Generates a new truth from trump!"""
return await ctx.respond(":x: not done yet.")

View file

@ -1,6 +1,7 @@
import os
import tomllib
import logging
import urllib.parse
from typing import Callable
import httpx
@ -8,20 +9,13 @@ from pydantic import BaseModel, Field, AnyHttpUrl
log = logging.getLogger(__name__)
__all__ = (
"ServerConfig",
"get_servers",
"get_server",
"get_config",
)
class ServerConfig(BaseModel):
name: str = Field(min_length=1, max_length=32)
name: str = Field(min_length=1, max_length=4096)
base_url: AnyHttpUrl
gpu: bool = False
vram_gb: int = 4
throttle: bool = False
default_model: str = "llama3:latest"
def __repr__(self):
return "<ServerConfig name={0.name} base_url={0.base_url} gpu={0.gpu!s} vram_gb={0.vram_gb}>".format(self)
@ -33,7 +27,7 @@ class ServerConfig(BaseModel):
"""
Checks that the current server is online and responding to requests.
"""
async with httpx.AsyncClient(base_url=str(self.base_url)) as client:
async with httpx.AsyncClient(base_url=str(self.base_url), timeout=httpx.Timeout(2.25)) as client:
try:
response = await client.get("/api/tags")
return response.status_code == 200
@ -64,6 +58,40 @@ def get_server(name_or_base_url: str) -> ServerConfig | None:
for server in servers:
if server.name == name_or_base_url or server.base_url == name_or_base_url:
return server
try:
parsed = urllib.parse.urlparse(name_or_base_url)
except ValueError:
pass
else:
if parsed.netloc and parsed.scheme in ["http", "https"]:
defaults = {
"name": ":temporary:-:%s:" % parsed.hostname,
"base_url": "{0.scheme}://{0.netloc}".format(parsed),
"gpu": False,
"vram_gb": 2,
"default_model": "orca-mini:3b"
}
if parsed.path and parsed.path.endswith(("/api", "/api/")):
defaults["base_url"] += parsed.path
parsed_qs = urllib.parse.parse_qs(parsed.query)
for key, values in parsed_qs.items():
if not values:
continue
if key == "gpu":
values = [
values[0][0].lower() in ("t", "1", "y")
]
elif key == "vram_gb":
try:
values = [
int(values[0])
]
except ValueError:
values = []
if values:
defaults[key] = values[0]
return ServerConfig(**defaults)
return None
@ -74,6 +102,10 @@ def get_config():
_loaded.setdefault("servers", {})
_loaded["servers"].setdefault("order", [])
_loaded.setdefault("bot", {})
_loaded.setdefault("truth_api", {})
_loaded["truth_api"].setdefault("url", "https://bots.nexy7574.co.uk/jimmy/v2/api")
_loaded["truth_api"].setdefault("username", "invalid")
_loaded["truth_api"].setdefault("password", "invalid")
if database_url := os.getenv("DATABASE_URL"):
_loaded["bot"]["db_url"] = database_url
return _loaded

2
tox.ini Normal file
View file

@ -0,0 +1,2 @@
[flake8]
max-line-length = 120