Start working on impersonation

2024-06-10 17:44:18 +01:00
8 changed files with 122 additions and 324 deletions
--- a/.gitea/workflows/docker.yml
+++ b/.gitea/workflows/docker.yml
@ -16,7 +16,10 @@ jobs:
        id: meta
        uses: docker/metadata-action@v5
        with:
-          images: git.i-am.nexus/nex/sentient-jimmy
+          images: |
            git.i-am.nexus/nex/sentient-jimmy
          tags: |
            type=sha
      - name: Log into forgejo CR
        uses: docker/login-action@v3
--- a/README.md
+++ b/README.md
@ -8,14 +8,6 @@ Another Ollama bot for discord, however designed for mesh self-hosting.
 [bot]
 token = "your-bot-token"
 debug_guilds = [0123456789]  # omit for global commands
 db_url = "sqlite://:memory:"
 # ^ The database URL. Overridden by $DATABASE_URL.
 # The default in a docker environment is IN MEMORY, i.e. `sqlite://:memory:`.
 # The default in a non-docker environment is sqlite://default.db, aka sqlite @ ./default.db
 # If $DATABASE_URL is set, it will override this setting.
 # You can use SQLite, or PostgreSQL.
 # You can choose to build jimmy with mysql/mssql&oracle support by changing extra `asyncpg` to `asyncmy`/`asyncodbc`
 # in the tortoise-orm requirement in requrements.txt
 [ollama]
 order = ["server1", "server2", "fallback"]
@ -25,23 +17,14 @@ order = ["server1", "server2", "fallback"]
 base_url = "https://hosted.ollama.internal"  # default port is 443, because HTTPS
 gpu = true
 vram_gb = 8
 default_model="llama3:latest"  # sets the default model for /ollama chat
 [ollama.server2]
 base_url = "http://192.168.1.2:11434"
 gpu = true
 vram_gb = 4  # <8GB will enable "low VRAM mode" in ollama
 default_model = "llama2:latest"
 [ollama.fallback]
 base_url = "http://192.168.1.250:11434"
 gpu = false
 vram_gb = 32  # in the case of CPU Ollama, "vram" is actually just regular RAM.
 default_model = "orca-mini:3b"
 ```
 ## Running
 See [the example docker-compose.yml](/docker-compose.yml) for an example of how to run this bot with docker-compose.
 Alternatively, you can just run the docker image: `git.i-am.nexus/nex/sentient-jimmy:master`.
--- a/jimmy/assets/prompts/trump.txt
+++ b/jimmy/assets/prompts/trump.txt
@ -0,0 +1,13 @@
 You are the former United States president, Donald Trump, convicted of 34 felonies.
 You have been posting approximately 50 times a day on your platform 'Truth Social' spreading your
 words of truth to the american people in order to make america great again.
 Donald Trump is a polarizing figure in American politics known for his bombastic personality, which often manifests as
 self-aggrandizing tweets, boastful speeches, and a tendency to dominate conversations. As the 45th President of the
 United States, he implemented tax cuts, deregulated industries, and appointed conservative judges, including two
 Supreme Court justices. However, his presidency was marked by controversies, including allegations of obstruction
 of justice during the Mueller investigation, which found no collusion between Trump's campaign and Russia. Other
 controversies include separating migrant families at the border, accusing former President Barack Obama of wiretapping
 him without evidence, and making divisive statements on issues like race and immigration. Trump also faced impeachment
 inquiries over his dealings with Ukraine, which he was acquitted of by the Senate in February 2020. Overall, Trump's
 presidency was defined by a "America First" agenda, which often prioritized national interest over international
 cooperation and traditional alliances.
--- a/jimmy/assets/truthers.json
+++ b/jimmy/assets/truthers.json
@ -0,0 +1,38 @@
 {
    "trump": {
        "name": "trump",
        "type": "truth"
    },
    "tate": {
        "name": "tate",
        "type": "truth"
    },
    "nigel": {
        "name": "Nigel Farage",
        "type": "tweet"
    },
    "rishi": {
        "name": "Rishi Sunak",
        "type": "tweet"
    },
    "laurence": {
        "name": "Laurence Fox",
        "type": "tweet"
    },
    "tommy": {
        "name": "Tommy Robinson \uD83C\uDDEC\uD83C\uDDE7",
        "type": "tweet"
    },
    "kier": {
        "name": "Kier Starmer",
        "type": "tweet"
    },
    "boris": {
        "name": "Boris Johnson",
        "type": "tweet"
    },
    "ron": {
        "name": "Ron DeSantis",
        "type": "tweet"
    }
 }
--- a/jimmy/cogs/chat.py
+++ b/jimmy/cogs/chat.py
@ -1,5 +1,4 @@
 import asyncio
 import datetime
 import io
 import logging
 import time
@ -7,12 +6,11 @@ import typing
 import contextlib
 import discord
 import httpx
 from discord import Interaction
 from ollama import AsyncClient, ResponseError, Options
 from discord.ext import commands
 from jimmy.utils import create_ollama_message, find_suitable_server, decorate_server_name as decorate_name
-from jimmy.config import get_servers, get_server, get_config
+from jimmy.config import get_servers, get_server
 from jimmy.db import OllamaThread
 from humanize import naturalsize, naturaldelta
@ -48,13 +46,10 @@ async def get_available_tags_autocomplete(ctx: discord.AutocompleteContext):
    chosen_server = get_server(ctx.options.get("server") or get_servers()[0].name)
    async with ollama_client(str(chosen_server.base_url), timeout=2) as client:
        tags = (await client.list())["models"]
    return [tag["model"] for tag in tags if ctx.value.casefold() in tag["model"].casefold()]
    v = [tag["model"] for tag in tags if ctx.value.casefold() in tag["model"].casefold()]
    return [ctx.value, *v][:25]
-_ServerOptionAutocomplete = discord.utils.basic_autocomplete(
+_ServerOptionChoices = [discord.OptionChoice(server.name, server.name) for server in get_servers()]
    [x.name for x in get_servers()]
 )
 class Chat(commands.Cog):
@ -65,13 +60,7 @@ class Chat(commands.Cog):
            self.server_locks[server.name] = asyncio.Lock()
        self.log = logging.getLogger(__name__)
-    ollama_group = discord.SlashCommandGroup(
+    @commands.slash_command()
        name="ollama",
        description="Commands related to ollama.",
        guild_only=True
    )
    @ollama_group.command()
    async def status(self, ctx: discord.ApplicationContext):
        """Checks the status on all servers."""
        await ctx.defer()
@ -82,10 +71,10 @@ class Chat(commands.Cog):
        )
        fields = {}
        for server in get_servers():
-            if self.server_locks[server.name].locked():
+            if server.throttle and self.server_locks[server.name].locked():
                embed.add_field(
                    name=decorate_name(server),
-                    value="\N{closed lock with key} In use.",
+                    value=f"\N{closed lock with key} In use.",
                    inline=False
                )
                fields[server] = len(embed.fields) - 1
@ -93,7 +82,7 @@ class Chat(commands.Cog):
            else:
                embed.add_field(
                    name=decorate_name(server),
-                    value="\N{hourglass with flowing sand} Waiting...",
+                    value=f"\N{hourglass with flowing sand} Waiting...",
                    inline=False
                )
                fields[server] = len(embed.fields) - 1
@ -101,7 +90,7 @@ class Chat(commands.Cog):
        await ctx.respond(embed=embed)
        tasks = {}
        for server in get_servers():
-            if self.server_locks[server.name].locked():
+            if server.throttle and self.server_locks[server.name].locked():
                continue
            tasks[server] = asyncio.create_task(server.is_online())
@ -111,52 +100,19 @@ class Chat(commands.Cog):
                embed.set_field_at(
                    fields[server],
                    name=decorate_name(server),
-                    value="\N{white heavy check mark} Online.",
+                    value=f"\N{white heavy check mark} Online.",
                    inline=False
                )
            else:
                embed.set_field_at(
                    fields[server],
                    name=decorate_name(server),
-                    value="\N{cross mark} Offline.",
+                    value=f"\N{cross mark} Offline.",
                    inline=False
                )
        await ctx.edit(embed=embed)
-    @ollama_group.command(name="server-info")
+    @commands.slash_command(name="ollama")
    async def get_server_info(
            self,
            ctx: discord.ApplicationContext,
            server: typing.Annotated[
                str,
                discord.Option(
                    discord.SlashCommandOptionType.string,
                    description="The server to use.",
                    autocomplete=_ServerOptionAutocomplete,
                    default=get_servers()[0].name
                )
            ]
    ):
        """Gets information on a given server"""
        await ctx.defer()
        server = get_server(server)
        is_online = await server.is_online()
        y = "\N{white heavy check mark}"
        x = "\N{cross mark}"
        t = {True: y, False: x}
        rt = "VRAM" if server.gpu else "RAM"
        lines = [
            f"Name: {server.name!r}",
            f"Base URL: {server.base_url!r}",
            f"GPU Enabled: {t[server.gpu]}",
            f"{rt}: {server.vram_gb:,} GB",
            f"Default Model: {server.default_model!r}",
            f"Is Online: {t[is_online]}"
        ]
        p = "```md\n" + "\n".join(lines) + "```"
        return await ctx.respond(p)
    @ollama_group.command(name="chat")
    async def start_ollama_chat(
            self,
            ctx: discord.ApplicationContext,
@ -174,7 +130,7 @@ class Chat(commands.Cog):
                discord.Option(
                    discord.SlashCommandOptionType.string,
                    description="The server to use.",
-                    autocomplete=_ServerOptionAutocomplete,
+                    choices=_ServerOptionChoices,
                    default=get_servers()[0].name
                )
            ],
@ -184,7 +140,7 @@ class Chat(commands.Cog):
                    discord.SlashCommandOptionType.string,
                    description="The model to use.",
                    autocomplete=get_available_tags_autocomplete,
-                    default="default"
+                    default="llama3:latest"
                )
            ],
            image: typing.Annotated[
@ -217,9 +173,7 @@ class Chat(commands.Cog):
        """Have a chat with ollama"""
        await ctx.defer()
        server = get_server(server)
-        if not server:
+        if not await server.is_online():
            return await ctx.respond("\N{cross mark} Unknown Server.")
        elif not await server.is_online():
            await ctx.respond(
                content=f"{server} is offline. Finding a suitable server...",
            )
@ -229,17 +183,14 @@ class Chat(commands.Cog):
                return await ctx.edit(content=str(err), delete_after=30)
            await ctx.delete(delay=5)
        async with self.server_locks[server.name]:
            if model == "default":
                model = server.default_model
            async with ollama_client(str(server.base_url)) as client:
                client: AsyncClient
                self.log.info("Checking if %r has the model %r", server, model)
                tags = (await client.list())["models"]
                # Download code. It's recommended to collapse this in the editor.
                if model not in [x["model"] for x in tags]:
                    embed = discord.Embed(
                        title=f"Downloading {model} on {server}.",
-                        description="Initiating download...",
+                        description=f"Initiating download...",
                        color=discord.Color.blurple()
                    )
                    view = StopDownloadView(ctx)
@ -314,7 +265,6 @@ class Chat(commands.Cog):
                            await ctx.edit(embed=embed, delete_after=30, view=None)
                messages = []
                thread = None
                if thread_id:
                    thread = await OllamaThread.get_or_none(thread_id=thread_id)
                    if thread:
@ -322,29 +272,8 @@ class Chat(commands.Cog):
                            messages.append(
                                await create_ollama_message(msg["content"], role=msg["role"])
                            )
                    elif len(thread_id) == 6:
                        # Is a legacy thread
                        _cfg = get_config()["truth_api"]
                        async with httpx.AsyncClient(
                            base_url=_cfg["url"],
                            auth=(_cfg["username"], _cfg["password"])
                        ) as http_client:
                            response = await http_client.get(f"/ollama/thread/threads:{thread_id}")
                            if response.status_code == 200:
                                thread = response.json()
                                messages = thread["messages"]
                                thread = OllamaThread(
                                    messages=[{"role": m["role"], "content": m["content"]} for m in messages],
                                )
                                await thread.save()
                    else:
-                                return await ctx.respond(
+                        await ctx.respond(content="No thread with that ID exists.", delete_after=30)
                                    content="Failed to fetch legacy ollama thread from jimmy v2: HTTP %d (`%r`)" % (
                                        response.status_code, response.text
                                    ),
                                )
                    else:
                        return await ctx.respond(content="No thread with that ID exists.", delete_after=30)
                if system_prompt:
                    messages.append(await create_ollama_message(system_prompt, role="system"))
                messages.append(await create_ollama_message(prompt, images=[await image.read()] if image else None))
@ -396,12 +325,11 @@ class Chat(commands.Cog):
                    embed.add_field(
                        name="Full chat",
                        value="The chat was too long to fit in this message. "
-                              "You can download the `full-chat.txt` file to see the full message."
+                              f"You can download the `full-chat.txt` file to see the full message."
                    )
                else:
                    file = discord.utils.MISSING
                if not thread:
                thread = OllamaThread(
                    messages=[{"role": m["role"], "content": m["content"]} for m in messages],
                )
@ -409,174 +337,6 @@ class Chat(commands.Cog):
                embed.set_footer(text=f"Chat ID: {thread.thread_id}")
                await msg.edit(embed=embed, view=None, file=file)
    @ollama_group.command(name="pull")
    async def pull_ollama_model(
            self,
            ctx: discord.ApplicationContext,
            server: typing.Annotated[
                str,
                discord.Option(
                    discord.SlashCommandOptionType.string,
                    description="The server to use.",
                    autocomplete=_ServerOptionAutocomplete,
                    default=get_servers()[0].name
                )
            ],
            model: typing.Annotated[
                str,
                discord.Option(
                    discord.SlashCommandOptionType.string,
                    description="The model to use.",
                    autocomplete=get_available_tags_autocomplete,
                    default="llama3:latest"
                )
            ],
    ):
        """Downloads a tag on the target server"""
        await ctx.defer()
        server = get_server(server)
        if not server:
            return await ctx.respond("\N{cross mark} Unknown server.")
        elif not await server.is_online():
            return await ctx.respond(f"\N{cross mark} Server {server.name!r} is not responding")
        embed = discord.Embed(
            title=f"Downloading {model} on {server}.",
            description="Initiating download...",
            color=discord.Color.blurple()
        )
        view = StopDownloadView(ctx)
        await ctx.respond(
            embed=embed,
            view=view
        )
        last_edit = 0
        async with ctx.typing():
            try:
                last_completed = 0
                last_completed_ts = time.time()
                async with ollama_client(str(server.base_url)) as client:
                    async for line in await client.pull(model, stream=True):
                        if view.event.is_set():
                            embed.add_field(name="Error!", value="Download cancelled.")
                            embed.colour = discord.Colour.red()
                            await ctx.edit(embed=embed)
                            return
                        self.log.debug("Response from %r: %r", server, line)
                        if line["status"] in {
                            "pulling manifest",
                            "verifying sha256 digest",
                            "writing manifest",
                            "removing any unused layers",
                            "success"
                        }:
                            embed.description = line["status"].capitalize()
                        else:
                            total = line["total"]
                            completed = line.get("completed", 0)
                            percent = round(completed / total * 100, 1)
                            pb_fill = "▰" * int(percent / 10)
                            pb_empty = "▱" * (10 - int(percent / 10))
                            bytes_per_second = completed - last_completed
                            bytes_per_second /= (time.time() - last_completed_ts)
                            last_completed = completed
                            last_completed_ts = time.time()
                            mbps = round((bytes_per_second * 8) / 1024 / 1024)
                            eta = (total - completed) / max(1, bytes_per_second)
                            progress_bar = f"[{pb_fill}{pb_empty}]"
                            ns_total = naturalsize(total, binary=True)
                            ns_completed = naturalsize(completed, binary=True)
                            embed.description = (
                                f"{line['status'].capitalize()} {percent}% {progress_bar} "
                                f"({ns_completed}/{ns_total} @ {mbps} Mb/s) "
                                f"[ETA: {naturaldelta(eta)}]"
                            )
                        if time.time() - last_edit >= 2.5:
                            await ctx.edit(embed=embed)
                            last_edit = time.time()
            except ResponseError as err:
                if err.error.endswith("file does not exist"):
                    await ctx.edit(
                        embed=None,
                        content="The model %r does not exist." % model,
                        delete_after=60,
                        view=None
                    )
                else:
                    embed.add_field(
                        name="Error!",
                        value=err.error
                    )
                    embed.colour = discord.Colour.red()
                    await ctx.edit(embed=embed, view=None)
                return
            else:
                embed.colour = discord.Colour.green()
                embed.description = f"Downloaded {model} on {server}."
                await ctx.edit(embed=embed, delete_after=30, view=None)
    @ollama_group.command(name="ps")
    async def ollama_proc_list(
            self,
            ctx: discord.ApplicationContext,
            server: typing.Annotated[
                str,
                discord.Option(
                    discord.SlashCommandOptionType.string,
                    description="The server to use.",
                    autocomplete=_ServerOptionAutocomplete,
                    default=get_servers()[0].name
                )
            ]
    ):
        """Checks the loaded models on the target server"""
        await ctx.defer()
        server = get_server(server)
        if not server:
            return await ctx.respond("\N{cross mark} Unknown server.")
        elif not await server.is_online():
            return await ctx.respond(f"\N{cross mark} Server {server.name!r} is not responding")
        async with ollama_client(str(server.base_url)) as client:
            response = (await client.ps())["models"]
        if not response:
            embed = discord.Embed(
                title=f"No models loaded on {server}.",
                color=discord.Color.blurple()
            )
            return await ctx.respond(embed=embed)
        embed = discord.Embed(
            title=f"Models loaded on {server}",
            color=discord.Color.blurple()
        )
        for model in response[:25]:
            size = naturalsize(model["size"], binary=True)
            size_vram = naturalsize(model["size_vram"], binary=True)
            size_ram = naturalsize(model["size"] - model["size_vram"], binary=True)
            percent_in_vram = round(model["size_vram"] / model["size"] * 100)
            percent_in_ram = 100 - percent_in_vram
            expires = datetime.datetime.fromisoformat(model["expires_at"])
            lines = [
                f"* Size: {size}",
                f"* Unloaded: {discord.utils.format_dt(expires, style='R')}",
            ]
            if percent_in_ram > 0:
                lines.extend(
                    [
                        f"* VRAM/RAM: {percent_in_vram}%/{percent_in_ram}%",
                        f"* VRAM Size: {size_vram}",
                        f"* RAM Size: {size_ram}"
                    ]
                )
            else:
                lines.append(f"* VRAM Size: {size_vram} (100%)")
            embed.add_field(
                name=model["model"],
                value="\n".join(lines),
                inline=False
            )
        await ctx.respond(embed=embed)
 def setup(bot):
    bot.add_cog(Chat(bot))
--- a/jimmy/cogs/impersonation.py
+++ b/jimmy/cogs/impersonation.py
@ -0,0 +1,35 @@
 import discord
 from discord.ext import commands
 from jimmy.config import *
 from jimmy.utils import *
 from ollama import Message
 class ImpersonateCog(commands.Cog):
    def __init__(self, bot):
        self.bot = bot
    impersonate = discord.SlashCommandGroup(
        name="impersonate",
        description="Impersonate some famous person",
        guild_only=True
    )
    async def get_truths(
            self,
            ctx: discord.ApplicationContext,
            author: str,
            limit: int,
            *,
            query: str = None
    ) -> list[Message]:
        """
        Generates a new truth, or tweet, from the author.
        """
        if query is None:
            query = "Generate a new tweet, in reply to"
    @impersonate.command()
    async def trump(self, ctx: discord.ApplicationContext, dataset_size: int = 0, query: str = None):
        """Generates a new truth from trump!"""
        return await ctx.respond(":x: not done yet.")
--- a/jimmy/config.py
+++ b/jimmy/config.py
@ -1,7 +1,6 @@
 import os
 import tomllib
 import logging
 import urllib.parse
 from typing import Callable
 import httpx
@ -9,13 +8,20 @@ from pydantic import BaseModel, Field, AnyHttpUrl
 log = logging.getLogger(__name__)
 __all__ = (
    "ServerConfig",
    "get_servers",
    "get_server",
    "get_config",
 )
 class ServerConfig(BaseModel):
-    name: str = Field(min_length=1, max_length=4096)
+    name: str = Field(min_length=1, max_length=32)
    base_url: AnyHttpUrl
    gpu: bool = False
    vram_gb: int = 4
-    default_model: str = "llama3:latest"
+    throttle: bool = False
    def __repr__(self):
        return "<ServerConfig name={0.name} base_url={0.base_url} gpu={0.gpu!s} vram_gb={0.vram_gb}>".format(self)
@ -27,7 +33,7 @@ class ServerConfig(BaseModel):
        """
        Checks that the current server is online and responding to requests.
        """
-        async with httpx.AsyncClient(base_url=str(self.base_url), timeout=httpx.Timeout(2.25)) as client:
+        async with httpx.AsyncClient(base_url=str(self.base_url)) as client:
            try:
                response = await client.get("/api/tags")
                return response.status_code == 200
@ -58,40 +64,6 @@ def get_server(name_or_base_url: str) -> ServerConfig | None:
    for server in servers:
        if server.name == name_or_base_url or server.base_url == name_or_base_url:
            return server
    try:
        parsed = urllib.parse.urlparse(name_or_base_url)
    except ValueError:
        pass
    else:
        if parsed.netloc and parsed.scheme in ["http", "https"]:
            defaults = {
                "name": ":temporary:-:%s:" % parsed.hostname,
                "base_url": "{0.scheme}://{0.netloc}".format(parsed),
                "gpu": False,
                "vram_gb": 2,
                "default_model": "orca-mini:3b"
            }
            if parsed.path and parsed.path.endswith(("/api", "/api/")):
                defaults["base_url"] += parsed.path
            parsed_qs = urllib.parse.parse_qs(parsed.query)
            for key, values in parsed_qs.items():
                if not values:
                    continue
                if key == "gpu":
                    values = [
                        values[0][0].lower() in ("t", "1", "y")
                    ]
                elif key == "vram_gb":
                    try:
                        values = [
                            int(values[0])
                        ]
                    except ValueError:
                        values = []
                if values:
                    defaults[key] = values[0]
            return ServerConfig(**defaults)
    return None
@ -102,10 +74,6 @@ def get_config():
    _loaded.setdefault("servers", {})
    _loaded["servers"].setdefault("order", [])
    _loaded.setdefault("bot", {})
    _loaded.setdefault("truth_api", {})
    _loaded["truth_api"].setdefault("url", "https://bots.nexy7574.co.uk/jimmy/v2/api")
    _loaded["truth_api"].setdefault("username", "invalid")
    _loaded["truth_api"].setdefault("password", "invalid")
    if database_url := os.getenv("DATABASE_URL"):
        _loaded["bot"]["db_url"] = database_url
    return _loaded
--- a/tox.ini
+++ b/tox.ini
@ -1,2 +0,0 @@
 [flake8]
 max-line-length = 120