Add image support to ollama

2024-01-10 16:10:45 +00:00 · 2024-01-10 16:10:45 +00:00 · 8ccc2a8a1b
commit 8ccc2a8a1b
parent a091104a9b
1 changed files with 29 additions and 0 deletions
--- a/src/cogs/ollama.py
+++ b/src/cogs/ollama.py
@ -5,6 +5,7 @@ import os
 import textwrap
 import time
 import typing
+import base64
 import io

 from discord.ui import View, button
@ -104,6 +105,14 @@ class Ollama(commands.Cog):
                    "Whether to give the AI acid, LSD, and other hallucinogens before responding.",
                    default=False
                )
+            ],
+            image: typing.Annotated[
+                discord.Attachment,
+                discord.Option(
+                    discord.Attachment,
+                    "An image to feed into ollama. Only works with llava.",
+                    default=None
+                )
            ]
    ):
        if context is not None:
@ -123,6 +132,24 @@ class Ollama(commands.Cog):
            model = model + ":latest"
            self.log.debug("Resolved model to %r" % model)

+        if image:
+            if fnmatch(model, "llava:*") is False:
+                await ctx.respond("You can only use images with llava.")
+                return
+            elif image.size > 1024 * 1024 * 25:
+                await ctx.respond("Attachment is too large. Maximum size is 25 MB, for sanity. Try compressing it.")
+                return
+            elif not fnmatch(image.content_type, "image/*"):
+                await ctx.respond("Attachment is not an image. Try using a different file.")
+                return
+            else:
+                data = io.BytesIO()
+                await image.save(data)
+                data.seek(0)
+                image_data = base64.b64encode(data.read()).decode("utf-8")
+        else:
+            image_data = None
+
        if server == "next":
            server = self.next_server()
        elif server not in CONFIG["ollama"]:
@ -261,6 +288,8 @@ class Ollama(commands.Cog):
            }
            if context is not None:
                payload["context"] = self.contexts[context]
+            if image_data:
+                payload["images"] = [image_data]
            async with session.post(
                "/api/generate",
                json=payload,