From 8ccc2a8a1bd6083582fbed634252084c89b037e8 Mon Sep 17 00:00:00 2001
From: nexy7574 <me@nexy7574.co.uk>
Date: Wed, 10 Jan 2024 16:10:45 +0000
Subject: [PATCH] Add image support to ollama

---
 src/cogs/ollama.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/src/cogs/ollama.py b/src/cogs/ollama.py
index 46dec10..f89a313 100644
--- a/src/cogs/ollama.py
+++ b/src/cogs/ollama.py
@@ -5,6 +5,7 @@ import os
 import textwrap
 import time
 import typing
+import base64
 import io
 
 from discord.ui import View, button
@@ -104,6 +105,14 @@ class Ollama(commands.Cog):
                     "Whether to give the AI acid, LSD, and other hallucinogens before responding.",
                     default=False
                 )
+            ],
+            image: typing.Annotated[
+                discord.Attachment,
+                discord.Option(
+                    discord.Attachment,
+                    "An image to feed into ollama. Only works with llava.",
+                    default=None
+                )
             ]
     ):
         if context is not None:
@@ -123,6 +132,24 @@ class Ollama(commands.Cog):
             model = model + ":latest"
             self.log.debug("Resolved model to %r" % model)
 
+        if image:
+            if fnmatch(model, "llava:*") is False:
+                await ctx.respond("You can only use images with llava.")
+                return
+            elif image.size > 1024 * 1024 * 25:
+                await ctx.respond("Attachment is too large. Maximum size is 25 MB, for sanity. Try compressing it.")
+                return
+            elif not fnmatch(image.content_type, "image/*"):
+                await ctx.respond("Attachment is not an image. Try using a different file.")
+                return
+            else:
+                data = io.BytesIO()
+                await image.save(data)
+                data.seek(0)
+                image_data = base64.b64encode(data.read()).decode("utf-8")
+        else:
+            image_data = None
+
         if server == "next":
             server = self.next_server()
         elif server not in CONFIG["ollama"]:
@@ -261,6 +288,8 @@ class Ollama(commands.Cog):
             }
             if context is not None:
                 payload["context"] = self.contexts[context]
+            if image_data:
+                payload["images"] = [image_data]
             async with session.post(
                 "/api/generate",
                 json=payload,