From 8ccc2a8a1bd6083582fbed634252084c89b037e8 Mon Sep 17 00:00:00 2001 From: nexy7574 Date: Wed, 10 Jan 2024 16:10:45 +0000 Subject: [PATCH] Add image support to ollama --- src/cogs/ollama.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/cogs/ollama.py b/src/cogs/ollama.py index 46dec10..f89a313 100644 --- a/src/cogs/ollama.py +++ b/src/cogs/ollama.py @@ -5,6 +5,7 @@ import os import textwrap import time import typing +import base64 import io from discord.ui import View, button @@ -104,6 +105,14 @@ class Ollama(commands.Cog): "Whether to give the AI acid, LSD, and other hallucinogens before responding.", default=False ) + ], + image: typing.Annotated[ + discord.Attachment, + discord.Option( + discord.Attachment, + "An image to feed into ollama. Only works with llava.", + default=None + ) ] ): if context is not None: @@ -123,6 +132,24 @@ class Ollama(commands.Cog): model = model + ":latest" self.log.debug("Resolved model to %r" % model) + if image: + if fnmatch(model, "llava:*") is False: + await ctx.respond("You can only use images with llava.") + return + elif image.size > 1024 * 1024 * 25: + await ctx.respond("Attachment is too large. Maximum size is 25 MB, for sanity. Try compressing it.") + return + elif not fnmatch(image.content_type, "image/*"): + await ctx.respond("Attachment is not an image. Try using a different file.") + return + else: + data = io.BytesIO() + await image.save(data) + data.seek(0) + image_data = base64.b64encode(data.read()).decode("utf-8") + else: + image_data = None + if server == "next": server = self.next_server() elif server not in CONFIG["ollama"]: @@ -261,6 +288,8 @@ class Ollama(commands.Cog): } if context is not None: payload["context"] = self.contexts[context] + if image_data: + payload["images"] = [image_data] async with session.post( "/api/generate", json=payload,