diff --git a/requirements.txt b/requirements.txt index 383448c..916e3c1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ uvicorn~=0.27 fastapi~=0.109 httpx~=0.26 beautifulsoup4~=4.12 +appdirs~=1.4 diff --git a/server.py b/server.py index 74ca6f1..017127a 100644 --- a/server.py +++ b/server.py @@ -1,9 +1,12 @@ import io import os +import uuid import fastapi import httpx import logging +import sqlite3 +import appdirs from typing import Annotated from fastapi import Query, Header, HTTPException, Request from fastapi.responses import JSONResponse @@ -13,7 +16,12 @@ from rich.logging import RichHandler from fastapi.middleware.cors import CORSMiddleware -logging.basicConfig(level=logging.INFO, format="%(message)s", datefmt="[%X]", handlers=[RichHandler(markup=True)]) +logging.basicConfig( + level=logging.getLevelName(os.environ.get("LOG_LEVEL", "INFO").upper()), + format="%(message)s", + datefmt="[%X]", + handlers=[RichHandler(markup=True)] +) app = fastapi.FastAPI( root_path=os.environ.get("PREVIEW_ROOT_PATH", ""), ) @@ -105,6 +113,31 @@ URL_OG_TAGS = [ "audio:secure_url" ] +if Path.cwd() == Path("/app"): + logging.info("Look to be running in a docker container. Cache will be stored in /app/cache.") + CACHE_DIR = Path("/app/cache") +else: + CACHE_DIR = Path(appdirs.user_cache_dir("matrix-url-preview")) +CACHE_FILE = CACHE_DIR / "db.sqlite3" +logging.debug("Cache file: %r", CACHE_FILE) + + +@app.on_event("startup") +async def startup(): + if not CACHE_DIR.exists(): + CACHE_DIR.mkdir(parents=True) + with sqlite3.connect(CACHE_FILE) as conn: + conn.execute( + """ + CREATE TABLE IF NOT EXISTS cache ( + uuid TEXT PRIMARY KEY, + url TEXT NOT NULL, + ts INTEGER NOT NULL, + metadata TEXT NOT NULL + ) + """ + ) + def upload_media(domain: str, access_token: str, file: io.BytesIO, filename: str, content_type: str): file.seek(0) @@ -142,8 +175,13 @@ def upload_media(domain: str, access_token: str, file: io.BytesIO, filename: str def preview_url( req: Request, url: Annotated[str, Query(..., description="URL to preview")], + ts: int = Query(None, description="The preferred point in time to return a preview for."), access_token_qs: str | None = Query(None, alias="access_token", description="Access token to use for the request."), - access_token_header: str | None = Header(None, alias="Authorization", description="Access token to use for the request."), + access_token_header: str | None = Header( + None, + alias="Authorization", + description="Access token to use for the request." + ), ): if access_token_qs is not None: access_token = access_token_qs @@ -152,6 +190,27 @@ def preview_url( else: return MISSING_TOKEN + with sqlite3.connect(CACHE_FILE) as conn: + cursor = conn.cursor() + cursor.execute( + "SELECT (metadata,ts) FROM cache WHERE url = ?", + (url, ts) + ) + results = cursor.fetchall() + if results: + for result in results: + # find the one with the closest timestamp + metadata, _ts = result + if ts is None or abs(ts - _ts) < 3600: + logging.debug("Optimal cache hit for %r", url) + return metadata + # No close matches, get the latest one + metadata, _ts = results[-1] + # If the latest one is more than 3 hours old, re-fetch. Otherwise, return. + if ts is None or abs(ts - _ts) < 10800: + logging.debug("Cache hit for %r", url) + return metadata + domain = os.environ.get("PREVIEW_HOMESERVER", "https://" + req.url.hostname) try: @@ -245,6 +304,12 @@ def preview_url( if not key.startswith(("original:", "og:", "matrix:")): value = og_tags.pop(key, None) og_tags["og:" + key] = value + + with sqlite3.connect(CACHE_FILE) as conn: + conn.execute( + "INSERT INTO cache (uuid, url, ts, metadata) VALUES (?, ?, ?, ?)", + (str(uuid.uuid4()), url, int(response.headers["date"]), str(og_tags)) + ) return og_tags