Use an sqlite cache

This commit is contained in:
Nexus 2024-02-10 01:52:20 +00:00
parent 984bc2ef8f
commit fb0a57b7d0
Signed by: nex
GPG key ID: 0FA334385D0B689F
2 changed files with 68 additions and 2 deletions

View file

@ -3,3 +3,4 @@ uvicorn~=0.27
fastapi~=0.109
httpx~=0.26
beautifulsoup4~=4.12
appdirs~=1.4

View file

@ -1,9 +1,12 @@
import io
import os
import uuid
import fastapi
import httpx
import logging
import sqlite3
import appdirs
from typing import Annotated
from fastapi import Query, Header, HTTPException, Request
from fastapi.responses import JSONResponse
@ -13,7 +16,12 @@ from rich.logging import RichHandler
from fastapi.middleware.cors import CORSMiddleware
logging.basicConfig(level=logging.INFO, format="%(message)s", datefmt="[%X]", handlers=[RichHandler(markup=True)])
logging.basicConfig(
level=logging.getLevelName(os.environ.get("LOG_LEVEL", "INFO").upper()),
format="%(message)s",
datefmt="[%X]",
handlers=[RichHandler(markup=True)]
)
app = fastapi.FastAPI(
root_path=os.environ.get("PREVIEW_ROOT_PATH", ""),
)
@ -105,6 +113,31 @@ URL_OG_TAGS = [
"audio:secure_url"
]
if Path.cwd() == Path("/app"):
logging.info("Look to be running in a docker container. Cache will be stored in /app/cache.")
CACHE_DIR = Path("/app/cache")
else:
CACHE_DIR = Path(appdirs.user_cache_dir("matrix-url-preview"))
CACHE_FILE = CACHE_DIR / "db.sqlite3"
logging.debug("Cache file: %r", CACHE_FILE)
@app.on_event("startup")
async def startup():
if not CACHE_DIR.exists():
CACHE_DIR.mkdir(parents=True)
with sqlite3.connect(CACHE_FILE) as conn:
conn.execute(
"""
CREATE TABLE IF NOT EXISTS cache (
uuid TEXT PRIMARY KEY,
url TEXT NOT NULL,
ts INTEGER NOT NULL,
metadata TEXT NOT NULL
)
"""
)
def upload_media(domain: str, access_token: str, file: io.BytesIO, filename: str, content_type: str):
file.seek(0)
@ -142,8 +175,13 @@ def upload_media(domain: str, access_token: str, file: io.BytesIO, filename: str
def preview_url(
req: Request,
url: Annotated[str, Query(..., description="URL to preview")],
ts: int = Query(None, description="The preferred point in time to return a preview for."),
access_token_qs: str | None = Query(None, alias="access_token", description="Access token to use for the request."),
access_token_header: str | None = Header(None, alias="Authorization", description="Access token to use for the request."),
access_token_header: str | None = Header(
None,
alias="Authorization",
description="Access token to use for the request."
),
):
if access_token_qs is not None:
access_token = access_token_qs
@ -152,6 +190,27 @@ def preview_url(
else:
return MISSING_TOKEN
with sqlite3.connect(CACHE_FILE) as conn:
cursor = conn.cursor()
cursor.execute(
"SELECT (metadata,ts) FROM cache WHERE url = ?",
(url, ts)
)
results = cursor.fetchall()
if results:
for result in results:
# find the one with the closest timestamp
metadata, _ts = result
if ts is None or abs(ts - _ts) < 3600:
logging.debug("Optimal cache hit for %r", url)
return metadata
# No close matches, get the latest one
metadata, _ts = results[-1]
# If the latest one is more than 3 hours old, re-fetch. Otherwise, return.
if ts is None or abs(ts - _ts) < 10800:
logging.debug("Cache hit for %r", url)
return metadata
domain = os.environ.get("PREVIEW_HOMESERVER", "https://" + req.url.hostname)
try:
@ -245,6 +304,12 @@ def preview_url(
if not key.startswith(("original:", "og:", "matrix:")):
value = og_tags.pop(key, None)
og_tags["og:" + key] = value
with sqlite3.connect(CACHE_FILE) as conn:
conn.execute(
"INSERT INTO cache (uuid, url, ts, metadata) VALUES (?, ?, ?, ?)",
(str(uuid.uuid4()), url, int(response.headers["date"]), str(og_tags))
)
return og_tags