Use an sqlite cache
This commit is contained in:
parent
984bc2ef8f
commit
fb0a57b7d0
2 changed files with 68 additions and 2 deletions
|
@ -3,3 +3,4 @@ uvicorn~=0.27
|
||||||
fastapi~=0.109
|
fastapi~=0.109
|
||||||
httpx~=0.26
|
httpx~=0.26
|
||||||
beautifulsoup4~=4.12
|
beautifulsoup4~=4.12
|
||||||
|
appdirs~=1.4
|
||||||
|
|
69
server.py
69
server.py
|
@ -1,9 +1,12 @@
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
|
import uuid
|
||||||
|
|
||||||
import fastapi
|
import fastapi
|
||||||
import httpx
|
import httpx
|
||||||
import logging
|
import logging
|
||||||
|
import sqlite3
|
||||||
|
import appdirs
|
||||||
from typing import Annotated
|
from typing import Annotated
|
||||||
from fastapi import Query, Header, HTTPException, Request
|
from fastapi import Query, Header, HTTPException, Request
|
||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
|
@ -13,7 +16,12 @@ from rich.logging import RichHandler
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO, format="%(message)s", datefmt="[%X]", handlers=[RichHandler(markup=True)])
|
logging.basicConfig(
|
||||||
|
level=logging.getLevelName(os.environ.get("LOG_LEVEL", "INFO").upper()),
|
||||||
|
format="%(message)s",
|
||||||
|
datefmt="[%X]",
|
||||||
|
handlers=[RichHandler(markup=True)]
|
||||||
|
)
|
||||||
app = fastapi.FastAPI(
|
app = fastapi.FastAPI(
|
||||||
root_path=os.environ.get("PREVIEW_ROOT_PATH", ""),
|
root_path=os.environ.get("PREVIEW_ROOT_PATH", ""),
|
||||||
)
|
)
|
||||||
|
@ -105,6 +113,31 @@ URL_OG_TAGS = [
|
||||||
"audio:secure_url"
|
"audio:secure_url"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if Path.cwd() == Path("/app"):
|
||||||
|
logging.info("Look to be running in a docker container. Cache will be stored in /app/cache.")
|
||||||
|
CACHE_DIR = Path("/app/cache")
|
||||||
|
else:
|
||||||
|
CACHE_DIR = Path(appdirs.user_cache_dir("matrix-url-preview"))
|
||||||
|
CACHE_FILE = CACHE_DIR / "db.sqlite3"
|
||||||
|
logging.debug("Cache file: %r", CACHE_FILE)
|
||||||
|
|
||||||
|
|
||||||
|
@app.on_event("startup")
|
||||||
|
async def startup():
|
||||||
|
if not CACHE_DIR.exists():
|
||||||
|
CACHE_DIR.mkdir(parents=True)
|
||||||
|
with sqlite3.connect(CACHE_FILE) as conn:
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
CREATE TABLE IF NOT EXISTS cache (
|
||||||
|
uuid TEXT PRIMARY KEY,
|
||||||
|
url TEXT NOT NULL,
|
||||||
|
ts INTEGER NOT NULL,
|
||||||
|
metadata TEXT NOT NULL
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def upload_media(domain: str, access_token: str, file: io.BytesIO, filename: str, content_type: str):
|
def upload_media(domain: str, access_token: str, file: io.BytesIO, filename: str, content_type: str):
|
||||||
file.seek(0)
|
file.seek(0)
|
||||||
|
@ -142,8 +175,13 @@ def upload_media(domain: str, access_token: str, file: io.BytesIO, filename: str
|
||||||
def preview_url(
|
def preview_url(
|
||||||
req: Request,
|
req: Request,
|
||||||
url: Annotated[str, Query(..., description="URL to preview")],
|
url: Annotated[str, Query(..., description="URL to preview")],
|
||||||
|
ts: int = Query(None, description="The preferred point in time to return a preview for."),
|
||||||
access_token_qs: str | None = Query(None, alias="access_token", description="Access token to use for the request."),
|
access_token_qs: str | None = Query(None, alias="access_token", description="Access token to use for the request."),
|
||||||
access_token_header: str | None = Header(None, alias="Authorization", description="Access token to use for the request."),
|
access_token_header: str | None = Header(
|
||||||
|
None,
|
||||||
|
alias="Authorization",
|
||||||
|
description="Access token to use for the request."
|
||||||
|
),
|
||||||
):
|
):
|
||||||
if access_token_qs is not None:
|
if access_token_qs is not None:
|
||||||
access_token = access_token_qs
|
access_token = access_token_qs
|
||||||
|
@ -152,6 +190,27 @@ def preview_url(
|
||||||
else:
|
else:
|
||||||
return MISSING_TOKEN
|
return MISSING_TOKEN
|
||||||
|
|
||||||
|
with sqlite3.connect(CACHE_FILE) as conn:
|
||||||
|
cursor = conn.cursor()
|
||||||
|
cursor.execute(
|
||||||
|
"SELECT (metadata,ts) FROM cache WHERE url = ?",
|
||||||
|
(url, ts)
|
||||||
|
)
|
||||||
|
results = cursor.fetchall()
|
||||||
|
if results:
|
||||||
|
for result in results:
|
||||||
|
# find the one with the closest timestamp
|
||||||
|
metadata, _ts = result
|
||||||
|
if ts is None or abs(ts - _ts) < 3600:
|
||||||
|
logging.debug("Optimal cache hit for %r", url)
|
||||||
|
return metadata
|
||||||
|
# No close matches, get the latest one
|
||||||
|
metadata, _ts = results[-1]
|
||||||
|
# If the latest one is more than 3 hours old, re-fetch. Otherwise, return.
|
||||||
|
if ts is None or abs(ts - _ts) < 10800:
|
||||||
|
logging.debug("Cache hit for %r", url)
|
||||||
|
return metadata
|
||||||
|
|
||||||
domain = os.environ.get("PREVIEW_HOMESERVER", "https://" + req.url.hostname)
|
domain = os.environ.get("PREVIEW_HOMESERVER", "https://" + req.url.hostname)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -245,6 +304,12 @@ def preview_url(
|
||||||
if not key.startswith(("original:", "og:", "matrix:")):
|
if not key.startswith(("original:", "og:", "matrix:")):
|
||||||
value = og_tags.pop(key, None)
|
value = og_tags.pop(key, None)
|
||||||
og_tags["og:" + key] = value
|
og_tags["og:" + key] = value
|
||||||
|
|
||||||
|
with sqlite3.connect(CACHE_FILE) as conn:
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO cache (uuid, url, ts, metadata) VALUES (?, ?, ?, ?)",
|
||||||
|
(str(uuid.uuid4()), url, int(response.headers["date"]), str(og_tags))
|
||||||
|
)
|
||||||
return og_tags
|
return og_tags
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue