diff --git a/server.py b/server.py index f7f5e7d..de87f75 100644 --- a/server.py +++ b/server.py @@ -2,6 +2,7 @@ import contextlib import io import json import os +import textwrap import time import uuid @@ -64,76 +65,84 @@ INVALID_TOKEN = JSONResponse( 401 ) VALID_OG_TAGS = [ - "title", - "type", - "image", - "url", - "audio", - "description", - "determiner", - "locale", - "locale:alternative", - "site_name", - "image:url", - "image:secure_url", - "image:type", - "image:width", - "image:height", - "image:alt", - "video", - "video:url", - "video:secure_url", - "video:type", - "video:width", - "video:height", - "video:alt", - "video:actor", - "video:actor:role", - "video:director", - "video:writer", - "video:duration", - "video:release_date", - "video:tag", - "video:series" - "audio:url", - "audio:secure_url", - "audio:type", - "music:duration", - "music:album", - "music:album:disc", - "music:album:track", - "music:musician", - "music:song", - "music:song:disc", - "music:song:track", - "music:release_date", - "music:creator", - "article:published_time", - "article:modified_time", - "article:expiration_time", - "article:author", - "article:section", - "article:tag", - "book:author", - "book:tag", - "book:isbn", - "book:release_date", - "profile:first_name", - "profile:last_name", - "profile:username", - "profile:gender" + "og:title", + "og:type", + "og:image", + "og:url", + "og:audio", + "og:description", + "og:determiner", + "og:locale", + "og:locale:alternative", + "og:site_name", + "og:image:url", + "og:image:secure_url", + "og:image:type", + "og:image:width", + "og:image:height", + "og:image:alt", + "og:video", + "og:video:url", + "og:video:secure_url", + "og:video:type", + "og:video:width", + "og:video:height", + "og:video:alt", + "og:video:actor", + "og:video:actor:role", + "og:video:director", + "og:video:writer", + "og:video:duration", + "og:video:release_date", + "og:video:tag", + "og:video:series" + "og:audio:url", + "og:audio:secure_url", + "og:audio:type", + "og:music:duration", + "og:music:album", + "og:music:album:disc", + "og:music:album:track", + "og:music:musician", + "og:music:song", + "og:music:song:disc", + "og:music:song:track", + "og:music:release_date", + "og:music:creator", + "og:article:published_time", + "og:article:modified_time", + "og:article:expiration_time", + "og:article:author", + "og:article:section", + "og:article:tag", + "og:book:author", + "og:book:tag", + "og:book:isbn", + "og:book:release_date", + "og:profile:first_name", + "og:profile:last_name", + "og:profile:username", + "og:profile:gender" ] URL_OG_TAGS = [ - "video", - "video:url", - "video:secure_url", - "image", - "image:url", - "image:secure_url", - "audio", - "audio:url", - "audio:secure_url" + "og:video", + "og:video:url", + "og:video:secure_url", + "og:image", + "og:image:url", + "og:image:secure_url", + "og:audio", + "og:audio:url", + "og:audio:secure_url" ] +TWITTER_MAPPING = { + "twitter:site": "site_name", + "twitter:creator": "site_name", + "twitter:image": "image", + "twitter:title": "title", + "twitter:image:width": "image:width", + "twitter:image:height": "image:height", +} if Path.cwd() == Path("/app"): logging.info("Look to be running in a docker container. Cache will be stored in /app/cache.") @@ -240,14 +249,30 @@ def preview_url( og_tags = {} for tag in soup.find_all("meta"): - if tag.get("property", "").startswith("og:"): - tag_name = tag.get("property")[3:] - if tag_name in VALID_OG_TAGS: + logging.debug("Found meta tag: %r", tag) + if tag.get("property", "").startswith(("og:", "twitter:")): + logging.debug( + "Tag %r is an OG/Twitter tag, with property: %r", + textwrap.shorten(tag.get("content", "N/A"), 100) + ) + tag_name = tag.get("property") + if tag_name in (*VALID_OG_TAGS, *TWITTER_MAPPING.keys()): og_tags[tag_name] = tag.get("content") + for tag in og_tags.keys(): + if tag.startswith("twitter:"): + if tag in TWITTER_MAPPING: + og_tags[TWITTER_MAPPING[tag]] = og_tags.pop(tag) + logging.debug("Mapped twitter tag %r to og tag %r", tag, TWITTER_MAPPING[tag]) + else: + logging.warning("Unrecognized Twitter tag: %r", tag) + og_tags.pop(tag, None) + for tag_name in URL_OG_TAGS: if tag_name in og_tags: + logging.debug("Retrieving tag %r to see if it needs uploading to Matrix", tag_name) _url = og_tags[tag_name] + logging.debug("%r = %r", tag_name, _url) try: # noinspection PyArgumentList with httpx.stream( @@ -295,9 +320,9 @@ def preview_url( response_media.headers.get("content-type", "") ) if upload_response: - og_tags["original:" + tag_name] = og_tags[tag_name] + og_tags["original:" + tag_name.replace("og:", "")] = og_tags[tag_name] og_tags[tag_name] = upload_response - if tag_name in ["image", "image:url", "image:secure_url"]: + if tag_name in ["og:image", "og:image:url", "og:image:secure_url"]: _file.seek(0) og_tags["matrix:image:size"] = len(_file.getvalue()) logging.info("Uploaded media: %r" % _url)