rsshubtrans/translator/app.py
2026-04-13 18:08:43 +02:00

174 lines
5.4 KiB
Python

import hashlib
import json
import os
import time
from email.utils import formatdate
from urllib.parse import urlparse
import requests
from flask import Flask, Response, redirect, render_template, request
from db import (
close_db,
feed_cache_key,
get_db,
get_feed_cache,
init_db,
set_feed_cache,
)
from translate import translate_feed
app = Flask(__name__)
RSSHUB = os.getenv("RSSHUB_URL", "http://rsshub:1200")
FEED_TTL = int(os.getenv("FEED_TTL_SECONDS", "300")) # 5 min feed cache
RSSHUB_DEFAULT_PATH = os.getenv("RSSHUB_DEFAULT_PATH", "")
SITE_NAME = os.getenv("SITE_NAME", "Feed Reader")
CHANNEL_URL = os.getenv("CHANNEL_URL", "")
CHANNEL_LABEL = os.getenv("CHANNEL_LABEL", "")
_lang_env = os.getenv("LANGUAGES", "sk,cs,de,zh-CN")
LANGUAGES = [(c.strip(), c.strip()) for c in _lang_env.split(",") if c.strip()]
LANG_CODES = {code for code, _ in LANGUAGES}
_TRANSLATIONS_DIR = os.path.join(os.path.dirname(__file__), "translations")
def _load_translations() -> dict:
result = {}
for code, _ in LANGUAGES:
path = os.path.join(_TRANSLATIONS_DIR, f"{code}.json")
try:
with open(path, encoding="utf-8") as f:
result[code] = json.load(f)
except FileNotFoundError:
result[code] = {}
return result
TRANSLATIONS = _load_translations()
init_db()
app.teardown_appcontext(close_db)
# ── Helpers ───────────────────────────────────────────────────────────────────
def _etag(content: bytes) -> str:
return hashlib.sha256(content).hexdigest()[:32]
def _http_date(ts: float) -> str:
return formatdate(ts, usegmt=True)
def _feed_response(content: bytes, fetched_at: float, cache_status: str) -> Response:
etag = _etag(content)
last_modified = _http_date(fetched_at)
# Honour conditional requests
if request.headers.get("If-None-Match") == etag:
return Response(status=304)
if_mod = request.headers.get("If-Modified-Since")
if if_mod and if_mod == last_modified:
return Response(status=304)
resp = Response(content, content_type="application/xml; charset=utf-8")
resp.headers["ETag"] = etag
resp.headers["Last-Modified"] = last_modified
resp.headers["Cache-Control"] = f"max-age={FEED_TTL}, must-revalidate"
resp.headers["X-Cache"] = cache_status
return resp
def detect_lang_from_host():
"""Extract language from CNAME subdomain (e.g. sk.domain.com → sk)."""
host = request.host.split(":")[0]
parts = host.split(".")
if len(parts) >= 3:
candidate = parts[0]
if candidate in LANG_CODES:
return candidate
return None
# ── Frontend routes ───────────────────────────────────────────────────────────
def _template_vars(lang: str) -> dict:
return dict(
lang=lang,
languages=LANGUAGES,
default_feed_path=RSSHUB_DEFAULT_PATH,
site_name=SITE_NAME,
channel_url=CHANNEL_URL,
channel_label=CHANNEL_LABEL,
ui=TRANSLATIONS.get(lang, {}),
)
@app.route("/")
def index():
lang = detect_lang_from_host() or LANGUAGES[0][0]
return render_template("reader.html", **_template_vars(lang))
@app.route("/<lang>/")
def reader(lang):
if lang not in LANG_CODES:
return redirect(f"/{LANGUAGES[0][0]}/")
return render_template("reader.html", **_template_vars(lang))
# ── Media proxy (Telegram CDN blocks cross-origin) ───────────────────────────
@app.route("/media")
def media_proxy():
url = request.args.get("url", "")
host = urlparse(url).hostname or ""
if host != "telesco.pe" and not host.endswith(".telesco.pe"):
return "Forbidden", 403
try:
r = requests.get(url, timeout=15, headers={"User-Agent": "Mozilla/5.0"})
r.raise_for_status()
except Exception:
return "Failed to fetch media", 502
return Response(
r.content,
content_type=r.headers.get("Content-Type", "application/octet-stream"),
)
# ── Proxy route ───────────────────────────────────────────────────────────────
@app.route("/<lang>/<path:rsshub_path>")
def proxy(lang, rsshub_path):
if lang not in LANG_CODES:
return f"Unknown language: {lang!r}", 400
db = get_db()
key = feed_cache_key(lang, rsshub_path)
cached = get_feed_cache(db, key, FEED_TTL)
if cached is not None:
content, fetched_at = cached
return _feed_response(content, fetched_at, "HIT")
url = f"{RSSHUB}/{rsshub_path}"
params = dict(request.args)
try:
upstream = requests.get(url, params=params, timeout=15)
upstream.raise_for_status()
except Exception:
return "Failed to fetch upstream feed", 502
try:
content = translate_feed(upstream.content, lang, db)
except ValueError:
return "Invalid feed XML from upstream", 502
fetched_at = time.time()
set_feed_cache(db, key, content, fetched_at)
return _feed_response(content, fetched_at, "MISS")
if __name__ == "__main__":
app.run(host="0.0.0.0", port=5000)