132 lines
4.4 KiB
Python
Executable file
132 lines
4.4 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
Fetch RSS/Atom feeds and write a merged cache file for AGS widgets.
|
|
|
|
Dependencies: python-feedparser (Arch: pacman -S python-feedparser)
|
|
|
|
Output: ~/.cache/ags/rss-feeds.json (flat array, sorted by date desc, max 30 items)
|
|
|
|
Supported sources:
|
|
- Any RSS / Atom feed (feedparser handles both transparently)
|
|
- YouTube channels (Atom) via ?channel_id=UC...
|
|
- Reddit (.rss suffix on any subreddit or user URL)
|
|
- Telegram / other services via RSSHub
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import re
|
|
import socket
|
|
import sys
|
|
import time
|
|
from html import unescape
|
|
from pathlib import Path
|
|
|
|
import feedparser
|
|
|
|
# ── Configuration ────────────────────────────────────────────────────────────
|
|
|
|
CACHE_DIR = os.path.join(
|
|
os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~/.cache")),
|
|
"ags",
|
|
)
|
|
CACHE_FILE = os.path.join(CACHE_DIR, "rss-feeds.json")
|
|
ITEM_LIMIT = 30
|
|
REQUEST_TIMEOUT = 15
|
|
|
|
# Apply socket-level timeout for feedparser HTTP requests
|
|
socket.setdefaulttimeout(REQUEST_TIMEOUT)
|
|
|
|
# Each entry: {"url": "...", "tag": "..."}
|
|
# tag is used by the GJS widget to style / filter items per source type
|
|
FEEDS = [
|
|
# Arch Linux news
|
|
{"url": "https://archlinux.org/feeds/news/", "tag": "arch"},
|
|
# YouTube — replace CHANNEL_ID with the actual channel ID
|
|
# {"url": "https://www.youtube.com/feeds/videos.xml?channel_id=CHANNEL_ID", "tag": "youtube"},
|
|
# Reddit — append .rss to any subreddit or user URL
|
|
# {"url": "https://www.reddit.com/r/archlinux/.rss", "tag": "reddit"},
|
|
# RSSHub Telegram channel bridge
|
|
# {"url": "https://your-rsshub/telegram/channel/ChannelName", "tag": "telegram"},
|
|
]
|
|
|
|
|
|
# ── Helpers ──────────────────────────────────────────────────────────────────
|
|
|
|
_TAG_RE = re.compile(r"<[^>]+>")
|
|
|
|
|
|
def strip_html(raw: str | None) -> str:
|
|
"""Strip HTML tags and decode named/numeric entities."""
|
|
if not raw:
|
|
return ""
|
|
text = _TAG_RE.sub("", raw)
|
|
text = unescape(text)
|
|
text = re.sub(r"\s+", " ", text).strip()
|
|
return text
|
|
|
|
|
|
def parse_date(entry) -> str:
|
|
"""Normalize a feedparser entry date to ISO-8601 (UTC)."""
|
|
parsed = entry.get("published_parsed") or entry.get("updated_parsed")
|
|
if not parsed:
|
|
return ""
|
|
try:
|
|
ts = time.mktime(parsed)
|
|
return time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime(ts))
|
|
except (OverflowError, ValueError):
|
|
return ""
|
|
|
|
|
|
# ── Main ─────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
def main() -> None:
|
|
all_items: list[dict] = []
|
|
|
|
for feed_conf in FEEDS:
|
|
url = feed_conf["url"]
|
|
tag = feed_conf["tag"]
|
|
|
|
try:
|
|
parsed = feedparser.parse(url, agent="ags-rss-fetch/1.0")
|
|
except Exception as exc:
|
|
print(f"Warning: failed to fetch {url}: {exc}", file=sys.stderr)
|
|
continue
|
|
|
|
# feedparser sets status attribute on HTTP-level errors
|
|
status = getattr(parsed, "status", None)
|
|
if status is not None and status >= 400:
|
|
print(f"Warning: HTTP {status} for {url}", file=sys.stderr)
|
|
continue
|
|
|
|
feed_title = parsed.feed.get("title", url)
|
|
|
|
for entry in parsed.entries:
|
|
item = {
|
|
"title": strip_html(entry.get("title", "(untitled)")),
|
|
"link": entry.get("link", ""),
|
|
"date": parse_date(entry),
|
|
"summary": strip_html(entry.get("summary") or entry.get("description", "")),
|
|
"author": strip_html(entry.get("author", "")),
|
|
"feed_title": feed_title,
|
|
"tag": tag,
|
|
}
|
|
all_items.append(item)
|
|
|
|
# Sort by date descending; items without dates land at the bottom
|
|
all_items.sort(key=lambda i: i["date"] or "", reverse=True)
|
|
|
|
# Truncate
|
|
all_items = all_items[:ITEM_LIMIT]
|
|
|
|
# Write cache
|
|
cache_path = Path(CACHE_FILE)
|
|
cache_path.parent.mkdir(parents=True, exist_ok=True)
|
|
cache_path.write_text(json.dumps(all_items, indent=2, ensure_ascii=False) + "\n")
|
|
|
|
print(f"Wrote {len(all_items)} items to {CACHE_FILE}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|