#!/usr/bin/env python3 """ Fetch RSS/Atom feeds and write a merged cache file for AGS widgets. Dependencies: python-feedparser (Arch: pacman -S python-feedparser) Output: ~/.cache/ags/rss-feeds.json (flat array, sorted by date desc, max 30 items) Supported sources: - Any RSS / Atom feed (feedparser handles both transparently) - YouTube channels (Atom) via ?channel_id=UC... - Reddit (.rss suffix on any subreddit or user URL) - Telegram / other services via RSSHub """ import json import os import re import socket import sys import time from html import unescape from pathlib import Path import feedparser # ── Configuration ──────────────────────────────────────────────────────────── CACHE_DIR = os.path.join( os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~/.cache")), "ags", ) CACHE_FILE = os.path.join(CACHE_DIR, "rss-feeds.json") ITEM_LIMIT = 30 REQUEST_TIMEOUT = 15 # Apply socket-level timeout for feedparser HTTP requests socket.setdefaulttimeout(REQUEST_TIMEOUT) # Each entry: {"url": "...", "tag": "..."} # tag is used by the GJS widget to style / filter items per source type FEEDS = [ # Arch Linux news {"url": "https://archlinux.org/feeds/news/", "tag": "arch"}, # YouTube — replace CHANNEL_ID with the actual channel ID # {"url": "https://www.youtube.com/feeds/videos.xml?channel_id=CHANNEL_ID", "tag": "youtube"}, # Reddit — append .rss to any subreddit or user URL # {"url": "https://www.reddit.com/r/archlinux/.rss", "tag": "reddit"}, # RSSHub Telegram channel bridge # {"url": "https://your-rsshub/telegram/channel/ChannelName", "tag": "telegram"}, ] # ── Helpers ────────────────────────────────────────────────────────────────── _TAG_RE = re.compile(r"<[^>]+>") def strip_html(raw: str | None) -> str: """Strip HTML tags and decode named/numeric entities.""" if not raw: return "" text = _TAG_RE.sub("", raw) text = unescape(text) text = re.sub(r"\s+", " ", text).strip() return text def parse_date(entry) -> str: """Normalize a feedparser entry date to ISO-8601 (UTC).""" parsed = entry.get("published_parsed") or entry.get("updated_parsed") if not parsed: return "" try: ts = time.mktime(parsed) return time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime(ts)) except (OverflowError, ValueError): return "" # ── Main ───────────────────────────────────────────────────────────────────── def main() -> None: all_items: list[dict] = [] for feed_conf in FEEDS: url = feed_conf["url"] tag = feed_conf["tag"] try: parsed = feedparser.parse(url, agent="ags-rss-fetch/1.0") except Exception as exc: print(f"Warning: failed to fetch {url}: {exc}", file=sys.stderr) continue # feedparser sets status attribute on HTTP-level errors status = getattr(parsed, "status", None) if status is not None and status >= 400: print(f"Warning: HTTP {status} for {url}", file=sys.stderr) continue feed_title = parsed.feed.get("title", url) for entry in parsed.entries: item = { "title": strip_html(entry.get("title", "(untitled)")), "link": entry.get("link", ""), "date": parse_date(entry), "summary": strip_html(entry.get("summary") or entry.get("description", "")), "author": strip_html(entry.get("author", "")), "feed_title": feed_title, "tag": tag, } all_items.append(item) # Sort by date descending; items without dates land at the bottom all_items.sort(key=lambda i: i["date"] or "", reverse=True) # Truncate all_items = all_items[:ITEM_LIMIT] # Write cache cache_path = Path(CACHE_FILE) cache_path.parent.mkdir(parents=True, exist_ok=True) cache_path.write_text(json.dumps(all_items, indent=2, ensure_ascii=False) + "\n") print(f"Wrote {len(all_items)} items to {CACHE_FILE}") if __name__ == "__main__": main()