From 6ad78e9ed79ba12b07583e16490112c8eb7ffa3a Mon Sep 17 00:00:00 2001 From: Matthew Grotke Date: Tue, 9 Jun 2026 01:25:02 -0400 Subject: [PATCH] Development --- docker/routlin-dash/app/config_utils.py | 38 ++-- .../app/pages/dnsblocking/content.json | 4 +- .../app/pages/dnsblocking/view.py | 37 +-- routlin/dns-blocklists.py | 212 ++++++++++++------ 4 files changed, 187 insertions(+), 104 deletions(-) diff --git a/docker/routlin-dash/app/config_utils.py b/docker/routlin-dash/app/config_utils.py index 3247111..6cfbfc4 100644 --- a/docker/routlin-dash/app/config_utils.py +++ b/docker/routlin-dash/app/config_utils.py @@ -678,6 +678,23 @@ def resolve_iface(vlan, cfg): # Config datasources ================================================ +def _bl_db_rows(): + """Return {blocklist_name: {domain_count, fetched_at}} from domains.db, or {} if unavailable.""" + db_path = os.path.join(BLOCKLISTS_DIR, 'domains.db') + try: + db = _sqlite3.connect(f'file:{db_path}?mode=ro', uri=True) + rows = db.execute('SELECT name, domain_count, fetched_at FROM blocklists').fetchall() + db.close() + return {name: {'domain_count': count, 'fetched_at': fetched_at} + for name, count, fetched_at in rows} + except Exception: + return {} + + +def _bl_db_counts(): + return {name: v['domain_count'] for name, v in _bl_db_rows().items()} + + def config_datasource(name): cfg = load_config() vlans = cfg.get('vlans', []) @@ -689,30 +706,25 @@ def config_datasource(name): return cfg.get('host_overrides', []) if name == 'blocklists': + db_counts = _bl_db_counts() rows = [] for bl in cfg.get('dns_blocking', {}).get('blocklists', []): row = dict(bl) bl_type = bl.get('bl_type', 'community') row['bl_type_label'] = 'Local' if bl_type == 'local' else 'Community' - bl_path = os.path.join(BLOCKLISTS_DIR, bl.get('save_as', '')) + count = db_counts.get(bl.get('name', '')) + row['domain_count'] = f'{count:,}' if count is not None else '-' if bl_type == 'local': + bl_path = os.path.join(BLOCKLISTS_DIR, bl.get('save_as', '')) try: with open(bl_path) as f: - content = f.read() - row['local_entries'] = content.strip() - row['domain_count'] = str(sum(1 for ln in content.splitlines() if ln.strip() and not ln.startswith('#'))) + row['local_entries'] = f.read().strip() except Exception: row['local_entries'] = '' - row['domain_count'] = '-' - row['last_updated'] = '-' + row['source_display'] = bl.get('save_as', '') else: - try: - with open(bl_path) as f: - row['domain_count'] = str(sum(1 for _ in f)) - row['last_updated'] = fmt_timestamp(int(os.path.getmtime(bl_path))) - except Exception: - row['domain_count'] = '-' - row['last_updated'] = '-' + row['local_entries'] = '' + row['source_display'] = row.get('url', '') rows.append(row) return rows diff --git a/docker/routlin-dash/app/pages/dnsblocking/content.json b/docker/routlin-dash/app/pages/dnsblocking/content.json index 335c0dc..e5b6724 100644 --- a/docker/routlin-dash/app/pages/dnsblocking/content.json +++ b/docker/routlin-dash/app/pages/dnsblocking/content.json @@ -33,8 +33,8 @@ "class": "col-narrow" }, { - "label": "Source URL", - "field": "url", + "label": "Source", + "field": "source_display", "class": "col-mono" } ], diff --git a/docker/routlin-dash/app/pages/dnsblocking/view.py b/docker/routlin-dash/app/pages/dnsblocking/view.py index 666d19f..d5351fa 100644 --- a/docker/routlin-dash/app/pages/dnsblocking/view.py +++ b/docker/routlin-dash/app/pages/dnsblocking/view.py @@ -37,35 +37,40 @@ def _dnsblocking_log_tail(cfg): def blocklist_stats_html(cfg): + db_rows = config_utils._bl_db_rows() rows = '' for bl in cfg.get('dns_blocking', {}).get('blocklists', []): - name = factory.e(bl.get('name', '')) + name = bl.get('name', '') is_local = bl.get('bl_type') == 'local' - save_as = bl.get('save_as', '') - bl_path = f'{config_utils.BLOCKLISTS_DIR}/{save_as}' if save_as else '' + db = db_rows.get(name, {}) + count = db.get('domain_count') + entries = f'{count:,}' if count is not None else '-' if is_local: + save_as = bl.get('save_as', '') + bl_path = f'{config_utils.BLOCKLISTS_DIR}/{save_as}' if save_as else '' try: - with open(bl_path) as f: - entries = sum(1 for ln in f if ln.strip() and not ln.startswith('#')) size_str = config_utils.fmt_bytes(os.path.getsize(bl_path)) - last_refreshed = 'Local' except Exception: - entries, size_str, last_refreshed = '-', '-', 'Local' + size_str = '-' + last_refreshed = 'Local' else: - try: - with open(bl_path) as f: - entries = sum(1 for _ in f) - mtime = int(os.path.getmtime(bl_path)) - size_str = config_utils.fmt_bytes(os.path.getsize(bl_path)) + fetched_at = db.get('fetched_at') + if fetched_at: last_refreshed = ( - f'{datetime.fromtimestamp(mtime).strftime("%Y-%m-%d %H:%M")}' - f' ({config_utils.relative_time(mtime, datetime.now(tz=timezone.utc).timestamp())} ago)' + f'{datetime.fromtimestamp(fetched_at).strftime("%Y-%m-%d %H:%M")}' + f' ({config_utils.relative_time(fetched_at, datetime.now(tz=timezone.utc).timestamp())} ago)' ) + else: + last_refreshed = 'Never' + save_as = bl.get('save_as', '') + bl_path = f'{config_utils.BLOCKLISTS_DIR}/{save_as}' if save_as else '' + try: + size_str = config_utils.fmt_bytes(os.path.getsize(bl_path)) except Exception: - entries, size_str, last_refreshed = '-', '-', 'Never' + size_str = '-' rows += ( '' - f'{name}' + f'{factory.e(name)}' f'{entries}' f'{size_str}' f'{factory.e(last_refreshed)}' diff --git a/routlin/dns-blocklists.py b/routlin/dns-blocklists.py index 9957404..5c42545 100644 --- a/routlin/dns-blocklists.py +++ b/routlin/dns-blocklists.py @@ -3,9 +3,10 @@ dns-blocklists.py -- Download and merge DNS blocklists defined in config.json. Reads the blocklists library from config.json, downloads every blocklist referenced -by at least one VLAN, merges them into per-combo conf files (one per unique -combination of blocklist names), then sends SIGHUP to each running dnsmasq -instance so it reloads its config without restarting. +by at least one VLAN, and upserts normalized domains into a SQLite database +(blocklists/domains.db). Downloads are skipped when the content hash is unchanged. +Merged per-combo conf files are only rewritten when a constituent blocklist changed. +Sends SIGHUP to each running dnsmasq instance so it reloads without restarting. Usage: sudo python3 dns-blocklists.py @@ -15,8 +16,10 @@ import hashlib import json import logging import os +import sqlite3 import subprocess import sys +import time import urllib.request import urllib.error from pathlib import Path @@ -25,6 +28,7 @@ PRODUCT_NAME = "routlin" SCRIPT_DIR = Path(__file__).parent CONFIG_FILE = SCRIPT_DIR / "config.json" BLOCKLIST_DIR = SCRIPT_DIR / "blocklists" +DB_FILE = BLOCKLIST_DIR / "domains.db" LOG_FILE = SCRIPT_DIR / "dns-blocklists.log" log = None @@ -93,6 +97,8 @@ def merged_path(h): return BLOCKLIST_DIR / f"merged-{h}.conf" +# Parse / detect ====================================================== + def parse_dnsmasq_format(content): domains = set() for ln in content.splitlines(): @@ -122,6 +128,15 @@ def parse_hosts_format(content): return domains +def parse_local_format(content): + domains = set() + for ln in content.splitlines(): + ln = ln.strip() + if ln and not ln.startswith("#"): + domains.add(ln) + return domains + + def detect_format(content): for ln in content.splitlines(): ln = ln.strip() @@ -134,14 +149,80 @@ def detect_format(content): return "dnsmasq" -def parse_blocklist(content, fmt=None): - if fmt is None: - fmt = detect_format(content) +def parse_blocklist(content, is_local=False): + if is_local: + return parse_local_format(content) + fmt = detect_format(content) if fmt == "dnsmasq": return parse_dnsmasq_format(content) return parse_hosts_format(content) +def content_hash(content): + return hashlib.sha256(content.encode()).hexdigest() + + +# SQLite ============================================================== + +def open_db(): + db = sqlite3.connect(DB_FILE) + db.execute("PRAGMA journal_mode=WAL") + db.execute("PRAGMA foreign_keys=ON") + db.executescript(""" + CREATE TABLE IF NOT EXISTS blocklists ( + id INTEGER PRIMARY KEY, + name TEXT UNIQUE NOT NULL, + content_hash TEXT, + fetched_at INTEGER, + domain_count INTEGER + ); + CREATE TABLE IF NOT EXISTS domains ( + domain TEXT NOT NULL, + blocklist_id INTEGER NOT NULL REFERENCES blocklists(id) ON DELETE CASCADE, + PRIMARY KEY (domain, blocklist_id) + ); + CREATE INDEX IF NOT EXISTS idx_domains_domain ON domains(domain); + """) + db.commit() + return db + + +def get_stored_hash(db, name): + row = db.execute("SELECT content_hash FROM blocklists WHERE name = ?", (name,)).fetchone() + return row[0] if row else None + + +def upsert_blocklist(db, name, domains, raw_hash): + now = int(time.time()) + db.execute(""" + INSERT INTO blocklists (name, content_hash, fetched_at, domain_count) + VALUES (?, ?, ?, ?) + ON CONFLICT(name) DO UPDATE SET + content_hash = excluded.content_hash, + fetched_at = excluded.fetched_at, + domain_count = excluded.domain_count + """, (name, raw_hash, now, len(domains))) + bl_id = db.execute("SELECT id FROM blocklists WHERE name = ?", (name,)).fetchone()[0] + db.execute("DELETE FROM domains WHERE blocklist_id = ?", (bl_id,)) + db.executemany("INSERT INTO domains (domain, blocklist_id) VALUES (?, ?)", + ((d, bl_id) for d in domains)) + db.commit() + + +def query_merged_domains(db, names): + placeholders = ",".join("?" * len(names)) + rows = db.execute(f""" + SELECT DISTINCT d.domain + FROM domains d + JOIN blocklists b ON d.blocklist_id = b.id + WHERE b.name IN ({placeholders}) + ORDER BY d.domain + """, list(names)).fetchall() + return [r[0] for r in rows] + + +# Conf file output ==================================================== + def build_merged_conf(domains, bl_names): lines = [ "# Generated by dns-blocklists.py -- do not edit manually.", @@ -151,72 +232,64 @@ def build_merged_conf(domains, bl_names): "# Blocks domain and all subdomains via local=/domain/ syntax.", "", ] - for domain in sorted(domains): + for domain in domains: lines.append(f"local=/{domain}/") return "\n".join(lines) -def download_all_blocklists(data): +# Fetch =============================================================== + +def fetch_community(entry): + url = entry["url"] + req = urllib.request.Request(url, headers={"User-Agent": "dns-blocklists.py/1.0"}) + with urllib.request.urlopen(req, timeout=30) as r: + return r.read().decode("utf-8", errors="ignore") + + +def read_local(entry): + save_as = entry.get("save_as", "") + path = BLOCKLIST_DIR / save_as if save_as else None + if not path: + return "" + return path.read_text() + + +# Main update ========================================================= + +def update_blocklists(data): + BLOCKLIST_DIR.mkdir(exist_ok=True) + _chown_to_script_dir_owner(BLOCKLIST_DIR) + + db = open_db() + bl_library = {bl["name"]: bl for bl in data.get("dns_blocking", {}).get("blocklists", [])} needed = set() for vlan in data["vlans"]: needed.update(vlan.get("use_blocklists", [])) - results = {} + changed = set() + any_fail = False + for name in needed: - entry = bl_library[name] - if entry.get("bl_type") == "local": - results[name] = (None, entry) - continue - url = entry["url"] + entry = bl_library[name] + is_local = entry.get("bl_type") == "local" + try: - req = urllib.request.Request(url, headers={"User-Agent": "dns-blocklists.py/1.0"}) - with urllib.request.urlopen(req, timeout=30) as r: - content = r.read().decode("utf-8", errors="ignore") - log.info(f"Downloaded: {entry['description']} ({len(content):,} bytes)") - results[name] = (content, entry) + raw = read_local(entry) if is_local else fetch_community(entry) except Exception as e: - log.error(f"Failed to download '{entry['description']}' from {url}: {e}") - results[name] = (None, entry) - return results + log.error(f"Failed to fetch '{name}': {e}") + any_fail = True + continue + h = content_hash(raw) + if h == get_stored_hash(db, name): + log.info(f"Unchanged: '{name}' -- skipping") + continue -def _parse_local_domains(content): - domains = set() - for ln in content.splitlines(): - ln = ln.strip() - if ln and not ln.startswith("#"): - domains.add(ln) - return domains - - -def update_blocklists(data): - BLOCKLIST_DIR.mkdir(exist_ok=True) - - log.info("Downloading blocklists...") - downloaded = download_all_blocklists(data) - - domains_by_name = {} - for name, (content, entry) in downloaded.items(): - if entry.get("bl_type") == "local": - save_as = entry.get("save_as", "") - local_file = BLOCKLIST_DIR / save_as if save_as else None - try: - local_content = local_file.read_text() if local_file else "" - domains = _parse_local_domains(local_content) - log.info(f"Local blocklist '{name}': {len(domains):,} domains") - except Exception as e: - log.error(f"Local blocklist '{name}' could not be read: {e}") - domains = set() - domains_by_name[name] = domains - elif content is None: - log.error(f"Blocklist '{name}' failed to download -- it will be skipped.") - domains_by_name[name] = set() - else: - (BLOCKLIST_DIR / entry["save_as"]).write_text(content) - domains = parse_blocklist(content) - log.info(f"Parsed {len(domains):,} domains from '{name}'") - domains_by_name[name] = domains + domains = parse_blocklist(raw, is_local=is_local) + upsert_blocklist(db, name, domains, h) + log.info(f"Updated '{name}': {len(domains):,} domains") + changed.add(name) active_hashes = set() combos = {} @@ -227,17 +300,13 @@ def update_blocklists(data): combos[h] = names for h, names in combos.items(): - combo_domains = set() - for name in names: - combo_domains.update(domains_by_name.get(name, set())) - - merged = build_merged_conf(combo_domains, names) - merged_path(h).write_text(merged) active_hashes.add(h) - log.info( - f"Merged [{h}] ({', '.join(sorted(names))}): " - f"{len(combo_domains):,} unique domains." - ) + if not changed.intersection(names) and merged_path(h).exists(): + log.info(f"Combo [{h}] unchanged -- skipping rewrite") + continue + domains = query_merged_domains(db, names) + merged_path(h).write_text(build_merged_conf(domains, names)) + log.info(f"Merged [{h}] ({', '.join(sorted(names))}): {len(domains):,} unique domains") for f in BLOCKLIST_DIR.glob("merged-*.conf"): h = f.stem.removeprefix("merged-") @@ -245,11 +314,8 @@ def update_blocklists(data): f.unlink() log.info(f"Removed stale merged file: {f.name}") - any_failed = any( - content is None and entry.get("bl_type") != "local" - for content, entry in downloaded.values() - ) - return not any_failed + db.close() + return not any_fail def reload_dnsmasq_instances():