diff --git a/docker/routlin-dash/app/config_utils.py b/docker/routlin-dash/app/config_utils.py
index 3247111..6cfbfc4 100644
--- a/docker/routlin-dash/app/config_utils.py
+++ b/docker/routlin-dash/app/config_utils.py
@@ -678,6 +678,23 @@ def resolve_iface(vlan, cfg):
# Config datasources ================================================
+def _bl_db_rows():
+ """Return {blocklist_name: {domain_count, fetched_at}} from domains.db, or {} if unavailable."""
+ db_path = os.path.join(BLOCKLISTS_DIR, 'domains.db')
+ try:
+ db = _sqlite3.connect(f'file:{db_path}?mode=ro', uri=True)
+ rows = db.execute('SELECT name, domain_count, fetched_at FROM blocklists').fetchall()
+ db.close()
+ return {name: {'domain_count': count, 'fetched_at': fetched_at}
+ for name, count, fetched_at in rows}
+ except Exception:
+ return {}
+
+
+def _bl_db_counts():
+ return {name: v['domain_count'] for name, v in _bl_db_rows().items()}
+
+
def config_datasource(name):
cfg = load_config()
vlans = cfg.get('vlans', [])
@@ -689,30 +706,25 @@ def config_datasource(name):
return cfg.get('host_overrides', [])
if name == 'blocklists':
+ db_counts = _bl_db_counts()
rows = []
for bl in cfg.get('dns_blocking', {}).get('blocklists', []):
row = dict(bl)
bl_type = bl.get('bl_type', 'community')
row['bl_type_label'] = 'Local' if bl_type == 'local' else 'Community'
- bl_path = os.path.join(BLOCKLISTS_DIR, bl.get('save_as', ''))
+ count = db_counts.get(bl.get('name', ''))
+ row['domain_count'] = f'{count:,}' if count is not None else '-'
if bl_type == 'local':
+ bl_path = os.path.join(BLOCKLISTS_DIR, bl.get('save_as', ''))
try:
with open(bl_path) as f:
- content = f.read()
- row['local_entries'] = content.strip()
- row['domain_count'] = str(sum(1 for ln in content.splitlines() if ln.strip() and not ln.startswith('#')))
+ row['local_entries'] = f.read().strip()
except Exception:
row['local_entries'] = ''
- row['domain_count'] = '-'
- row['last_updated'] = '-'
+ row['source_display'] = bl.get('save_as', '')
else:
- try:
- with open(bl_path) as f:
- row['domain_count'] = str(sum(1 for _ in f))
- row['last_updated'] = fmt_timestamp(int(os.path.getmtime(bl_path)))
- except Exception:
- row['domain_count'] = '-'
- row['last_updated'] = '-'
+ row['local_entries'] = ''
+ row['source_display'] = row.get('url', '')
rows.append(row)
return rows
diff --git a/docker/routlin-dash/app/pages/dnsblocking/content.json b/docker/routlin-dash/app/pages/dnsblocking/content.json
index 335c0dc..e5b6724 100644
--- a/docker/routlin-dash/app/pages/dnsblocking/content.json
+++ b/docker/routlin-dash/app/pages/dnsblocking/content.json
@@ -33,8 +33,8 @@
"class": "col-narrow"
},
{
- "label": "Source URL",
- "field": "url",
+ "label": "Source",
+ "field": "source_display",
"class": "col-mono"
}
],
diff --git a/docker/routlin-dash/app/pages/dnsblocking/view.py b/docker/routlin-dash/app/pages/dnsblocking/view.py
index 666d19f..d5351fa 100644
--- a/docker/routlin-dash/app/pages/dnsblocking/view.py
+++ b/docker/routlin-dash/app/pages/dnsblocking/view.py
@@ -37,35 +37,40 @@ def _dnsblocking_log_tail(cfg):
def blocklist_stats_html(cfg):
+ db_rows = config_utils._bl_db_rows()
rows = ''
for bl in cfg.get('dns_blocking', {}).get('blocklists', []):
- name = factory.e(bl.get('name', ''))
+ name = bl.get('name', '')
is_local = bl.get('bl_type') == 'local'
- save_as = bl.get('save_as', '')
- bl_path = f'{config_utils.BLOCKLISTS_DIR}/{save_as}' if save_as else ''
+ db = db_rows.get(name, {})
+ count = db.get('domain_count')
+ entries = f'{count:,}' if count is not None else '-'
if is_local:
+ save_as = bl.get('save_as', '')
+ bl_path = f'{config_utils.BLOCKLISTS_DIR}/{save_as}' if save_as else ''
try:
- with open(bl_path) as f:
- entries = sum(1 for ln in f if ln.strip() and not ln.startswith('#'))
size_str = config_utils.fmt_bytes(os.path.getsize(bl_path))
- last_refreshed = 'Local'
except Exception:
- entries, size_str, last_refreshed = '-', '-', 'Local'
+ size_str = '-'
+ last_refreshed = 'Local'
else:
- try:
- with open(bl_path) as f:
- entries = sum(1 for _ in f)
- mtime = int(os.path.getmtime(bl_path))
- size_str = config_utils.fmt_bytes(os.path.getsize(bl_path))
+ fetched_at = db.get('fetched_at')
+ if fetched_at:
last_refreshed = (
- f'{datetime.fromtimestamp(mtime).strftime("%Y-%m-%d %H:%M")}'
- f' ({config_utils.relative_time(mtime, datetime.now(tz=timezone.utc).timestamp())} ago)'
+ f'{datetime.fromtimestamp(fetched_at).strftime("%Y-%m-%d %H:%M")}'
+ f' ({config_utils.relative_time(fetched_at, datetime.now(tz=timezone.utc).timestamp())} ago)'
)
+ else:
+ last_refreshed = 'Never'
+ save_as = bl.get('save_as', '')
+ bl_path = f'{config_utils.BLOCKLISTS_DIR}/{save_as}' if save_as else ''
+ try:
+ size_str = config_utils.fmt_bytes(os.path.getsize(bl_path))
except Exception:
- entries, size_str, last_refreshed = '-', '-', 'Never'
+ size_str = '-'
rows += (
'
'
- f'| {name} | '
+ f'{factory.e(name)} | '
f'{entries} | '
f'{size_str} | '
f'{factory.e(last_refreshed)} | '
diff --git a/routlin/dns-blocklists.py b/routlin/dns-blocklists.py
index 9957404..5c42545 100644
--- a/routlin/dns-blocklists.py
+++ b/routlin/dns-blocklists.py
@@ -3,9 +3,10 @@
dns-blocklists.py -- Download and merge DNS blocklists defined in config.json.
Reads the blocklists library from config.json, downloads every blocklist referenced
-by at least one VLAN, merges them into per-combo conf files (one per unique
-combination of blocklist names), then sends SIGHUP to each running dnsmasq
-instance so it reloads its config without restarting.
+by at least one VLAN, and upserts normalized domains into a SQLite database
+(blocklists/domains.db). Downloads are skipped when the content hash is unchanged.
+Merged per-combo conf files are only rewritten when a constituent blocklist changed.
+Sends SIGHUP to each running dnsmasq instance so it reloads without restarting.
Usage:
sudo python3 dns-blocklists.py
@@ -15,8 +16,10 @@ import hashlib
import json
import logging
import os
+import sqlite3
import subprocess
import sys
+import time
import urllib.request
import urllib.error
from pathlib import Path
@@ -25,6 +28,7 @@ PRODUCT_NAME = "routlin"
SCRIPT_DIR = Path(__file__).parent
CONFIG_FILE = SCRIPT_DIR / "config.json"
BLOCKLIST_DIR = SCRIPT_DIR / "blocklists"
+DB_FILE = BLOCKLIST_DIR / "domains.db"
LOG_FILE = SCRIPT_DIR / "dns-blocklists.log"
log = None
@@ -93,6 +97,8 @@ def merged_path(h):
return BLOCKLIST_DIR / f"merged-{h}.conf"
+# Parse / detect ======================================================
+
def parse_dnsmasq_format(content):
domains = set()
for ln in content.splitlines():
@@ -122,6 +128,15 @@ def parse_hosts_format(content):
return domains
+def parse_local_format(content):
+ domains = set()
+ for ln in content.splitlines():
+ ln = ln.strip()
+ if ln and not ln.startswith("#"):
+ domains.add(ln)
+ return domains
+
+
def detect_format(content):
for ln in content.splitlines():
ln = ln.strip()
@@ -134,14 +149,80 @@ def detect_format(content):
return "dnsmasq"
-def parse_blocklist(content, fmt=None):
- if fmt is None:
- fmt = detect_format(content)
+def parse_blocklist(content, is_local=False):
+ if is_local:
+ return parse_local_format(content)
+ fmt = detect_format(content)
if fmt == "dnsmasq":
return parse_dnsmasq_format(content)
return parse_hosts_format(content)
+def content_hash(content):
+ return hashlib.sha256(content.encode()).hexdigest()
+
+
+# SQLite ==============================================================
+
+def open_db():
+ db = sqlite3.connect(DB_FILE)
+ db.execute("PRAGMA journal_mode=WAL")
+ db.execute("PRAGMA foreign_keys=ON")
+ db.executescript("""
+ CREATE TABLE IF NOT EXISTS blocklists (
+ id INTEGER PRIMARY KEY,
+ name TEXT UNIQUE NOT NULL,
+ content_hash TEXT,
+ fetched_at INTEGER,
+ domain_count INTEGER
+ );
+ CREATE TABLE IF NOT EXISTS domains (
+ domain TEXT NOT NULL,
+ blocklist_id INTEGER NOT NULL REFERENCES blocklists(id) ON DELETE CASCADE,
+ PRIMARY KEY (domain, blocklist_id)
+ );
+ CREATE INDEX IF NOT EXISTS idx_domains_domain ON domains(domain);
+ """)
+ db.commit()
+ return db
+
+
+def get_stored_hash(db, name):
+ row = db.execute("SELECT content_hash FROM blocklists WHERE name = ?", (name,)).fetchone()
+ return row[0] if row else None
+
+
+def upsert_blocklist(db, name, domains, raw_hash):
+ now = int(time.time())
+ db.execute("""
+ INSERT INTO blocklists (name, content_hash, fetched_at, domain_count)
+ VALUES (?, ?, ?, ?)
+ ON CONFLICT(name) DO UPDATE SET
+ content_hash = excluded.content_hash,
+ fetched_at = excluded.fetched_at,
+ domain_count = excluded.domain_count
+ """, (name, raw_hash, now, len(domains)))
+ bl_id = db.execute("SELECT id FROM blocklists WHERE name = ?", (name,)).fetchone()[0]
+ db.execute("DELETE FROM domains WHERE blocklist_id = ?", (bl_id,))
+ db.executemany("INSERT INTO domains (domain, blocklist_id) VALUES (?, ?)",
+ ((d, bl_id) for d in domains))
+ db.commit()
+
+
+def query_merged_domains(db, names):
+ placeholders = ",".join("?" * len(names))
+ rows = db.execute(f"""
+ SELECT DISTINCT d.domain
+ FROM domains d
+ JOIN blocklists b ON d.blocklist_id = b.id
+ WHERE b.name IN ({placeholders})
+ ORDER BY d.domain
+ """, list(names)).fetchall()
+ return [r[0] for r in rows]
+
+
+# Conf file output ====================================================
+
def build_merged_conf(domains, bl_names):
lines = [
"# Generated by dns-blocklists.py -- do not edit manually.",
@@ -151,72 +232,64 @@ def build_merged_conf(domains, bl_names):
"# Blocks domain and all subdomains via local=/domain/ syntax.",
"",
]
- for domain in sorted(domains):
+ for domain in domains:
lines.append(f"local=/{domain}/")
return "\n".join(lines)
-def download_all_blocklists(data):
+# Fetch ===============================================================
+
+def fetch_community(entry):
+ url = entry["url"]
+ req = urllib.request.Request(url, headers={"User-Agent": "dns-blocklists.py/1.0"})
+ with urllib.request.urlopen(req, timeout=30) as r:
+ return r.read().decode("utf-8", errors="ignore")
+
+
+def read_local(entry):
+ save_as = entry.get("save_as", "")
+ path = BLOCKLIST_DIR / save_as if save_as else None
+ if not path:
+ return ""
+ return path.read_text()
+
+
+# Main update =========================================================
+
+def update_blocklists(data):
+ BLOCKLIST_DIR.mkdir(exist_ok=True)
+ _chown_to_script_dir_owner(BLOCKLIST_DIR)
+
+ db = open_db()
+
bl_library = {bl["name"]: bl for bl in data.get("dns_blocking", {}).get("blocklists", [])}
needed = set()
for vlan in data["vlans"]:
needed.update(vlan.get("use_blocklists", []))
- results = {}
+ changed = set()
+ any_fail = False
+
for name in needed:
- entry = bl_library[name]
- if entry.get("bl_type") == "local":
- results[name] = (None, entry)
- continue
- url = entry["url"]
+ entry = bl_library[name]
+ is_local = entry.get("bl_type") == "local"
+
try:
- req = urllib.request.Request(url, headers={"User-Agent": "dns-blocklists.py/1.0"})
- with urllib.request.urlopen(req, timeout=30) as r:
- content = r.read().decode("utf-8", errors="ignore")
- log.info(f"Downloaded: {entry['description']} ({len(content):,} bytes)")
- results[name] = (content, entry)
+ raw = read_local(entry) if is_local else fetch_community(entry)
except Exception as e:
- log.error(f"Failed to download '{entry['description']}' from {url}: {e}")
- results[name] = (None, entry)
- return results
+ log.error(f"Failed to fetch '{name}': {e}")
+ any_fail = True
+ continue
+ h = content_hash(raw)
+ if h == get_stored_hash(db, name):
+ log.info(f"Unchanged: '{name}' -- skipping")
+ continue
-def _parse_local_domains(content):
- domains = set()
- for ln in content.splitlines():
- ln = ln.strip()
- if ln and not ln.startswith("#"):
- domains.add(ln)
- return domains
-
-
-def update_blocklists(data):
- BLOCKLIST_DIR.mkdir(exist_ok=True)
-
- log.info("Downloading blocklists...")
- downloaded = download_all_blocklists(data)
-
- domains_by_name = {}
- for name, (content, entry) in downloaded.items():
- if entry.get("bl_type") == "local":
- save_as = entry.get("save_as", "")
- local_file = BLOCKLIST_DIR / save_as if save_as else None
- try:
- local_content = local_file.read_text() if local_file else ""
- domains = _parse_local_domains(local_content)
- log.info(f"Local blocklist '{name}': {len(domains):,} domains")
- except Exception as e:
- log.error(f"Local blocklist '{name}' could not be read: {e}")
- domains = set()
- domains_by_name[name] = domains
- elif content is None:
- log.error(f"Blocklist '{name}' failed to download -- it will be skipped.")
- domains_by_name[name] = set()
- else:
- (BLOCKLIST_DIR / entry["save_as"]).write_text(content)
- domains = parse_blocklist(content)
- log.info(f"Parsed {len(domains):,} domains from '{name}'")
- domains_by_name[name] = domains
+ domains = parse_blocklist(raw, is_local=is_local)
+ upsert_blocklist(db, name, domains, h)
+ log.info(f"Updated '{name}': {len(domains):,} domains")
+ changed.add(name)
active_hashes = set()
combos = {}
@@ -227,17 +300,13 @@ def update_blocklists(data):
combos[h] = names
for h, names in combos.items():
- combo_domains = set()
- for name in names:
- combo_domains.update(domains_by_name.get(name, set()))
-
- merged = build_merged_conf(combo_domains, names)
- merged_path(h).write_text(merged)
active_hashes.add(h)
- log.info(
- f"Merged [{h}] ({', '.join(sorted(names))}): "
- f"{len(combo_domains):,} unique domains."
- )
+ if not changed.intersection(names) and merged_path(h).exists():
+ log.info(f"Combo [{h}] unchanged -- skipping rewrite")
+ continue
+ domains = query_merged_domains(db, names)
+ merged_path(h).write_text(build_merged_conf(domains, names))
+ log.info(f"Merged [{h}] ({', '.join(sorted(names))}): {len(domains):,} unique domains")
for f in BLOCKLIST_DIR.glob("merged-*.conf"):
h = f.stem.removeprefix("merged-")
@@ -245,11 +314,8 @@ def update_blocklists(data):
f.unlink()
log.info(f"Removed stale merged file: {f.name}")
- any_failed = any(
- content is None and entry.get("bl_type") != "local"
- for content, entry in downloaded.values()
- )
- return not any_failed
+ db.close()
+ return not any_fail
def reload_dnsmasq_instances():