Development

This commit is contained in:
Matthew Grotke 2026-06-09 01:25:02 -04:00
parent 89306b132d
commit 6ad78e9ed7
4 changed files with 187 additions and 104 deletions

View file

@ -678,6 +678,23 @@ def resolve_iface(vlan, cfg):
# Config datasources ================================================
def _bl_db_rows():
"""Return {blocklist_name: {domain_count, fetched_at}} from domains.db, or {} if unavailable."""
db_path = os.path.join(BLOCKLISTS_DIR, 'domains.db')
try:
db = _sqlite3.connect(f'file:{db_path}?mode=ro', uri=True)
rows = db.execute('SELECT name, domain_count, fetched_at FROM blocklists').fetchall()
db.close()
return {name: {'domain_count': count, 'fetched_at': fetched_at}
for name, count, fetched_at in rows}
except Exception:
return {}
def _bl_db_counts():
return {name: v['domain_count'] for name, v in _bl_db_rows().items()}
def config_datasource(name):
cfg = load_config()
vlans = cfg.get('vlans', [])
@ -689,30 +706,25 @@ def config_datasource(name):
return cfg.get('host_overrides', [])
if name == 'blocklists':
db_counts = _bl_db_counts()
rows = []
for bl in cfg.get('dns_blocking', {}).get('blocklists', []):
row = dict(bl)
bl_type = bl.get('bl_type', 'community')
row['bl_type_label'] = 'Local' if bl_type == 'local' else 'Community'
bl_path = os.path.join(BLOCKLISTS_DIR, bl.get('save_as', ''))
count = db_counts.get(bl.get('name', ''))
row['domain_count'] = f'{count:,}' if count is not None else '-'
if bl_type == 'local':
bl_path = os.path.join(BLOCKLISTS_DIR, bl.get('save_as', ''))
try:
with open(bl_path) as f:
content = f.read()
row['local_entries'] = content.strip()
row['domain_count'] = str(sum(1 for ln in content.splitlines() if ln.strip() and not ln.startswith('#')))
row['local_entries'] = f.read().strip()
except Exception:
row['local_entries'] = ''
row['domain_count'] = '-'
row['last_updated'] = '-'
row['source_display'] = bl.get('save_as', '')
else:
try:
with open(bl_path) as f:
row['domain_count'] = str(sum(1 for _ in f))
row['last_updated'] = fmt_timestamp(int(os.path.getmtime(bl_path)))
except Exception:
row['domain_count'] = '-'
row['last_updated'] = '-'
row['local_entries'] = ''
row['source_display'] = row.get('url', '')
rows.append(row)
return rows

View file

@ -33,8 +33,8 @@
"class": "col-narrow"
},
{
"label": "Source URL",
"field": "url",
"label": "Source",
"field": "source_display",
"class": "col-mono"
}
],

View file

@ -37,35 +37,40 @@ def _dnsblocking_log_tail(cfg):
def blocklist_stats_html(cfg):
db_rows = config_utils._bl_db_rows()
rows = ''
for bl in cfg.get('dns_blocking', {}).get('blocklists', []):
name = factory.e(bl.get('name', ''))
name = bl.get('name', '')
is_local = bl.get('bl_type') == 'local'
db = db_rows.get(name, {})
count = db.get('domain_count')
entries = f'{count:,}' if count is not None else '-'
if is_local:
save_as = bl.get('save_as', '')
bl_path = f'{config_utils.BLOCKLISTS_DIR}/{save_as}' if save_as else ''
if is_local:
try:
with open(bl_path) as f:
entries = sum(1 for ln in f if ln.strip() and not ln.startswith('#'))
size_str = config_utils.fmt_bytes(os.path.getsize(bl_path))
except Exception:
size_str = '-'
last_refreshed = 'Local'
except Exception:
entries, size_str, last_refreshed = '-', '-', 'Local'
else:
try:
with open(bl_path) as f:
entries = sum(1 for _ in f)
mtime = int(os.path.getmtime(bl_path))
size_str = config_utils.fmt_bytes(os.path.getsize(bl_path))
fetched_at = db.get('fetched_at')
if fetched_at:
last_refreshed = (
f'{datetime.fromtimestamp(mtime).strftime("%Y-%m-%d %H:%M")}'
f' ({config_utils.relative_time(mtime, datetime.now(tz=timezone.utc).timestamp())} ago)'
f'{datetime.fromtimestamp(fetched_at).strftime("%Y-%m-%d %H:%M")}'
f' ({config_utils.relative_time(fetched_at, datetime.now(tz=timezone.utc).timestamp())} ago)'
)
else:
last_refreshed = 'Never'
save_as = bl.get('save_as', '')
bl_path = f'{config_utils.BLOCKLISTS_DIR}/{save_as}' if save_as else ''
try:
size_str = config_utils.fmt_bytes(os.path.getsize(bl_path))
except Exception:
entries, size_str, last_refreshed = '-', '-', 'Never'
size_str = '-'
rows += (
'<tr>'
f'<td class="table-cell">{name}</td>'
f'<td class="table-cell">{factory.e(name)}</td>'
f'<td class="table-cell">{entries}</td>'
f'<td class="table-cell">{size_str}</td>'
f'<td class="table-cell">{factory.e(last_refreshed)}</td>'

View file

@ -3,9 +3,10 @@
dns-blocklists.py -- Download and merge DNS blocklists defined in config.json.
Reads the blocklists library from config.json, downloads every blocklist referenced
by at least one VLAN, merges them into per-combo conf files (one per unique
combination of blocklist names), then sends SIGHUP to each running dnsmasq
instance so it reloads its config without restarting.
by at least one VLAN, and upserts normalized domains into a SQLite database
(blocklists/domains.db). Downloads are skipped when the content hash is unchanged.
Merged per-combo conf files are only rewritten when a constituent blocklist changed.
Sends SIGHUP to each running dnsmasq instance so it reloads without restarting.
Usage:
sudo python3 dns-blocklists.py
@ -15,8 +16,10 @@ import hashlib
import json
import logging
import os
import sqlite3
import subprocess
import sys
import time
import urllib.request
import urllib.error
from pathlib import Path
@ -25,6 +28,7 @@ PRODUCT_NAME = "routlin"
SCRIPT_DIR = Path(__file__).parent
CONFIG_FILE = SCRIPT_DIR / "config.json"
BLOCKLIST_DIR = SCRIPT_DIR / "blocklists"
DB_FILE = BLOCKLIST_DIR / "domains.db"
LOG_FILE = SCRIPT_DIR / "dns-blocklists.log"
log = None
@ -93,6 +97,8 @@ def merged_path(h):
return BLOCKLIST_DIR / f"merged-{h}.conf"
# Parse / detect ======================================================
def parse_dnsmasq_format(content):
domains = set()
for ln in content.splitlines():
@ -122,6 +128,15 @@ def parse_hosts_format(content):
return domains
def parse_local_format(content):
domains = set()
for ln in content.splitlines():
ln = ln.strip()
if ln and not ln.startswith("#"):
domains.add(ln)
return domains
def detect_format(content):
for ln in content.splitlines():
ln = ln.strip()
@ -134,14 +149,80 @@ def detect_format(content):
return "dnsmasq"
def parse_blocklist(content, fmt=None):
if fmt is None:
def parse_blocklist(content, is_local=False):
if is_local:
return parse_local_format(content)
fmt = detect_format(content)
if fmt == "dnsmasq":
return parse_dnsmasq_format(content)
return parse_hosts_format(content)
def content_hash(content):
return hashlib.sha256(content.encode()).hexdigest()
# SQLite ==============================================================
def open_db():
db = sqlite3.connect(DB_FILE)
db.execute("PRAGMA journal_mode=WAL")
db.execute("PRAGMA foreign_keys=ON")
db.executescript("""
CREATE TABLE IF NOT EXISTS blocklists (
id INTEGER PRIMARY KEY,
name TEXT UNIQUE NOT NULL,
content_hash TEXT,
fetched_at INTEGER,
domain_count INTEGER
);
CREATE TABLE IF NOT EXISTS domains (
domain TEXT NOT NULL,
blocklist_id INTEGER NOT NULL REFERENCES blocklists(id) ON DELETE CASCADE,
PRIMARY KEY (domain, blocklist_id)
);
CREATE INDEX IF NOT EXISTS idx_domains_domain ON domains(domain);
""")
db.commit()
return db
def get_stored_hash(db, name):
row = db.execute("SELECT content_hash FROM blocklists WHERE name = ?", (name,)).fetchone()
return row[0] if row else None
def upsert_blocklist(db, name, domains, raw_hash):
now = int(time.time())
db.execute("""
INSERT INTO blocklists (name, content_hash, fetched_at, domain_count)
VALUES (?, ?, ?, ?)
ON CONFLICT(name) DO UPDATE SET
content_hash = excluded.content_hash,
fetched_at = excluded.fetched_at,
domain_count = excluded.domain_count
""", (name, raw_hash, now, len(domains)))
bl_id = db.execute("SELECT id FROM blocklists WHERE name = ?", (name,)).fetchone()[0]
db.execute("DELETE FROM domains WHERE blocklist_id = ?", (bl_id,))
db.executemany("INSERT INTO domains (domain, blocklist_id) VALUES (?, ?)",
((d, bl_id) for d in domains))
db.commit()
def query_merged_domains(db, names):
placeholders = ",".join("?" * len(names))
rows = db.execute(f"""
SELECT DISTINCT d.domain
FROM domains d
JOIN blocklists b ON d.blocklist_id = b.id
WHERE b.name IN ({placeholders})
ORDER BY d.domain
""", list(names)).fetchall()
return [r[0] for r in rows]
# Conf file output ====================================================
def build_merged_conf(domains, bl_names):
lines = [
"# Generated by dns-blocklists.py -- do not edit manually.",
@ -151,72 +232,64 @@ def build_merged_conf(domains, bl_names):
"# Blocks domain and all subdomains via local=/domain/ syntax.",
"",
]
for domain in sorted(domains):
for domain in domains:
lines.append(f"local=/{domain}/")
return "\n".join(lines)
def download_all_blocklists(data):
# Fetch ===============================================================
def fetch_community(entry):
url = entry["url"]
req = urllib.request.Request(url, headers={"User-Agent": "dns-blocklists.py/1.0"})
with urllib.request.urlopen(req, timeout=30) as r:
return r.read().decode("utf-8", errors="ignore")
def read_local(entry):
save_as = entry.get("save_as", "")
path = BLOCKLIST_DIR / save_as if save_as else None
if not path:
return ""
return path.read_text()
# Main update =========================================================
def update_blocklists(data):
BLOCKLIST_DIR.mkdir(exist_ok=True)
_chown_to_script_dir_owner(BLOCKLIST_DIR)
db = open_db()
bl_library = {bl["name"]: bl for bl in data.get("dns_blocking", {}).get("blocklists", [])}
needed = set()
for vlan in data["vlans"]:
needed.update(vlan.get("use_blocklists", []))
results = {}
changed = set()
any_fail = False
for name in needed:
entry = bl_library[name]
if entry.get("bl_type") == "local":
results[name] = (None, entry)
is_local = entry.get("bl_type") == "local"
try:
raw = read_local(entry) if is_local else fetch_community(entry)
except Exception as e:
log.error(f"Failed to fetch '{name}': {e}")
any_fail = True
continue
url = entry["url"]
try:
req = urllib.request.Request(url, headers={"User-Agent": "dns-blocklists.py/1.0"})
with urllib.request.urlopen(req, timeout=30) as r:
content = r.read().decode("utf-8", errors="ignore")
log.info(f"Downloaded: {entry['description']} ({len(content):,} bytes)")
results[name] = (content, entry)
except Exception as e:
log.error(f"Failed to download '{entry['description']}' from {url}: {e}")
results[name] = (None, entry)
return results
h = content_hash(raw)
if h == get_stored_hash(db, name):
log.info(f"Unchanged: '{name}' -- skipping")
continue
def _parse_local_domains(content):
domains = set()
for ln in content.splitlines():
ln = ln.strip()
if ln and not ln.startswith("#"):
domains.add(ln)
return domains
def update_blocklists(data):
BLOCKLIST_DIR.mkdir(exist_ok=True)
log.info("Downloading blocklists...")
downloaded = download_all_blocklists(data)
domains_by_name = {}
for name, (content, entry) in downloaded.items():
if entry.get("bl_type") == "local":
save_as = entry.get("save_as", "")
local_file = BLOCKLIST_DIR / save_as if save_as else None
try:
local_content = local_file.read_text() if local_file else ""
domains = _parse_local_domains(local_content)
log.info(f"Local blocklist '{name}': {len(domains):,} domains")
except Exception as e:
log.error(f"Local blocklist '{name}' could not be read: {e}")
domains = set()
domains_by_name[name] = domains
elif content is None:
log.error(f"Blocklist '{name}' failed to download -- it will be skipped.")
domains_by_name[name] = set()
else:
(BLOCKLIST_DIR / entry["save_as"]).write_text(content)
domains = parse_blocklist(content)
log.info(f"Parsed {len(domains):,} domains from '{name}'")
domains_by_name[name] = domains
domains = parse_blocklist(raw, is_local=is_local)
upsert_blocklist(db, name, domains, h)
log.info(f"Updated '{name}': {len(domains):,} domains")
changed.add(name)
active_hashes = set()
combos = {}
@ -227,17 +300,13 @@ def update_blocklists(data):
combos[h] = names
for h, names in combos.items():
combo_domains = set()
for name in names:
combo_domains.update(domains_by_name.get(name, set()))
merged = build_merged_conf(combo_domains, names)
merged_path(h).write_text(merged)
active_hashes.add(h)
log.info(
f"Merged [{h}] ({', '.join(sorted(names))}): "
f"{len(combo_domains):,} unique domains."
)
if not changed.intersection(names) and merged_path(h).exists():
log.info(f"Combo [{h}] unchanged -- skipping rewrite")
continue
domains = query_merged_domains(db, names)
merged_path(h).write_text(build_merged_conf(domains, names))
log.info(f"Merged [{h}] ({', '.join(sorted(names))}): {len(domains):,} unique domains")
for f in BLOCKLIST_DIR.glob("merged-*.conf"):
h = f.stem.removeprefix("merged-")
@ -245,11 +314,8 @@ def update_blocklists(data):
f.unlink()
log.info(f"Removed stale merged file: {f.name}")
any_failed = any(
content is None and entry.get("bl_type") != "local"
for content, entry in downloaded.values()
)
return not any_failed
db.close()
return not any_fail
def reload_dnsmasq_instances():