Development
This commit is contained in:
parent
89306b132d
commit
6ad78e9ed7
4 changed files with 187 additions and 104 deletions
|
|
@ -678,6 +678,23 @@ def resolve_iface(vlan, cfg):
|
|||
|
||||
# Config datasources ================================================
|
||||
|
||||
def _bl_db_rows():
|
||||
"""Return {blocklist_name: {domain_count, fetched_at}} from domains.db, or {} if unavailable."""
|
||||
db_path = os.path.join(BLOCKLISTS_DIR, 'domains.db')
|
||||
try:
|
||||
db = _sqlite3.connect(f'file:{db_path}?mode=ro', uri=True)
|
||||
rows = db.execute('SELECT name, domain_count, fetched_at FROM blocklists').fetchall()
|
||||
db.close()
|
||||
return {name: {'domain_count': count, 'fetched_at': fetched_at}
|
||||
for name, count, fetched_at in rows}
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
def _bl_db_counts():
|
||||
return {name: v['domain_count'] for name, v in _bl_db_rows().items()}
|
||||
|
||||
|
||||
def config_datasource(name):
|
||||
cfg = load_config()
|
||||
vlans = cfg.get('vlans', [])
|
||||
|
|
@ -689,30 +706,25 @@ def config_datasource(name):
|
|||
return cfg.get('host_overrides', [])
|
||||
|
||||
if name == 'blocklists':
|
||||
db_counts = _bl_db_counts()
|
||||
rows = []
|
||||
for bl in cfg.get('dns_blocking', {}).get('blocklists', []):
|
||||
row = dict(bl)
|
||||
bl_type = bl.get('bl_type', 'community')
|
||||
row['bl_type_label'] = 'Local' if bl_type == 'local' else 'Community'
|
||||
bl_path = os.path.join(BLOCKLISTS_DIR, bl.get('save_as', ''))
|
||||
count = db_counts.get(bl.get('name', ''))
|
||||
row['domain_count'] = f'{count:,}' if count is not None else '-'
|
||||
if bl_type == 'local':
|
||||
bl_path = os.path.join(BLOCKLISTS_DIR, bl.get('save_as', ''))
|
||||
try:
|
||||
with open(bl_path) as f:
|
||||
content = f.read()
|
||||
row['local_entries'] = content.strip()
|
||||
row['domain_count'] = str(sum(1 for ln in content.splitlines() if ln.strip() and not ln.startswith('#')))
|
||||
row['local_entries'] = f.read().strip()
|
||||
except Exception:
|
||||
row['local_entries'] = ''
|
||||
row['domain_count'] = '-'
|
||||
row['last_updated'] = '-'
|
||||
row['source_display'] = bl.get('save_as', '')
|
||||
else:
|
||||
try:
|
||||
with open(bl_path) as f:
|
||||
row['domain_count'] = str(sum(1 for _ in f))
|
||||
row['last_updated'] = fmt_timestamp(int(os.path.getmtime(bl_path)))
|
||||
except Exception:
|
||||
row['domain_count'] = '-'
|
||||
row['last_updated'] = '-'
|
||||
row['local_entries'] = ''
|
||||
row['source_display'] = row.get('url', '')
|
||||
rows.append(row)
|
||||
return rows
|
||||
|
||||
|
|
|
|||
|
|
@ -33,8 +33,8 @@
|
|||
"class": "col-narrow"
|
||||
},
|
||||
{
|
||||
"label": "Source URL",
|
||||
"field": "url",
|
||||
"label": "Source",
|
||||
"field": "source_display",
|
||||
"class": "col-mono"
|
||||
}
|
||||
],
|
||||
|
|
|
|||
|
|
@ -37,35 +37,40 @@ def _dnsblocking_log_tail(cfg):
|
|||
|
||||
|
||||
def blocklist_stats_html(cfg):
|
||||
db_rows = config_utils._bl_db_rows()
|
||||
rows = ''
|
||||
for bl in cfg.get('dns_blocking', {}).get('blocklists', []):
|
||||
name = factory.e(bl.get('name', ''))
|
||||
name = bl.get('name', '')
|
||||
is_local = bl.get('bl_type') == 'local'
|
||||
save_as = bl.get('save_as', '')
|
||||
bl_path = f'{config_utils.BLOCKLISTS_DIR}/{save_as}' if save_as else ''
|
||||
db = db_rows.get(name, {})
|
||||
count = db.get('domain_count')
|
||||
entries = f'{count:,}' if count is not None else '-'
|
||||
if is_local:
|
||||
save_as = bl.get('save_as', '')
|
||||
bl_path = f'{config_utils.BLOCKLISTS_DIR}/{save_as}' if save_as else ''
|
||||
try:
|
||||
with open(bl_path) as f:
|
||||
entries = sum(1 for ln in f if ln.strip() and not ln.startswith('#'))
|
||||
size_str = config_utils.fmt_bytes(os.path.getsize(bl_path))
|
||||
last_refreshed = 'Local'
|
||||
except Exception:
|
||||
entries, size_str, last_refreshed = '-', '-', 'Local'
|
||||
size_str = '-'
|
||||
last_refreshed = 'Local'
|
||||
else:
|
||||
try:
|
||||
with open(bl_path) as f:
|
||||
entries = sum(1 for _ in f)
|
||||
mtime = int(os.path.getmtime(bl_path))
|
||||
size_str = config_utils.fmt_bytes(os.path.getsize(bl_path))
|
||||
fetched_at = db.get('fetched_at')
|
||||
if fetched_at:
|
||||
last_refreshed = (
|
||||
f'{datetime.fromtimestamp(mtime).strftime("%Y-%m-%d %H:%M")}'
|
||||
f' ({config_utils.relative_time(mtime, datetime.now(tz=timezone.utc).timestamp())} ago)'
|
||||
f'{datetime.fromtimestamp(fetched_at).strftime("%Y-%m-%d %H:%M")}'
|
||||
f' ({config_utils.relative_time(fetched_at, datetime.now(tz=timezone.utc).timestamp())} ago)'
|
||||
)
|
||||
else:
|
||||
last_refreshed = 'Never'
|
||||
save_as = bl.get('save_as', '')
|
||||
bl_path = f'{config_utils.BLOCKLISTS_DIR}/{save_as}' if save_as else ''
|
||||
try:
|
||||
size_str = config_utils.fmt_bytes(os.path.getsize(bl_path))
|
||||
except Exception:
|
||||
entries, size_str, last_refreshed = '-', '-', 'Never'
|
||||
size_str = '-'
|
||||
rows += (
|
||||
'<tr>'
|
||||
f'<td class="table-cell">{name}</td>'
|
||||
f'<td class="table-cell">{factory.e(name)}</td>'
|
||||
f'<td class="table-cell">{entries}</td>'
|
||||
f'<td class="table-cell">{size_str}</td>'
|
||||
f'<td class="table-cell">{factory.e(last_refreshed)}</td>'
|
||||
|
|
|
|||
|
|
@ -3,9 +3,10 @@
|
|||
dns-blocklists.py -- Download and merge DNS blocklists defined in config.json.
|
||||
|
||||
Reads the blocklists library from config.json, downloads every blocklist referenced
|
||||
by at least one VLAN, merges them into per-combo conf files (one per unique
|
||||
combination of blocklist names), then sends SIGHUP to each running dnsmasq
|
||||
instance so it reloads its config without restarting.
|
||||
by at least one VLAN, and upserts normalized domains into a SQLite database
|
||||
(blocklists/domains.db). Downloads are skipped when the content hash is unchanged.
|
||||
Merged per-combo conf files are only rewritten when a constituent blocklist changed.
|
||||
Sends SIGHUP to each running dnsmasq instance so it reloads without restarting.
|
||||
|
||||
Usage:
|
||||
sudo python3 dns-blocklists.py
|
||||
|
|
@ -15,8 +16,10 @@ import hashlib
|
|||
import json
|
||||
import logging
|
||||
import os
|
||||
import sqlite3
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
from pathlib import Path
|
||||
|
|
@ -25,6 +28,7 @@ PRODUCT_NAME = "routlin"
|
|||
SCRIPT_DIR = Path(__file__).parent
|
||||
CONFIG_FILE = SCRIPT_DIR / "config.json"
|
||||
BLOCKLIST_DIR = SCRIPT_DIR / "blocklists"
|
||||
DB_FILE = BLOCKLIST_DIR / "domains.db"
|
||||
LOG_FILE = SCRIPT_DIR / "dns-blocklists.log"
|
||||
|
||||
log = None
|
||||
|
|
@ -93,6 +97,8 @@ def merged_path(h):
|
|||
return BLOCKLIST_DIR / f"merged-{h}.conf"
|
||||
|
||||
|
||||
# Parse / detect ======================================================
|
||||
|
||||
def parse_dnsmasq_format(content):
|
||||
domains = set()
|
||||
for ln in content.splitlines():
|
||||
|
|
@ -122,6 +128,15 @@ def parse_hosts_format(content):
|
|||
return domains
|
||||
|
||||
|
||||
def parse_local_format(content):
|
||||
domains = set()
|
||||
for ln in content.splitlines():
|
||||
ln = ln.strip()
|
||||
if ln and not ln.startswith("#"):
|
||||
domains.add(ln)
|
||||
return domains
|
||||
|
||||
|
||||
def detect_format(content):
|
||||
for ln in content.splitlines():
|
||||
ln = ln.strip()
|
||||
|
|
@ -134,14 +149,80 @@ def detect_format(content):
|
|||
return "dnsmasq"
|
||||
|
||||
|
||||
def parse_blocklist(content, fmt=None):
|
||||
if fmt is None:
|
||||
fmt = detect_format(content)
|
||||
def parse_blocklist(content, is_local=False):
|
||||
if is_local:
|
||||
return parse_local_format(content)
|
||||
fmt = detect_format(content)
|
||||
if fmt == "dnsmasq":
|
||||
return parse_dnsmasq_format(content)
|
||||
return parse_hosts_format(content)
|
||||
|
||||
|
||||
def content_hash(content):
|
||||
return hashlib.sha256(content.encode()).hexdigest()
|
||||
|
||||
|
||||
# SQLite ==============================================================
|
||||
|
||||
def open_db():
|
||||
db = sqlite3.connect(DB_FILE)
|
||||
db.execute("PRAGMA journal_mode=WAL")
|
||||
db.execute("PRAGMA foreign_keys=ON")
|
||||
db.executescript("""
|
||||
CREATE TABLE IF NOT EXISTS blocklists (
|
||||
id INTEGER PRIMARY KEY,
|
||||
name TEXT UNIQUE NOT NULL,
|
||||
content_hash TEXT,
|
||||
fetched_at INTEGER,
|
||||
domain_count INTEGER
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS domains (
|
||||
domain TEXT NOT NULL,
|
||||
blocklist_id INTEGER NOT NULL REFERENCES blocklists(id) ON DELETE CASCADE,
|
||||
PRIMARY KEY (domain, blocklist_id)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_domains_domain ON domains(domain);
|
||||
""")
|
||||
db.commit()
|
||||
return db
|
||||
|
||||
|
||||
def get_stored_hash(db, name):
|
||||
row = db.execute("SELECT content_hash FROM blocklists WHERE name = ?", (name,)).fetchone()
|
||||
return row[0] if row else None
|
||||
|
||||
|
||||
def upsert_blocklist(db, name, domains, raw_hash):
|
||||
now = int(time.time())
|
||||
db.execute("""
|
||||
INSERT INTO blocklists (name, content_hash, fetched_at, domain_count)
|
||||
VALUES (?, ?, ?, ?)
|
||||
ON CONFLICT(name) DO UPDATE SET
|
||||
content_hash = excluded.content_hash,
|
||||
fetched_at = excluded.fetched_at,
|
||||
domain_count = excluded.domain_count
|
||||
""", (name, raw_hash, now, len(domains)))
|
||||
bl_id = db.execute("SELECT id FROM blocklists WHERE name = ?", (name,)).fetchone()[0]
|
||||
db.execute("DELETE FROM domains WHERE blocklist_id = ?", (bl_id,))
|
||||
db.executemany("INSERT INTO domains (domain, blocklist_id) VALUES (?, ?)",
|
||||
((d, bl_id) for d in domains))
|
||||
db.commit()
|
||||
|
||||
|
||||
def query_merged_domains(db, names):
|
||||
placeholders = ",".join("?" * len(names))
|
||||
rows = db.execute(f"""
|
||||
SELECT DISTINCT d.domain
|
||||
FROM domains d
|
||||
JOIN blocklists b ON d.blocklist_id = b.id
|
||||
WHERE b.name IN ({placeholders})
|
||||
ORDER BY d.domain
|
||||
""", list(names)).fetchall()
|
||||
return [r[0] for r in rows]
|
||||
|
||||
|
||||
# Conf file output ====================================================
|
||||
|
||||
def build_merged_conf(domains, bl_names):
|
||||
lines = [
|
||||
"# Generated by dns-blocklists.py -- do not edit manually.",
|
||||
|
|
@ -151,72 +232,64 @@ def build_merged_conf(domains, bl_names):
|
|||
"# Blocks domain and all subdomains via local=/domain/ syntax.",
|
||||
"",
|
||||
]
|
||||
for domain in sorted(domains):
|
||||
for domain in domains:
|
||||
lines.append(f"local=/{domain}/")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def download_all_blocklists(data):
|
||||
# Fetch ===============================================================
|
||||
|
||||
def fetch_community(entry):
|
||||
url = entry["url"]
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "dns-blocklists.py/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=30) as r:
|
||||
return r.read().decode("utf-8", errors="ignore")
|
||||
|
||||
|
||||
def read_local(entry):
|
||||
save_as = entry.get("save_as", "")
|
||||
path = BLOCKLIST_DIR / save_as if save_as else None
|
||||
if not path:
|
||||
return ""
|
||||
return path.read_text()
|
||||
|
||||
|
||||
# Main update =========================================================
|
||||
|
||||
def update_blocklists(data):
|
||||
BLOCKLIST_DIR.mkdir(exist_ok=True)
|
||||
_chown_to_script_dir_owner(BLOCKLIST_DIR)
|
||||
|
||||
db = open_db()
|
||||
|
||||
bl_library = {bl["name"]: bl for bl in data.get("dns_blocking", {}).get("blocklists", [])}
|
||||
needed = set()
|
||||
for vlan in data["vlans"]:
|
||||
needed.update(vlan.get("use_blocklists", []))
|
||||
|
||||
results = {}
|
||||
changed = set()
|
||||
any_fail = False
|
||||
|
||||
for name in needed:
|
||||
entry = bl_library[name]
|
||||
if entry.get("bl_type") == "local":
|
||||
results[name] = (None, entry)
|
||||
continue
|
||||
url = entry["url"]
|
||||
entry = bl_library[name]
|
||||
is_local = entry.get("bl_type") == "local"
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "dns-blocklists.py/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=30) as r:
|
||||
content = r.read().decode("utf-8", errors="ignore")
|
||||
log.info(f"Downloaded: {entry['description']} ({len(content):,} bytes)")
|
||||
results[name] = (content, entry)
|
||||
raw = read_local(entry) if is_local else fetch_community(entry)
|
||||
except Exception as e:
|
||||
log.error(f"Failed to download '{entry['description']}' from {url}: {e}")
|
||||
results[name] = (None, entry)
|
||||
return results
|
||||
log.error(f"Failed to fetch '{name}': {e}")
|
||||
any_fail = True
|
||||
continue
|
||||
|
||||
h = content_hash(raw)
|
||||
if h == get_stored_hash(db, name):
|
||||
log.info(f"Unchanged: '{name}' -- skipping")
|
||||
continue
|
||||
|
||||
def _parse_local_domains(content):
|
||||
domains = set()
|
||||
for ln in content.splitlines():
|
||||
ln = ln.strip()
|
||||
if ln and not ln.startswith("#"):
|
||||
domains.add(ln)
|
||||
return domains
|
||||
|
||||
|
||||
def update_blocklists(data):
|
||||
BLOCKLIST_DIR.mkdir(exist_ok=True)
|
||||
|
||||
log.info("Downloading blocklists...")
|
||||
downloaded = download_all_blocklists(data)
|
||||
|
||||
domains_by_name = {}
|
||||
for name, (content, entry) in downloaded.items():
|
||||
if entry.get("bl_type") == "local":
|
||||
save_as = entry.get("save_as", "")
|
||||
local_file = BLOCKLIST_DIR / save_as if save_as else None
|
||||
try:
|
||||
local_content = local_file.read_text() if local_file else ""
|
||||
domains = _parse_local_domains(local_content)
|
||||
log.info(f"Local blocklist '{name}': {len(domains):,} domains")
|
||||
except Exception as e:
|
||||
log.error(f"Local blocklist '{name}' could not be read: {e}")
|
||||
domains = set()
|
||||
domains_by_name[name] = domains
|
||||
elif content is None:
|
||||
log.error(f"Blocklist '{name}' failed to download -- it will be skipped.")
|
||||
domains_by_name[name] = set()
|
||||
else:
|
||||
(BLOCKLIST_DIR / entry["save_as"]).write_text(content)
|
||||
domains = parse_blocklist(content)
|
||||
log.info(f"Parsed {len(domains):,} domains from '{name}'")
|
||||
domains_by_name[name] = domains
|
||||
domains = parse_blocklist(raw, is_local=is_local)
|
||||
upsert_blocklist(db, name, domains, h)
|
||||
log.info(f"Updated '{name}': {len(domains):,} domains")
|
||||
changed.add(name)
|
||||
|
||||
active_hashes = set()
|
||||
combos = {}
|
||||
|
|
@ -227,17 +300,13 @@ def update_blocklists(data):
|
|||
combos[h] = names
|
||||
|
||||
for h, names in combos.items():
|
||||
combo_domains = set()
|
||||
for name in names:
|
||||
combo_domains.update(domains_by_name.get(name, set()))
|
||||
|
||||
merged = build_merged_conf(combo_domains, names)
|
||||
merged_path(h).write_text(merged)
|
||||
active_hashes.add(h)
|
||||
log.info(
|
||||
f"Merged [{h}] ({', '.join(sorted(names))}): "
|
||||
f"{len(combo_domains):,} unique domains."
|
||||
)
|
||||
if not changed.intersection(names) and merged_path(h).exists():
|
||||
log.info(f"Combo [{h}] unchanged -- skipping rewrite")
|
||||
continue
|
||||
domains = query_merged_domains(db, names)
|
||||
merged_path(h).write_text(build_merged_conf(domains, names))
|
||||
log.info(f"Merged [{h}] ({', '.join(sorted(names))}): {len(domains):,} unique domains")
|
||||
|
||||
for f in BLOCKLIST_DIR.glob("merged-*.conf"):
|
||||
h = f.stem.removeprefix("merged-")
|
||||
|
|
@ -245,11 +314,8 @@ def update_blocklists(data):
|
|||
f.unlink()
|
||||
log.info(f"Removed stale merged file: {f.name}")
|
||||
|
||||
any_failed = any(
|
||||
content is None and entry.get("bl_type") != "local"
|
||||
for content, entry in downloaded.values()
|
||||
)
|
||||
return not any_failed
|
||||
db.close()
|
||||
return not any_fail
|
||||
|
||||
|
||||
def reload_dnsmasq_instances():
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue