From 0983e14de48fe73e3b486f023847b3e2f92fdf31 Mon Sep 17 00:00:00 2001 From: Matthew Grotke Date: Tue, 9 Jun 2026 21:28:38 -0400 Subject: [PATCH] Development --- .../app/pages/dnsserver/content.json | 70 ++++++ .../routlin-dash/app/pages/dnsserver/view.py | 19 +- .../app/pages/overview/content.json | 58 ----- .../routlin-dash/app/pages/overview/view.py | 77 ++++-- routlin/maintenance.py | 12 +- routlin/mod_dns_queries.py | 182 ++++++++++++++ routlin/mod_metrics.py | 236 ++++++++++++------ 7 files changed, 494 insertions(+), 160 deletions(-) create mode 100644 routlin/mod_dns_queries.py diff --git a/docker/routlin-dash/app/pages/dnsserver/content.json b/docker/routlin-dash/app/pages/dnsserver/content.json index c71a9b4..1405c1d 100644 --- a/docker/routlin-dash/app/pages/dnsserver/content.json +++ b/docker/routlin-dash/app/pages/dnsserver/content.json @@ -99,5 +99,75 @@ } ] } + , + { + "type": "card", + "label": "DNS Statistics", + "items": [ + { + "type": "grid", + "rows": [ + { + "cells": [ + {"type": "grid_label", "text": "Tracking Since"}, + {"type": "grid_value", "text": "%DNS_METRICS_SINCE%"} + ] + }, + { + "cells": [ + {"type": "grid_label", "text": "Last Updated"}, + {"type": "grid_value", "text": "%DNS_METRICS_UPDATED%"} + ] + }, + { + "cells": [ + {"type": "grid_label", "text": "Total Queries"}, + {"type": "grid_value", "text": "%DNS_STAT_QUERIES%"} + ] + }, + { + "cells": [ + {"type": "grid_label", "text": "Cache Hits"}, + {"type": "grid_value", "text": "%DNS_STAT_HITS% (%DNS_STAT_HIT_RATE% hit rate)"} + ] + }, + { + "cells": [ + {"type": "grid_label", "text": "Forwarded to Upstream"}, + {"type": "grid_value", "text": "%DNS_STAT_FORWARDED%"} + ] + }, + { + "cells": [ + {"type": "grid_label", "text": "Authoritative Answers"}, + {"type": "grid_value", "text": "%DNS_STAT_AUTH%"} + ] + }, + { + "cells": [ + {"type": "grid_label", "text": "TCP Peak"}, + {"type": "grid_value", "text": "%DNS_STAT_TCP_PEAK%"} + ] + }, + { + "cells": [ + {"type": "grid_label", "text": "Cache Capacity"}, + {"type": "grid_value", "text": "%DNS_CACHE_SIZE% entries"} + ] + }, + { + "cells": [ + {"type": "grid_label", "text": "Cache Evictions"}, + {"type": "grid_value", "text": "%DNS_STAT_CACHE_EVICTIONS%"} + ] + } + ] + }, + { + "type": "raw_html", + "html": "%DNS_PROVIDERS_TABLE%" + } + ] + } ] } \ No newline at end of file diff --git a/docker/routlin-dash/app/pages/dnsserver/view.py b/docker/routlin-dash/app/pages/dnsserver/view.py index a841f8f..7c659e9 100644 --- a/docker/routlin-dash/app/pages/dnsserver/view.py +++ b/docker/routlin-dash/app/pages/dnsserver/view.py @@ -1,12 +1,25 @@ import json import config_utils +from pages.overview.view import load_dns_metrics, _dns_providers_table def collect_tokens(cfg): tokens = config_utils.collect_layout_tokens(cfg) - dns = cfg.get('upstream_dns', {}) + dns = cfg.get('upstream_dns', {}) servers = dns.get('upstream_servers', []) - tokens['DNS_STRICT_ORDER'] = 'true' if dns.get('strict_order') else 'false' - tokens['DNS_CACHE_SIZE'] = str(dns.get('cache_size', '-')) + tokens['DNS_STRICT_ORDER'] = 'true' if dns.get('strict_order') else 'false' + tokens['DNS_CACHE_SIZE'] = str(dns.get('cache_size', '-')) tokens['DNS_UPSTREAM_SERVERS_JSON'] = json.dumps(servers) + + dns_stats = load_dns_metrics() + tokens['DNS_METRICS_SINCE'] = dns_stats['since'] + tokens['DNS_METRICS_UPDATED'] = dns_stats['updated'] + tokens['DNS_STAT_QUERIES'] = dns_stats['queries'] + tokens['DNS_STAT_HITS'] = dns_stats['hits'] + tokens['DNS_STAT_HIT_RATE'] = dns_stats['hit_rate'] + tokens['DNS_STAT_FORWARDED'] = dns_stats['forwarded'] + tokens['DNS_STAT_AUTH'] = dns_stats['auth'] + tokens['DNS_STAT_TCP_PEAK'] = dns_stats['tcp_peak'] + tokens['DNS_STAT_CACHE_EVICTIONS'] = dns_stats['cache_evictions'] + tokens['DNS_PROVIDERS_TABLE'] = _dns_providers_table(dns_stats['servers']) return tokens diff --git a/docker/routlin-dash/app/pages/overview/content.json b/docker/routlin-dash/app/pages/overview/content.json index 4029ccd..0cee686 100644 --- a/docker/routlin-dash/app/pages/overview/content.json +++ b/docker/routlin-dash/app/pages/overview/content.json @@ -81,64 +81,6 @@ } ] }, - { - "type": "card", - "label": "DNS Statistics", - "client_requirement": "client_is_viewer+", - "items": [ - { - "type": "grid", - "rows": [ - { - "cells": [ - {"type": "grid_label", "text": "Tracking Since"}, - {"type": "grid_value", "text": "%DNS_METRICS_SINCE%"} - ] - }, - { - "cells": [ - {"type": "grid_label", "text": "Last Updated"}, - {"type": "grid_value", "text": "%DNS_METRICS_UPDATED%"} - ] - }, - { - "cells": [ - {"type": "grid_label", "text": "Total Queries"}, - {"type": "grid_value", "text": "%DNS_STAT_QUERIES%"} - ] - }, - { - "cells": [ - {"type": "grid_label", "text": "Cache Hits"}, - {"type": "grid_value", "text": "%DNS_STAT_HITS% (%DNS_STAT_HIT_RATE% hit rate)"} - ] - }, - { - "cells": [ - {"type": "grid_label", "text": "Forwarded to Upstream"}, - {"type": "grid_value", "text": "%DNS_STAT_FORWARDED%"} - ] - }, - { - "cells": [ - {"type": "grid_label", "text": "Cache Capacity"}, - {"type": "grid_value", "text": "%DNS_CACHE_SIZE% entries"} - ] - }, - { - "cells": [ - {"type": "grid_label", "text": "Cache Evictions"}, - {"type": "grid_value", "text": "%DNS_STAT_CACHE_EVICTIONS%"} - ] - } - ] - }, - { - "type": "raw_html", - "html": "%DNS_PROVIDERS_TABLE%" - } - ] - }, { "type": "card", "label": "Blocked Domains", diff --git a/docker/routlin-dash/app/pages/overview/view.py b/docker/routlin-dash/app/pages/overview/view.py index 57708c3..3b9b419 100644 --- a/docker/routlin-dash/app/pages/overview/view.py +++ b/docker/routlin-dash/app/pages/overview/view.py @@ -1,12 +1,14 @@ import json import os +import threading from datetime import datetime import config_utils import factory +import mod_dns_queries from pages.ddns.view import public_ip_info from pages.dhcpleases.view import live_dhcp_leases -METRICS_FILE = f'{config_utils.CONFIGS_DIR}/.dns-metrics' +METRICS_DB = f'{config_utils.CONFIGS_DIR}/.dns-metrics2' def _fmt_since(since_str): @@ -62,28 +64,69 @@ def _dns_providers_table(servers): def load_dns_metrics(): - empty = {'queries': '-', 'hits': '-', 'hit_rate': '-', 'forwarded': '-', - 'tcp_peak': '-', 'cache_evictions': '-', 'updated': '-', 'since': '-', 'servers': []} + import sqlite3 + empty = { + 'queries': '-', 'hits': '-', 'hit_rate': '-', 'forwarded': '-', + 'auth': '-', 'tcp_peak': '-', 'cache_evictions': '-', + 'updated': '-', 'since': '-', 'servers': [], + } try: - with open(METRICS_FILE) as f: - data = json.load(f) - t = data.get('totals', {}) - meta = data.get('metadata', {}) - fwd = t.get('queries_forwarded', 0) - hits = t.get('queries_answered_locally', 0) + con = sqlite3.connect(METRICS_DB, timeout=5) + con.execute('PRAGMA journal_mode=WAL') + row = con.execute(''' + SELECT + MIN(date), MAX(date), + SUM(queries_forwarded), SUM(queries_answered_locally), + SUM(queries_authoritative), SUM(cache_reused), MAX(tcp_hwm) + FROM daily_totals + ''').fetchone() + srv_rows = con.execute(''' + SELECT + ds.address, + SUM(ds.queries_sent), + SUM(ds.retried), + SUM(ds.failed), + SUM(ds.nxdomain), + (SELECT avg_latency_ms FROM daily_servers d2 + WHERE d2.address = ds.address AND d2.avg_latency_ms > 0 + ORDER BY d2.date DESC LIMIT 1) + FROM daily_servers ds + GROUP BY ds.address + ORDER BY SUM(ds.queries_sent) DESC + ''').fetchall() + con.close() + + if not row or row[0] is None: + return empty + + since_raw, updated_raw, fwd, hits, auth, reused, tcp_hwm = row + fwd = fwd or 0 + hits = hits or 0 total = fwd + hits - since_raw = meta.get('first_recorded', '-') - updated_raw = meta.get('last_recorded', '-') + + servers = [ + { + 'address': r[0], + 'queries_sent': r[1] or 0, + 'retried': r[2] or 0, + 'failed': r[3] or 0, + 'nxdomain': r[4] or 0, + 'avg_latency_ms': r[5] or 0, + } + for r in srv_rows + ] + return { 'queries': f'{total:,}' if total else '-', 'hits': f'{hits:,}' if hits else '-', 'hit_rate': f'{hits / total * 100:.0f}%' if total > 0 else '-', 'forwarded': f'{fwd:,}' if fwd else '-', - 'tcp_peak': str(t.get('tcp_hwm', 0)), - 'cache_evictions': f'{t.get("cache_reused", 0):,}', + 'auth': f'{(auth or 0):,}', + 'tcp_peak': str(tcp_hwm or 0), + 'cache_evictions': f'{(reused or 0):,}', 'updated': _fmt_updated(updated_raw), 'since': _fmt_since(since_raw), - 'servers': t.get('servers', []), + 'servers': servers, } except Exception: return empty @@ -200,6 +243,9 @@ def bl_last_update(): def collect_tokens(cfg): + if has_query_logging(cfg): + threading.Thread(target=mod_dns_queries.collect, args=(cfg,), daemon=True).start() + tokens = config_utils.collect_layout_tokens(cfg) non_vpn_vlans = [v for v in cfg.get('vlans', []) if not v.get('is_vpn')] dns = cfg.get('upstream_dns', {}) @@ -218,11 +264,8 @@ def collect_tokens(cfg): tokens['DNS_STAT_QUERIES'] = dns_stats['queries'] tokens['DNS_STAT_HITS'] = dns_stats['hits'] tokens['DNS_STAT_HIT_RATE'] = dns_stats['hit_rate'] - tokens['DNS_STAT_FORWARDED'] = dns_stats['forwarded'] tokens['DNS_STAT_CACHE_EVICTIONS'] = dns_stats['cache_evictions'] - tokens['DNS_METRICS_UPDATED'] = dns_stats['updated'] tokens['DNS_METRICS_SINCE'] = dns_stats['since'] - tokens['DNS_PROVIDERS_TABLE'] = _dns_providers_table(dns_stats['servers']) tokens['STAT_BLOCKED_ALLTIME'] = all_time_blocked_display() tokens['HAS_QUERY_LOGGING'] = '1' if has_query_logging(cfg) else '' diff --git a/routlin/maintenance.py b/routlin/maintenance.py index d8b11b9..4911360 100644 --- a/routlin/maintenance.py +++ b/routlin/maintenance.py @@ -35,6 +35,7 @@ import logging from pathlib import Path import mod_metrics as metrics +import mod_dns_queries as dns_queries SCRIPT_DIR = Path(__file__).parent CONFIG_FILE = SCRIPT_DIR / "config.json" @@ -657,9 +658,18 @@ def main(): full_cfg = json.load(f) new_metrics = metrics.collect_metrics(full_cfg) if new_metrics: - metrics.update_metrics_file(new_metrics) + metrics.update_metrics_db(new_metrics) except Exception as e: log.warning(f"DNS metrics collection failed: {e}") + try: + with open(CONFIG_FILE) as f: + full_cfg = json.load(f) + inserted = dns_queries.collect(full_cfg) + if inserted: + log.info(f"DNS query collector: inserted {inserted} new rows.") + except Exception as e: + log.warning(f"DNS query collection failed: {e}") + if __name__ == "__main__": main() diff --git a/routlin/mod_dns_queries.py b/routlin/mod_dns_queries.py new file mode 100644 index 0000000..bc2a036 --- /dev/null +++ b/routlin/mod_dns_queries.py @@ -0,0 +1,182 @@ +""" +mod_dns_queries.py -- DNS query log collector. + +Reads dnsmasq query logs from journalctl using a cursor bookmark, +parses query/result line pairs, and appends rows to a SQLite database. + +Called by: + - maintenance.py on each timer tick + - routlin-dash overview page on each page load (background thread) + +Only VLANs with dnsmasq_log_queries=true are collected. +""" + +import json +import re +import sqlite3 +import subprocess +from collections import defaultdict, deque +from pathlib import Path + +import mod_shared as shared +import mod_validation as validation + +DB_FILE = shared.SCRIPT_DIR / ".dns-queries" + +QUERY_RE = re.compile(r'query\[(\w+)\] (\S+) from ([\d.]+)') +BLOCK_RE = re.compile(r'(\S+) is 0\.0\.0\.0$') +CACHED_RE = re.compile(r'cached (\S+) is ') +FWD_RE = re.compile(r'forwarded (\S+) to ') +REPLY_RE = re.compile(r'\breply (\S+) is ') +LOCAL_RE = re.compile(r'/\S+ (\S+) is ') + + +# =================================================================== +# Database +# =================================================================== + +def open_db(): + con = sqlite3.connect(DB_FILE, timeout=10) + con.execute('PRAGMA journal_mode=WAL') + con.executescript(''' + CREATE TABLE IF NOT EXISTS dns_queries ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + ts INTEGER NOT NULL, + domain TEXT NOT NULL, + qtype TEXT NOT NULL, + client_ip TEXT NOT NULL, + vlan TEXT NOT NULL, + blocked INTEGER NOT NULL DEFAULT 0 + ); + CREATE INDEX IF NOT EXISTS idx_dq_ts ON dns_queries(ts); + CREATE INDEX IF NOT EXISTS idx_dq_domain ON dns_queries(domain, blocked); + CREATE INDEX IF NOT EXISTS idx_dq_client ON dns_queries(client_ip); + CREATE TABLE IF NOT EXISTS collector_state ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL + ); + ''') + con.commit() + return con + + +def _get_cursor(con): + row = con.execute("SELECT value FROM collector_state WHERE key='cursor'").fetchone() + return row[0] if row else None + + +def _save_cursor(con, cursor_val): + con.execute( + "INSERT OR REPLACE INTO collector_state(key, value) VALUES ('cursor', ?)", + (cursor_val,) + ) + + +# =================================================================== +# Collection +# =================================================================== + +def collect(data): + """ + Fetch new dnsmasq log entries from journalctl since the last stored + cursor, parse query/result pairs, and insert into dns_queries. + Returns the number of rows inserted. + """ + unit_to_vlan = {} + for vlan in data.get('vlans', []): + if not vlan.get('dnsmasq_log_queries'): + continue + iface = validation.derive_interface(vlan, data) + svc = shared.vlan_service_name(vlan, iface) + unit_to_vlan[svc] = vlan['name'] + unit_to_vlan[svc + '.service'] = vlan['name'] + + if not unit_to_vlan: + return 0 + + con = open_db() + journal_cursor = _get_cursor(con) + + cmd = ['journalctl', '-u', 'dnsmasq-routlin-*', '--no-pager', '-o', 'json'] + if journal_cursor: + cmd += ['--after-cursor', journal_cursor] + + result = subprocess.run(cmd, capture_output=True, text=True) + + # pending[domain] = deque of {ts, qtype, client_ip, vlan} + # FIFO so concurrent same-domain queries from different clients pair correctly. + pending = defaultdict(deque) + rows = [] + last_cursor = journal_cursor + + for line in result.stdout.splitlines(): + try: + entry = json.loads(line) + except Exception: + continue + + msg = entry.get('MESSAGE', '') + if not isinstance(msg, str): + continue + + raw_unit = entry.get('_SYSTEMD_UNIT', '') + vlan_name = unit_to_vlan.get(raw_unit) or unit_to_vlan.get(raw_unit.removesuffix('.service')) + jcursor = entry.get('__CURSOR', '') + ts = int(entry.get('__REALTIME_TIMESTAMP', 0)) // 1_000_000 + + if vlan_name: + m = QUERY_RE.search(msg) + if m: + # Incoming query line -- push to pending, wait for result line + pending[m.group(2)].append({ + 'ts': ts, 'qtype': m.group(1), + 'client_ip': m.group(3), 'vlan': vlan_name, + }) + else: + # Result line -- identify domain and whether it was blocked + domain = None + blocked = 0 + + bm = BLOCK_RE.search(msg) + if bm: + domain = bm.group(1) + blocked = 1 + else: + for pat in (CACHED_RE, FWD_RE, REPLY_RE, LOCAL_RE): + pm = pat.search(msg) + if pm: + domain = pm.group(1) + break + + if domain and pending.get(domain): + p = pending[domain].popleft() + if not pending[domain]: + del pending[domain] + rows.append((p['ts'], domain, p['qtype'], p['client_ip'], p['vlan'], blocked)) + + if jcursor: + last_cursor = jcursor + + # Flush any pending entries that never received a result line. + # This can happen when the collector runs mid-transaction. We + # record them as not-blocked since if they were blocked dnsmasq + # would have answered synchronously and the result line would be + # in the same journal batch. + for domain, q in pending.items(): + for p in q: + rows.append((p['ts'], domain, p['qtype'], p['client_ip'], p['vlan'], 0)) + + if rows: + con.executemany( + 'INSERT INTO dns_queries(ts, domain, qtype, client_ip, vlan, blocked)' + ' VALUES(?,?,?,?,?,?)', + rows + ) + + if last_cursor and last_cursor != journal_cursor: + _save_cursor(con, last_cursor) + + con.commit() + shared.chown_to_script_dir_owner(DB_FILE) + con.close() + return len(rows) diff --git a/routlin/mod_metrics.py b/routlin/mod_metrics.py index 3984d0d..98ab669 100644 --- a/routlin/mod_metrics.py +++ b/routlin/mod_metrics.py @@ -2,31 +2,71 @@ mod_metrics.py -- DNS metrics collection and display. Sends SIGUSR1 to running dnsmasq instances, parses stats from journalctl, -and accumulates lifetime totals in a JSON file. +and stores daily-aggregated totals in a SQLite database (.dns-metrics2). + +Each maintenance tick upserts into today's row, accumulating additive +counters and taking MAX for high-water marks. All-time totals are +derived with SUM/MAX across rows at read time. """ -import json import os import re import signal +import sqlite3 import subprocess import time -from datetime import datetime +from datetime import date import mod_shared as shared import mod_validation as validation -METRICS_FILE = shared.SCRIPT_DIR / ".dns-metrics" +DB_FILE = shared.SCRIPT_DIR / ".dns-metrics2" # =================================================================== -# Collect and store +# Database +# =================================================================== + +def open_db(): + con = sqlite3.connect(DB_FILE, timeout=10) + con.execute('PRAGMA journal_mode=WAL') + con.executescript(''' + CREATE TABLE IF NOT EXISTS daily_totals ( + date TEXT PRIMARY KEY, + queries_forwarded INTEGER NOT NULL DEFAULT 0, + queries_answered_locally INTEGER NOT NULL DEFAULT 0, + queries_authoritative INTEGER NOT NULL DEFAULT 0, + cache_reused INTEGER NOT NULL DEFAULT 0, + tcp_hwm INTEGER NOT NULL DEFAULT 0, + tcp_max_allowed INTEGER NOT NULL DEFAULT 0, + pool_memory_max INTEGER NOT NULL DEFAULT 0, + dnssec_subqueries_hwm INTEGER NOT NULL DEFAULT 0, + dnssec_crypto_hwm INTEGER NOT NULL DEFAULT 0, + dnssec_sig_fails_hwm INTEGER NOT NULL DEFAULT 0 + ); + CREATE TABLE IF NOT EXISTS daily_servers ( + date TEXT NOT NULL, + address TEXT NOT NULL, + queries_sent INTEGER NOT NULL DEFAULT 0, + retried INTEGER NOT NULL DEFAULT 0, + failed INTEGER NOT NULL DEFAULT 0, + nxdomain INTEGER NOT NULL DEFAULT 0, + avg_latency_ms INTEGER NOT NULL DEFAULT 0, + PRIMARY KEY (date, address) + ); + ''') + con.commit() + return con + + +# =================================================================== +# Collect # =================================================================== def collect_metrics(data): """ Send SIGUSR1 to each running dnsmasq instance and parse stats from - journalctl. Returns a combined metrics dict, or None if unavailable. + journalctl. Returns a combined metrics dict, or None if unavailable. """ metrics = { "queries_forwarded": 0, @@ -101,66 +141,79 @@ def collect_metrics(data): "address": addr, "queries_sent": 0, "retried": 0, "failed": 0, "nxdomain": 0, "avg_latency_ms": 0 } - server_map[addr]["queries_sent"] += int(m.group(2)) - server_map[addr]["retried"] += int(m.group(3)) - server_map[addr]["failed"] += int(m.group(4)) - server_map[addr]["nxdomain"] += int(m.group(5)) - server_map[addr]["avg_latency_ms"] = int(m.group(6)) + server_map[addr]["queries_sent"] += int(m.group(2)) + server_map[addr]["retried"] += int(m.group(3)) + server_map[addr]["failed"] += int(m.group(4)) + server_map[addr]["nxdomain"] += int(m.group(5)) + if int(m.group(6)) > 0: + server_map[addr]["avg_latency_ms"] = int(m.group(6)) metrics["servers"] = list(server_map.values()) return metrics -def update_metrics_file(new_metrics): - now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S") +# =================================================================== +# Store +# =================================================================== - if METRICS_FILE.exists(): - with open(METRICS_FILE) as f: - stored = json.load(f) - else: - stored = { - "metadata": {"first_recorded": now_str, "last_recorded": now_str, "total_updates": 0}, - "totals": { - "queries_forwarded": 0, "queries_answered_locally": 0, - "queries_authoritative": 0, "cache_reused": 0, - "tcp_hwm": 0, "tcp_max_allowed": 0, "pool_memory_max": 0, - "dnssec_subqueries_hwm": 0, "dnssec_crypto_hwm": 0, - "dnssec_sig_fails_hwm": 0, "servers": [] - } - } +def update_metrics_db(new_metrics): + today = date.today().isoformat() + con = open_db() - t = stored["totals"] - t["queries_forwarded"] += new_metrics["queries_forwarded"] - t["queries_answered_locally"] += new_metrics["queries_answered_locally"] - t["queries_authoritative"] += new_metrics["queries_authoritative"] - t["cache_reused"] += new_metrics["cache_reused"] - t["tcp_hwm"] = max(t["tcp_hwm"], new_metrics["tcp_hwm"]) - t["pool_memory_max"] = max(t["pool_memory_max"], new_metrics["pool_memory_max"]) - t["dnssec_subqueries_hwm"] = max(t["dnssec_subqueries_hwm"], new_metrics["dnssec_subqueries_hwm"]) - t["dnssec_crypto_hwm"] = max(t["dnssec_crypto_hwm"], new_metrics["dnssec_crypto_hwm"]) - t["dnssec_sig_fails_hwm"] = max(t["dnssec_sig_fails_hwm"], new_metrics["dnssec_sig_fails_hwm"]) - if new_metrics["tcp_max_allowed"]: - t["tcp_max_allowed"] = new_metrics["tcp_max_allowed"] + con.execute(''' + INSERT INTO daily_totals( + date, + queries_forwarded, queries_answered_locally, queries_authoritative, + cache_reused, tcp_hwm, tcp_max_allowed, pool_memory_max, + dnssec_subqueries_hwm, dnssec_crypto_hwm, dnssec_sig_fails_hwm + ) VALUES (?,?,?,?,?,?,?,?,?,?,?) + ON CONFLICT(date) DO UPDATE SET + queries_forwarded = queries_forwarded + excluded.queries_forwarded, + queries_answered_locally = queries_answered_locally + excluded.queries_answered_locally, + queries_authoritative = queries_authoritative + excluded.queries_authoritative, + cache_reused = cache_reused + excluded.cache_reused, + tcp_hwm = MAX(tcp_hwm, excluded.tcp_hwm), + tcp_max_allowed = CASE WHEN excluded.tcp_max_allowed > 0 + THEN excluded.tcp_max_allowed ELSE tcp_max_allowed END, + pool_memory_max = MAX(pool_memory_max, excluded.pool_memory_max), + dnssec_subqueries_hwm = MAX(dnssec_subqueries_hwm, excluded.dnssec_subqueries_hwm), + dnssec_crypto_hwm = MAX(dnssec_crypto_hwm, excluded.dnssec_crypto_hwm), + dnssec_sig_fails_hwm = MAX(dnssec_sig_fails_hwm, excluded.dnssec_sig_fails_hwm) + ''', ( + today, + new_metrics["queries_forwarded"], + new_metrics["queries_answered_locally"], + new_metrics["queries_authoritative"], + new_metrics["cache_reused"], + new_metrics["tcp_hwm"], + new_metrics["tcp_max_allowed"], + new_metrics["pool_memory_max"], + new_metrics["dnssec_subqueries_hwm"], + new_metrics["dnssec_crypto_hwm"], + new_metrics["dnssec_sig_fails_hwm"], + )) - existing = {s["address"]: s for s in t["servers"]} for srv in new_metrics["servers"]: - addr = srv["address"] - if addr in existing: - existing[addr]["queries_sent"] += srv["queries_sent"] - existing[addr]["retried"] += srv["retried"] - existing[addr]["failed"] += srv["failed"] - existing[addr]["nxdomain"] += srv["nxdomain"] - existing[addr]["avg_latency_ms"] = srv["avg_latency_ms"] - else: - existing[addr] = srv.copy() - t["servers"] = list(existing.values()) + con.execute(''' + INSERT INTO daily_servers(date, address, queries_sent, retried, failed, nxdomain, avg_latency_ms) + VALUES (?,?,?,?,?,?,?) + ON CONFLICT(date, address) DO UPDATE SET + queries_sent = queries_sent + excluded.queries_sent, + retried = retried + excluded.retried, + failed = failed + excluded.failed, + nxdomain = nxdomain + excluded.nxdomain, + avg_latency_ms = CASE WHEN excluded.avg_latency_ms > 0 + THEN excluded.avg_latency_ms + ELSE avg_latency_ms END + ''', ( + today, srv["address"], + srv["queries_sent"], srv["retried"], srv["failed"], + srv["nxdomain"], srv["avg_latency_ms"], + )) - stored["metadata"]["last_recorded"] = now_str - stored["metadata"]["total_updates"] += 1 - - with open(METRICS_FILE, "w") as f: - json.dump(stored, f, indent=2) - shared.chown_to_script_dir_owner(METRICS_FILE) + con.commit() + shared.chown_to_script_dir_owner(DB_FILE) + con.close() # =================================================================== @@ -171,37 +224,58 @@ def show_metrics(data): new = collect_metrics(data) if new is None: return - update_metrics_file(new) + update_metrics_db(new) - with open(METRICS_FILE) as f: - data_m = json.load(f) + con = open_db() + row = con.execute(''' + SELECT + MIN(date), MAX(date), COUNT(*), + SUM(queries_forwarded), SUM(queries_answered_locally), + SUM(queries_authoritative), SUM(cache_reused), + MAX(tcp_hwm), MAX(tcp_max_allowed), MAX(pool_memory_max) + FROM daily_totals + ''').fetchone() + servers = con.execute(''' + SELECT + ds.address, + SUM(ds.queries_sent), + SUM(ds.retried), + SUM(ds.failed), + SUM(ds.nxdomain), + (SELECT avg_latency_ms FROM daily_servers d2 + WHERE d2.address = ds.address AND d2.avg_latency_ms > 0 + ORDER BY d2.date DESC LIMIT 1) + FROM daily_servers ds + GROUP BY ds.address + ORDER BY SUM(ds.queries_sent) DESC + ''').fetchall() + con.close() - m = data_m["metadata"] - t = data_m["totals"] + first, last, days, fwd, local, auth, reused, tcp_hwm, tcp_max, pool = row - print("DNS Metrics (lifetime totals across all VLAN instances)") - print(f" First recorded : {m['first_recorded']}") - print(f" Last recorded : {m['last_recorded']}") - print(f" Total updates : {m['total_updates']}") + print("DNS Metrics (all-time totals across all VLAN instances)") + print(f" First recorded : {first or '-'}") + print(f" Last recorded : {last or '-'}") + print(f" Days tracked : {days or 0}") print() print("Queries") - print(f" Forwarded to upstream : {t['queries_forwarded']:,}") - print(f" Answered from cache : {t['queries_answered_locally']:,}") - print(f" Authoritative : {t['queries_authoritative']:,}") - print(f" Cache reused : {t['cache_reused']:,}") + print(f" Forwarded to upstream : {(fwd or 0):,}") + print(f" Answered from cache : {(local or 0):,}") + print(f" Authoritative : {(auth or 0):,}") + print(f" Cache reused : {(reused or 0):,}") print() print("TCP") - print(f" Peak concurrent (HWM) : {t['tcp_hwm']}") - print(f" Max allowed : {t['tcp_max_allowed']}") + print(f" Peak concurrent (HWM) : {tcp_hwm or 0}") + print(f" Max allowed : {tcp_max or 0}") print() - print(f"Pool memory peak : {t['pool_memory_max']} bytes") - if t["servers"]: + print(f"Pool memory peak : {pool or 0} bytes") + if servers: print() - print("Upstream servers") - for s in t["servers"]: - print(f" {s['address']}") - print(f" Sent : {s['queries_sent']:,}") - print(f" Retried : {s['retried']:,}") - print(f" Failed : {s['failed']:,}") - print(f" NXDOMAIN : {s['nxdomain']:,}") - print(f" Latency : {s['avg_latency_ms']}ms (last recorded)") + print("Upstream servers (all-time)") + for addr, sent, retried, failed, nxdomain, latency in servers: + print(f" {addr}") + print(f" Sent : {(sent or 0):,}") + print(f" Retried : {(retried or 0):,}") + print(f" Failed : {(failed or 0):,}") + print(f" NXDOMAIN : {(nxdomain or 0):,}") + print(f" Latency : {latency}ms (last recorded)" if latency else " Latency : -")