Development
This commit is contained in:
parent
e9166d8a6a
commit
0983e14de4
7 changed files with 494 additions and 160 deletions
|
|
@ -99,5 +99,75 @@
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
,
|
||||||
|
{
|
||||||
|
"type": "card",
|
||||||
|
"label": "DNS Statistics",
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"type": "grid",
|
||||||
|
"rows": [
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{"type": "grid_label", "text": "Tracking Since"},
|
||||||
|
{"type": "grid_value", "text": "%DNS_METRICS_SINCE%"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{"type": "grid_label", "text": "Last Updated"},
|
||||||
|
{"type": "grid_value", "text": "%DNS_METRICS_UPDATED%"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{"type": "grid_label", "text": "Total Queries"},
|
||||||
|
{"type": "grid_value", "text": "%DNS_STAT_QUERIES%"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{"type": "grid_label", "text": "Cache Hits"},
|
||||||
|
{"type": "grid_value", "text": "%DNS_STAT_HITS% (%DNS_STAT_HIT_RATE% hit rate)"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{"type": "grid_label", "text": "Forwarded to Upstream"},
|
||||||
|
{"type": "grid_value", "text": "%DNS_STAT_FORWARDED%"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{"type": "grid_label", "text": "Authoritative Answers"},
|
||||||
|
{"type": "grid_value", "text": "%DNS_STAT_AUTH%"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{"type": "grid_label", "text": "TCP Peak"},
|
||||||
|
{"type": "grid_value", "text": "%DNS_STAT_TCP_PEAK%"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{"type": "grid_label", "text": "Cache Capacity"},
|
||||||
|
{"type": "grid_value", "text": "%DNS_CACHE_SIZE% entries"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{"type": "grid_label", "text": "Cache Evictions"},
|
||||||
|
{"type": "grid_value", "text": "%DNS_STAT_CACHE_EVICTIONS%"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "raw_html",
|
||||||
|
"html": "%DNS_PROVIDERS_TABLE%"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
@ -1,12 +1,25 @@
|
||||||
import json
|
import json
|
||||||
import config_utils
|
import config_utils
|
||||||
|
from pages.overview.view import load_dns_metrics, _dns_providers_table
|
||||||
|
|
||||||
|
|
||||||
def collect_tokens(cfg):
|
def collect_tokens(cfg):
|
||||||
tokens = config_utils.collect_layout_tokens(cfg)
|
tokens = config_utils.collect_layout_tokens(cfg)
|
||||||
dns = cfg.get('upstream_dns', {})
|
dns = cfg.get('upstream_dns', {})
|
||||||
servers = dns.get('upstream_servers', [])
|
servers = dns.get('upstream_servers', [])
|
||||||
tokens['DNS_STRICT_ORDER'] = 'true' if dns.get('strict_order') else 'false'
|
tokens['DNS_STRICT_ORDER'] = 'true' if dns.get('strict_order') else 'false'
|
||||||
tokens['DNS_CACHE_SIZE'] = str(dns.get('cache_size', '-'))
|
tokens['DNS_CACHE_SIZE'] = str(dns.get('cache_size', '-'))
|
||||||
tokens['DNS_UPSTREAM_SERVERS_JSON'] = json.dumps(servers)
|
tokens['DNS_UPSTREAM_SERVERS_JSON'] = json.dumps(servers)
|
||||||
|
|
||||||
|
dns_stats = load_dns_metrics()
|
||||||
|
tokens['DNS_METRICS_SINCE'] = dns_stats['since']
|
||||||
|
tokens['DNS_METRICS_UPDATED'] = dns_stats['updated']
|
||||||
|
tokens['DNS_STAT_QUERIES'] = dns_stats['queries']
|
||||||
|
tokens['DNS_STAT_HITS'] = dns_stats['hits']
|
||||||
|
tokens['DNS_STAT_HIT_RATE'] = dns_stats['hit_rate']
|
||||||
|
tokens['DNS_STAT_FORWARDED'] = dns_stats['forwarded']
|
||||||
|
tokens['DNS_STAT_AUTH'] = dns_stats['auth']
|
||||||
|
tokens['DNS_STAT_TCP_PEAK'] = dns_stats['tcp_peak']
|
||||||
|
tokens['DNS_STAT_CACHE_EVICTIONS'] = dns_stats['cache_evictions']
|
||||||
|
tokens['DNS_PROVIDERS_TABLE'] = _dns_providers_table(dns_stats['servers'])
|
||||||
return tokens
|
return tokens
|
||||||
|
|
|
||||||
|
|
@ -81,64 +81,6 @@
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"type": "card",
|
|
||||||
"label": "DNS Statistics",
|
|
||||||
"client_requirement": "client_is_viewer+",
|
|
||||||
"items": [
|
|
||||||
{
|
|
||||||
"type": "grid",
|
|
||||||
"rows": [
|
|
||||||
{
|
|
||||||
"cells": [
|
|
||||||
{"type": "grid_label", "text": "Tracking Since"},
|
|
||||||
{"type": "grid_value", "text": "%DNS_METRICS_SINCE%"}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cells": [
|
|
||||||
{"type": "grid_label", "text": "Last Updated"},
|
|
||||||
{"type": "grid_value", "text": "%DNS_METRICS_UPDATED%"}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cells": [
|
|
||||||
{"type": "grid_label", "text": "Total Queries"},
|
|
||||||
{"type": "grid_value", "text": "%DNS_STAT_QUERIES%"}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cells": [
|
|
||||||
{"type": "grid_label", "text": "Cache Hits"},
|
|
||||||
{"type": "grid_value", "text": "%DNS_STAT_HITS% (%DNS_STAT_HIT_RATE% hit rate)"}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cells": [
|
|
||||||
{"type": "grid_label", "text": "Forwarded to Upstream"},
|
|
||||||
{"type": "grid_value", "text": "%DNS_STAT_FORWARDED%"}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cells": [
|
|
||||||
{"type": "grid_label", "text": "Cache Capacity"},
|
|
||||||
{"type": "grid_value", "text": "%DNS_CACHE_SIZE% entries"}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cells": [
|
|
||||||
{"type": "grid_label", "text": "Cache Evictions"},
|
|
||||||
{"type": "grid_value", "text": "%DNS_STAT_CACHE_EVICTIONS%"}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "raw_html",
|
|
||||||
"html": "%DNS_PROVIDERS_TABLE%"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"type": "card",
|
"type": "card",
|
||||||
"label": "Blocked Domains",
|
"label": "Blocked Domains",
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,14 @@
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import threading
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import config_utils
|
import config_utils
|
||||||
import factory
|
import factory
|
||||||
|
import mod_dns_queries
|
||||||
from pages.ddns.view import public_ip_info
|
from pages.ddns.view import public_ip_info
|
||||||
from pages.dhcpleases.view import live_dhcp_leases
|
from pages.dhcpleases.view import live_dhcp_leases
|
||||||
|
|
||||||
METRICS_FILE = f'{config_utils.CONFIGS_DIR}/.dns-metrics'
|
METRICS_DB = f'{config_utils.CONFIGS_DIR}/.dns-metrics2'
|
||||||
|
|
||||||
|
|
||||||
def _fmt_since(since_str):
|
def _fmt_since(since_str):
|
||||||
|
|
@ -62,28 +64,69 @@ def _dns_providers_table(servers):
|
||||||
|
|
||||||
|
|
||||||
def load_dns_metrics():
|
def load_dns_metrics():
|
||||||
empty = {'queries': '-', 'hits': '-', 'hit_rate': '-', 'forwarded': '-',
|
import sqlite3
|
||||||
'tcp_peak': '-', 'cache_evictions': '-', 'updated': '-', 'since': '-', 'servers': []}
|
empty = {
|
||||||
|
'queries': '-', 'hits': '-', 'hit_rate': '-', 'forwarded': '-',
|
||||||
|
'auth': '-', 'tcp_peak': '-', 'cache_evictions': '-',
|
||||||
|
'updated': '-', 'since': '-', 'servers': [],
|
||||||
|
}
|
||||||
try:
|
try:
|
||||||
with open(METRICS_FILE) as f:
|
con = sqlite3.connect(METRICS_DB, timeout=5)
|
||||||
data = json.load(f)
|
con.execute('PRAGMA journal_mode=WAL')
|
||||||
t = data.get('totals', {})
|
row = con.execute('''
|
||||||
meta = data.get('metadata', {})
|
SELECT
|
||||||
fwd = t.get('queries_forwarded', 0)
|
MIN(date), MAX(date),
|
||||||
hits = t.get('queries_answered_locally', 0)
|
SUM(queries_forwarded), SUM(queries_answered_locally),
|
||||||
|
SUM(queries_authoritative), SUM(cache_reused), MAX(tcp_hwm)
|
||||||
|
FROM daily_totals
|
||||||
|
''').fetchone()
|
||||||
|
srv_rows = con.execute('''
|
||||||
|
SELECT
|
||||||
|
ds.address,
|
||||||
|
SUM(ds.queries_sent),
|
||||||
|
SUM(ds.retried),
|
||||||
|
SUM(ds.failed),
|
||||||
|
SUM(ds.nxdomain),
|
||||||
|
(SELECT avg_latency_ms FROM daily_servers d2
|
||||||
|
WHERE d2.address = ds.address AND d2.avg_latency_ms > 0
|
||||||
|
ORDER BY d2.date DESC LIMIT 1)
|
||||||
|
FROM daily_servers ds
|
||||||
|
GROUP BY ds.address
|
||||||
|
ORDER BY SUM(ds.queries_sent) DESC
|
||||||
|
''').fetchall()
|
||||||
|
con.close()
|
||||||
|
|
||||||
|
if not row or row[0] is None:
|
||||||
|
return empty
|
||||||
|
|
||||||
|
since_raw, updated_raw, fwd, hits, auth, reused, tcp_hwm = row
|
||||||
|
fwd = fwd or 0
|
||||||
|
hits = hits or 0
|
||||||
total = fwd + hits
|
total = fwd + hits
|
||||||
since_raw = meta.get('first_recorded', '-')
|
|
||||||
updated_raw = meta.get('last_recorded', '-')
|
servers = [
|
||||||
|
{
|
||||||
|
'address': r[0],
|
||||||
|
'queries_sent': r[1] or 0,
|
||||||
|
'retried': r[2] or 0,
|
||||||
|
'failed': r[3] or 0,
|
||||||
|
'nxdomain': r[4] or 0,
|
||||||
|
'avg_latency_ms': r[5] or 0,
|
||||||
|
}
|
||||||
|
for r in srv_rows
|
||||||
|
]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'queries': f'{total:,}' if total else '-',
|
'queries': f'{total:,}' if total else '-',
|
||||||
'hits': f'{hits:,}' if hits else '-',
|
'hits': f'{hits:,}' if hits else '-',
|
||||||
'hit_rate': f'{hits / total * 100:.0f}%' if total > 0 else '-',
|
'hit_rate': f'{hits / total * 100:.0f}%' if total > 0 else '-',
|
||||||
'forwarded': f'{fwd:,}' if fwd else '-',
|
'forwarded': f'{fwd:,}' if fwd else '-',
|
||||||
'tcp_peak': str(t.get('tcp_hwm', 0)),
|
'auth': f'{(auth or 0):,}',
|
||||||
'cache_evictions': f'{t.get("cache_reused", 0):,}',
|
'tcp_peak': str(tcp_hwm or 0),
|
||||||
|
'cache_evictions': f'{(reused or 0):,}',
|
||||||
'updated': _fmt_updated(updated_raw),
|
'updated': _fmt_updated(updated_raw),
|
||||||
'since': _fmt_since(since_raw),
|
'since': _fmt_since(since_raw),
|
||||||
'servers': t.get('servers', []),
|
'servers': servers,
|
||||||
}
|
}
|
||||||
except Exception:
|
except Exception:
|
||||||
return empty
|
return empty
|
||||||
|
|
@ -200,6 +243,9 @@ def bl_last_update():
|
||||||
|
|
||||||
|
|
||||||
def collect_tokens(cfg):
|
def collect_tokens(cfg):
|
||||||
|
if has_query_logging(cfg):
|
||||||
|
threading.Thread(target=mod_dns_queries.collect, args=(cfg,), daemon=True).start()
|
||||||
|
|
||||||
tokens = config_utils.collect_layout_tokens(cfg)
|
tokens = config_utils.collect_layout_tokens(cfg)
|
||||||
non_vpn_vlans = [v for v in cfg.get('vlans', []) if not v.get('is_vpn')]
|
non_vpn_vlans = [v for v in cfg.get('vlans', []) if not v.get('is_vpn')]
|
||||||
dns = cfg.get('upstream_dns', {})
|
dns = cfg.get('upstream_dns', {})
|
||||||
|
|
@ -218,11 +264,8 @@ def collect_tokens(cfg):
|
||||||
tokens['DNS_STAT_QUERIES'] = dns_stats['queries']
|
tokens['DNS_STAT_QUERIES'] = dns_stats['queries']
|
||||||
tokens['DNS_STAT_HITS'] = dns_stats['hits']
|
tokens['DNS_STAT_HITS'] = dns_stats['hits']
|
||||||
tokens['DNS_STAT_HIT_RATE'] = dns_stats['hit_rate']
|
tokens['DNS_STAT_HIT_RATE'] = dns_stats['hit_rate']
|
||||||
tokens['DNS_STAT_FORWARDED'] = dns_stats['forwarded']
|
|
||||||
tokens['DNS_STAT_CACHE_EVICTIONS'] = dns_stats['cache_evictions']
|
tokens['DNS_STAT_CACHE_EVICTIONS'] = dns_stats['cache_evictions']
|
||||||
tokens['DNS_METRICS_UPDATED'] = dns_stats['updated']
|
|
||||||
tokens['DNS_METRICS_SINCE'] = dns_stats['since']
|
tokens['DNS_METRICS_SINCE'] = dns_stats['since']
|
||||||
tokens['DNS_PROVIDERS_TABLE'] = _dns_providers_table(dns_stats['servers'])
|
|
||||||
|
|
||||||
tokens['STAT_BLOCKED_ALLTIME'] = all_time_blocked_display()
|
tokens['STAT_BLOCKED_ALLTIME'] = all_time_blocked_display()
|
||||||
tokens['HAS_QUERY_LOGGING'] = '1' if has_query_logging(cfg) else ''
|
tokens['HAS_QUERY_LOGGING'] = '1' if has_query_logging(cfg) else ''
|
||||||
|
|
|
||||||
|
|
@ -35,6 +35,7 @@ import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import mod_metrics as metrics
|
import mod_metrics as metrics
|
||||||
|
import mod_dns_queries as dns_queries
|
||||||
|
|
||||||
SCRIPT_DIR = Path(__file__).parent
|
SCRIPT_DIR = Path(__file__).parent
|
||||||
CONFIG_FILE = SCRIPT_DIR / "config.json"
|
CONFIG_FILE = SCRIPT_DIR / "config.json"
|
||||||
|
|
@ -657,9 +658,18 @@ def main():
|
||||||
full_cfg = json.load(f)
|
full_cfg = json.load(f)
|
||||||
new_metrics = metrics.collect_metrics(full_cfg)
|
new_metrics = metrics.collect_metrics(full_cfg)
|
||||||
if new_metrics:
|
if new_metrics:
|
||||||
metrics.update_metrics_file(new_metrics)
|
metrics.update_metrics_db(new_metrics)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.warning(f"DNS metrics collection failed: {e}")
|
log.warning(f"DNS metrics collection failed: {e}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(CONFIG_FILE) as f:
|
||||||
|
full_cfg = json.load(f)
|
||||||
|
inserted = dns_queries.collect(full_cfg)
|
||||||
|
if inserted:
|
||||||
|
log.info(f"DNS query collector: inserted {inserted} new rows.")
|
||||||
|
except Exception as e:
|
||||||
|
log.warning(f"DNS query collection failed: {e}")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
||||||
182
routlin/mod_dns_queries.py
Normal file
182
routlin/mod_dns_queries.py
Normal file
|
|
@ -0,0 +1,182 @@
|
||||||
|
"""
|
||||||
|
mod_dns_queries.py -- DNS query log collector.
|
||||||
|
|
||||||
|
Reads dnsmasq query logs from journalctl using a cursor bookmark,
|
||||||
|
parses query/result line pairs, and appends rows to a SQLite database.
|
||||||
|
|
||||||
|
Called by:
|
||||||
|
- maintenance.py on each timer tick
|
||||||
|
- routlin-dash overview page on each page load (background thread)
|
||||||
|
|
||||||
|
Only VLANs with dnsmasq_log_queries=true are collected.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import sqlite3
|
||||||
|
import subprocess
|
||||||
|
from collections import defaultdict, deque
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import mod_shared as shared
|
||||||
|
import mod_validation as validation
|
||||||
|
|
||||||
|
DB_FILE = shared.SCRIPT_DIR / ".dns-queries"
|
||||||
|
|
||||||
|
QUERY_RE = re.compile(r'query\[(\w+)\] (\S+) from ([\d.]+)')
|
||||||
|
BLOCK_RE = re.compile(r'(\S+) is 0\.0\.0\.0$')
|
||||||
|
CACHED_RE = re.compile(r'cached (\S+) is ')
|
||||||
|
FWD_RE = re.compile(r'forwarded (\S+) to ')
|
||||||
|
REPLY_RE = re.compile(r'\breply (\S+) is ')
|
||||||
|
LOCAL_RE = re.compile(r'/\S+ (\S+) is ')
|
||||||
|
|
||||||
|
|
||||||
|
# ===================================================================
|
||||||
|
# Database
|
||||||
|
# ===================================================================
|
||||||
|
|
||||||
|
def open_db():
|
||||||
|
con = sqlite3.connect(DB_FILE, timeout=10)
|
||||||
|
con.execute('PRAGMA journal_mode=WAL')
|
||||||
|
con.executescript('''
|
||||||
|
CREATE TABLE IF NOT EXISTS dns_queries (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
ts INTEGER NOT NULL,
|
||||||
|
domain TEXT NOT NULL,
|
||||||
|
qtype TEXT NOT NULL,
|
||||||
|
client_ip TEXT NOT NULL,
|
||||||
|
vlan TEXT NOT NULL,
|
||||||
|
blocked INTEGER NOT NULL DEFAULT 0
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dq_ts ON dns_queries(ts);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dq_domain ON dns_queries(domain, blocked);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dq_client ON dns_queries(client_ip);
|
||||||
|
CREATE TABLE IF NOT EXISTS collector_state (
|
||||||
|
key TEXT PRIMARY KEY,
|
||||||
|
value TEXT NOT NULL
|
||||||
|
);
|
||||||
|
''')
|
||||||
|
con.commit()
|
||||||
|
return con
|
||||||
|
|
||||||
|
|
||||||
|
def _get_cursor(con):
|
||||||
|
row = con.execute("SELECT value FROM collector_state WHERE key='cursor'").fetchone()
|
||||||
|
return row[0] if row else None
|
||||||
|
|
||||||
|
|
||||||
|
def _save_cursor(con, cursor_val):
|
||||||
|
con.execute(
|
||||||
|
"INSERT OR REPLACE INTO collector_state(key, value) VALUES ('cursor', ?)",
|
||||||
|
(cursor_val,)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ===================================================================
|
||||||
|
# Collection
|
||||||
|
# ===================================================================
|
||||||
|
|
||||||
|
def collect(data):
|
||||||
|
"""
|
||||||
|
Fetch new dnsmasq log entries from journalctl since the last stored
|
||||||
|
cursor, parse query/result pairs, and insert into dns_queries.
|
||||||
|
Returns the number of rows inserted.
|
||||||
|
"""
|
||||||
|
unit_to_vlan = {}
|
||||||
|
for vlan in data.get('vlans', []):
|
||||||
|
if not vlan.get('dnsmasq_log_queries'):
|
||||||
|
continue
|
||||||
|
iface = validation.derive_interface(vlan, data)
|
||||||
|
svc = shared.vlan_service_name(vlan, iface)
|
||||||
|
unit_to_vlan[svc] = vlan['name']
|
||||||
|
unit_to_vlan[svc + '.service'] = vlan['name']
|
||||||
|
|
||||||
|
if not unit_to_vlan:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
con = open_db()
|
||||||
|
journal_cursor = _get_cursor(con)
|
||||||
|
|
||||||
|
cmd = ['journalctl', '-u', 'dnsmasq-routlin-*', '--no-pager', '-o', 'json']
|
||||||
|
if journal_cursor:
|
||||||
|
cmd += ['--after-cursor', journal_cursor]
|
||||||
|
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||||
|
|
||||||
|
# pending[domain] = deque of {ts, qtype, client_ip, vlan}
|
||||||
|
# FIFO so concurrent same-domain queries from different clients pair correctly.
|
||||||
|
pending = defaultdict(deque)
|
||||||
|
rows = []
|
||||||
|
last_cursor = journal_cursor
|
||||||
|
|
||||||
|
for line in result.stdout.splitlines():
|
||||||
|
try:
|
||||||
|
entry = json.loads(line)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
msg = entry.get('MESSAGE', '')
|
||||||
|
if not isinstance(msg, str):
|
||||||
|
continue
|
||||||
|
|
||||||
|
raw_unit = entry.get('_SYSTEMD_UNIT', '')
|
||||||
|
vlan_name = unit_to_vlan.get(raw_unit) or unit_to_vlan.get(raw_unit.removesuffix('.service'))
|
||||||
|
jcursor = entry.get('__CURSOR', '')
|
||||||
|
ts = int(entry.get('__REALTIME_TIMESTAMP', 0)) // 1_000_000
|
||||||
|
|
||||||
|
if vlan_name:
|
||||||
|
m = QUERY_RE.search(msg)
|
||||||
|
if m:
|
||||||
|
# Incoming query line -- push to pending, wait for result line
|
||||||
|
pending[m.group(2)].append({
|
||||||
|
'ts': ts, 'qtype': m.group(1),
|
||||||
|
'client_ip': m.group(3), 'vlan': vlan_name,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
# Result line -- identify domain and whether it was blocked
|
||||||
|
domain = None
|
||||||
|
blocked = 0
|
||||||
|
|
||||||
|
bm = BLOCK_RE.search(msg)
|
||||||
|
if bm:
|
||||||
|
domain = bm.group(1)
|
||||||
|
blocked = 1
|
||||||
|
else:
|
||||||
|
for pat in (CACHED_RE, FWD_RE, REPLY_RE, LOCAL_RE):
|
||||||
|
pm = pat.search(msg)
|
||||||
|
if pm:
|
||||||
|
domain = pm.group(1)
|
||||||
|
break
|
||||||
|
|
||||||
|
if domain and pending.get(domain):
|
||||||
|
p = pending[domain].popleft()
|
||||||
|
if not pending[domain]:
|
||||||
|
del pending[domain]
|
||||||
|
rows.append((p['ts'], domain, p['qtype'], p['client_ip'], p['vlan'], blocked))
|
||||||
|
|
||||||
|
if jcursor:
|
||||||
|
last_cursor = jcursor
|
||||||
|
|
||||||
|
# Flush any pending entries that never received a result line.
|
||||||
|
# This can happen when the collector runs mid-transaction. We
|
||||||
|
# record them as not-blocked since if they were blocked dnsmasq
|
||||||
|
# would have answered synchronously and the result line would be
|
||||||
|
# in the same journal batch.
|
||||||
|
for domain, q in pending.items():
|
||||||
|
for p in q:
|
||||||
|
rows.append((p['ts'], domain, p['qtype'], p['client_ip'], p['vlan'], 0))
|
||||||
|
|
||||||
|
if rows:
|
||||||
|
con.executemany(
|
||||||
|
'INSERT INTO dns_queries(ts, domain, qtype, client_ip, vlan, blocked)'
|
||||||
|
' VALUES(?,?,?,?,?,?)',
|
||||||
|
rows
|
||||||
|
)
|
||||||
|
|
||||||
|
if last_cursor and last_cursor != journal_cursor:
|
||||||
|
_save_cursor(con, last_cursor)
|
||||||
|
|
||||||
|
con.commit()
|
||||||
|
shared.chown_to_script_dir_owner(DB_FILE)
|
||||||
|
con.close()
|
||||||
|
return len(rows)
|
||||||
|
|
@ -2,31 +2,71 @@
|
||||||
mod_metrics.py -- DNS metrics collection and display.
|
mod_metrics.py -- DNS metrics collection and display.
|
||||||
|
|
||||||
Sends SIGUSR1 to running dnsmasq instances, parses stats from journalctl,
|
Sends SIGUSR1 to running dnsmasq instances, parses stats from journalctl,
|
||||||
and accumulates lifetime totals in a JSON file.
|
and stores daily-aggregated totals in a SQLite database (.dns-metrics2).
|
||||||
|
|
||||||
|
Each maintenance tick upserts into today's row, accumulating additive
|
||||||
|
counters and taking MAX for high-water marks. All-time totals are
|
||||||
|
derived with SUM/MAX across rows at read time.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import signal
|
import signal
|
||||||
|
import sqlite3
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
from datetime import datetime
|
from datetime import date
|
||||||
|
|
||||||
import mod_shared as shared
|
import mod_shared as shared
|
||||||
import mod_validation as validation
|
import mod_validation as validation
|
||||||
|
|
||||||
METRICS_FILE = shared.SCRIPT_DIR / ".dns-metrics"
|
DB_FILE = shared.SCRIPT_DIR / ".dns-metrics2"
|
||||||
|
|
||||||
|
|
||||||
# ===================================================================
|
# ===================================================================
|
||||||
# Collect and store
|
# Database
|
||||||
|
# ===================================================================
|
||||||
|
|
||||||
|
def open_db():
|
||||||
|
con = sqlite3.connect(DB_FILE, timeout=10)
|
||||||
|
con.execute('PRAGMA journal_mode=WAL')
|
||||||
|
con.executescript('''
|
||||||
|
CREATE TABLE IF NOT EXISTS daily_totals (
|
||||||
|
date TEXT PRIMARY KEY,
|
||||||
|
queries_forwarded INTEGER NOT NULL DEFAULT 0,
|
||||||
|
queries_answered_locally INTEGER NOT NULL DEFAULT 0,
|
||||||
|
queries_authoritative INTEGER NOT NULL DEFAULT 0,
|
||||||
|
cache_reused INTEGER NOT NULL DEFAULT 0,
|
||||||
|
tcp_hwm INTEGER NOT NULL DEFAULT 0,
|
||||||
|
tcp_max_allowed INTEGER NOT NULL DEFAULT 0,
|
||||||
|
pool_memory_max INTEGER NOT NULL DEFAULT 0,
|
||||||
|
dnssec_subqueries_hwm INTEGER NOT NULL DEFAULT 0,
|
||||||
|
dnssec_crypto_hwm INTEGER NOT NULL DEFAULT 0,
|
||||||
|
dnssec_sig_fails_hwm INTEGER NOT NULL DEFAULT 0
|
||||||
|
);
|
||||||
|
CREATE TABLE IF NOT EXISTS daily_servers (
|
||||||
|
date TEXT NOT NULL,
|
||||||
|
address TEXT NOT NULL,
|
||||||
|
queries_sent INTEGER NOT NULL DEFAULT 0,
|
||||||
|
retried INTEGER NOT NULL DEFAULT 0,
|
||||||
|
failed INTEGER NOT NULL DEFAULT 0,
|
||||||
|
nxdomain INTEGER NOT NULL DEFAULT 0,
|
||||||
|
avg_latency_ms INTEGER NOT NULL DEFAULT 0,
|
||||||
|
PRIMARY KEY (date, address)
|
||||||
|
);
|
||||||
|
''')
|
||||||
|
con.commit()
|
||||||
|
return con
|
||||||
|
|
||||||
|
|
||||||
|
# ===================================================================
|
||||||
|
# Collect
|
||||||
# ===================================================================
|
# ===================================================================
|
||||||
|
|
||||||
def collect_metrics(data):
|
def collect_metrics(data):
|
||||||
"""
|
"""
|
||||||
Send SIGUSR1 to each running dnsmasq instance and parse stats from
|
Send SIGUSR1 to each running dnsmasq instance and parse stats from
|
||||||
journalctl. Returns a combined metrics dict, or None if unavailable.
|
journalctl. Returns a combined metrics dict, or None if unavailable.
|
||||||
"""
|
"""
|
||||||
metrics = {
|
metrics = {
|
||||||
"queries_forwarded": 0,
|
"queries_forwarded": 0,
|
||||||
|
|
@ -101,66 +141,79 @@ def collect_metrics(data):
|
||||||
"address": addr, "queries_sent": 0, "retried": 0,
|
"address": addr, "queries_sent": 0, "retried": 0,
|
||||||
"failed": 0, "nxdomain": 0, "avg_latency_ms": 0
|
"failed": 0, "nxdomain": 0, "avg_latency_ms": 0
|
||||||
}
|
}
|
||||||
server_map[addr]["queries_sent"] += int(m.group(2))
|
server_map[addr]["queries_sent"] += int(m.group(2))
|
||||||
server_map[addr]["retried"] += int(m.group(3))
|
server_map[addr]["retried"] += int(m.group(3))
|
||||||
server_map[addr]["failed"] += int(m.group(4))
|
server_map[addr]["failed"] += int(m.group(4))
|
||||||
server_map[addr]["nxdomain"] += int(m.group(5))
|
server_map[addr]["nxdomain"] += int(m.group(5))
|
||||||
server_map[addr]["avg_latency_ms"] = int(m.group(6))
|
if int(m.group(6)) > 0:
|
||||||
|
server_map[addr]["avg_latency_ms"] = int(m.group(6))
|
||||||
|
|
||||||
metrics["servers"] = list(server_map.values())
|
metrics["servers"] = list(server_map.values())
|
||||||
return metrics
|
return metrics
|
||||||
|
|
||||||
|
|
||||||
def update_metrics_file(new_metrics):
|
# ===================================================================
|
||||||
now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
# Store
|
||||||
|
# ===================================================================
|
||||||
|
|
||||||
if METRICS_FILE.exists():
|
def update_metrics_db(new_metrics):
|
||||||
with open(METRICS_FILE) as f:
|
today = date.today().isoformat()
|
||||||
stored = json.load(f)
|
con = open_db()
|
||||||
else:
|
|
||||||
stored = {
|
|
||||||
"metadata": {"first_recorded": now_str, "last_recorded": now_str, "total_updates": 0},
|
|
||||||
"totals": {
|
|
||||||
"queries_forwarded": 0, "queries_answered_locally": 0,
|
|
||||||
"queries_authoritative": 0, "cache_reused": 0,
|
|
||||||
"tcp_hwm": 0, "tcp_max_allowed": 0, "pool_memory_max": 0,
|
|
||||||
"dnssec_subqueries_hwm": 0, "dnssec_crypto_hwm": 0,
|
|
||||||
"dnssec_sig_fails_hwm": 0, "servers": []
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
t = stored["totals"]
|
con.execute('''
|
||||||
t["queries_forwarded"] += new_metrics["queries_forwarded"]
|
INSERT INTO daily_totals(
|
||||||
t["queries_answered_locally"] += new_metrics["queries_answered_locally"]
|
date,
|
||||||
t["queries_authoritative"] += new_metrics["queries_authoritative"]
|
queries_forwarded, queries_answered_locally, queries_authoritative,
|
||||||
t["cache_reused"] += new_metrics["cache_reused"]
|
cache_reused, tcp_hwm, tcp_max_allowed, pool_memory_max,
|
||||||
t["tcp_hwm"] = max(t["tcp_hwm"], new_metrics["tcp_hwm"])
|
dnssec_subqueries_hwm, dnssec_crypto_hwm, dnssec_sig_fails_hwm
|
||||||
t["pool_memory_max"] = max(t["pool_memory_max"], new_metrics["pool_memory_max"])
|
) VALUES (?,?,?,?,?,?,?,?,?,?,?)
|
||||||
t["dnssec_subqueries_hwm"] = max(t["dnssec_subqueries_hwm"], new_metrics["dnssec_subqueries_hwm"])
|
ON CONFLICT(date) DO UPDATE SET
|
||||||
t["dnssec_crypto_hwm"] = max(t["dnssec_crypto_hwm"], new_metrics["dnssec_crypto_hwm"])
|
queries_forwarded = queries_forwarded + excluded.queries_forwarded,
|
||||||
t["dnssec_sig_fails_hwm"] = max(t["dnssec_sig_fails_hwm"], new_metrics["dnssec_sig_fails_hwm"])
|
queries_answered_locally = queries_answered_locally + excluded.queries_answered_locally,
|
||||||
if new_metrics["tcp_max_allowed"]:
|
queries_authoritative = queries_authoritative + excluded.queries_authoritative,
|
||||||
t["tcp_max_allowed"] = new_metrics["tcp_max_allowed"]
|
cache_reused = cache_reused + excluded.cache_reused,
|
||||||
|
tcp_hwm = MAX(tcp_hwm, excluded.tcp_hwm),
|
||||||
|
tcp_max_allowed = CASE WHEN excluded.tcp_max_allowed > 0
|
||||||
|
THEN excluded.tcp_max_allowed ELSE tcp_max_allowed END,
|
||||||
|
pool_memory_max = MAX(pool_memory_max, excluded.pool_memory_max),
|
||||||
|
dnssec_subqueries_hwm = MAX(dnssec_subqueries_hwm, excluded.dnssec_subqueries_hwm),
|
||||||
|
dnssec_crypto_hwm = MAX(dnssec_crypto_hwm, excluded.dnssec_crypto_hwm),
|
||||||
|
dnssec_sig_fails_hwm = MAX(dnssec_sig_fails_hwm, excluded.dnssec_sig_fails_hwm)
|
||||||
|
''', (
|
||||||
|
today,
|
||||||
|
new_metrics["queries_forwarded"],
|
||||||
|
new_metrics["queries_answered_locally"],
|
||||||
|
new_metrics["queries_authoritative"],
|
||||||
|
new_metrics["cache_reused"],
|
||||||
|
new_metrics["tcp_hwm"],
|
||||||
|
new_metrics["tcp_max_allowed"],
|
||||||
|
new_metrics["pool_memory_max"],
|
||||||
|
new_metrics["dnssec_subqueries_hwm"],
|
||||||
|
new_metrics["dnssec_crypto_hwm"],
|
||||||
|
new_metrics["dnssec_sig_fails_hwm"],
|
||||||
|
))
|
||||||
|
|
||||||
existing = {s["address"]: s for s in t["servers"]}
|
|
||||||
for srv in new_metrics["servers"]:
|
for srv in new_metrics["servers"]:
|
||||||
addr = srv["address"]
|
con.execute('''
|
||||||
if addr in existing:
|
INSERT INTO daily_servers(date, address, queries_sent, retried, failed, nxdomain, avg_latency_ms)
|
||||||
existing[addr]["queries_sent"] += srv["queries_sent"]
|
VALUES (?,?,?,?,?,?,?)
|
||||||
existing[addr]["retried"] += srv["retried"]
|
ON CONFLICT(date, address) DO UPDATE SET
|
||||||
existing[addr]["failed"] += srv["failed"]
|
queries_sent = queries_sent + excluded.queries_sent,
|
||||||
existing[addr]["nxdomain"] += srv["nxdomain"]
|
retried = retried + excluded.retried,
|
||||||
existing[addr]["avg_latency_ms"] = srv["avg_latency_ms"]
|
failed = failed + excluded.failed,
|
||||||
else:
|
nxdomain = nxdomain + excluded.nxdomain,
|
||||||
existing[addr] = srv.copy()
|
avg_latency_ms = CASE WHEN excluded.avg_latency_ms > 0
|
||||||
t["servers"] = list(existing.values())
|
THEN excluded.avg_latency_ms
|
||||||
|
ELSE avg_latency_ms END
|
||||||
|
''', (
|
||||||
|
today, srv["address"],
|
||||||
|
srv["queries_sent"], srv["retried"], srv["failed"],
|
||||||
|
srv["nxdomain"], srv["avg_latency_ms"],
|
||||||
|
))
|
||||||
|
|
||||||
stored["metadata"]["last_recorded"] = now_str
|
con.commit()
|
||||||
stored["metadata"]["total_updates"] += 1
|
shared.chown_to_script_dir_owner(DB_FILE)
|
||||||
|
con.close()
|
||||||
with open(METRICS_FILE, "w") as f:
|
|
||||||
json.dump(stored, f, indent=2)
|
|
||||||
shared.chown_to_script_dir_owner(METRICS_FILE)
|
|
||||||
|
|
||||||
|
|
||||||
# ===================================================================
|
# ===================================================================
|
||||||
|
|
@ -171,37 +224,58 @@ def show_metrics(data):
|
||||||
new = collect_metrics(data)
|
new = collect_metrics(data)
|
||||||
if new is None:
|
if new is None:
|
||||||
return
|
return
|
||||||
update_metrics_file(new)
|
update_metrics_db(new)
|
||||||
|
|
||||||
with open(METRICS_FILE) as f:
|
con = open_db()
|
||||||
data_m = json.load(f)
|
row = con.execute('''
|
||||||
|
SELECT
|
||||||
|
MIN(date), MAX(date), COUNT(*),
|
||||||
|
SUM(queries_forwarded), SUM(queries_answered_locally),
|
||||||
|
SUM(queries_authoritative), SUM(cache_reused),
|
||||||
|
MAX(tcp_hwm), MAX(tcp_max_allowed), MAX(pool_memory_max)
|
||||||
|
FROM daily_totals
|
||||||
|
''').fetchone()
|
||||||
|
servers = con.execute('''
|
||||||
|
SELECT
|
||||||
|
ds.address,
|
||||||
|
SUM(ds.queries_sent),
|
||||||
|
SUM(ds.retried),
|
||||||
|
SUM(ds.failed),
|
||||||
|
SUM(ds.nxdomain),
|
||||||
|
(SELECT avg_latency_ms FROM daily_servers d2
|
||||||
|
WHERE d2.address = ds.address AND d2.avg_latency_ms > 0
|
||||||
|
ORDER BY d2.date DESC LIMIT 1)
|
||||||
|
FROM daily_servers ds
|
||||||
|
GROUP BY ds.address
|
||||||
|
ORDER BY SUM(ds.queries_sent) DESC
|
||||||
|
''').fetchall()
|
||||||
|
con.close()
|
||||||
|
|
||||||
m = data_m["metadata"]
|
first, last, days, fwd, local, auth, reused, tcp_hwm, tcp_max, pool = row
|
||||||
t = data_m["totals"]
|
|
||||||
|
|
||||||
print("DNS Metrics (lifetime totals across all VLAN instances)")
|
print("DNS Metrics (all-time totals across all VLAN instances)")
|
||||||
print(f" First recorded : {m['first_recorded']}")
|
print(f" First recorded : {first or '-'}")
|
||||||
print(f" Last recorded : {m['last_recorded']}")
|
print(f" Last recorded : {last or '-'}")
|
||||||
print(f" Total updates : {m['total_updates']}")
|
print(f" Days tracked : {days or 0}")
|
||||||
print()
|
print()
|
||||||
print("Queries")
|
print("Queries")
|
||||||
print(f" Forwarded to upstream : {t['queries_forwarded']:,}")
|
print(f" Forwarded to upstream : {(fwd or 0):,}")
|
||||||
print(f" Answered from cache : {t['queries_answered_locally']:,}")
|
print(f" Answered from cache : {(local or 0):,}")
|
||||||
print(f" Authoritative : {t['queries_authoritative']:,}")
|
print(f" Authoritative : {(auth or 0):,}")
|
||||||
print(f" Cache reused : {t['cache_reused']:,}")
|
print(f" Cache reused : {(reused or 0):,}")
|
||||||
print()
|
print()
|
||||||
print("TCP")
|
print("TCP")
|
||||||
print(f" Peak concurrent (HWM) : {t['tcp_hwm']}")
|
print(f" Peak concurrent (HWM) : {tcp_hwm or 0}")
|
||||||
print(f" Max allowed : {t['tcp_max_allowed']}")
|
print(f" Max allowed : {tcp_max or 0}")
|
||||||
print()
|
print()
|
||||||
print(f"Pool memory peak : {t['pool_memory_max']} bytes")
|
print(f"Pool memory peak : {pool or 0} bytes")
|
||||||
if t["servers"]:
|
if servers:
|
||||||
print()
|
print()
|
||||||
print("Upstream servers")
|
print("Upstream servers (all-time)")
|
||||||
for s in t["servers"]:
|
for addr, sent, retried, failed, nxdomain, latency in servers:
|
||||||
print(f" {s['address']}")
|
print(f" {addr}")
|
||||||
print(f" Sent : {s['queries_sent']:,}")
|
print(f" Sent : {(sent or 0):,}")
|
||||||
print(f" Retried : {s['retried']:,}")
|
print(f" Retried : {(retried or 0):,}")
|
||||||
print(f" Failed : {s['failed']:,}")
|
print(f" Failed : {(failed or 0):,}")
|
||||||
print(f" NXDOMAIN : {s['nxdomain']:,}")
|
print(f" NXDOMAIN : {(nxdomain or 0):,}")
|
||||||
print(f" Latency : {s['avg_latency_ms']}ms (last recorded)")
|
print(f" Latency : {latency}ms (last recorded)" if latency else " Latency : -")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue