perf: faster page loads, live-recording playback and seeking fixes

Server (web.py):
- /api/analyze no longer returns the full per-window RMS array (~45x
  larger than the rms_display the UI actually renders); old caches are
  stripped on read
- /api/files reads only the first 256 bytes of each analysis cache to
  get threshold/min_gap instead of parsing the whole JSON
- durations cached by (mtime, size) instead of re-opening every audio
  header per request; stat() race with deleted files guarded
- /api/storage no longer walks the recordings tree (used bytes now
  computed client-side from the file list)
- HTTP/1.1 keep-alive enabled; short writes force-close the connection;
  client-disconnect tracebacks from aborted seeks silenced
- all file copies bounded by the advertised Content-Length so files
  growing during a response cannot desync the connection

Live recording playback:
- /stream/ patches in-progress WAV headers to the current file size so
  browsers show real duration and can seek (on-disk header says 0
  frames until the recorder closes the file)
- active files served with Cache-Control: no-store
- reopening the player for a recording file reloads the source to pick
  up newly captured audio

UI loading:
- analyses lazy-load only for expanded day groups; collapsed days defer
  fetching until opened, and auto-load only when cached parameters
  match the current controls (no surprise mass recompute)
- client-side analysis cache shared by file rows and day highlights, so
  re-renders and filters never refetch
- filename filter debounced (200 ms)
- file list auto-refreshes when the active recording set changes,
  unless audio is playing

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-06-10 12:29:13 +02:00
parent c445eb3e04
commit 8e496ec2c4
4 changed files with 214 additions and 46 deletions
+134 -28
View File
@@ -21,6 +21,7 @@ import shutil
import struct
import subprocess
import tempfile
import threading
import wave
from datetime import datetime
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
@@ -62,6 +63,36 @@ MIME_TYPES = {
# Audio analysis helpers
# ---------------------------------------------------------------------------
def _live_wav_header(path: Path, size: int):
"""Return the WAV header (through the 'data' chunk header) with RIFF and
data sizes rewritten to match the current file size, or None.
While a WAV file is still being recorded its header claims ~0 frames, so
browsers show no duration and refuse to seek. Serving a header patched to
the bytes recorded so far fixes both; the patch is the same length as the
original header, so all byte offsets and Range math stay valid.
"""
try:
with open(path, 'rb') as fh:
hdr = fh.read(512)
if len(hdr) < 44 or hdr[:4] != b'RIFF' or hdr[8:12] != b'WAVE':
return None
pos = 12
while pos + 8 <= len(hdr):
chunk_id = hdr[pos:pos + 4]
chunk_size = int.from_bytes(hdr[pos + 4:pos + 8], 'little')
if chunk_id == b'data':
data_off = pos + 8
patched = bytearray(hdr[:data_off])
patched[4:8] = (size - 8).to_bytes(4, 'little')
patched[pos + 4:pos + 8] = (size - data_off).to_bytes(4, 'little')
return bytes(patched)
pos += 8 + chunk_size + (chunk_size & 1)
return None
except Exception:
return None
def _get_audio_duration(path: Path):
"""Return duration in seconds for any supported audio file, or None."""
ext = path.suffix.lower()
@@ -142,8 +173,9 @@ def _package_result(rms_values: list, framerate: int, n_frames: int,
else:
rms_display = rms_values
# Note: the full per-window RMS list is deliberately NOT returned — the UI
# only renders rms_display (~800 points), and the full list is ~45x larger.
return {
'rms': rms_values,
'rms_display': rms_display,
'sections': _loud_sections(rms_values, window_dur, duration, threshold, min_gap),
'duration': round(duration, 2),
@@ -204,6 +236,21 @@ def _analysis_cache_path(analyses_base: Path, recordings_base: Path, audio_path:
return analyses_base / rel.parent / (rel.name + '.analysis.json')
def _cached_analysis_params(cache_path: Path):
"""Read just threshold/min_gap from a cache file without parsing the whole
JSON (the embedded result can be hundreds of KB). Relies on the writer in
_api_analyze putting these two keys first."""
try:
with open(cache_path, 'r', encoding='utf-8') as fh:
head = fh.read(256)
except OSError:
return None
m = re.search(r'"threshold":\s*([0-9.eE+-]+),\s*"min_gap":\s*([0-9.eE+-]+)', head)
if not m:
return None
return {'threshold': float(m.group(1)), 'min_gap': float(m.group(2))}
def prune_orphan_analyses(analyses_base: Path, recordings_base: Path):
if not analyses_base.exists():
return
@@ -225,6 +272,24 @@ def prune_orphan_analyses(analyses_base: Path, recordings_base: Path):
# File listing
# ---------------------------------------------------------------------------
# rel-path -> ((mtime_ns, size), duration); avoids re-opening every audio
# header on each /api/files request
_DURATION_CACHE: dict = {}
_DURATION_CACHE_LOCK = threading.Lock()
def _cached_duration(path: Path, rel: str, stat) -> float:
sig = (stat.st_mtime_ns, stat.st_size)
with _DURATION_CACHE_LOCK:
hit = _DURATION_CACHE.get(rel)
if hit is not None and hit[0] == sig:
return hit[1]
duration = _get_audio_duration(path)
with _DURATION_CACHE_LOCK:
_DURATION_CACHE[rel] = (sig, duration)
return duration
def list_files(recordings_dir: str):
"""Return list of audio file metadata dicts, sorted newest first."""
base = Path(recordings_dir)
@@ -245,14 +310,17 @@ def list_files(recordings_dir: str):
for path in base.rglob('*'):
if path.suffix.lower() not in AUDIO_EXTENSIONS:
continue
stat = path.stat()
try:
stat = path.stat()
except OSError:
continue # deleted between rglob and stat
rel = str(path.relative_to(base)).replace('\\', '/')
is_active = rel in active_files
# Skip reading partial headers for in-progress files — the WAV nframes
# field and FLAC total_samples are both unfinalized while recording,
# producing wildly incorrect values (e.g. 53375995583:39:01).
duration = None if is_active else _get_audio_duration(path)
duration = None if is_active else _cached_duration(path, rel, stat)
files.append({
'name': rel,
@@ -273,6 +341,10 @@ def list_files(recordings_dir: str):
# ---------------------------------------------------------------------------
class _Handler(BaseHTTPRequestHandler):
# Keep-alive: browsers reuse connections instead of a TCP handshake per
# request. Safe because every response sets Content-Length.
protocol_version = 'HTTP/1.1'
recordings_dir: str = 'recordings'
analyses_dir: str = 'recordings/analyses'
threshold: float = LOUD_THRESHOLD
@@ -323,14 +395,7 @@ class _Handler(BaseHTTPRequestHandler):
if f.get('ext') in ('wav', 'flac') and not f.get('recording'):
cache_path = _analysis_cache_path(
analyses_base, recordings_base, recordings_base / f['name'])
try:
cached = json.loads(cache_path.read_text('utf-8'))
f['cached_analysis'] = {
'threshold': cached['threshold'],
'min_gap': cached['min_gap'],
}
except Exception:
f['cached_analysis'] = None
f['cached_analysis'] = _cached_analysis_params(cache_path)
else:
f['cached_analysis'] = None
self._send(200, json.dumps(files).encode('utf-8'), 'application/json')
@@ -367,7 +432,9 @@ class _Handler(BaseHTTPRequestHandler):
try:
cached = json.loads(cache_path.read_text('utf-8'))
if cached.get('threshold') == threshold and cached.get('min_gap') == min_gap:
payload = dict(cached['result']); payload['cached'] = True
payload = dict(cached['result'])
payload.pop('rms', None) # caches written before the full-RMS field was dropped
payload['cached'] = True
self._send(200, json.dumps(payload).encode('utf-8'), 'application/json')
return
except Exception:
@@ -418,16 +485,27 @@ class _Handler(BaseHTTPRequestHandler):
self.end_headers()
with open(path, 'rb') as fh:
self._copy_to_response(fh)
self._copy_to_response(fh, size)
def _stream(self, filename: str):
"""Serve audio for inline playback with HTTP Range support."""
"""Serve audio for inline playback with HTTP Range support.
In-progress recordings are served with Cache-Control: no-store (the
content is still growing) and, for WAV, with a header patched to the
current size so the browser can show a duration and seek.
"""
path = self._safe_path(filename)
if path is None:
return
content_type = MIME_TYPES.get(path.suffix.lower(), 'application/octet-stream')
size = path.stat().st_size
size = path.stat().st_size
is_active = self._is_active(filename)
prefix = b''
if is_active and path.suffix.lower() == '.wav':
prefix = _live_wav_header(path, size) or b''
range_header = self.headers.get('Range', '')
m = re.match(r'bytes=(\d+)-(\d*)', range_header) if range_header else None
@@ -445,36 +523,48 @@ class _Handler(BaseHTTPRequestHandler):
self.send_header('Content-Range', f'bytes {start}-{end}/{size}')
self.send_header('Content-Length', str(length))
self.send_header('Accept-Ranges', 'bytes')
if is_active:
self.send_header('Cache-Control', 'no-store')
self.end_headers()
with open(path, 'rb') as fh:
fh.seek(start)
self._copy_to_response(fh, length)
sent = 0
if start < len(prefix):
head = prefix[start:start + length]
self.wfile.write(head)
sent = len(head)
if sent < length:
fh.seek(start + sent)
self._copy_to_response(fh, length - sent)
else:
self.send_response(200)
self.send_header('Content-Type', content_type)
self.send_header('Content-Length', str(size))
self.send_header('Accept-Ranges', 'bytes')
if is_active:
self.send_header('Cache-Control', 'no-store')
self.end_headers()
with open(path, 'rb') as fh:
self._copy_to_response(fh)
if prefix:
self.wfile.write(prefix)
fh.seek(len(prefix))
self._copy_to_response(fh, size - len(prefix))
else:
# Bound the copy: the file may grow while we serve it, and
# writing more than Content-Length desyncs keep-alive.
self._copy_to_response(fh, size)
def _api_storage(self):
# 'used' is computed client-side from the file list; walking the whole
# tree again here doubled the I/O of every page load.
base = Path(self.recordings_dir)
used = 0
if base.exists():
used = sum(
p.stat().st_size
for p in base.rglob('*')
if p.is_file() and p.suffix.lower() in AUDIO_EXTENSIONS
)
try:
du = shutil.disk_usage(str(base) if base.exists() else '.')
disk_free, disk_total = du.free, du.total
except Exception:
disk_free = disk_total = None
data = json.dumps({'used': used, 'disk_free': disk_free, 'disk_total': disk_total})
data = json.dumps({'disk_free': disk_free, 'disk_total': disk_total})
self._send(200, data.encode(), 'application/json')
def _api_config(self):
@@ -569,7 +659,7 @@ class _Handler(BaseHTTPRequestHandler):
self.send_header('Content-Length', str(tmp_size))
self.end_headers()
with open(tmp_path, 'rb') as fh:
self._copy_to_response(fh)
self._copy_to_response(fh, tmp_size)
except subprocess.TimeoutExpired:
self._json_err(504, 'ffmpeg timed out — file may be too large')
finally:
@@ -596,6 +686,10 @@ class _Handler(BaseHTTPRequestHandler):
self.wfile.write(chunk)
if remaining is not None:
remaining -= len(chunk)
# Sent fewer bytes than Content-Length promised (file truncated while
# serving): the keep-alive connection is desynced, force it closed.
if remaining is not None and remaining > 0:
self.close_connection = True
def _safe_path(self, filename: str):
base = Path(self.recordings_dir).resolve()
@@ -626,6 +720,18 @@ class _Handler(BaseHTTPRequestHandler):
pass
class _Server(ThreadingHTTPServer):
"""ThreadingHTTPServer that stays quiet when clients disconnect mid-stream
(browsers abort audio range requests constantly while seeking)."""
def handle_error(self, request, client_address):
import sys
exc = sys.exc_info()[1]
if isinstance(exc, (ConnectionError, TimeoutError)):
return
super().handle_error(request, client_address)
# ---------------------------------------------------------------------------
# UI page — single-page HTML/CSS/JS, loaded once at startup
# ---------------------------------------------------------------------------
@@ -668,7 +774,7 @@ def main():
threshold = args.threshold
min_gap = args.min_gap
server = ThreadingHTTPServer((args.host, args.port), Handler)
server = _Server((args.host, args.port), Handler)
print(f"ISR Web running on http://{args.host}:{args.port}/")
print(f"Recordings dir: {rec_dir}")