perf: faster page loads, live-recording playback and seeking fixes

Server (web.py): - /api/analyze no longer returns the full per-window RMS array (~45x larger than the rms_display the UI actually renders); old caches are stripped on read - /api/files reads only the first 256 bytes of each analysis cache to get threshold/min_gap instead of parsing the whole JSON - durations cached by (mtime, size) instead of re-opening every audio header per request; stat() race with deleted files guarded - /api/storage no longer walks the recordings tree (used bytes now computed client-side from the file list) - HTTP/1.1 keep-alive enabled; short writes force-close the connection; client-disconnect tracebacks from aborted seeks silenced - all file copies bounded by the advertised Content-Length so files growing during a response cannot desync the connection Live recording playback: - /stream/ patches in-progress WAV headers to the current file size so browsers show real duration and can seek (on-disk header says 0 frames until the recorder closes the file) - active files served with Cache-Control: no-store - reopening the player for a recording file reloads the source to pick up newly captured audio UI loading: - analyses lazy-load only for expanded day groups; collapsed days defer fetching until opened, and auto-load only when cached parameters match the current controls (no surprise mass recompute) - client-side analysis cache shared by file rows and day highlights, so re-renders and filters never refetch - filename filter debounced (200 ms) - file list auto-refreshes when the active recording set changes, unless audio is playing Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 12:29:13 +02:00
parent c445eb3e04
commit 8e496ec2c4
4 changed files with 214 additions and 46 deletions
@@ -21,6 +21,7 @@ import shutil
 import struct
 import subprocess
 import tempfile
+import threading
 import wave
 from datetime import datetime
 from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
@@ -62,6 +63,36 @@ MIME_TYPES = {
 # Audio analysis helpers
 # ---------------------------------------------------------------------------

+def _live_wav_header(path: Path, size: int):
+    """Return the WAV header (through the 'data' chunk header) with RIFF and
+    data sizes rewritten to match the current file size, or None.
+
+    While a WAV file is still being recorded its header claims ~0 frames, so
+    browsers show no duration and refuse to seek. Serving a header patched to
+    the bytes recorded so far fixes both; the patch is the same length as the
+    original header, so all byte offsets and Range math stay valid.
+    """
+    try:
+        with open(path, 'rb') as fh:
+            hdr = fh.read(512)
+        if len(hdr) < 44 or hdr[:4] != b'RIFF' or hdr[8:12] != b'WAVE':
+            return None
+        pos = 12
+        while pos + 8 <= len(hdr):
+            chunk_id   = hdr[pos:pos + 4]
+            chunk_size = int.from_bytes(hdr[pos + 4:pos + 8], 'little')
+            if chunk_id == b'data':
+                data_off = pos + 8
+                patched = bytearray(hdr[:data_off])
+                patched[4:8] = (size - 8).to_bytes(4, 'little')
+                patched[pos + 4:pos + 8] = (size - data_off).to_bytes(4, 'little')
+                return bytes(patched)
+            pos += 8 + chunk_size + (chunk_size & 1)
+        return None
+    except Exception:
+        return None
+
+
 def _get_audio_duration(path: Path):
    """Return duration in seconds for any supported audio file, or None."""
    ext = path.suffix.lower()
@@ -142,8 +173,9 @@ def _package_result(rms_values: list, framerate: int, n_frames: int,
    else:
        rms_display = rms_values

+    # Note: the full per-window RMS list is deliberately NOT returned — the UI
+    # only renders rms_display (~800 points), and the full list is ~45x larger.
    return {
-        'rms':         rms_values,
        'rms_display': rms_display,
        'sections':    _loud_sections(rms_values, window_dur, duration, threshold, min_gap),
        'duration':    round(duration, 2),
@@ -204,6 +236,21 @@ def _analysis_cache_path(analyses_base: Path, recordings_base: Path, audio_path:
    return analyses_base / rel.parent / (rel.name + '.analysis.json')


+def _cached_analysis_params(cache_path: Path):
+    """Read just threshold/min_gap from a cache file without parsing the whole
+    JSON (the embedded result can be hundreds of KB). Relies on the writer in
+    _api_analyze putting these two keys first."""
+    try:
+        with open(cache_path, 'r', encoding='utf-8') as fh:
+            head = fh.read(256)
+    except OSError:
+        return None
+    m = re.search(r'"threshold":\s*([0-9.eE+-]+),\s*"min_gap":\s*([0-9.eE+-]+)', head)
+    if not m:
+        return None
+    return {'threshold': float(m.group(1)), 'min_gap': float(m.group(2))}
+
+
 def prune_orphan_analyses(analyses_base: Path, recordings_base: Path):
    if not analyses_base.exists():
        return
@@ -225,6 +272,24 @@ def prune_orphan_analyses(analyses_base: Path, recordings_base: Path):
 # File listing
 # ---------------------------------------------------------------------------

+# rel-path -> ((mtime_ns, size), duration); avoids re-opening every audio
+# header on each /api/files request
+_DURATION_CACHE: dict = {}
+_DURATION_CACHE_LOCK = threading.Lock()
+
+
+def _cached_duration(path: Path, rel: str, stat) -> float:
+    sig = (stat.st_mtime_ns, stat.st_size)
+    with _DURATION_CACHE_LOCK:
+        hit = _DURATION_CACHE.get(rel)
+    if hit is not None and hit[0] == sig:
+        return hit[1]
+    duration = _get_audio_duration(path)
+    with _DURATION_CACHE_LOCK:
+        _DURATION_CACHE[rel] = (sig, duration)
+    return duration
+
+
 def list_files(recordings_dir: str):
    """Return list of audio file metadata dicts, sorted newest first."""
    base = Path(recordings_dir)
@@ -245,14 +310,17 @@ def list_files(recordings_dir: str):
    for path in base.rglob('*'):
        if path.suffix.lower() not in AUDIO_EXTENSIONS:
            continue
-        stat     = path.stat()
+        try:
+            stat = path.stat()
+        except OSError:
+            continue  # deleted between rglob and stat
        rel      = str(path.relative_to(base)).replace('\\', '/')
        is_active = rel in active_files

        # Skip reading partial headers for in-progress files — the WAV nframes
        # field and FLAC total_samples are both unfinalized while recording,
        # producing wildly incorrect values (e.g. 53375995583:39:01).
-        duration = None if is_active else _get_audio_duration(path)
+        duration = None if is_active else _cached_duration(path, rel, stat)

        files.append({
            'name':      rel,
@@ -273,6 +341,10 @@ def list_files(recordings_dir: str):
 # ---------------------------------------------------------------------------

 class _Handler(BaseHTTPRequestHandler):
+    # Keep-alive: browsers reuse connections instead of a TCP handshake per
+    # request. Safe because every response sets Content-Length.
+    protocol_version = 'HTTP/1.1'
+
    recordings_dir: str = 'recordings'
    analyses_dir:   str = 'recordings/analyses'
    threshold: float    = LOUD_THRESHOLD
@@ -323,14 +395,7 @@ class _Handler(BaseHTTPRequestHandler):
            if f.get('ext') in ('wav', 'flac') and not f.get('recording'):
                cache_path = _analysis_cache_path(
                    analyses_base, recordings_base, recordings_base / f['name'])
-                try:
-                    cached = json.loads(cache_path.read_text('utf-8'))
-                    f['cached_analysis'] = {
-                        'threshold': cached['threshold'],
-                        'min_gap':   cached['min_gap'],
-                    }
-                except Exception:
-                    f['cached_analysis'] = None
+                f['cached_analysis'] = _cached_analysis_params(cache_path)
            else:
                f['cached_analysis'] = None
        self._send(200, json.dumps(files).encode('utf-8'), 'application/json')
@@ -367,7 +432,9 @@ class _Handler(BaseHTTPRequestHandler):
        try:
            cached = json.loads(cache_path.read_text('utf-8'))
            if cached.get('threshold') == threshold and cached.get('min_gap') == min_gap:
-                payload = dict(cached['result']); payload['cached'] = True
+                payload = dict(cached['result'])
+                payload.pop('rms', None)  # caches written before the full-RMS field was dropped
+                payload['cached'] = True
                self._send(200, json.dumps(payload).encode('utf-8'), 'application/json')
                return
        except Exception:
@@ -418,16 +485,27 @@ class _Handler(BaseHTTPRequestHandler):
        self.end_headers()

        with open(path, 'rb') as fh:
-            self._copy_to_response(fh)
+            self._copy_to_response(fh, size)

    def _stream(self, filename: str):
-        """Serve audio for inline playback with HTTP Range support."""
+        """Serve audio for inline playback with HTTP Range support.
+
+        In-progress recordings are served with Cache-Control: no-store (the
+        content is still growing) and, for WAV, with a header patched to the
+        current size so the browser can show a duration and seek.
+        """
        path = self._safe_path(filename)
        if path is None:
            return

        content_type = MIME_TYPES.get(path.suffix.lower(), 'application/octet-stream')
-        size = path.stat().st_size
+        size      = path.stat().st_size
+        is_active = self._is_active(filename)
+
+        prefix = b''
+        if is_active and path.suffix.lower() == '.wav':
+            prefix = _live_wav_header(path, size) or b''
+
        range_header = self.headers.get('Range', '')
        m = re.match(r'bytes=(\d+)-(\d*)', range_header) if range_header else None

@@ -445,36 +523,48 @@ class _Handler(BaseHTTPRequestHandler):
            self.send_header('Content-Range', f'bytes {start}-{end}/{size}')
            self.send_header('Content-Length', str(length))
            self.send_header('Accept-Ranges', 'bytes')
+            if is_active:
+                self.send_header('Cache-Control', 'no-store')
            self.end_headers()

            with open(path, 'rb') as fh:
-                fh.seek(start)
-                self._copy_to_response(fh, length)
+                sent = 0
+                if start < len(prefix):
+                    head = prefix[start:start + length]
+                    self.wfile.write(head)
+                    sent = len(head)
+                if sent < length:
+                    fh.seek(start + sent)
+                    self._copy_to_response(fh, length - sent)
        else:
            self.send_response(200)
            self.send_header('Content-Type', content_type)
            self.send_header('Content-Length', str(size))
            self.send_header('Accept-Ranges', 'bytes')
+            if is_active:
+                self.send_header('Cache-Control', 'no-store')
            self.end_headers()

            with open(path, 'rb') as fh:
-                self._copy_to_response(fh)
+                if prefix:
+                    self.wfile.write(prefix)
+                    fh.seek(len(prefix))
+                    self._copy_to_response(fh, size - len(prefix))
+                else:
+                    # Bound the copy: the file may grow while we serve it, and
+                    # writing more than Content-Length desyncs keep-alive.
+                    self._copy_to_response(fh, size)

    def _api_storage(self):
+        # 'used' is computed client-side from the file list; walking the whole
+        # tree again here doubled the I/O of every page load.
        base = Path(self.recordings_dir)
-        used = 0
-        if base.exists():
-            used = sum(
-                p.stat().st_size
-                for p in base.rglob('*')
-                if p.is_file() and p.suffix.lower() in AUDIO_EXTENSIONS
-            )
        try:
            du = shutil.disk_usage(str(base) if base.exists() else '.')
            disk_free, disk_total = du.free, du.total
        except Exception:
            disk_free = disk_total = None
-        data = json.dumps({'used': used, 'disk_free': disk_free, 'disk_total': disk_total})
+        data = json.dumps({'disk_free': disk_free, 'disk_total': disk_total})
        self._send(200, data.encode(), 'application/json')

    def _api_config(self):
@@ -569,7 +659,7 @@ class _Handler(BaseHTTPRequestHandler):
            self.send_header('Content-Length', str(tmp_size))
            self.end_headers()
            with open(tmp_path, 'rb') as fh:
-                self._copy_to_response(fh)
+                self._copy_to_response(fh, tmp_size)
        except subprocess.TimeoutExpired:
            self._json_err(504, 'ffmpeg timed out — file may be too large')
        finally:
@@ -596,6 +686,10 @@ class _Handler(BaseHTTPRequestHandler):
            self.wfile.write(chunk)
            if remaining is not None:
                remaining -= len(chunk)
+        # Sent fewer bytes than Content-Length promised (file truncated while
+        # serving): the keep-alive connection is desynced, force it closed.
+        if remaining is not None and remaining > 0:
+            self.close_connection = True

    def _safe_path(self, filename: str):
        base = Path(self.recordings_dir).resolve()
@@ -626,6 +720,18 @@ class _Handler(BaseHTTPRequestHandler):
        pass


+class _Server(ThreadingHTTPServer):
+    """ThreadingHTTPServer that stays quiet when clients disconnect mid-stream
+    (browsers abort audio range requests constantly while seeking)."""
+
+    def handle_error(self, request, client_address):
+        import sys
+        exc = sys.exc_info()[1]
+        if isinstance(exc, (ConnectionError, TimeoutError)):
+            return
+        super().handle_error(request, client_address)
+
+
 # ---------------------------------------------------------------------------
 # UI page — single-page HTML/CSS/JS, loaded once at startup
 # ---------------------------------------------------------------------------
@@ -668,7 +774,7 @@ def main():
        threshold      = args.threshold
        min_gap        = args.min_gap

-    server = ThreadingHTTPServer((args.host, args.port), Handler)
+    server = _Server((args.host, args.port), Handler)

    print(f"ISR Web running on http://{args.host}:{args.port}/")
    print(f"Recordings dir:    {rec_dir}")