From 8e496ec2c4dd41986050cd64692b2dae9b7c8ea8 Mon Sep 17 00:00:00 2001 From: jonathan Date: Wed, 10 Jun 2026 12:29:13 +0200 Subject: [PATCH] perf: faster page loads, live-recording playback and seeking fixes Server (web.py): - /api/analyze no longer returns the full per-window RMS array (~45x larger than the rms_display the UI actually renders); old caches are stripped on read - /api/files reads only the first 256 bytes of each analysis cache to get threshold/min_gap instead of parsing the whole JSON - durations cached by (mtime, size) instead of re-opening every audio header per request; stat() race with deleted files guarded - /api/storage no longer walks the recordings tree (used bytes now computed client-side from the file list) - HTTP/1.1 keep-alive enabled; short writes force-close the connection; client-disconnect tracebacks from aborted seeks silenced - all file copies bounded by the advertised Content-Length so files growing during a response cannot desync the connection Live recording playback: - /stream/ patches in-progress WAV headers to the current file size so browsers show real duration and can seek (on-disk header says 0 frames until the recorder closes the file) - active files served with Cache-Control: no-store - reopening the player for a recording file reloads the source to pick up newly captured audio UI loading: - analyses lazy-load only for expanded day groups; collapsed days defer fetching until opened, and auto-load only when cached parameters match the current controls (no surprise mass recompute) - client-side analysis cache shared by file rows and day highlights, so re-renders and filters never refetch - filename filter debounced (200 ms) - file list auto-refreshes when the active recording set changes, unless audio is playing Co-Authored-By: Claude Fable 5 --- CLAUDE.md | 5 +- README.md | 4 +- web.py | 162 ++++++++++++++++++++++++++++++++++++++++++++--------- webui.html | 89 +++++++++++++++++++++++------ 4 files changed, 214 insertions(+), 46 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 56e0ff2..95f1a29 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -35,6 +35,9 @@ Dependencies: `requests` (streams), `numpy` + `soundfile` (FLAC output and FLAC/ - **Split timing:** files split at clock-aligned boundaries (`get_next_split_time()`), e.g. `split_minutes = 60` → on the hour. - **ALSA:** capture spawns `arecord` as a subprocess, raw PCM read in 100 ms chunks by a thread. Device spec resolution: `default` → exact `hw:X,Y` → partial name → fallback to any literal ALSA PCM name (so `shared_mic` from asound.conf works without appearing in `arecord -l`). - **Shutdown:** SIGTERM is converted to KeyboardInterrupt in `main()`; `RecorderManager.stop()` joins all threads against a single shared 25 s deadline to stay inside Docker's `stop_grace_period: 30s`. -- **Analysis cache:** results stored as `/.analysis.json` keyed by threshold+min_gap; orphans pruned at web startup. In Docker the recordings mount is **read-only** for the web container, so the cache uses a separate `./analyses` bind mount. +- **Analysis cache:** results stored as `/.analysis.json` keyed by threshold+min_gap; orphans pruned at web startup. In Docker the recordings mount is **read-only** for the web container, so the cache uses a separate `./analyses` bind mount. The `threshold` and `min_gap` keys MUST stay first in the cache JSON — `_cached_analysis_params()` reads only the first 256 bytes to avoid parsing the large embedded result. +- **Analyze responses:** `/api/analyze` returns `rms_display` (~800 points), never the full per-window RMS list — the UI doesn't use it and it is ~45x larger. +- **HTTP/1.1 keep-alive:** `_Handler.protocol_version = 'HTTP/1.1'`; every response path must set an accurate `Content-Length`. `_copy_to_response()` force-closes the connection if it under-delivers (file truncated mid-serve). +- **Live playback:** for files listed in status.json, `/stream/` patches the WAV header on the fly (`_live_wav_header`) so the browser sees the duration recorded so far and can seek; responses get `Cache-Control: no-store`. - **Path safety:** every file parameter in `web.py` goes through `_safe_path()`, which resolves and verifies the path stays inside the recordings dir. - **dsnoop in Docker:** sharing the soundcard requires `asound.conf` on the host *and* `ipc: host` in docker-compose (dsnoop uses shared memory across the container boundary). diff --git a/README.md b/README.md index 0bb6ec4..b71ece0 100644 --- a/README.md +++ b/README.md @@ -168,7 +168,9 @@ Shows recordings grouped by day with collapsible sections. Features: - **Cut & download** — `✂ Cut` button opens the player row and reveals a cut panel. Enter start and end times in `m:ss` or `h:mm:ss` format and click **↓ Download cut** to receive an ffmpeg-trimmed copy without re-encoding. Requires ffmpeg (included in the Docker image). - **Filters** — live filename search and from/to date pickers above the table; applied client-side with no additional requests. Shows `N of M shown` when a filter is active. - **Delete** — `✕ Delete` button per row with confirmation prompt; disabled for files currently being recorded; sends `DELETE /api/files/` and re-renders the table. -- **Live REC badge** — files currently being written by `isr.py` show an animated REC indicator, polled every 5 seconds via `/api/status`. Duration for in-progress files shows `—` (header is unfinalized until recording stops). +- **Live REC badge** — files currently being written by `isr.py` show an animated REC indicator, polled every 5 seconds via `/api/status`. Duration for in-progress files shows `—` in the table (header is unfinalized until recording stops). The file list refreshes automatically when a recording starts, stops, or rolls over to a new split file (unless audio is playing). +- **Listen while recording** — in-progress files are playable and seekable. For WAV the server patches the (still unfinalized) header on the fly so the browser sees the real duration-so-far; reopening the player reloads the source to pick up newly recorded audio. Live responses are sent with `Cache-Control: no-store`. +- **Fast loading** — analysis results are cached server-side on disk and client-side per session; cached waveforms load only for expanded day groups, and collapsed days fetch nothing until opened. - **WCAG-compliant** — skip link, `aria-expanded`/`aria-controls` on the player toggle, `aria-live` status, focus management, `role=img` on SVG waveforms. --- diff --git a/web.py b/web.py index fad7e06..1dd1a51 100644 --- a/web.py +++ b/web.py @@ -21,6 +21,7 @@ import shutil import struct import subprocess import tempfile +import threading import wave from datetime import datetime from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer @@ -62,6 +63,36 @@ MIME_TYPES = { # Audio analysis helpers # --------------------------------------------------------------------------- +def _live_wav_header(path: Path, size: int): + """Return the WAV header (through the 'data' chunk header) with RIFF and + data sizes rewritten to match the current file size, or None. + + While a WAV file is still being recorded its header claims ~0 frames, so + browsers show no duration and refuse to seek. Serving a header patched to + the bytes recorded so far fixes both; the patch is the same length as the + original header, so all byte offsets and Range math stay valid. + """ + try: + with open(path, 'rb') as fh: + hdr = fh.read(512) + if len(hdr) < 44 or hdr[:4] != b'RIFF' or hdr[8:12] != b'WAVE': + return None + pos = 12 + while pos + 8 <= len(hdr): + chunk_id = hdr[pos:pos + 4] + chunk_size = int.from_bytes(hdr[pos + 4:pos + 8], 'little') + if chunk_id == b'data': + data_off = pos + 8 + patched = bytearray(hdr[:data_off]) + patched[4:8] = (size - 8).to_bytes(4, 'little') + patched[pos + 4:pos + 8] = (size - data_off).to_bytes(4, 'little') + return bytes(patched) + pos += 8 + chunk_size + (chunk_size & 1) + return None + except Exception: + return None + + def _get_audio_duration(path: Path): """Return duration in seconds for any supported audio file, or None.""" ext = path.suffix.lower() @@ -142,8 +173,9 @@ def _package_result(rms_values: list, framerate: int, n_frames: int, else: rms_display = rms_values + # Note: the full per-window RMS list is deliberately NOT returned — the UI + # only renders rms_display (~800 points), and the full list is ~45x larger. return { - 'rms': rms_values, 'rms_display': rms_display, 'sections': _loud_sections(rms_values, window_dur, duration, threshold, min_gap), 'duration': round(duration, 2), @@ -204,6 +236,21 @@ def _analysis_cache_path(analyses_base: Path, recordings_base: Path, audio_path: return analyses_base / rel.parent / (rel.name + '.analysis.json') +def _cached_analysis_params(cache_path: Path): + """Read just threshold/min_gap from a cache file without parsing the whole + JSON (the embedded result can be hundreds of KB). Relies on the writer in + _api_analyze putting these two keys first.""" + try: + with open(cache_path, 'r', encoding='utf-8') as fh: + head = fh.read(256) + except OSError: + return None + m = re.search(r'"threshold":\s*([0-9.eE+-]+),\s*"min_gap":\s*([0-9.eE+-]+)', head) + if not m: + return None + return {'threshold': float(m.group(1)), 'min_gap': float(m.group(2))} + + def prune_orphan_analyses(analyses_base: Path, recordings_base: Path): if not analyses_base.exists(): return @@ -225,6 +272,24 @@ def prune_orphan_analyses(analyses_base: Path, recordings_base: Path): # File listing # --------------------------------------------------------------------------- +# rel-path -> ((mtime_ns, size), duration); avoids re-opening every audio +# header on each /api/files request +_DURATION_CACHE: dict = {} +_DURATION_CACHE_LOCK = threading.Lock() + + +def _cached_duration(path: Path, rel: str, stat) -> float: + sig = (stat.st_mtime_ns, stat.st_size) + with _DURATION_CACHE_LOCK: + hit = _DURATION_CACHE.get(rel) + if hit is not None and hit[0] == sig: + return hit[1] + duration = _get_audio_duration(path) + with _DURATION_CACHE_LOCK: + _DURATION_CACHE[rel] = (sig, duration) + return duration + + def list_files(recordings_dir: str): """Return list of audio file metadata dicts, sorted newest first.""" base = Path(recordings_dir) @@ -245,14 +310,17 @@ def list_files(recordings_dir: str): for path in base.rglob('*'): if path.suffix.lower() not in AUDIO_EXTENSIONS: continue - stat = path.stat() + try: + stat = path.stat() + except OSError: + continue # deleted between rglob and stat rel = str(path.relative_to(base)).replace('\\', '/') is_active = rel in active_files # Skip reading partial headers for in-progress files — the WAV nframes # field and FLAC total_samples are both unfinalized while recording, # producing wildly incorrect values (e.g. 53375995583:39:01). - duration = None if is_active else _get_audio_duration(path) + duration = None if is_active else _cached_duration(path, rel, stat) files.append({ 'name': rel, @@ -273,6 +341,10 @@ def list_files(recordings_dir: str): # --------------------------------------------------------------------------- class _Handler(BaseHTTPRequestHandler): + # Keep-alive: browsers reuse connections instead of a TCP handshake per + # request. Safe because every response sets Content-Length. + protocol_version = 'HTTP/1.1' + recordings_dir: str = 'recordings' analyses_dir: str = 'recordings/analyses' threshold: float = LOUD_THRESHOLD @@ -323,14 +395,7 @@ class _Handler(BaseHTTPRequestHandler): if f.get('ext') in ('wav', 'flac') and not f.get('recording'): cache_path = _analysis_cache_path( analyses_base, recordings_base, recordings_base / f['name']) - try: - cached = json.loads(cache_path.read_text('utf-8')) - f['cached_analysis'] = { - 'threshold': cached['threshold'], - 'min_gap': cached['min_gap'], - } - except Exception: - f['cached_analysis'] = None + f['cached_analysis'] = _cached_analysis_params(cache_path) else: f['cached_analysis'] = None self._send(200, json.dumps(files).encode('utf-8'), 'application/json') @@ -367,7 +432,9 @@ class _Handler(BaseHTTPRequestHandler): try: cached = json.loads(cache_path.read_text('utf-8')) if cached.get('threshold') == threshold and cached.get('min_gap') == min_gap: - payload = dict(cached['result']); payload['cached'] = True + payload = dict(cached['result']) + payload.pop('rms', None) # caches written before the full-RMS field was dropped + payload['cached'] = True self._send(200, json.dumps(payload).encode('utf-8'), 'application/json') return except Exception: @@ -418,16 +485,27 @@ class _Handler(BaseHTTPRequestHandler): self.end_headers() with open(path, 'rb') as fh: - self._copy_to_response(fh) + self._copy_to_response(fh, size) def _stream(self, filename: str): - """Serve audio for inline playback with HTTP Range support.""" + """Serve audio for inline playback with HTTP Range support. + + In-progress recordings are served with Cache-Control: no-store (the + content is still growing) and, for WAV, with a header patched to the + current size so the browser can show a duration and seek. + """ path = self._safe_path(filename) if path is None: return content_type = MIME_TYPES.get(path.suffix.lower(), 'application/octet-stream') - size = path.stat().st_size + size = path.stat().st_size + is_active = self._is_active(filename) + + prefix = b'' + if is_active and path.suffix.lower() == '.wav': + prefix = _live_wav_header(path, size) or b'' + range_header = self.headers.get('Range', '') m = re.match(r'bytes=(\d+)-(\d*)', range_header) if range_header else None @@ -445,36 +523,48 @@ class _Handler(BaseHTTPRequestHandler): self.send_header('Content-Range', f'bytes {start}-{end}/{size}') self.send_header('Content-Length', str(length)) self.send_header('Accept-Ranges', 'bytes') + if is_active: + self.send_header('Cache-Control', 'no-store') self.end_headers() with open(path, 'rb') as fh: - fh.seek(start) - self._copy_to_response(fh, length) + sent = 0 + if start < len(prefix): + head = prefix[start:start + length] + self.wfile.write(head) + sent = len(head) + if sent < length: + fh.seek(start + sent) + self._copy_to_response(fh, length - sent) else: self.send_response(200) self.send_header('Content-Type', content_type) self.send_header('Content-Length', str(size)) self.send_header('Accept-Ranges', 'bytes') + if is_active: + self.send_header('Cache-Control', 'no-store') self.end_headers() with open(path, 'rb') as fh: - self._copy_to_response(fh) + if prefix: + self.wfile.write(prefix) + fh.seek(len(prefix)) + self._copy_to_response(fh, size - len(prefix)) + else: + # Bound the copy: the file may grow while we serve it, and + # writing more than Content-Length desyncs keep-alive. + self._copy_to_response(fh, size) def _api_storage(self): + # 'used' is computed client-side from the file list; walking the whole + # tree again here doubled the I/O of every page load. base = Path(self.recordings_dir) - used = 0 - if base.exists(): - used = sum( - p.stat().st_size - for p in base.rglob('*') - if p.is_file() and p.suffix.lower() in AUDIO_EXTENSIONS - ) try: du = shutil.disk_usage(str(base) if base.exists() else '.') disk_free, disk_total = du.free, du.total except Exception: disk_free = disk_total = None - data = json.dumps({'used': used, 'disk_free': disk_free, 'disk_total': disk_total}) + data = json.dumps({'disk_free': disk_free, 'disk_total': disk_total}) self._send(200, data.encode(), 'application/json') def _api_config(self): @@ -569,7 +659,7 @@ class _Handler(BaseHTTPRequestHandler): self.send_header('Content-Length', str(tmp_size)) self.end_headers() with open(tmp_path, 'rb') as fh: - self._copy_to_response(fh) + self._copy_to_response(fh, tmp_size) except subprocess.TimeoutExpired: self._json_err(504, 'ffmpeg timed out — file may be too large') finally: @@ -596,6 +686,10 @@ class _Handler(BaseHTTPRequestHandler): self.wfile.write(chunk) if remaining is not None: remaining -= len(chunk) + # Sent fewer bytes than Content-Length promised (file truncated while + # serving): the keep-alive connection is desynced, force it closed. + if remaining is not None and remaining > 0: + self.close_connection = True def _safe_path(self, filename: str): base = Path(self.recordings_dir).resolve() @@ -626,6 +720,18 @@ class _Handler(BaseHTTPRequestHandler): pass +class _Server(ThreadingHTTPServer): + """ThreadingHTTPServer that stays quiet when clients disconnect mid-stream + (browsers abort audio range requests constantly while seeking).""" + + def handle_error(self, request, client_address): + import sys + exc = sys.exc_info()[1] + if isinstance(exc, (ConnectionError, TimeoutError)): + return + super().handle_error(request, client_address) + + # --------------------------------------------------------------------------- # UI page — single-page HTML/CSS/JS, loaded once at startup # --------------------------------------------------------------------------- @@ -668,7 +774,7 @@ def main(): threshold = args.threshold min_gap = args.min_gap - server = ThreadingHTTPServer((args.host, args.port), Handler) + server = _Server((args.host, args.port), Handler) print(f"ISR Web running on http://{args.host}:{args.port}/") print(f"Recordings dir: {rec_dir}") diff --git a/webui.html b/webui.html index 266bbbf..b127fb0 100644 --- a/webui.html +++ b/webui.html @@ -246,6 +246,10 @@ function togglePlayer(idx, filename) { audio.src = '/stream/' + encodeURIComponent(filename); audio.load(); audio.setAttribute('data-src-set','1'); + } else if (!document.getElementById('rec-'+idx)?.hidden) { + // Still recording: re-fetch so duration and seek range cover the audio + // captured since the source was last loaded + audio.load(); } activePlayerIdx = idx; prow.hidden = false; @@ -317,7 +321,22 @@ function seekToSection(idx, filename, startSec, endSec, sectionIdx) { } } -async function analyse(idx, filename, cell, btn) { +// filename|threshold|gap -> analysis result, so re-renders (filtering, +// refresh) never refetch what this session already has +const analysisCache = new Map(); + +async function fetchAnalysis(filename, threshold, minGap, force = false) { + const key = `${filename}|${threshold}|${minGap}`; + if (!force && analysisCache.has(key)) return analysisCache.get(key); + const r = await fetch('/api/analyze?file='+encodeURIComponent(filename) + +'&threshold='+encodeURIComponent(threshold) + +'&min_gap='+encodeURIComponent(minGap)); + const d = await r.json(); + if (!d.error) analysisCache.set(key, d); + return d; +} + +async function analyse(idx, filename, cell, btn, force = false) { btn.disabled = true; btn.textContent = '…'; cell.innerHTML = '
Analysing…
'; @@ -329,10 +348,7 @@ async function analyse(idx, filename, cell, btn) { if (!cell.contains(btn)) cell.appendChild(btn); }; try { - const r = await fetch('/api/analyze?file='+encodeURIComponent(filename) - +'&threshold='+encodeURIComponent(threshold) - +'&min_gap='+encodeURIComponent(minGap)); - const d = await r.json(); + const d = await fetchAnalysis(filename, threshold, minGap, force); if (d.error) { cell.innerHTML = ``; restoreBtn(); return; @@ -369,7 +385,7 @@ async function analyse(idx, filename, cell, btn) { const rebtn = document.createElement('button'); rebtn.className='reanalyse-btn'; rebtn.textContent='Re-analyse'; - rebtn.onclick = () => analyse(idx, filename, cell, rebtn); + rebtn.onclick = () => analyse(idx, filename, cell, rebtn, true); box.appendChild(rebtn); cell.innerHTML=''; cell.appendChild(box); @@ -447,6 +463,8 @@ async function deleteFile(idx, filename) { if (r.ok) { allFiles = allFiles.filter(f => f._idx !== idx); recMap.delete(idx); + for (const k of [...analysisCache.keys()]) + if (k.startsWith(filename + '|')) analysisCache.delete(k); applyFilters(); updateStorage(); } else { @@ -464,13 +482,31 @@ async function updateStorage() { try { const s = await (await fetch('/api/storage')).json(); const el = document.getElementById('storage-info'); - let txt = fmtSize(s.used) + ' used'; + const used = allFiles.reduce((a, f) => a + f.size, 0); + let txt = fmtSize(used) + ' used'; if (s.disk_free != null) txt += ' · ' + fmtSize(s.disk_free) + ' free'; if (s.disk_total != null) txt += ' of ' + fmtSize(s.disk_total); el.textContent = txt; } catch(e) {} } +// Does a server-side cached analysis match the current control values? +// Auto-loading on a mismatch would silently recompute every file. +function cachedParamsMatch(ca) { + return ca != null + && Number(ca.threshold) === parseFloat(document.getElementById('threshold-input').value) + && Number(ca.min_gap) === parseFloat(document.getElementById('min-gap-input').value); +} + +// Run the deferred analyses of a freshly expanded day +function autoloadDayAnalyses(dayId) { + document.querySelectorAll('#daytbl-' + dayId + ' td[data-autoload="1"]').forEach(cell => { + cell.removeAttribute('data-autoload'); + const btn = cell.querySelector('button') || document.createElement('button'); + analyse(Number(cell.dataset.idx), cell.dataset.fname, cell, btn); + }); +} + function _attachFileRowHandlers(f, isRec, expanded, dayId) { const i = f._idx; const ext = f.ext; @@ -485,10 +521,19 @@ function _attachFileRowHandlers(f, isRec, expanded, dayId) { abtn.disabled = true; abtn.title = 'Recording in progress — analyse after recording stops'; cell.appendChild(abtn); - } else if (f.cached_analysis) { - abtn.textContent = 'Re-analyse'; - cell.innerHTML = '
Loading…
'; - analyse(i, f.name, cell, abtn); + } else if (cachedParamsMatch(f.cached_analysis)) { + if (expanded) { + abtn.textContent = 'Re-analyse'; + analyse(i, f.name, cell, abtn); + } else { + // Collapsed day: defer the fetch until the day is opened + cell.setAttribute('data-autoload', '1'); + cell.dataset.idx = i; + cell.dataset.fname = f.name; + abtn.textContent = 'Analyse'; + abtn.onclick = () => analyse(i, f.name, cell, abtn); + cell.appendChild(abtn); + } } else { abtn.textContent = 'Analyse'; abtn.onclick = () => analyse(i, f.name, cell, abtn); @@ -683,6 +728,7 @@ function renderFiles(files) { tgl.querySelector('.day-arrow').textContent = nowExp ? '▼' : '▶'; headBar.classList.toggle('open', nowExp); document.getElementById('daytbl-' + dayId).hidden = !nowExp; + if (nowExp) autoloadDayAnalyses(dayId); if (!nowExp) { dayFiles.forEach(f => closePlayer(f._idx)); document.getElementById('dayhl-' + dayId).hidden = true; @@ -733,10 +779,7 @@ async function dayHighlights(dayId, analyzableFiles) { progFile.textContent = `${i + 1} / ${n} — ${f.name}`; progFill.style.width = `${(i / n) * 100}%`; try { - const r = await fetch('/api/analyze?file=' + encodeURIComponent(f.name) - + '&threshold=' + encodeURIComponent(threshold) - + '&min_gap=' + encodeURIComponent(minGap)); - const d = await r.json(); + const d = await fetchAnalysis(f.name, threshold, minGap); if (!d.error) { results.push({ f, data: d }); d.cached ? nCached++ : nLive++; } } catch(e) {} } @@ -929,6 +972,7 @@ async function load() { } // Poll recording status every 5 s to update REC badges +let lastActiveKey = null; async function pollStatus() { try { const s = await (await fetch('/api/status')).json(); @@ -937,12 +981,25 @@ async function pollStatus() { const badge = document.getElementById('rec-'+idx); if (badge) badge.hidden = !active.has(filename); }); + // Active set changed (recording started/stopped or rolled over to a new + // split file): refresh the list so new files appear — but never yank the + // DOM out from under an in-progress playback. + const key = [...active].sort().join('|'); + if (lastActiveKey === null) { lastActiveKey = key; return; } + if (key !== lastActiveKey) { + const playing = [...document.querySelectorAll('audio')].some(a => !a.paused && !a.ended); + if (!playing) { lastActiveKey = key; load(); } + } } catch(e) {} } document.getElementById('refresh-btn').addEventListener('click', load); -document.getElementById('filter-name').addEventListener('input', applyFilters); +let filterDebounce; +document.getElementById('filter-name').addEventListener('input', () => { + clearTimeout(filterDebounce); + filterDebounce = setTimeout(applyFilters, 200); +}); document.getElementById('filter-from').addEventListener('change', applyFilters); document.getElementById('filter-to').addEventListener('change', applyFilters); document.getElementById('filter-clear').addEventListener('click', () => {