perf: faster page loads, live-recording playback and seeking fixes
Server (web.py): - /api/analyze no longer returns the full per-window RMS array (~45x larger than the rms_display the UI actually renders); old caches are stripped on read - /api/files reads only the first 256 bytes of each analysis cache to get threshold/min_gap instead of parsing the whole JSON - durations cached by (mtime, size) instead of re-opening every audio header per request; stat() race with deleted files guarded - /api/storage no longer walks the recordings tree (used bytes now computed client-side from the file list) - HTTP/1.1 keep-alive enabled; short writes force-close the connection; client-disconnect tracebacks from aborted seeks silenced - all file copies bounded by the advertised Content-Length so files growing during a response cannot desync the connection Live recording playback: - /stream/ patches in-progress WAV headers to the current file size so browsers show real duration and can seek (on-disk header says 0 frames until the recorder closes the file) - active files served with Cache-Control: no-store - reopening the player for a recording file reloads the source to pick up newly captured audio UI loading: - analyses lazy-load only for expanded day groups; collapsed days defer fetching until opened, and auto-load only when cached parameters match the current controls (no surprise mass recompute) - client-side analysis cache shared by file rows and day highlights, so re-renders and filters never refetch - filename filter debounced (200 ms) - file list auto-refreshes when the active recording set changes, unless audio is playing Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -21,6 +21,7 @@ import shutil
|
||||
import struct
|
||||
import subprocess
|
||||
import tempfile
|
||||
import threading
|
||||
import wave
|
||||
from datetime import datetime
|
||||
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||
@@ -62,6 +63,36 @@ MIME_TYPES = {
|
||||
# Audio analysis helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _live_wav_header(path: Path, size: int):
|
||||
"""Return the WAV header (through the 'data' chunk header) with RIFF and
|
||||
data sizes rewritten to match the current file size, or None.
|
||||
|
||||
While a WAV file is still being recorded its header claims ~0 frames, so
|
||||
browsers show no duration and refuse to seek. Serving a header patched to
|
||||
the bytes recorded so far fixes both; the patch is the same length as the
|
||||
original header, so all byte offsets and Range math stay valid.
|
||||
"""
|
||||
try:
|
||||
with open(path, 'rb') as fh:
|
||||
hdr = fh.read(512)
|
||||
if len(hdr) < 44 or hdr[:4] != b'RIFF' or hdr[8:12] != b'WAVE':
|
||||
return None
|
||||
pos = 12
|
||||
while pos + 8 <= len(hdr):
|
||||
chunk_id = hdr[pos:pos + 4]
|
||||
chunk_size = int.from_bytes(hdr[pos + 4:pos + 8], 'little')
|
||||
if chunk_id == b'data':
|
||||
data_off = pos + 8
|
||||
patched = bytearray(hdr[:data_off])
|
||||
patched[4:8] = (size - 8).to_bytes(4, 'little')
|
||||
patched[pos + 4:pos + 8] = (size - data_off).to_bytes(4, 'little')
|
||||
return bytes(patched)
|
||||
pos += 8 + chunk_size + (chunk_size & 1)
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _get_audio_duration(path: Path):
|
||||
"""Return duration in seconds for any supported audio file, or None."""
|
||||
ext = path.suffix.lower()
|
||||
@@ -142,8 +173,9 @@ def _package_result(rms_values: list, framerate: int, n_frames: int,
|
||||
else:
|
||||
rms_display = rms_values
|
||||
|
||||
# Note: the full per-window RMS list is deliberately NOT returned — the UI
|
||||
# only renders rms_display (~800 points), and the full list is ~45x larger.
|
||||
return {
|
||||
'rms': rms_values,
|
||||
'rms_display': rms_display,
|
||||
'sections': _loud_sections(rms_values, window_dur, duration, threshold, min_gap),
|
||||
'duration': round(duration, 2),
|
||||
@@ -204,6 +236,21 @@ def _analysis_cache_path(analyses_base: Path, recordings_base: Path, audio_path:
|
||||
return analyses_base / rel.parent / (rel.name + '.analysis.json')
|
||||
|
||||
|
||||
def _cached_analysis_params(cache_path: Path):
|
||||
"""Read just threshold/min_gap from a cache file without parsing the whole
|
||||
JSON (the embedded result can be hundreds of KB). Relies on the writer in
|
||||
_api_analyze putting these two keys first."""
|
||||
try:
|
||||
with open(cache_path, 'r', encoding='utf-8') as fh:
|
||||
head = fh.read(256)
|
||||
except OSError:
|
||||
return None
|
||||
m = re.search(r'"threshold":\s*([0-9.eE+-]+),\s*"min_gap":\s*([0-9.eE+-]+)', head)
|
||||
if not m:
|
||||
return None
|
||||
return {'threshold': float(m.group(1)), 'min_gap': float(m.group(2))}
|
||||
|
||||
|
||||
def prune_orphan_analyses(analyses_base: Path, recordings_base: Path):
|
||||
if not analyses_base.exists():
|
||||
return
|
||||
@@ -225,6 +272,24 @@ def prune_orphan_analyses(analyses_base: Path, recordings_base: Path):
|
||||
# File listing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# rel-path -> ((mtime_ns, size), duration); avoids re-opening every audio
|
||||
# header on each /api/files request
|
||||
_DURATION_CACHE: dict = {}
|
||||
_DURATION_CACHE_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def _cached_duration(path: Path, rel: str, stat) -> float:
|
||||
sig = (stat.st_mtime_ns, stat.st_size)
|
||||
with _DURATION_CACHE_LOCK:
|
||||
hit = _DURATION_CACHE.get(rel)
|
||||
if hit is not None and hit[0] == sig:
|
||||
return hit[1]
|
||||
duration = _get_audio_duration(path)
|
||||
with _DURATION_CACHE_LOCK:
|
||||
_DURATION_CACHE[rel] = (sig, duration)
|
||||
return duration
|
||||
|
||||
|
||||
def list_files(recordings_dir: str):
|
||||
"""Return list of audio file metadata dicts, sorted newest first."""
|
||||
base = Path(recordings_dir)
|
||||
@@ -245,14 +310,17 @@ def list_files(recordings_dir: str):
|
||||
for path in base.rglob('*'):
|
||||
if path.suffix.lower() not in AUDIO_EXTENSIONS:
|
||||
continue
|
||||
stat = path.stat()
|
||||
try:
|
||||
stat = path.stat()
|
||||
except OSError:
|
||||
continue # deleted between rglob and stat
|
||||
rel = str(path.relative_to(base)).replace('\\', '/')
|
||||
is_active = rel in active_files
|
||||
|
||||
# Skip reading partial headers for in-progress files — the WAV nframes
|
||||
# field and FLAC total_samples are both unfinalized while recording,
|
||||
# producing wildly incorrect values (e.g. 53375995583:39:01).
|
||||
duration = None if is_active else _get_audio_duration(path)
|
||||
duration = None if is_active else _cached_duration(path, rel, stat)
|
||||
|
||||
files.append({
|
||||
'name': rel,
|
||||
@@ -273,6 +341,10 @@ def list_files(recordings_dir: str):
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class _Handler(BaseHTTPRequestHandler):
|
||||
# Keep-alive: browsers reuse connections instead of a TCP handshake per
|
||||
# request. Safe because every response sets Content-Length.
|
||||
protocol_version = 'HTTP/1.1'
|
||||
|
||||
recordings_dir: str = 'recordings'
|
||||
analyses_dir: str = 'recordings/analyses'
|
||||
threshold: float = LOUD_THRESHOLD
|
||||
@@ -323,14 +395,7 @@ class _Handler(BaseHTTPRequestHandler):
|
||||
if f.get('ext') in ('wav', 'flac') and not f.get('recording'):
|
||||
cache_path = _analysis_cache_path(
|
||||
analyses_base, recordings_base, recordings_base / f['name'])
|
||||
try:
|
||||
cached = json.loads(cache_path.read_text('utf-8'))
|
||||
f['cached_analysis'] = {
|
||||
'threshold': cached['threshold'],
|
||||
'min_gap': cached['min_gap'],
|
||||
}
|
||||
except Exception:
|
||||
f['cached_analysis'] = None
|
||||
f['cached_analysis'] = _cached_analysis_params(cache_path)
|
||||
else:
|
||||
f['cached_analysis'] = None
|
||||
self._send(200, json.dumps(files).encode('utf-8'), 'application/json')
|
||||
@@ -367,7 +432,9 @@ class _Handler(BaseHTTPRequestHandler):
|
||||
try:
|
||||
cached = json.loads(cache_path.read_text('utf-8'))
|
||||
if cached.get('threshold') == threshold and cached.get('min_gap') == min_gap:
|
||||
payload = dict(cached['result']); payload['cached'] = True
|
||||
payload = dict(cached['result'])
|
||||
payload.pop('rms', None) # caches written before the full-RMS field was dropped
|
||||
payload['cached'] = True
|
||||
self._send(200, json.dumps(payload).encode('utf-8'), 'application/json')
|
||||
return
|
||||
except Exception:
|
||||
@@ -418,16 +485,27 @@ class _Handler(BaseHTTPRequestHandler):
|
||||
self.end_headers()
|
||||
|
||||
with open(path, 'rb') as fh:
|
||||
self._copy_to_response(fh)
|
||||
self._copy_to_response(fh, size)
|
||||
|
||||
def _stream(self, filename: str):
|
||||
"""Serve audio for inline playback with HTTP Range support."""
|
||||
"""Serve audio for inline playback with HTTP Range support.
|
||||
|
||||
In-progress recordings are served with Cache-Control: no-store (the
|
||||
content is still growing) and, for WAV, with a header patched to the
|
||||
current size so the browser can show a duration and seek.
|
||||
"""
|
||||
path = self._safe_path(filename)
|
||||
if path is None:
|
||||
return
|
||||
|
||||
content_type = MIME_TYPES.get(path.suffix.lower(), 'application/octet-stream')
|
||||
size = path.stat().st_size
|
||||
size = path.stat().st_size
|
||||
is_active = self._is_active(filename)
|
||||
|
||||
prefix = b''
|
||||
if is_active and path.suffix.lower() == '.wav':
|
||||
prefix = _live_wav_header(path, size) or b''
|
||||
|
||||
range_header = self.headers.get('Range', '')
|
||||
m = re.match(r'bytes=(\d+)-(\d*)', range_header) if range_header else None
|
||||
|
||||
@@ -445,36 +523,48 @@ class _Handler(BaseHTTPRequestHandler):
|
||||
self.send_header('Content-Range', f'bytes {start}-{end}/{size}')
|
||||
self.send_header('Content-Length', str(length))
|
||||
self.send_header('Accept-Ranges', 'bytes')
|
||||
if is_active:
|
||||
self.send_header('Cache-Control', 'no-store')
|
||||
self.end_headers()
|
||||
|
||||
with open(path, 'rb') as fh:
|
||||
fh.seek(start)
|
||||
self._copy_to_response(fh, length)
|
||||
sent = 0
|
||||
if start < len(prefix):
|
||||
head = prefix[start:start + length]
|
||||
self.wfile.write(head)
|
||||
sent = len(head)
|
||||
if sent < length:
|
||||
fh.seek(start + sent)
|
||||
self._copy_to_response(fh, length - sent)
|
||||
else:
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', content_type)
|
||||
self.send_header('Content-Length', str(size))
|
||||
self.send_header('Accept-Ranges', 'bytes')
|
||||
if is_active:
|
||||
self.send_header('Cache-Control', 'no-store')
|
||||
self.end_headers()
|
||||
|
||||
with open(path, 'rb') as fh:
|
||||
self._copy_to_response(fh)
|
||||
if prefix:
|
||||
self.wfile.write(prefix)
|
||||
fh.seek(len(prefix))
|
||||
self._copy_to_response(fh, size - len(prefix))
|
||||
else:
|
||||
# Bound the copy: the file may grow while we serve it, and
|
||||
# writing more than Content-Length desyncs keep-alive.
|
||||
self._copy_to_response(fh, size)
|
||||
|
||||
def _api_storage(self):
|
||||
# 'used' is computed client-side from the file list; walking the whole
|
||||
# tree again here doubled the I/O of every page load.
|
||||
base = Path(self.recordings_dir)
|
||||
used = 0
|
||||
if base.exists():
|
||||
used = sum(
|
||||
p.stat().st_size
|
||||
for p in base.rglob('*')
|
||||
if p.is_file() and p.suffix.lower() in AUDIO_EXTENSIONS
|
||||
)
|
||||
try:
|
||||
du = shutil.disk_usage(str(base) if base.exists() else '.')
|
||||
disk_free, disk_total = du.free, du.total
|
||||
except Exception:
|
||||
disk_free = disk_total = None
|
||||
data = json.dumps({'used': used, 'disk_free': disk_free, 'disk_total': disk_total})
|
||||
data = json.dumps({'disk_free': disk_free, 'disk_total': disk_total})
|
||||
self._send(200, data.encode(), 'application/json')
|
||||
|
||||
def _api_config(self):
|
||||
@@ -569,7 +659,7 @@ class _Handler(BaseHTTPRequestHandler):
|
||||
self.send_header('Content-Length', str(tmp_size))
|
||||
self.end_headers()
|
||||
with open(tmp_path, 'rb') as fh:
|
||||
self._copy_to_response(fh)
|
||||
self._copy_to_response(fh, tmp_size)
|
||||
except subprocess.TimeoutExpired:
|
||||
self._json_err(504, 'ffmpeg timed out — file may be too large')
|
||||
finally:
|
||||
@@ -596,6 +686,10 @@ class _Handler(BaseHTTPRequestHandler):
|
||||
self.wfile.write(chunk)
|
||||
if remaining is not None:
|
||||
remaining -= len(chunk)
|
||||
# Sent fewer bytes than Content-Length promised (file truncated while
|
||||
# serving): the keep-alive connection is desynced, force it closed.
|
||||
if remaining is not None and remaining > 0:
|
||||
self.close_connection = True
|
||||
|
||||
def _safe_path(self, filename: str):
|
||||
base = Path(self.recordings_dir).resolve()
|
||||
@@ -626,6 +720,18 @@ class _Handler(BaseHTTPRequestHandler):
|
||||
pass
|
||||
|
||||
|
||||
class _Server(ThreadingHTTPServer):
|
||||
"""ThreadingHTTPServer that stays quiet when clients disconnect mid-stream
|
||||
(browsers abort audio range requests constantly while seeking)."""
|
||||
|
||||
def handle_error(self, request, client_address):
|
||||
import sys
|
||||
exc = sys.exc_info()[1]
|
||||
if isinstance(exc, (ConnectionError, TimeoutError)):
|
||||
return
|
||||
super().handle_error(request, client_address)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# UI page — single-page HTML/CSS/JS, loaded once at startup
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -668,7 +774,7 @@ def main():
|
||||
threshold = args.threshold
|
||||
min_gap = args.min_gap
|
||||
|
||||
server = ThreadingHTTPServer((args.host, args.port), Handler)
|
||||
server = _Server((args.host, args.port), Handler)
|
||||
|
||||
print(f"ISR Web running on http://{args.host}:{args.port}/")
|
||||
print(f"Recordings dir: {rec_dir}")
|
||||
|
||||
Reference in New Issue
Block a user