feat: move analysis cache to recordings/analyses/, prune orphans on startup

- Cache files now live in recordings/analyses/<filename>.analysis.json
  (mirroring the relative path for files in subdirectories) rather than
  alongside each audio file.
- _api_delete now removes the corresponding cache file after deleting audio.
- prune_orphan_analyses() runs at startup and removes any .analysis.json
  whose audio file no longer exists (handles files deleted outside the UI).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-02 22:33:26 +02:00
parent e22c0059f6
commit eb774a0876
2 changed files with 38 additions and 2 deletions
+1 -1
View File
@@ -159,7 +159,7 @@ Shows recordings grouped by day with collapsible sections. Features:
- **Day groups** — recordings are grouped under a collapsible day heading showing date, file count, total duration, and total size. The most recent day is expanded by default; older days start collapsed. Expanded state is preserved across filter changes. - **Day groups** — recordings are grouped under a collapsible day heading showing date, file count, total duration, and total size. The most recent day is expanded by default; older days start collapsed. Expanded state is preserved across filter changes.
- **Day highlights** — click **★ Highlights** on any day heading to run loudness analysis across all WAV/FLAC files in that day and display a combined activity timeline SVG. Orange segments show when loud sections occurred relative to the day's time span; blue shows the file extents. Labels show the start, midpoint, and end times. - **Day highlights** — click **★ Highlights** on any day heading to run loudness analysis across all WAV/FLAC files in that day and display a combined activity timeline SVG. Orange segments show when loud sections occurred relative to the day's time span; blue shows the file extents. Labels show the start, midpoint, and end times.
- **Inline playback** — collapsible `▶ Play` button per row; audio loads lazily via a seekable `/stream/` endpoint with HTTP Range support. Metadata is fetched immediately so the duration is visible without pressing play. - **Inline playback** — collapsible `▶ Play` button per row; audio loads lazily via a seekable `/stream/` endpoint with HTTP Range support. Metadata is fetched immediately so the duration is visible without pressing play.
- **Waveform analysis** — on demand per file; computes RMS per 100 ms window and highlights loud sections. Supported for WAV and FLAC (FLAC requires `numpy` + `soundfile`). Pure-Python fallback for WAV when numpy is absent. Results are cached alongside the audio file as `<filename>.analysis.json`; subsequent requests at the same threshold and min-gap settings return instantly without re-reading the audio. - **Waveform analysis** — on demand per file; computes RMS per 100 ms window and highlights loud sections. Supported for WAV and FLAC (FLAC requires `numpy` + `soundfile`). Pure-Python fallback for WAV when numpy is absent. Results are cached in `recordings/analyses/<filename>.analysis.json`; subsequent requests at the same threshold and min-gap settings return instantly without re-reading the audio. The cache file is deleted automatically when the audio file is deleted. Orphaned cache files (audio deleted outside the UI) are pruned on startup.
- **Grace period** — configurable in the controls bar (default 2 s). Loud sections separated by less than this gap are merged into one. Raise this (e.g. to 1530 s) when a single event generates many timestamps due to brief quiet gaps within it. - **Grace period** — configurable in the controls bar (default 2 s). Loud sections separated by less than this gap are merged into one. Raise this (e.g. to 1530 s) when a single event generates many timestamps due to brief quiet gaps within it.
- **Timestamp jump** — after analysis, click any loud-section chip to seek the player to that position and pre-fill the cut panel. Use **J** / **K** keyboard shortcuts to jump to the previous / next section while audio is playing. - **Timestamp jump** — after analysis, click any loud-section chip to seek the player to that position and pre-fill the cut panel. Use **J** / **K** keyboard shortcuts to jump to the previous / next section while audio is playing.
- **Cut & download** — `✂ Cut` button opens the player row and reveals a cut panel. Enter start and end times in `m:ss` or `h:mm:ss` format and click **↓ Download cut** to receive an ffmpeg-trimmed copy without re-encoding. Requires ffmpeg (included in the Docker image). - **Cut & download** — `✂ Cut` button opens the player row and reveals a cut panel. Enter start and end times in `m:ss` or `h:mm:ss` format and click **↓ Download cut** to receive an ffmpeg-trimmed copy without re-encoding. Requires ffmpeg (included in the Docker image).
+37 -1
View File
@@ -203,6 +203,33 @@ def analyze_flac(path: Path, window_samples: int = WINDOW_SAMPLES,
return _package_result(rms_values, framerate, n_frames, window_samples, threshold, min_gap) return _package_result(rms_values, framerate, n_frames, window_samples, threshold, min_gap)
# ---------------------------------------------------------------------------
# Analysis cache helpers
# ---------------------------------------------------------------------------
def _analysis_cache_path(base: Path, audio_path: Path) -> Path:
rel = audio_path.relative_to(base)
return base / 'analyses' / rel.parent / (rel.name + '.analysis.json')
def prune_orphan_analyses(base: Path):
analyses_dir = base / 'analyses'
if not analyses_dir.exists():
return
removed = 0
for cache in analyses_dir.rglob('*.analysis.json'):
rel = cache.relative_to(analyses_dir)
audio_path = base / rel.parent / rel.name[:-len('.analysis.json')]
if not audio_path.exists():
try:
cache.unlink()
removed += 1
except Exception:
pass
if removed:
print(f'Pruned {removed} orphaned analysis cache file(s)')
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# File listing # File listing
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -331,7 +358,8 @@ class _Handler(BaseHTTPRequestHandler):
except Exception: except Exception:
pass pass
cache_path = path.parent / (path.name + '.analysis.json') base = Path(self.recordings_dir).resolve()
cache_path = _analysis_cache_path(base, path)
try: try:
cached = json.loads(cache_path.read_text('utf-8')) cached = json.loads(cache_path.read_text('utf-8'))
if cached.get('threshold') == threshold and cached.get('min_gap') == min_gap: if cached.get('threshold') == threshold and cached.get('min_gap') == min_gap:
@@ -353,6 +381,7 @@ class _Handler(BaseHTTPRequestHandler):
return return
try: try:
cache_path.parent.mkdir(parents=True, exist_ok=True)
tmp = cache_path.with_suffix('.tmp') tmp = cache_path.with_suffix('.tmp')
tmp.write_text(json.dumps({'threshold': threshold, 'min_gap': min_gap, 'result': result}), 'utf-8') tmp.write_text(json.dumps({'threshold': threshold, 'min_gap': min_gap, 'result': result}), 'utf-8')
os.replace(tmp, cache_path) os.replace(tmp, cache_path)
@@ -481,6 +510,11 @@ class _Handler(BaseHTTPRequestHandler):
self._json_err(500, f'Failed to delete: {e}') self._json_err(500, f'Failed to delete: {e}')
return return
try:
_analysis_cache_path(Path(self.recordings_dir).resolve(), path).unlink()
except Exception:
pass
self._send(200, json.dumps({'deleted': filename}).encode(), 'application/json') self._send(200, json.dumps({'deleted': filename}).encode(), 'application/json')
def _api_cut(self, qs): def _api_cut(self, qs):
@@ -1567,6 +1601,8 @@ def main():
if not rec_dir.exists(): if not rec_dir.exists():
print(f"Warning: recordings directory '{args.dir}' does not exist yet.") print(f"Warning: recordings directory '{args.dir}' does not exist yet.")
prune_orphan_analyses(rec_dir.resolve())
class Handler(_Handler): class Handler(_Handler):
recordings_dir = str(rec_dir.resolve()) recordings_dir = str(rec_dir.resolve())
threshold = args.threshold threshold = args.threshold