feat: onset-aware section scoring so slow swells rank at the bottom

A section's score is now its peak dB above the noise floor capped by the sharpest rise within ONSET_SECONDS (0.5 s). Real events (voices, impacts, barks) rise fast and keep their full prominence; a gradual swell that outruns the 30 s floor blocks (gusts, distant approaching cars) still flags but scores near zero, so score-ranked review (chips, U/I highlights, "Highlights only" mode) surfaces events first. A section starting in a file's first 0.5 s is scored against the floor instead, so events cut off by a file split are not punished as swells. Old cached analyses carry now-wrong scores, so the cache gains a leading "detector" version key (DETECTOR_VERSION = 2) checked by both _cached_analysis_params() and the /api/analyze cache hit path; v1 caches never match and are recomputed on the next analyse. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-11 14:57:19 +02:00
parent 6431918989
commit f6031cfa16
4 changed files with 76 additions and 20 deletions
@@ -55,6 +55,13 @@ NOISE_BLOCK_SECONDS = 30.0 # noise floor is estimated per block of this length
 NOISE_PERCENTILE    = 20   # percentile of windowed dB levels taken as the floor
 MIN_RMS             = 0.002  # ≈ −54 dBFS; the floor never drops below this, so
                             # digital silence does not make every tiny sound loud
+ONSET_SECONDS       = 0.5  # a section's score is capped by its sharpest dB rise
+                           # within this span, so slow swells rank low
+
+# Bumped whenever section detection/scoring changes in a way that makes old
+# cached results wrong (not just differently parameterised). Caches written
+# with another version never match and are recomputed on the next analyse.
+DETECTOR_VERSION = 2

 CLIP_MAX_SECONDS = 600     # upper bound on /api/clip length

@@ -331,8 +338,12 @@ def _loud_sections(rms_values: list, window_dur: float, duration: float,
                   margin_db: float, min_gap: float = MIN_GAP_SECONDS,
                   min_duration: float = MIN_DURATION_SECONDS) -> list:
    """Sections whose level rises at least margin_db above the local noise
-    floor. Each section carries a 'score': its peak dB above the floor, used
-    by the UI to rank sections by how much they stand out.
+    floor. Each section carries a 'score': its peak dB above the floor,
+    capped by the sharpest rise observed within ONSET_SECONDS. Real events
+    (voices, impacts, barks) have steep onsets, so their cap equals their
+    peak; a swell that drifts up slower than the noise-floor blocks can track
+    (wind, a distant approaching car) still flags but scores near zero, so
+    score-ranked review (chips, U/I highlights) surfaces events first.

    Sections shorter than min_duration (after min_gap merging) are discarded:
    without this, every isolated 100 ms window that pops above the floor — a
@@ -341,11 +352,16 @@ def _loud_sections(rms_values: list, window_dur: float, duration: float,
    db = [20 * math.log10(max(r, 1e-6)) for r in rms_values]
    floor = _noise_floor_db(db, window_dur)
    min_db = 20 * math.log10(MIN_RMS)
+    onset_win = max(1, int(round(ONSET_SECONDS / window_dur)))

    sections = []
    start_t = None
    last_loud_t = None
    peak = 0.0
+    onset = 0.0
+
+    def _score():
+        return round(max(0.0, min(peak, onset)), 1)

    for i, d in enumerate(db):
        t = i * window_dur
@@ -354,21 +370,27 @@ def _loud_sections(rms_values: list, window_dur: float, duration: float,
            if start_t is None:
                start_t = t
                peak = 0.0
+                onset = 0.0
            last_loud_t = t
            peak = max(peak, d - floor_eff)
+            # Rise within the onset span; a section starting before the file
+            # has history is measured against the floor instead (an event cut
+            # off by a file split must not be punished as a swell).
+            rise = d - db[i - onset_win] if i >= onset_win else d - floor_eff
+            onset = max(onset, rise)
        else:
            if start_t is not None and (t - last_loud_t) > min_gap:
                end_t = last_loud_t + window_dur
                if end_t - start_t >= min_duration - 1e-9:
                    sections.append({'start': round(start_t, 1),
                                     'end':   round(end_t, 1),
-                                     'score': round(peak, 1)})
+                                     'score': _score()})
                start_t = None
                last_loud_t = None

    if start_t is not None and (last_loud_t + window_dur - start_t) >= min_duration - 1e-9:
        sections.append({'start': round(start_t, 1), 'end': round(duration, 1),
-                         'score': round(peak, 1)})
+                         'score': _score()})

    return sections

@@ -452,21 +474,22 @@ def _analysis_cache_path(analyses_base: Path, recordings_base: Path, audio_path:


 def _cached_analysis_params(cache_path: Path):
-    """Read just margin/min_gap/min_duration from a cache file without parsing
-    the whole JSON (the embedded result can be hundreds of KB). Relies on the
-    writer in _api_analyze putting these three keys first. Caches written by
-    older detector versions lack one of the keys and simply never match."""
+    """Read just detector/margin/min_gap/min_duration from a cache file
+    without parsing the whole JSON (the embedded result can be hundreds of
+    KB). Relies on the writer in _api_analyze putting these keys first.
+    Caches written by other detector versions (or so old they lack a key)
+    simply never match and get recomputed on the next analyse."""
    try:
        with open(cache_path, 'r', encoding='utf-8') as fh:
            head = fh.read(256)
    except OSError:
        return None
-    m = re.search(r'"margin":\s*([0-9.eE+-]+),\s*"min_gap":\s*([0-9.eE+-]+),'
-                  r'\s*"min_duration":\s*([0-9.eE+-]+)', head)
-    if not m:
+    m = re.search(r'"detector":\s*(\d+),\s*"margin":\s*([0-9.eE+-]+),'
+                  r'\s*"min_gap":\s*([0-9.eE+-]+),\s*"min_duration":\s*([0-9.eE+-]+)', head)
+    if not m or int(m.group(1)) != DETECTOR_VERSION:
        return None
-    return {'margin': float(m.group(1)), 'min_gap': float(m.group(2)),
-            'min_duration': float(m.group(3))}
+    return {'margin': float(m.group(2)), 'min_gap': float(m.group(3)),
+            'min_duration': float(m.group(4))}


 def prune_orphan_analyses(analyses_base: Path, recordings_base: Path):
@@ -663,7 +686,8 @@ class _Handler(BaseHTTPRequestHandler):
        cache_path = _analysis_cache_path(analyses_base, recordings_base, path)
        try:
            cached = json.loads(cache_path.read_text('utf-8'))
-            if (cached.get('margin') == margin and cached.get('min_gap') == min_gap
+            if (cached.get('detector') == DETECTOR_VERSION
+                    and cached.get('margin') == margin and cached.get('min_gap') == min_gap
                    and cached.get('min_duration') == min_duration):
                payload = dict(cached['result'])
                payload.pop('rms', None)  # caches written before the full-RMS field was dropped
@@ -688,9 +712,10 @@ class _Handler(BaseHTTPRequestHandler):
        try:
            cache_path.parent.mkdir(parents=True, exist_ok=True)
            tmp = cache_path.with_suffix('.tmp')
-            # margin, min_gap and min_duration MUST stay first:
+            # detector, margin, min_gap and min_duration MUST stay first:
            # _cached_analysis_params reads only the first 256 bytes of this file
-            tmp.write_text(json.dumps({'margin': margin, 'min_gap': min_gap,
+            tmp.write_text(json.dumps({'detector': DETECTOR_VERSION,
+                                       'margin': margin, 'min_gap': min_gap,
                                       'min_duration': min_duration, 'result': result}), 'utf-8')
            os.replace(tmp, cache_path)
        except Exception as e: