feat: onset-aware section scoring so slow swells rank at the bottom

A section's score is now its peak dB above the noise floor capped by
the sharpest rise within ONSET_SECONDS (0.5 s). Real events (voices,
impacts, barks) rise fast and keep their full prominence; a gradual
swell that outruns the 30 s floor blocks (gusts, distant approaching
cars) still flags but scores near zero, so score-ranked review (chips,
U/I highlights, "Highlights only" mode) surfaces events first. A
section starting in a file's first 0.5 s is scored against the floor
instead, so events cut off by a file split are not punished as swells.

Old cached analyses carry now-wrong scores, so the cache gains a
leading "detector" version key (DETECTOR_VERSION = 2) checked by both
_cached_analysis_params() and the /api/analyze cache hit path; v1
caches never match and are recomputed on the next analyse.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-06-11 14:57:19 +02:00
parent 6431918989
commit f6031cfa16
4 changed files with 76 additions and 20 deletions
+41 -16
View File
@@ -55,6 +55,13 @@ NOISE_BLOCK_SECONDS = 30.0 # noise floor is estimated per block of this length
NOISE_PERCENTILE = 20 # percentile of windowed dB levels taken as the floor
MIN_RMS = 0.002 # ≈ 54 dBFS; the floor never drops below this, so
# digital silence does not make every tiny sound loud
ONSET_SECONDS = 0.5 # a section's score is capped by its sharpest dB rise
# within this span, so slow swells rank low
# Bumped whenever section detection/scoring changes in a way that makes old
# cached results wrong (not just differently parameterised). Caches written
# with another version never match and are recomputed on the next analyse.
DETECTOR_VERSION = 2
CLIP_MAX_SECONDS = 600 # upper bound on /api/clip length
@@ -331,8 +338,12 @@ def _loud_sections(rms_values: list, window_dur: float, duration: float,
margin_db: float, min_gap: float = MIN_GAP_SECONDS,
min_duration: float = MIN_DURATION_SECONDS) -> list:
"""Sections whose level rises at least margin_db above the local noise
floor. Each section carries a 'score': its peak dB above the floor, used
by the UI to rank sections by how much they stand out.
floor. Each section carries a 'score': its peak dB above the floor,
capped by the sharpest rise observed within ONSET_SECONDS. Real events
(voices, impacts, barks) have steep onsets, so their cap equals their
peak; a swell that drifts up slower than the noise-floor blocks can track
(wind, a distant approaching car) still flags but scores near zero, so
score-ranked review (chips, U/I highlights) surfaces events first.
Sections shorter than min_duration (after min_gap merging) are discarded:
without this, every isolated 100 ms window that pops above the floor — a
@@ -341,11 +352,16 @@ def _loud_sections(rms_values: list, window_dur: float, duration: float,
db = [20 * math.log10(max(r, 1e-6)) for r in rms_values]
floor = _noise_floor_db(db, window_dur)
min_db = 20 * math.log10(MIN_RMS)
onset_win = max(1, int(round(ONSET_SECONDS / window_dur)))
sections = []
start_t = None
last_loud_t = None
peak = 0.0
onset = 0.0
def _score():
return round(max(0.0, min(peak, onset)), 1)
for i, d in enumerate(db):
t = i * window_dur
@@ -354,21 +370,27 @@ def _loud_sections(rms_values: list, window_dur: float, duration: float,
if start_t is None:
start_t = t
peak = 0.0
onset = 0.0
last_loud_t = t
peak = max(peak, d - floor_eff)
# Rise within the onset span; a section starting before the file
# has history is measured against the floor instead (an event cut
# off by a file split must not be punished as a swell).
rise = d - db[i - onset_win] if i >= onset_win else d - floor_eff
onset = max(onset, rise)
else:
if start_t is not None and (t - last_loud_t) > min_gap:
end_t = last_loud_t + window_dur
if end_t - start_t >= min_duration - 1e-9:
sections.append({'start': round(start_t, 1),
'end': round(end_t, 1),
'score': round(peak, 1)})
'score': _score()})
start_t = None
last_loud_t = None
if start_t is not None and (last_loud_t + window_dur - start_t) >= min_duration - 1e-9:
sections.append({'start': round(start_t, 1), 'end': round(duration, 1),
'score': round(peak, 1)})
'score': _score()})
return sections
@@ -452,21 +474,22 @@ def _analysis_cache_path(analyses_base: Path, recordings_base: Path, audio_path:
def _cached_analysis_params(cache_path: Path):
"""Read just margin/min_gap/min_duration from a cache file without parsing
the whole JSON (the embedded result can be hundreds of KB). Relies on the
writer in _api_analyze putting these three keys first. Caches written by
older detector versions lack one of the keys and simply never match."""
"""Read just detector/margin/min_gap/min_duration from a cache file
without parsing the whole JSON (the embedded result can be hundreds of
KB). Relies on the writer in _api_analyze putting these keys first.
Caches written by other detector versions (or so old they lack a key)
simply never match and get recomputed on the next analyse."""
try:
with open(cache_path, 'r', encoding='utf-8') as fh:
head = fh.read(256)
except OSError:
return None
m = re.search(r'"margin":\s*([0-9.eE+-]+),\s*"min_gap":\s*([0-9.eE+-]+),'
r'\s*"min_duration":\s*([0-9.eE+-]+)', head)
if not m:
m = re.search(r'"detector":\s*(\d+),\s*"margin":\s*([0-9.eE+-]+),'
r'\s*"min_gap":\s*([0-9.eE+-]+),\s*"min_duration":\s*([0-9.eE+-]+)', head)
if not m or int(m.group(1)) != DETECTOR_VERSION:
return None
return {'margin': float(m.group(1)), 'min_gap': float(m.group(2)),
'min_duration': float(m.group(3))}
return {'margin': float(m.group(2)), 'min_gap': float(m.group(3)),
'min_duration': float(m.group(4))}
def prune_orphan_analyses(analyses_base: Path, recordings_base: Path):
@@ -663,7 +686,8 @@ class _Handler(BaseHTTPRequestHandler):
cache_path = _analysis_cache_path(analyses_base, recordings_base, path)
try:
cached = json.loads(cache_path.read_text('utf-8'))
if (cached.get('margin') == margin and cached.get('min_gap') == min_gap
if (cached.get('detector') == DETECTOR_VERSION
and cached.get('margin') == margin and cached.get('min_gap') == min_gap
and cached.get('min_duration') == min_duration):
payload = dict(cached['result'])
payload.pop('rms', None) # caches written before the full-RMS field was dropped
@@ -688,9 +712,10 @@ class _Handler(BaseHTTPRequestHandler):
try:
cache_path.parent.mkdir(parents=True, exist_ok=True)
tmp = cache_path.with_suffix('.tmp')
# margin, min_gap and min_duration MUST stay first:
# detector, margin, min_gap and min_duration MUST stay first:
# _cached_analysis_params reads only the first 256 bytes of this file
tmp.write_text(json.dumps({'margin': margin, 'min_gap': min_gap,
tmp.write_text(json.dumps({'detector': DETECTOR_VERSION,
'margin': margin, 'min_gap': min_gap,
'min_duration': min_duration, 'result': result}), 'utf-8')
os.replace(tmp, cache_path)
except Exception as e: