feat: adaptive noise-floor loudness detection with section scoring
Replace the fixed RMS threshold with prominence over a rolling noise floor (20th percentile per 30s block, min-smoothed so events cannot raise their own floor, clamped to -54 dBFS). Slow ambience changes such as rain or daytime traffic hum move the floor instead of flagging everything; sections now need `margin` dB (default 12) of prominence. Each section carries a score (peak dB above floor); day-highlight chips show the top 50 by score when there are too many to list, so the most striking events are reviewed first. --threshold is replaced by --margin; analysis caches are now keyed by margin+min_gap, old threshold-keyed caches never match and are overwritten on the next analyse. Detector covered by tests/test_web.py. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -3,13 +3,14 @@
|
||||
ISR Web — Browse and download recorded audio files.
|
||||
|
||||
Shows a chronological table of all recordings, allows inline playback,
|
||||
download, and analyses WAV/FLAC files for loud sections using RMS.
|
||||
download, and analyses WAV/FLAC files for sections that stand out above the
|
||||
background noise.
|
||||
|
||||
Usage:
|
||||
python web.py # serves recordings/ on port 8080
|
||||
python web.py --dir /path/to/audio # custom recordings directory
|
||||
python web.py --port 8888 # custom port
|
||||
python web.py --threshold 0.03 # loudness threshold (0-1, default 0.05)
|
||||
python web.py --margin 15 # dB above noise floor (default 12)
|
||||
"""
|
||||
|
||||
import argparse
|
||||
@@ -46,9 +47,14 @@ except ImportError:
|
||||
|
||||
AUDIO_EXTENSIONS = {'.wav', '.mp3', '.ogg', '.flac', '.aac', '.opus'}
|
||||
WINDOW_SAMPLES = 4800 # 100 ms at 48 kHz
|
||||
LOUD_THRESHOLD = 0.05 # RMS 0–1 scale; sections above this are "interesting"
|
||||
MARGIN_DB = 12.0 # sections must rise this many dB above the noise floor
|
||||
MIN_GAP_SECONDS = 2.0 # merge loud sections separated by less than this
|
||||
|
||||
NOISE_BLOCK_SECONDS = 30.0 # noise floor is estimated per block of this length
|
||||
NOISE_PERCENTILE = 20 # percentile of windowed dB levels taken as the floor
|
||||
MIN_RMS = 0.002 # ≈ −54 dBFS; the floor never drops below this, so
|
||||
# digital silence does not make every tiny sound loud
|
||||
|
||||
MIME_TYPES = {
|
||||
'.wav': 'audio/wav',
|
||||
'.mp3': 'audio/mpeg',
|
||||
@@ -254,33 +260,64 @@ def _compute_rms_windows_wav(wf, channels: int, sampwidth: int, framerate: int,
|
||||
yield round(rms, 5)
|
||||
|
||||
|
||||
def _noise_floor_db(db_values: list, window_dur: float) -> list:
|
||||
"""Per-window background noise floor in dBFS.
|
||||
|
||||
The floor is the NOISE_PERCENTILE-th percentile of the windowed dB levels
|
||||
in each NOISE_BLOCK_SECONDS block, then min-smoothed over ±2 neighbouring
|
||||
blocks so an event spanning a whole block cannot raise its own floor.
|
||||
Tracks slow ambience changes (day/night, rain, traffic hum) so detection
|
||||
is relative to "how loud it normally is right now"."""
|
||||
n = len(db_values)
|
||||
block = max(1, int(round(NOISE_BLOCK_SECONDS / window_dur)))
|
||||
floors = []
|
||||
for i in range(0, n, block):
|
||||
chunk = sorted(db_values[i:i + block])
|
||||
floors.append(chunk[int(len(chunk) * NOISE_PERCENTILE / 100)])
|
||||
smoothed = [min(floors[max(0, b - 2):b + 3]) for b in range(len(floors))]
|
||||
return [smoothed[min(i // block, len(smoothed) - 1)] for i in range(n)]
|
||||
|
||||
|
||||
def _loud_sections(rms_values: list, window_dur: float, duration: float,
|
||||
threshold: float, min_gap: float = MIN_GAP_SECONDS) -> list:
|
||||
margin_db: float, min_gap: float = MIN_GAP_SECONDS) -> list:
|
||||
"""Sections whose level rises at least margin_db above the local noise
|
||||
floor. Each section carries a 'score': its peak dB above the floor, used
|
||||
by the UI to rank sections by how much they stand out."""
|
||||
db = [20 * math.log10(max(r, 1e-6)) for r in rms_values]
|
||||
floor = _noise_floor_db(db, window_dur)
|
||||
min_db = 20 * math.log10(MIN_RMS)
|
||||
|
||||
sections = []
|
||||
start_t = None
|
||||
last_loud_t = None
|
||||
peak = 0.0
|
||||
|
||||
for i, rms in enumerate(rms_values):
|
||||
for i, d in enumerate(db):
|
||||
t = i * window_dur
|
||||
if rms >= threshold:
|
||||
floor_eff = max(floor[i], min_db)
|
||||
if d >= floor_eff + margin_db:
|
||||
if start_t is None:
|
||||
start_t = t
|
||||
peak = 0.0
|
||||
last_loud_t = t
|
||||
peak = max(peak, d - floor_eff)
|
||||
else:
|
||||
if start_t is not None and (t - last_loud_t) > min_gap:
|
||||
sections.append({'start': round(start_t, 1),
|
||||
'end': round(last_loud_t + window_dur, 1)})
|
||||
'end': round(last_loud_t + window_dur, 1),
|
||||
'score': round(peak, 1)})
|
||||
start_t = None
|
||||
last_loud_t = None
|
||||
|
||||
if start_t is not None:
|
||||
sections.append({'start': round(start_t, 1), 'end': round(duration, 1)})
|
||||
sections.append({'start': round(start_t, 1), 'end': round(duration, 1),
|
||||
'score': round(peak, 1)})
|
||||
|
||||
return sections
|
||||
|
||||
|
||||
def _package_result(rms_values: list, framerate: int, n_frames: int,
|
||||
window_samples: int, threshold: float,
|
||||
window_samples: int, margin_db: float,
|
||||
min_gap: float = MIN_GAP_SECONDS) -> dict:
|
||||
window_dur = window_samples / framerate
|
||||
duration = n_frames / framerate
|
||||
@@ -295,14 +332,14 @@ def _package_result(rms_values: list, framerate: int, n_frames: int,
|
||||
# only renders rms_display (~800 points), and the full list is ~45x larger.
|
||||
return {
|
||||
'rms_display': rms_display,
|
||||
'sections': _loud_sections(rms_values, window_dur, duration, threshold, min_gap),
|
||||
'sections': _loud_sections(rms_values, window_dur, duration, margin_db, min_gap),
|
||||
'duration': round(duration, 2),
|
||||
'window': round(window_dur, 4),
|
||||
}
|
||||
|
||||
|
||||
def analyze_wav(path: Path, window_samples: int = WINDOW_SAMPLES,
|
||||
threshold: float = LOUD_THRESHOLD,
|
||||
margin_db: float = MARGIN_DB,
|
||||
min_gap: float = MIN_GAP_SECONDS) -> dict:
|
||||
try:
|
||||
with wave.open(str(path), 'rb') as wf:
|
||||
@@ -315,11 +352,11 @@ def analyze_wav(path: Path, window_samples: int = WINDOW_SAMPLES,
|
||||
except Exception as e:
|
||||
return {'error': str(e)}
|
||||
|
||||
return _package_result(rms_values, framerate, n_frames, window_samples, threshold, min_gap)
|
||||
return _package_result(rms_values, framerate, n_frames, window_samples, margin_db, min_gap)
|
||||
|
||||
|
||||
def analyze_flac(path: Path, window_samples: int = WINDOW_SAMPLES,
|
||||
threshold: float = LOUD_THRESHOLD,
|
||||
margin_db: float = MARGIN_DB,
|
||||
min_gap: float = MIN_GAP_SECONDS) -> dict:
|
||||
"""Analyse a FLAC file for loudness. Requires numpy and soundfile."""
|
||||
if not NUMPY_AVAILABLE or not SOUNDFILE_AVAILABLE:
|
||||
@@ -342,7 +379,7 @@ def analyze_flac(path: Path, window_samples: int = WINDOW_SAMPLES,
|
||||
except Exception as e:
|
||||
return {'error': str(e)}
|
||||
|
||||
return _package_result(rms_values, framerate, n_frames, window_samples, threshold, min_gap)
|
||||
return _package_result(rms_values, framerate, n_frames, window_samples, margin_db, min_gap)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -355,18 +392,19 @@ def _analysis_cache_path(analyses_base: Path, recordings_base: Path, audio_path:
|
||||
|
||||
|
||||
def _cached_analysis_params(cache_path: Path):
|
||||
"""Read just threshold/min_gap from a cache file without parsing the whole
|
||||
"""Read just margin/min_gap from a cache file without parsing the whole
|
||||
JSON (the embedded result can be hundreds of KB). Relies on the writer in
|
||||
_api_analyze putting these two keys first."""
|
||||
_api_analyze putting these two keys first. Caches written by the old
|
||||
fixed-threshold detector have no margin key and simply never match."""
|
||||
try:
|
||||
with open(cache_path, 'r', encoding='utf-8') as fh:
|
||||
head = fh.read(256)
|
||||
except OSError:
|
||||
return None
|
||||
m = re.search(r'"threshold":\s*([0-9.eE+-]+),\s*"min_gap":\s*([0-9.eE+-]+)', head)
|
||||
m = re.search(r'"margin":\s*([0-9.eE+-]+),\s*"min_gap":\s*([0-9.eE+-]+)', head)
|
||||
if not m:
|
||||
return None
|
||||
return {'threshold': float(m.group(1)), 'min_gap': float(m.group(2))}
|
||||
return {'margin': float(m.group(1)), 'min_gap': float(m.group(2))}
|
||||
|
||||
|
||||
def prune_orphan_analyses(analyses_base: Path, recordings_base: Path):
|
||||
@@ -465,7 +503,7 @@ class _Handler(BaseHTTPRequestHandler):
|
||||
|
||||
recordings_dir: str = 'recordings'
|
||||
analyses_dir: str = 'recordings/analyses'
|
||||
threshold: float = LOUD_THRESHOLD
|
||||
margin_db: float = MARGIN_DB
|
||||
min_gap: float = MIN_GAP_SECONDS
|
||||
|
||||
def do_DELETE(self):
|
||||
@@ -529,10 +567,10 @@ class _Handler(BaseHTTPRequestHandler):
|
||||
return
|
||||
|
||||
try:
|
||||
threshold = float(qs.get('threshold', [self.threshold])[0])
|
||||
threshold = max(0.0, min(1.0, threshold))
|
||||
margin = float(qs.get('margin', [self.margin_db])[0])
|
||||
margin = max(1.0, min(60.0, margin))
|
||||
except (ValueError, TypeError):
|
||||
threshold = self.threshold
|
||||
margin = self.margin_db
|
||||
|
||||
try:
|
||||
min_gap = float(qs.get('min_gap', [self.min_gap])[0])
|
||||
@@ -549,7 +587,7 @@ class _Handler(BaseHTTPRequestHandler):
|
||||
cache_path = _analysis_cache_path(analyses_base, recordings_base, path)
|
||||
try:
|
||||
cached = json.loads(cache_path.read_text('utf-8'))
|
||||
if cached.get('threshold') == threshold and cached.get('min_gap') == min_gap:
|
||||
if cached.get('margin') == margin and cached.get('min_gap') == min_gap:
|
||||
payload = dict(cached['result'])
|
||||
payload.pop('rms', None) # caches written before the full-RMS field was dropped
|
||||
payload['cached'] = True
|
||||
@@ -560,12 +598,12 @@ class _Handler(BaseHTTPRequestHandler):
|
||||
|
||||
ext = path.suffix.lower()
|
||||
if ext == '.wav':
|
||||
result = analyze_wav(path, threshold=threshold, min_gap=min_gap)
|
||||
result = analyze_wav(path, margin_db=margin, min_gap=min_gap)
|
||||
elif ext == '.flac':
|
||||
if not (NUMPY_AVAILABLE and SOUNDFILE_AVAILABLE):
|
||||
self._json_err(400, 'FLAC analysis requires: pip install numpy soundfile')
|
||||
return
|
||||
result = analyze_flac(path, threshold=threshold, min_gap=min_gap)
|
||||
result = analyze_flac(path, margin_db=margin, min_gap=min_gap)
|
||||
else:
|
||||
self._json_err(400, f'Loudness analysis is not available for {ext} files')
|
||||
return
|
||||
@@ -573,7 +611,9 @@ class _Handler(BaseHTTPRequestHandler):
|
||||
try:
|
||||
cache_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = cache_path.with_suffix('.tmp')
|
||||
tmp.write_text(json.dumps({'threshold': threshold, 'min_gap': min_gap, 'result': result}), 'utf-8')
|
||||
# margin and min_gap MUST stay first: _cached_analysis_params reads
|
||||
# only the first 256 bytes of this file
|
||||
tmp.write_text(json.dumps({'margin': margin, 'min_gap': min_gap, 'result': result}), 'utf-8')
|
||||
os.replace(tmp, cache_path)
|
||||
except Exception as e:
|
||||
print(f'Warning: could not write analysis cache {cache_path}: {e}', flush=True)
|
||||
@@ -690,7 +730,7 @@ class _Handler(BaseHTTPRequestHandler):
|
||||
self._send(200, data.encode(), 'application/json')
|
||||
|
||||
def _api_config(self):
|
||||
data = json.dumps({'threshold': self.threshold, 'min_gap': self.min_gap})
|
||||
data = json.dumps({'margin': self.margin_db, 'min_gap': self.min_gap})
|
||||
self._send(200, data.encode(), 'application/json')
|
||||
|
||||
def _api_delete(self, filename: str):
|
||||
@@ -873,8 +913,9 @@ def main():
|
||||
help='HTTP port (default: 8080)')
|
||||
parser.add_argument('--host', default='0.0.0.0',
|
||||
help='Bind address (default: 0.0.0.0)')
|
||||
parser.add_argument('--threshold', type=float, default=LOUD_THRESHOLD,
|
||||
help=f'RMS loudness threshold 0–1 (default: {LOUD_THRESHOLD})')
|
||||
parser.add_argument('--margin', type=float, default=MARGIN_DB,
|
||||
help=f'dB above the background noise floor for a section '
|
||||
f'to count as loud (default: {MARGIN_DB})')
|
||||
parser.add_argument('--min-gap', type=float, default=MIN_GAP_SECONDS,
|
||||
help=f'Seconds gap for merging loud sections (default: {MIN_GAP_SECONDS})')
|
||||
parser.add_argument('--analyses-dir', default=None,
|
||||
@@ -893,7 +934,7 @@ def main():
|
||||
class Handler(_Handler):
|
||||
recordings_dir = str(rec_dir)
|
||||
analyses_dir = str(_analyses_dir)
|
||||
threshold = args.threshold
|
||||
margin_db = args.margin
|
||||
min_gap = args.min_gap
|
||||
|
||||
server = _Server((args.host, args.port), Handler)
|
||||
@@ -901,7 +942,7 @@ def main():
|
||||
print(f"ISR Web running on http://{args.host}:{args.port}/")
|
||||
print(f"Recordings dir: {rec_dir}")
|
||||
print(f"Analyses dir: {analyses_dir}")
|
||||
print(f"Loud threshold: {args.threshold}")
|
||||
print(f"Loudness margin: {args.margin} dB above noise floor")
|
||||
if not NUMPY_AVAILABLE:
|
||||
print("Note: numpy not installed — WAV RMS uses pure Python (slower); FLAC analysis unavailable")
|
||||
elif not SOUNDFILE_AVAILABLE:
|
||||
|
||||
Reference in New Issue
Block a user