Replace fine thumbnail grid with mpv/vlc/ffplay handoff

Watching the video play turns out to be much faster than scanning a
thumbnail grid. The coarse 10-min thumbnail grid still does rough
localisation; after picking, a video player launches at coarse_t-30s
paused with frame-accurate scrubbing controls. The analyst reads the
exact opening time off the player's OSD and types it into the
terminal prompt (default = the coarse pick, so a single Enter keeps
the coarse pick if the player is hard to use).

Backend auto-detects mpv > vlc > ffplay; gracefully degrades to "use
the coarse pick" if no player is installed.

New `bad_rois` column captures non-opening sub-arenas (partial-opening
videos like the 2024-10-21 set where only the lower half opens). The
prompt validates entries are integers in 1..6.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Giorgio Gilestro 2026-05-01 12:20:09 +01:00
parent 125f187187
commit 2b75daa783

View file

@ -1,25 +1,25 @@
"""Interactive picker for barrier-opening time, frame-by-frame thumbnail style. """Interactive picker for barrier-opening time.
For each video that doesn't yet have a barrier-opening annotation, show a
10x6 grid of timestamped thumbnails extracted directly from the .mp4.
The analyst clicks the thumbnail at (or just after) the moment the
barrier opens; the picker then refines with a second tighter grid for
sub-second precision.
Two-stage flow per video: Two-stage flow per video:
1. Coarse grid: 60 thumbs spanning the 5-min search window (5 s spacing). 1. Coarse: 10×6 thumbnail grid spanning 10 min (~10 s spacing) lets
Click pick that 5 s slot. you click the rough moment where the barrier opens.
2. Fine grid: 60 thumbs spanning ±6 s of the coarse pick (0.2 s spacing). 2. Fine: launches mpv at the coarse pick, paused with on-screen
Click final answer with 0.2 s precision. fractional time. You scrub to the exact frame; on close, type the
time you saw on the OSD into the terminal prompt. Default is the
coarse pick.
After the time is set, the picker also prompts for non-opening ROIs
(comma-separated list, e.g. "1,2,3"). Useful for the partial-opening
videos where only some sub-arenas open. Saved to the `bad_rois` column.
Output: data/metadata/barrier_opening.csv with columns Output: data/metadata/barrier_opening.csv with columns
machine_name, session_date, session_time, opening_s, trim_first_s, notes machine_name, session_date, session_time, opening_s, trim_first_s,
bad_rois, notes
Window keys: Coarse-grid keys:
click select thumbnail at that timestamp click pick that timestamp
n skip this video for THIS run n skip this video for THIS run
u mark unusable (opening_s = NaN) u mark unusable (opening_s = NaN)
b back to coarse grid (after seeing fine grid)
q / ESC save+quit q / ESC save+quit
Usage: Usage:
@ -27,13 +27,16 @@ Usage:
python pick_barrier.py --redo python pick_barrier.py --redo
python pick_barrier.py --limit 10 python pick_barrier.py --limit 10
python pick_barrier.py --db /path/to/specific_tracking.db python pick_barrier.py --db /path/to/specific_tracking.db
python pick_barrier.py --no-player # skip the video-player refinement step
""" """
from __future__ import annotations from __future__ import annotations
import argparse import argparse
import re import re
import shutil
import sqlite3 import sqlite3
import subprocess
import sys import sys
from pathlib import Path from pathlib import Path
@ -50,7 +53,7 @@ from detect_barrier_opening import (
OUT_CSV = DATA_METADATA / "barrier_opening.csv" OUT_CSV = DATA_METADATA / "barrier_opening.csv"
OUT_COLS = ["machine_name", "session_date", "session_time", OUT_COLS = ["machine_name", "session_date", "session_time",
"opening_s", "trim_first_s", "notes"] "opening_s", "trim_first_s", "bad_rois", "notes"]
DB_NAME_RE = re.compile( DB_NAME_RE = re.compile(
r"^(\d{4}-\d{2}-\d{2})_(\d{2}-\d{2}-\d{2})_([0-9a-f]{32})__" r"^(\d{4}-\d{2}-\d{2})_(\d{2}-\d{2}-\d{2})_([0-9a-f]{32})__"
@ -213,6 +216,96 @@ def show_thumbnail_grid(
return state["time"], state["action"] or "skip" return state["time"], state["action"] or "skip"
def parse_time_input(s: str) -> float | None:
"""Accept seconds ('290'), m:ss ('4:50'), or m:ss.ss ('4:50.40')."""
s = s.strip()
if not s:
return None
try:
if ":" in s:
parts = s.split(":")
if len(parts) == 2:
return float(parts[0]) * 60 + float(parts[1])
if len(parts) == 3:
return float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[2])
return float(s)
except ValueError:
return None
def play_video(video_path: Path, start_t: float) -> str | None:
"""Launch a video player at start_t-30s. Returns player name, or None.
Tries mpv (best UX for this), then vlc, then ffplay. The user scrubs
to the exact frame, reads the timestamp off the player's OSD/seekbar,
closes the player, and types the time at the terminal prompt.
"""
seek = max(0.0, start_t - 30.0)
if shutil.which("mpv"):
cmd = ["mpv", "--no-resume-playback", "--osd-level=3", "--osd-fractions",
"--pause", f"--start={seek:.1f}", str(video_path)]
name = "mpv"
elif shutil.which("vlc"):
cmd = ["vlc", "--no-video-title-show", f"--start-time={seek:.1f}",
"--play-and-pause", str(video_path)]
name = "vlc"
elif shutil.which("ffplay"):
cmd = ["ffplay", "-hide_banner", "-loglevel", "error",
"-ss", f"{seek:.1f}", str(video_path)]
name = "ffplay"
else:
print(" ! no video player found (tried mpv, vlc, ffplay)")
return None
print(f" launching {name} at {seek:.1f}s — pause on the opening frame, "
"read the time off the player, then close it.")
try:
subprocess.run(cmd, check=False,
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
except OSError as e:
print(f" {name} launch failed: {e}")
return None
return name
def prompt_opening_time(default_s: float) -> float | str:
"""Prompt for the exact opening time. Returns float, 'skip', 'unusable', 'quit'."""
while True:
s = input(
f" opening time [enter={default_s:.1f}s · n=skip · u=unusable · q=quit]: "
).strip()
if not s:
return default_s
if s.lower() in ("n", "skip"):
return "skip"
if s.lower() in ("u", "unusable"):
return "unusable"
if s.lower() in ("q", "quit"):
return "quit"
t = parse_time_input(s)
if t is None:
print(" ? enter seconds, m:ss, or m:ss.ss")
continue
return t
def prompt_bad_rois() -> str:
"""Prompt for non-opening ROIs. Returns comma-separated string (may be empty)."""
while True:
s = input(" non-opening ROIs (e.g. '1,2,3') [enter=none]: ").strip()
if not s:
return ""
# Sanity-check: comma-separated integers in 1..6
try:
rois = sorted({int(x.strip()) for x in s.split(",") if x.strip()})
except ValueError:
print(" ? enter integers separated by commas")
continue
if not all(1 <= r <= 6 for r in rois):
print(" ? ROI numbers must be 1..6")
continue
return ",".join(str(r) for r in rois)
def pick_for_video( def pick_for_video(
video_path: Path, video_path: Path,
db_path: Path | None, db_path: Path | None,
@ -220,16 +313,17 @@ def pick_for_video(
session_date: str, session_date: str,
session_time: str, session_time: str,
coarse_span_s: float = DEFAULT_COARSE_SPAN_S, coarse_span_s: float = DEFAULT_COARSE_SPAN_S,
use_player: bool = True,
) -> dict | str | None: ) -> dict | str | None:
"""Run the two-stage thumbnail picker. Return dict, 'skip', or 'quit'.""" """Run the picker. Return result dict, 'skip', or 'quit'."""
auto_t = auto_suggest(db_path) if db_path else None auto_t = auto_suggest(db_path) if db_path else None
print(f" auto-suggest: {f'{auto_t:.1f}s' if auto_t else '(none)'}") print(f" auto-suggest: {f'{auto_t:.1f}s' if auto_t else '(none)'}")
# Stage 1: coarse grid centred on auto-suggest, or middle of span. # Stage 1: coarse thumbnail grid for rough localisation.
coarse_center = auto_t if auto_t is not None else coarse_span_s / 2 coarse_center = auto_t if auto_t is not None else coarse_span_s / 2
title_coarse = (f"COARSE {machine_name} {session_date} {session_time} " title_coarse = (f"COARSE {machine_name} {session_date} {session_time} "
f"· spanning {coarse_span_s/60:.0f} min") f"· spanning {coarse_span_s/60:.0f} min "
while True: f"· click ≈ where the barrier opens")
coarse_t, action = show_thumbnail_grid( coarse_t, action = show_thumbnail_grid(
video_path, coarse_center, coarse_span_s, title_coarse video_path, coarse_center, coarse_span_s, title_coarse
) )
@ -239,40 +333,37 @@ def pick_for_video(
return { return {
"machine_name": machine_name, "session_date": session_date, "machine_name": machine_name, "session_date": session_date,
"session_time": session_time, "opening_s": np.nan, "session_time": session_time, "opening_s": np.nan,
"trim_first_s": 0, "notes": "unusable", "trim_first_s": 0, "bad_rois": "", "notes": "unusable",
} }
if action == "quit": if action == "quit":
return "quit" return "quit"
if action == "back": if action != "pick" or coarse_t is None:
continue # already at the top stage; redraw
if action == "pick" and coarse_t is not None:
break
# Stage 2: fine grid around the coarse pick.
title_fine = (f"FINE {machine_name} {session_date} {session_time} "
f"· ±{FINE_SPAN_S/2:.0f} s around {coarse_t:.1f} s")
while True:
fine_t, action = show_thumbnail_grid(
video_path, coarse_t, FINE_SPAN_S, title_fine
)
if action == "back":
return pick_for_video(video_path, db_path, machine_name,
session_date, session_time)
if action == "skip":
return "skip" return "skip"
if action == "unusable":
# Stage 2: hand off to a video player for frame-accurate refinement.
if use_player:
played = play_video(video_path, coarse_t)
if played is None:
print(" ! using coarse pick as the answer.")
fine_t = prompt_opening_time(default_s=coarse_t)
if fine_t == "skip":
return "skip"
if fine_t == "unusable":
return { return {
"machine_name": machine_name, "session_date": session_date, "machine_name": machine_name, "session_date": session_date,
"session_time": session_time, "opening_s": np.nan, "session_time": session_time, "opening_s": np.nan,
"trim_first_s": 0, "notes": "unusable", "trim_first_s": 0, "bad_rois": "", "notes": "unusable",
} }
if action == "quit": if fine_t == "quit":
return "quit" return "quit"
if action == "pick" and fine_t is not None:
bad_rois = prompt_bad_rois()
return { return {
"machine_name": machine_name, "session_date": session_date, "machine_name": machine_name, "session_date": session_date,
"session_time": session_time, "opening_s": round(fine_t, 1), "session_time": session_time, "opening_s": round(float(fine_t), 1),
"trim_first_s": 0, "notes": "", "trim_first_s": 0, "bad_rois": bad_rois, "notes": "",
} }
@ -299,6 +390,8 @@ def main() -> None:
help="annotate this specific tracking DB only") help="annotate this specific tracking DB only")
parser.add_argument("--coarse-span", type=float, default=DEFAULT_COARSE_SPAN_S, parser.add_argument("--coarse-span", type=float, default=DEFAULT_COARSE_SPAN_S,
help=f"coarse-grid time span in seconds (default {DEFAULT_COARSE_SPAN_S:.0f})") help=f"coarse-grid time span in seconds (default {DEFAULT_COARSE_SPAN_S:.0f})")
parser.add_argument("--no-player", action="store_true",
help="skip the video-player refinement step (use the coarse pick directly)")
args = parser.parse_args() args = parser.parse_args()
OUT_CSV.parent.mkdir(parents=True, exist_ok=True) OUT_CSV.parent.mkdir(parents=True, exist_ok=True)
@ -306,6 +399,10 @@ def main() -> None:
out = pd.read_csv(OUT_CSV) out = pd.read_csv(OUT_CSV)
else: else:
out = pd.DataFrame(columns=OUT_COLS) out = pd.DataFrame(columns=OUT_COLS)
# Reason: backfill bad_rois column for older CSVs without it.
for col in OUT_COLS:
if col not in out.columns:
out[col] = ""
done = set(zip(out["machine_name"], out["session_date"], out["session_time"])) done = set(zip(out["machine_name"], out["session_date"], out["session_time"]))
if not INVENTORY_CSV.exists(): if not INVENTORY_CSV.exists():
@ -363,7 +460,8 @@ def main() -> None:
print(f"\n{prefix}") print(f"\n{prefix}")
result = pick_for_video(video, db, machine_name, session_date, session_time, result = pick_for_video(video, db, machine_name, session_date, session_time,
coarse_span_s=args.coarse_span) coarse_span_s=args.coarse_span,
use_player=not args.no_player)
if result is None or result == "skip": if result is None or result == "skip":
skipped += 1 skipped += 1