Add offline tracking pipeline for video backlog

The 2024 video set in all_video_info_merged.xlsx covers 63 (date, machine)
sessions — 129 video instances — that have no auto-detectable targets, so
ROI placement requires manual reference-point selection. This commit adds
the three-stage pipeline that lets a user click for an hour, then walk
away while the tracker grinds overnight:

  1. build_video_inventory.py — scan /mnt/ethoscope_data/videos/ and join
     against the xlsx, producing data/metadata/video_inventory.csv

  2. pick_targets.py — interactive matplotlib/Tk picker. User clicks
     TOP/CORNER/LEFT (the L-shape ethoscope expects); after the third
     click the 6 ROI rectangles are drawn on top of the frame so geometry
     can be verified before saving. Also supports marking a video
     'unusable' (FOV wrong) so it's permanently skipped, frame stepping
     by ±1s/±5%/midpoint, point editing in --redo mode, and a crosshair
     cursor that survives matplotlib's per-motion cursor reset.

  3. track_videos.py — headless batch tracker. Reads the JSON sidecars,
     builds 6 ROIs from the HD-mating-arena geometry, runs MultiFlyTracker
     against the merged.mp4 via MovieVirtualCamera, writes SQLite DBs to
     data/tracked/. Idempotent (skips done DBs), parallel via --jobs,
     subclasses MovieVirtualCamera so frames stay BGR (MultiFlyTracker
     calls cvtColor(BGR2GRAY) without checking channel count).

Plus auto_detect_targets.py (fallback that runs ethoscope's auto-detector
in case any videos do have visible target dots), monitor_tracking.py
(progress + ETA from data/tracked/ ground truth, --watch for live view),
and tracking_geometry.py (single source of truth for the affine math
shared by picker and tracker).

requirements-tracking.txt pins the extra deps (opencv-python, openpyxl,
gitpython, netifaces, mysql-connector-python) — these are only needed
for the tracking pipeline, not the existing analysis notebooks.

Verified end-to-end on one of the user-picked videos: ~4000 rows/ROI in
a 120s slice, fly bounding boxes in the expected 800-2000 px² band.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Giorgio Gilestro 2026-04-27 17:25:26 +01:00
parent e7e4db264d
commit e4da7691d5
11 changed files with 1296 additions and 0 deletions

155
scripts/monitor_tracking.py Normal file
View file

@ -0,0 +1,155 @@
"""Live progress + ETA for the offline tracker batch.
Counts ground-truth (DBs on disk) rather than parsing log lines, so it works
whether the batch is running fresh or was resumed after a crash. Errors are
parsed out of any *.log files in data/logs/.
Usage:
python monitor_tracking.py # one snapshot, exit
python monitor_tracking.py --watch # refresh every 10 s
python monitor_tracking.py --watch 30 # refresh every 30 s
"""
from __future__ import annotations
import argparse
import json
import re
import time
from datetime import datetime, timedelta
from pathlib import Path
from config import LOGS_DIR, TARGETS_DIR, TRACKING_OUTPUT_DIR
def count_target_jsons() -> tuple[int, int, list[str]]:
"""Return (n_pickable, n_unusable, unusable_video_stems)."""
pickable = 0
unusable_stems: list[str] = []
for j in TARGETS_DIR.glob("*.json"):
try:
d = json.loads(j.read_text())
except Exception:
continue
if d.get("unusable"):
unusable_stems.append(j.stem)
elif d.get("reference_points"):
pickable += 1
return pickable, len(unusable_stems), unusable_stems
def count_tracked_dbs() -> tuple[int, datetime | None, str | None]:
"""Return (n_dbs, mtime_of_newest, name_of_newest)."""
dbs = list(TRACKING_OUTPUT_DIR.glob("*_tracking.db"))
if not dbs:
return 0, None, None
newest = max(dbs, key=lambda p: p.stat().st_mtime)
return len(dbs), datetime.fromtimestamp(newest.stat().st_mtime), newest.stem
def parse_recent_errors(log_dir: Path, tail_lines: int = 5000) -> list[str]:
"""Scan the most recent *.log file for lines reporting errors."""
if not log_dir.exists():
return []
logs = sorted(log_dir.glob("*.log"), key=lambda p: p.stat().st_mtime)
if not logs:
return []
latest = logs[-1]
try:
with latest.open() as f:
tail = f.readlines()[-tail_lines:]
except Exception:
return []
out = []
for line in tail:
if re.search(r":\s*error\b", line) or " error: " in line.lower():
out.append(line.rstrip())
return out
def db_completion_history() -> list[float]:
"""Return mtimes of all tracking DBs, sorted ascending. Used for rate."""
return sorted(p.stat().st_mtime for p in TRACKING_OUTPUT_DIR.glob("*_tracking.db"))
def fmt_duration(seconds: float) -> str:
if seconds < 60:
return f"{int(seconds)} s"
if seconds < 3600:
return f"{int(seconds // 60)} min"
h = int(seconds // 3600)
m = int((seconds % 3600) // 60)
return f"{h} h {m} min"
def snapshot() -> str:
pickable, unusable, _ = count_target_jsons()
tracked, last_mtime, last_name = count_tracked_dbs()
history = db_completion_history()
errors = parse_recent_errors(LOGS_DIR)
lines = [f"tracking progress @ {datetime.now():%Y-%m-%d %H:%M:%S}"]
lines.append(f" pickable JSONs: {pickable}")
lines.append(f" unusable JSONs: {unusable} (skipped by tracker)")
pct = (tracked / pickable * 100) if pickable else 0
lines.append(
f" DBs on disk: {tracked} / {pickable} ({pct:.0f}%)"
)
lines.append(f" errors in log: {len(errors)}")
# Rate from the last 10 completions, when available.
if len(history) >= 2:
window = history[-min(10, len(history)) :]
span = window[-1] - window[0]
if span > 0:
rate_per_hour = (len(window) - 1) / span * 3600
lines.append(f" rate (last {len(window) - 1}): {rate_per_hour:.1f} videos/hour")
remaining = max(0, pickable - tracked)
if rate_per_hour > 0 and remaining > 0:
eta_sec = remaining * 3600 / rate_per_hour
eta_at = datetime.now() + timedelta(seconds=eta_sec)
lines.append(
f" ETA remaining: {fmt_duration(eta_sec)} "
f"(done by {eta_at:%H:%M %a})"
)
if last_mtime is not None and last_name is not None:
ago = (datetime.now() - last_mtime).total_seconds()
lines.append(
f" most recent DB: {last_name[:60]}... ({fmt_duration(ago)} ago)"
)
if errors:
lines.append("")
lines.append(f" recent errors ({min(5, len(errors))} of {len(errors)}):")
for e in errors[-5:]:
lines.append(f" {e[:120]}")
return "\n".join(lines)
def main() -> None:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--watch", nargs="?", type=int, const=10, default=None,
help="refresh every N seconds (default 10 if flag given without value)",
)
args = parser.parse_args()
if args.watch is None:
print(snapshot())
return
try:
while True:
# Clear screen and reprint
print("\033[2J\033[H", end="")
print(snapshot())
print(f"\n(refreshing every {args.watch}s — Ctrl-C to exit)")
time.sleep(args.watch)
except KeyboardInterrupt:
print()
if __name__ == "__main__":
main()