Add offline tracking pipeline for video backlog
The 2024 video set in all_video_info_merged.xlsx covers 63 (date, machine)
sessions — 129 video instances — that have no auto-detectable targets, so
ROI placement requires manual reference-point selection. This commit adds
the three-stage pipeline that lets a user click for an hour, then walk
away while the tracker grinds overnight:
1. build_video_inventory.py — scan /mnt/ethoscope_data/videos/ and join
against the xlsx, producing data/metadata/video_inventory.csv
2. pick_targets.py — interactive matplotlib/Tk picker. User clicks
TOP/CORNER/LEFT (the L-shape ethoscope expects); after the third
click the 6 ROI rectangles are drawn on top of the frame so geometry
can be verified before saving. Also supports marking a video
'unusable' (FOV wrong) so it's permanently skipped, frame stepping
by ±1s/±5%/midpoint, point editing in --redo mode, and a crosshair
cursor that survives matplotlib's per-motion cursor reset.
3. track_videos.py — headless batch tracker. Reads the JSON sidecars,
builds 6 ROIs from the HD-mating-arena geometry, runs MultiFlyTracker
against the merged.mp4 via MovieVirtualCamera, writes SQLite DBs to
data/tracked/. Idempotent (skips done DBs), parallel via --jobs,
subclasses MovieVirtualCamera so frames stay BGR (MultiFlyTracker
calls cvtColor(BGR2GRAY) without checking channel count).
Plus auto_detect_targets.py (fallback that runs ethoscope's auto-detector
in case any videos do have visible target dots), monitor_tracking.py
(progress + ETA from data/tracked/ ground truth, --watch for live view),
and tracking_geometry.py (single source of truth for the affine math
shared by picker and tracker).
requirements-tracking.txt pins the extra deps (opencv-python, openpyxl,
gitpython, netifaces, mysql-connector-python) — these are only needed
for the tracking pipeline, not the existing analysis notebooks.
Verified end-to-end on one of the user-picked videos: ~4000 rows/ROI in
a 120s slice, fly bounding boxes in the expected 800-2000 px² band.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
e7e4db264d
commit
e4da7691d5
11 changed files with 1296 additions and 0 deletions
155
scripts/monitor_tracking.py
Normal file
155
scripts/monitor_tracking.py
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
"""Live progress + ETA for the offline tracker batch.
|
||||
|
||||
Counts ground-truth (DBs on disk) rather than parsing log lines, so it works
|
||||
whether the batch is running fresh or was resumed after a crash. Errors are
|
||||
parsed out of any *.log files in data/logs/.
|
||||
|
||||
Usage:
|
||||
python monitor_tracking.py # one snapshot, exit
|
||||
python monitor_tracking.py --watch # refresh every 10 s
|
||||
python monitor_tracking.py --watch 30 # refresh every 30 s
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
from config import LOGS_DIR, TARGETS_DIR, TRACKING_OUTPUT_DIR
|
||||
|
||||
|
||||
def count_target_jsons() -> tuple[int, int, list[str]]:
|
||||
"""Return (n_pickable, n_unusable, unusable_video_stems)."""
|
||||
pickable = 0
|
||||
unusable_stems: list[str] = []
|
||||
for j in TARGETS_DIR.glob("*.json"):
|
||||
try:
|
||||
d = json.loads(j.read_text())
|
||||
except Exception:
|
||||
continue
|
||||
if d.get("unusable"):
|
||||
unusable_stems.append(j.stem)
|
||||
elif d.get("reference_points"):
|
||||
pickable += 1
|
||||
return pickable, len(unusable_stems), unusable_stems
|
||||
|
||||
|
||||
def count_tracked_dbs() -> tuple[int, datetime | None, str | None]:
|
||||
"""Return (n_dbs, mtime_of_newest, name_of_newest)."""
|
||||
dbs = list(TRACKING_OUTPUT_DIR.glob("*_tracking.db"))
|
||||
if not dbs:
|
||||
return 0, None, None
|
||||
newest = max(dbs, key=lambda p: p.stat().st_mtime)
|
||||
return len(dbs), datetime.fromtimestamp(newest.stat().st_mtime), newest.stem
|
||||
|
||||
|
||||
def parse_recent_errors(log_dir: Path, tail_lines: int = 5000) -> list[str]:
|
||||
"""Scan the most recent *.log file for lines reporting errors."""
|
||||
if not log_dir.exists():
|
||||
return []
|
||||
logs = sorted(log_dir.glob("*.log"), key=lambda p: p.stat().st_mtime)
|
||||
if not logs:
|
||||
return []
|
||||
latest = logs[-1]
|
||||
try:
|
||||
with latest.open() as f:
|
||||
tail = f.readlines()[-tail_lines:]
|
||||
except Exception:
|
||||
return []
|
||||
out = []
|
||||
for line in tail:
|
||||
if re.search(r":\s*error\b", line) or " error: " in line.lower():
|
||||
out.append(line.rstrip())
|
||||
return out
|
||||
|
||||
|
||||
def db_completion_history() -> list[float]:
|
||||
"""Return mtimes of all tracking DBs, sorted ascending. Used for rate."""
|
||||
return sorted(p.stat().st_mtime for p in TRACKING_OUTPUT_DIR.glob("*_tracking.db"))
|
||||
|
||||
|
||||
def fmt_duration(seconds: float) -> str:
|
||||
if seconds < 60:
|
||||
return f"{int(seconds)} s"
|
||||
if seconds < 3600:
|
||||
return f"{int(seconds // 60)} min"
|
||||
h = int(seconds // 3600)
|
||||
m = int((seconds % 3600) // 60)
|
||||
return f"{h} h {m} min"
|
||||
|
||||
|
||||
def snapshot() -> str:
|
||||
pickable, unusable, _ = count_target_jsons()
|
||||
tracked, last_mtime, last_name = count_tracked_dbs()
|
||||
history = db_completion_history()
|
||||
errors = parse_recent_errors(LOGS_DIR)
|
||||
|
||||
lines = [f"tracking progress @ {datetime.now():%Y-%m-%d %H:%M:%S}"]
|
||||
lines.append(f" pickable JSONs: {pickable}")
|
||||
lines.append(f" unusable JSONs: {unusable} (skipped by tracker)")
|
||||
pct = (tracked / pickable * 100) if pickable else 0
|
||||
lines.append(
|
||||
f" DBs on disk: {tracked} / {pickable} ({pct:.0f}%)"
|
||||
)
|
||||
lines.append(f" errors in log: {len(errors)}")
|
||||
|
||||
# Rate from the last 10 completions, when available.
|
||||
if len(history) >= 2:
|
||||
window = history[-min(10, len(history)) :]
|
||||
span = window[-1] - window[0]
|
||||
if span > 0:
|
||||
rate_per_hour = (len(window) - 1) / span * 3600
|
||||
lines.append(f" rate (last {len(window) - 1}): {rate_per_hour:.1f} videos/hour")
|
||||
remaining = max(0, pickable - tracked)
|
||||
if rate_per_hour > 0 and remaining > 0:
|
||||
eta_sec = remaining * 3600 / rate_per_hour
|
||||
eta_at = datetime.now() + timedelta(seconds=eta_sec)
|
||||
lines.append(
|
||||
f" ETA remaining: {fmt_duration(eta_sec)} "
|
||||
f"(done by {eta_at:%H:%M %a})"
|
||||
)
|
||||
|
||||
if last_mtime is not None and last_name is not None:
|
||||
ago = (datetime.now() - last_mtime).total_seconds()
|
||||
lines.append(
|
||||
f" most recent DB: {last_name[:60]}... ({fmt_duration(ago)} ago)"
|
||||
)
|
||||
|
||||
if errors:
|
||||
lines.append("")
|
||||
lines.append(f" recent errors ({min(5, len(errors))} of {len(errors)}):")
|
||||
for e in errors[-5:]:
|
||||
lines.append(f" {e[:120]}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"--watch", nargs="?", type=int, const=10, default=None,
|
||||
help="refresh every N seconds (default 10 if flag given without value)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.watch is None:
|
||||
print(snapshot())
|
||||
return
|
||||
|
||||
try:
|
||||
while True:
|
||||
# Clear screen and reprint
|
||||
print("\033[2J\033[H", end="")
|
||||
print(snapshot())
|
||||
print(f"\n(refreshing every {args.watch}s — Ctrl-C to exit)")
|
||||
time.sleep(args.watch)
|
||||
except KeyboardInterrupt:
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue