Add offline tracking pipeline for video backlog
The 2024 video set in all_video_info_merged.xlsx covers 63 (date, machine)
sessions — 129 video instances — that have no auto-detectable targets, so
ROI placement requires manual reference-point selection. This commit adds
the three-stage pipeline that lets a user click for an hour, then walk
away while the tracker grinds overnight:
1. build_video_inventory.py — scan /mnt/ethoscope_data/videos/ and join
against the xlsx, producing data/metadata/video_inventory.csv
2. pick_targets.py — interactive matplotlib/Tk picker. User clicks
TOP/CORNER/LEFT (the L-shape ethoscope expects); after the third
click the 6 ROI rectangles are drawn on top of the frame so geometry
can be verified before saving. Also supports marking a video
'unusable' (FOV wrong) so it's permanently skipped, frame stepping
by ±1s/±5%/midpoint, point editing in --redo mode, and a crosshair
cursor that survives matplotlib's per-motion cursor reset.
3. track_videos.py — headless batch tracker. Reads the JSON sidecars,
builds 6 ROIs from the HD-mating-arena geometry, runs MultiFlyTracker
against the merged.mp4 via MovieVirtualCamera, writes SQLite DBs to
data/tracked/. Idempotent (skips done DBs), parallel via --jobs,
subclasses MovieVirtualCamera so frames stay BGR (MultiFlyTracker
calls cvtColor(BGR2GRAY) without checking channel count).
Plus auto_detect_targets.py (fallback that runs ethoscope's auto-detector
in case any videos do have visible target dots), monitor_tracking.py
(progress + ETA from data/tracked/ ground truth, --watch for live view),
and tracking_geometry.py (single source of truth for the affine math
shared by picker and tracker).
requirements-tracking.txt pins the extra deps (opencv-python, openpyxl,
gitpython, netifaces, mysql-connector-python) — these are only needed
for the tracking pipeline, not the existing analysis notebooks.
Verified end-to-end on one of the user-picked videos: ~4000 rows/ROI in
a 120s slice, fly bounding boxes in the expected 800-2000 px² band.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
e7e4db264d
commit
e4da7691d5
11 changed files with 1296 additions and 0 deletions
467
scripts/pick_targets.py
Normal file
467
scripts/pick_targets.py
Normal file
|
|
@ -0,0 +1,467 @@
|
|||
"""Interactive target picker for offline tracking (matplotlib/Tk GUI).
|
||||
|
||||
Loops through videos that need tracking and lets the user click 3 reference
|
||||
points per video in L-shape order:
|
||||
|
||||
1) TOP target (above the corner)
|
||||
2) CORNER target (the right-angle vertex)
|
||||
3) LEFT target (to the left of the corner)
|
||||
|
||||
These three points are the same reference layout used by ethoscope's
|
||||
`TargetGridROIBuilder`: dst_points = [(0, -1), (0, 0), (-1, 0)] in unit
|
||||
coordinates. Saving them as a JSON sidecar lets the offline tracker build the
|
||||
6-ROI HD mating arena grid without needing auto-target detection.
|
||||
|
||||
Output JSON sidecar: data/targets/<video_basename>.json
|
||||
{
|
||||
"video_path": "/mnt/.../*.mp4",
|
||||
"frame_index": <int>,
|
||||
"reference_points": [[x0, y0], [x1, y1], [x2, y2]],
|
||||
"order": ["top", "corner", "left"],
|
||||
"picked_at": "<isoformat>"
|
||||
}
|
||||
|
||||
Keys (in the picker window):
|
||||
LEFT-CLICK add a point (top → corner → left)
|
||||
r reset clicks for current video
|
||||
d skip this video for THIS run only (no JSON written)
|
||||
u mark this video unusable (FOV wrong etc.); skipped forever
|
||||
. / , advance / rewind by 25 frames (≈ 1 s @ 25 fps)
|
||||
] / [ advance / rewind by 5% of the video (~3 min in a 1 h video)
|
||||
# jump to the middle of the video
|
||||
enter save the 3 points and move on
|
||||
q / ESC quit picker
|
||||
|
||||
After the 3rd click, the 6 ROI rectangles are drawn over the frame so you
|
||||
can sanity-check the geometry before pressing ENTER.
|
||||
|
||||
With --redo, if a JSON sidecar exists, its points are pre-loaded so you can
|
||||
nudge them rather than restart from scratch.
|
||||
|
||||
Why matplotlib instead of cv2.imshow:
|
||||
OpenCV's bundled GUI uses Qt, which needs XKeyboard + a fonts directory and
|
||||
is fragile over SSH X11-forwarding. matplotlib's TkAgg backend uses pure
|
||||
Tk/X11 and works out of the box on any DISPLAY (and gives free pan/zoom
|
||||
via the toolbar — useful for clicking small targets precisely).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import datetime as dt
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Force TkAgg BEFORE importing matplotlib. We override even if MPLBACKEND is
|
||||
# already set, because the script is unusable with a non-interactive backend.
|
||||
os.environ["MPLBACKEND"] = "TkAgg"
|
||||
|
||||
import cv2 # noqa: E402
|
||||
import matplotlib # noqa: E402
|
||||
import matplotlib.pyplot as plt # noqa: E402
|
||||
import numpy as np # noqa: E402
|
||||
import pandas as pd # noqa: E402
|
||||
|
||||
# matplotlib.backend_bases exposes the cursor identifiers under different
|
||||
# names depending on version: `Cursors` enum on 3.5+, lowercase `cursors`
|
||||
# instance on older releases. Both have the same integer attributes.
|
||||
try:
|
||||
from matplotlib.backend_bases import Cursors as _Cursors # 3.5+
|
||||
except ImportError:
|
||||
try:
|
||||
from matplotlib.backend_bases import cursors as _Cursors # older
|
||||
except ImportError:
|
||||
_Cursors = None
|
||||
|
||||
# Verify we ended up on an interactive backend; bail loud (with a concrete
|
||||
# explanation) if not. matplotlib silently falls back to 'agg' when its
|
||||
# requested backend can't load, which is hard to debug without help.
|
||||
_backend = matplotlib.get_backend()
|
||||
if _backend.lower() in ("agg", "headless", "template", "pdf", "svg", "ps"):
|
||||
diag = []
|
||||
try:
|
||||
import tkinter as _tk
|
||||
try:
|
||||
_tk.Tk().destroy()
|
||||
diag.append("tkinter import + Tk() instantiation: OK")
|
||||
except Exception as e:
|
||||
diag.append(f"tkinter imported but Tk() failed: {e!r}")
|
||||
except Exception as e:
|
||||
diag.append(f"tkinter import FAILED: {e!r}")
|
||||
diag.append(" → on Manjaro/Arch, run: sudo pacman -S tk")
|
||||
print(
|
||||
f"ERROR: matplotlib loaded the non-interactive backend {_backend!r}.\n"
|
||||
f" Expected 'TkAgg'. Diagnostic info:\n"
|
||||
f" DISPLAY = {os.environ.get('DISPLAY')!r}\n"
|
||||
f" MPLBACKEND = {os.environ.get('MPLBACKEND')!r}\n"
|
||||
f" matplotlib ver = {matplotlib.__version__}\n"
|
||||
+ "\n".join(f" {d}" for d in diag),
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(2)
|
||||
|
||||
from config import INVENTORY_CSV, TARGETS_DIR # noqa: E402
|
||||
from tracking_geometry import compute_roi_polygons # noqa: E402
|
||||
|
||||
# Strip default matplotlib keybindings that would conflict with ours.
|
||||
for k in ("keymap.home", "keymap.save", "keymap.quit", "keymap.fullscreen",
|
||||
"keymap.pan", "keymap.zoom", "keymap.back", "keymap.forward"):
|
||||
try:
|
||||
plt.rcParams[k] = []
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
CLICK_LABELS = ("TOP", "CORNER", "LEFT")
|
||||
CLICK_COLORS = ("red", "lime", "deepskyblue")
|
||||
|
||||
|
||||
def grab_frame(
|
||||
video_path: Path, frame_idx: int
|
||||
) -> tuple[np.ndarray, int, int] | None:
|
||||
"""Return (RGB frame, actual_frame_idx, n_frames) from the video, or None.
|
||||
|
||||
Clamps frame_idx to [0, n_frames-1] so callers can step blindly.
|
||||
"""
|
||||
cap = cv2.VideoCapture(str(video_path))
|
||||
if not cap.isOpened():
|
||||
return None
|
||||
n = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
if n > 0:
|
||||
frame_idx = max(0, min(frame_idx, n - 1))
|
||||
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
|
||||
ok, frame = cap.read()
|
||||
cap.release()
|
||||
if not ok or frame is None:
|
||||
return None
|
||||
return cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), frame_idx, n
|
||||
|
||||
|
||||
def pick_one(
|
||||
video_path: Path,
|
||||
frame_idx: int,
|
||||
status_prefix: str,
|
||||
initial_points: list[tuple[float, float]] | None = None,
|
||||
) -> dict | None:
|
||||
"""Show the picker UI for a single video; return the result dict or None."""
|
||||
grabbed = grab_frame(video_path, frame_idx)
|
||||
if grabbed is None:
|
||||
print(f" ! cannot read {video_path}")
|
||||
return None
|
||||
frame, frame_idx, n_frames = grabbed
|
||||
# Big-step size for ] / [ : 5% of total length, ~3 min in a 1h video.
|
||||
big_step = max(1, int(round(0.05 * n_frames))) if n_frames > 0 else 250
|
||||
|
||||
fig, ax = plt.subplots(figsize=(14, 8))
|
||||
try:
|
||||
fig.canvas.manager.set_window_title("pick targets")
|
||||
except Exception:
|
||||
pass
|
||||
# Use a crosshair cursor over the axes so it's obvious where the click
|
||||
# will land. matplotlib's toolbar resets the cursor to POINTER (arrow) on
|
||||
# every mouse-move when no tool is active, so we intercept set_cursor:
|
||||
# whenever it asks for POINTER, we substitute SELECT_REGION (crosshair).
|
||||
# Tool modes (zoom/pan) keep their native cursors.
|
||||
if _Cursors is not None:
|
||||
_orig_set_cursor = fig.canvas.set_cursor
|
||||
|
||||
def _set_cursor_with_crosshair(cursor):
|
||||
if cursor == _Cursors.POINTER:
|
||||
cursor = _Cursors.SELECT_REGION
|
||||
return _orig_set_cursor(cursor)
|
||||
|
||||
fig.canvas.set_cursor = _set_cursor_with_crosshair
|
||||
try:
|
||||
fig.canvas.set_cursor(_Cursors.SELECT_REGION)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
# Last-ditch: just set the Tk widget's cursor once and hope the
|
||||
# toolbar doesn't immediately overwrite it.
|
||||
try:
|
||||
fig.canvas.get_tk_widget().config(cursor="tcross")
|
||||
except Exception:
|
||||
pass
|
||||
img_artist = ax.imshow(frame)
|
||||
ax.set_axis_off()
|
||||
fig.tight_layout()
|
||||
|
||||
state = {
|
||||
"points": list(initial_points) if initial_points else [],
|
||||
"action": None, # 'save' | 'skip' | 'quit' | 'unusable'
|
||||
"frame": frame,
|
||||
"frame_idx": frame_idx,
|
||||
"drawn": [], # artists drawn on top of the image
|
||||
}
|
||||
|
||||
def update_title():
|
||||
nb = len(state["points"])
|
||||
nxt = (
|
||||
f"click {CLICK_LABELS[nb]}"
|
||||
if nb < 3
|
||||
else "ENTER=save | r=reset d=skip u=unusable q=quit | . , [ ] # = step frame"
|
||||
)
|
||||
ax.set_title(
|
||||
f'{status_prefix} frame {state["frame_idx"]} | {nxt}',
|
||||
fontsize=10,
|
||||
)
|
||||
|
||||
def redraw_points():
|
||||
for a in state["drawn"]:
|
||||
try:
|
||||
a.remove()
|
||||
except Exception:
|
||||
pass
|
||||
state["drawn"].clear()
|
||||
for i, (x, y) in enumerate(state["points"]):
|
||||
color = CLICK_COLORS[i]
|
||||
label = CLICK_LABELS[i]
|
||||
(cross,) = ax.plot(x, y, marker="+", color=color, markersize=22, mew=2)
|
||||
(ring,) = ax.plot(
|
||||
x, y, marker="o", color=color, markersize=22,
|
||||
fillstyle="none", mew=2,
|
||||
)
|
||||
txt = ax.text(
|
||||
x + 14, y - 14, label,
|
||||
color=color, fontsize=10, weight="bold",
|
||||
)
|
||||
state["drawn"].extend([cross, ring, txt])
|
||||
if len(state["points"]) >= 2:
|
||||
(line1,) = ax.plot(
|
||||
[state["points"][0][0], state["points"][1][0]],
|
||||
[state["points"][0][1], state["points"][1][1]],
|
||||
color="white", linewidth=0.7, alpha=0.6,
|
||||
)
|
||||
state["drawn"].append(line1)
|
||||
if len(state["points"]) == 3:
|
||||
(line2,) = ax.plot(
|
||||
[state["points"][1][0], state["points"][2][0]],
|
||||
[state["points"][1][1], state["points"][2][1]],
|
||||
color="white", linewidth=0.7, alpha=0.6,
|
||||
)
|
||||
state["drawn"].append(line2)
|
||||
# ROI overlay — draw the 6 computed rectangles on top of the frame
|
||||
try:
|
||||
polys = compute_roi_polygons(state["points"])
|
||||
except Exception as e:
|
||||
polys = []
|
||||
print(f" (ROI preview failed: {e})")
|
||||
for j, poly in enumerate(polys):
|
||||
# Close the polygon by repeating the first point
|
||||
xs = list(poly[:, 0]) + [poly[0, 0]]
|
||||
ys = list(poly[:, 1]) + [poly[0, 1]]
|
||||
(line,) = ax.plot(
|
||||
xs, ys, color="yellow", linewidth=1.5, alpha=0.9,
|
||||
)
|
||||
state["drawn"].append(line)
|
||||
cx = float(np.mean(poly[:, 0]))
|
||||
cy = float(np.mean(poly[:, 1]))
|
||||
lbl = ax.text(
|
||||
cx, cy, str(j + 1),
|
||||
color="yellow", fontsize=14, weight="bold",
|
||||
ha="center", va="center",
|
||||
)
|
||||
state["drawn"].append(lbl)
|
||||
update_title()
|
||||
fig.canvas.draw_idle()
|
||||
|
||||
def reload_frame(new_idx: int):
|
||||
grabbed = grab_frame(video_path, new_idx)
|
||||
if grabbed is None:
|
||||
return
|
||||
new_frame, new_idx, _ = grabbed
|
||||
state["frame"] = new_frame
|
||||
state["frame_idx"] = new_idx
|
||||
img_artist.set_data(new_frame)
|
||||
# Keep clicked targets + ROI overlay in place across frame-stepping —
|
||||
# press 'r' to clear them explicitly.
|
||||
redraw_points()
|
||||
|
||||
def on_click(event):
|
||||
if event.inaxes is not ax:
|
||||
return
|
||||
if event.button != 1: # left click only
|
||||
return
|
||||
if event.xdata is None or event.ydata is None:
|
||||
return
|
||||
# Skip clicks fired while the toolbar's pan/zoom is active.
|
||||
toolbar = getattr(fig.canvas, "toolbar", None)
|
||||
if toolbar is not None and getattr(toolbar, "mode", ""):
|
||||
return
|
||||
x, y = float(event.xdata), float(event.ydata)
|
||||
if len(state["points"]) < 3:
|
||||
state["points"].append((x, y))
|
||||
else:
|
||||
# 3 points already there — replace the nearest one. Lets the user
|
||||
# nudge pre-loaded targets in --redo mode, or correct a bad click.
|
||||
dists = [(x - px) ** 2 + (y - py) ** 2 for px, py in state["points"]]
|
||||
i_nearest = min(range(3), key=dists.__getitem__)
|
||||
state["points"][i_nearest] = (x, y)
|
||||
redraw_points()
|
||||
|
||||
def on_key(event):
|
||||
k = event.key or ""
|
||||
if k in ("escape", "q"):
|
||||
state["action"] = "quit"
|
||||
plt.close(fig)
|
||||
elif k == "r":
|
||||
state["points"].clear()
|
||||
redraw_points()
|
||||
elif k == "d":
|
||||
state["action"] = "skip"
|
||||
plt.close(fig)
|
||||
elif k == "u":
|
||||
state["action"] = "unusable"
|
||||
plt.close(fig)
|
||||
elif k == "enter":
|
||||
if len(state["points"]) == 3:
|
||||
state["action"] = "save"
|
||||
plt.close(fig)
|
||||
elif k == ".":
|
||||
reload_frame(state["frame_idx"] + 25)
|
||||
elif k == ",":
|
||||
reload_frame(state["frame_idx"] - 25)
|
||||
elif k == "]":
|
||||
reload_frame(state["frame_idx"] + big_step)
|
||||
elif k == "[":
|
||||
reload_frame(state["frame_idx"] - big_step)
|
||||
elif k == "#":
|
||||
if n_frames > 0:
|
||||
reload_frame(n_frames // 2)
|
||||
|
||||
fig.canvas.mpl_connect("button_press_event", on_click)
|
||||
fig.canvas.mpl_connect("key_press_event", on_key)
|
||||
update_title()
|
||||
plt.show() # blocks until the figure is closed
|
||||
|
||||
if state["action"] == "save":
|
||||
return {
|
||||
"action": "save",
|
||||
"frame_idx": state["frame_idx"],
|
||||
"points": state["points"],
|
||||
}
|
||||
if state["action"] == "unusable":
|
||||
return {"action": "unusable", "frame_idx": state["frame_idx"]}
|
||||
if state["action"] in ("skip", "quit"):
|
||||
return {"action": state["action"]}
|
||||
# Window closed via the WM "X" button — treat as quit so the loop stops
|
||||
return {"action": "quit"}
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"--redo", action="store_true",
|
||||
help="re-pick videos that already have JSON sidecars",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--frame", type=int, default=125,
|
||||
help="default frame index to display (default 125 ≈ 5 s @ 25 fps)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--limit", type=int, default=None,
|
||||
help="only process the first N videos",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if not INVENTORY_CSV.exists():
|
||||
sys.exit(
|
||||
f"Inventory not found at {INVENTORY_CSV}. "
|
||||
"Run build_video_inventory.py first."
|
||||
)
|
||||
|
||||
inv = pd.read_csv(INVENTORY_CSV)
|
||||
todo = inv[inv["in_xlsx"] & ~inv["already_tracked"]].copy()
|
||||
todo = todo.sort_values(
|
||||
["session_date", "machine_name", "session_time"]
|
||||
).reset_index(drop=True)
|
||||
|
||||
TARGETS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def sidecar_for(mp4_path: str) -> Path:
|
||||
return TARGETS_DIR / (Path(mp4_path).stem + ".json")
|
||||
|
||||
if not args.redo:
|
||||
todo = todo[
|
||||
~todo["mp4_path"].apply(lambda p: sidecar_for(p).exists())
|
||||
].reset_index(drop=True)
|
||||
|
||||
if args.limit:
|
||||
todo = todo.head(args.limit)
|
||||
|
||||
n = len(todo)
|
||||
if n == 0:
|
||||
print("Nothing to pick. All eligible videos already have target JSONs.")
|
||||
return
|
||||
|
||||
print(
|
||||
f"Picking targets for {n} videos. "
|
||||
"Window keys: ENTER=save r=reset d=skip u=unusable q=quit "
|
||||
".,[]=step frame | pan/zoom via toolbar"
|
||||
)
|
||||
saved = skipped = unusable = 0
|
||||
for i, row in todo.iterrows():
|
||||
mp4 = Path(row["mp4_path"])
|
||||
prefix = f"[{i + 1}/{n}] {row['machine_name']} {row['session_datetime']}"
|
||||
print(f"\n{prefix}")
|
||||
|
||||
# If --redo and a JSON sidecar exists, pre-load its points (only for
|
||||
# regular saves — unusable sidecars are left as-is and shown empty).
|
||||
initial_points = None
|
||||
existing = sidecar_for(row["mp4_path"])
|
||||
if args.redo and existing.exists():
|
||||
try:
|
||||
prev = json.loads(existing.read_text())
|
||||
if not prev.get("unusable") and prev.get("reference_points"):
|
||||
initial_points = [tuple(p) for p in prev["reference_points"]]
|
||||
print(f" pre-loaded {len(initial_points)} previous point(s)")
|
||||
except Exception as e:
|
||||
print(f" ! could not read previous sidecar: {e}")
|
||||
|
||||
result = pick_one(mp4, args.frame, prefix, initial_points=initial_points)
|
||||
if result is None or result.get("action") == "quit":
|
||||
print(" quitting picker.")
|
||||
break
|
||||
if result["action"] == "skip":
|
||||
skipped += 1
|
||||
print(" skipped (no JSON written, will be re-asked next run).")
|
||||
continue
|
||||
if result["action"] == "unusable":
|
||||
try:
|
||||
reason = input(" reason for marking unusable (Enter to skip): ").strip()
|
||||
except EOFError:
|
||||
reason = ""
|
||||
payload = {
|
||||
"video_path": str(mp4),
|
||||
"unusable": True,
|
||||
"reason": reason,
|
||||
"marked_at": dt.datetime.now().isoformat(timespec="seconds"),
|
||||
}
|
||||
out_path = sidecar_for(row["mp4_path"])
|
||||
out_path.write_text(json.dumps(payload, indent=2))
|
||||
unusable += 1
|
||||
print(f" marked unusable → {out_path.name}")
|
||||
continue
|
||||
if result["action"] == "save":
|
||||
payload = {
|
||||
"video_path": str(mp4),
|
||||
"frame_index": int(result["frame_idx"]),
|
||||
"reference_points": [list(map(int, p)) for p in result["points"]],
|
||||
"order": ["top", "corner", "left"],
|
||||
"picked_at": dt.datetime.now().isoformat(timespec="seconds"),
|
||||
}
|
||||
out_path = sidecar_for(row["mp4_path"])
|
||||
out_path.write_text(json.dumps(payload, indent=2))
|
||||
saved += 1
|
||||
print(f" saved → {out_path.name}")
|
||||
|
||||
remaining = n - saved - skipped - unusable
|
||||
print(
|
||||
f"\nDone. saved={saved} unusable={unusable} "
|
||||
f"skipped(this run)={skipped} remaining={remaining}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue