The 2024 video set in all_video_info_merged.xlsx covers 63 (date, machine)
sessions — 129 video instances — that have no auto-detectable targets, so
ROI placement requires manual reference-point selection. This commit adds
the three-stage pipeline that lets a user click for an hour, then walk
away while the tracker grinds overnight:
1. build_video_inventory.py — scan /mnt/ethoscope_data/videos/ and join
against the xlsx, producing data/metadata/video_inventory.csv
2. pick_targets.py — interactive matplotlib/Tk picker. User clicks
TOP/CORNER/LEFT (the L-shape ethoscope expects); after the third
click the 6 ROI rectangles are drawn on top of the frame so geometry
can be verified before saving. Also supports marking a video
'unusable' (FOV wrong) so it's permanently skipped, frame stepping
by ±1s/±5%/midpoint, point editing in --redo mode, and a crosshair
cursor that survives matplotlib's per-motion cursor reset.
3. track_videos.py — headless batch tracker. Reads the JSON sidecars,
builds 6 ROIs from the HD-mating-arena geometry, runs MultiFlyTracker
against the merged.mp4 via MovieVirtualCamera, writes SQLite DBs to
data/tracked/. Idempotent (skips done DBs), parallel via --jobs,
subclasses MovieVirtualCamera so frames stay BGR (MultiFlyTracker
calls cvtColor(BGR2GRAY) without checking channel count).
Plus auto_detect_targets.py (fallback that runs ethoscope's auto-detector
in case any videos do have visible target dots), monitor_tracking.py
(progress + ETA from data/tracked/ ground truth, --watch for live view),
and tracking_geometry.py (single source of truth for the affine math
shared by picker and tracker).
requirements-tracking.txt pins the extra deps (opencv-python, openpyxl,
gitpython, netifaces, mysql-connector-python) — these are only needed
for the tracking pipeline, not the existing analysis notebooks.
Verified end-to-end on one of the user-picked videos: ~4000 rows/ROI in
a 120s slice, fly bounding boxes in the expected 800-2000 px² band.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
467 lines
17 KiB
Python
467 lines
17 KiB
Python
"""Interactive target picker for offline tracking (matplotlib/Tk GUI).
|
|
|
|
Loops through videos that need tracking and lets the user click 3 reference
|
|
points per video in L-shape order:
|
|
|
|
1) TOP target (above the corner)
|
|
2) CORNER target (the right-angle vertex)
|
|
3) LEFT target (to the left of the corner)
|
|
|
|
These three points are the same reference layout used by ethoscope's
|
|
`TargetGridROIBuilder`: dst_points = [(0, -1), (0, 0), (-1, 0)] in unit
|
|
coordinates. Saving them as a JSON sidecar lets the offline tracker build the
|
|
6-ROI HD mating arena grid without needing auto-target detection.
|
|
|
|
Output JSON sidecar: data/targets/<video_basename>.json
|
|
{
|
|
"video_path": "/mnt/.../*.mp4",
|
|
"frame_index": <int>,
|
|
"reference_points": [[x0, y0], [x1, y1], [x2, y2]],
|
|
"order": ["top", "corner", "left"],
|
|
"picked_at": "<isoformat>"
|
|
}
|
|
|
|
Keys (in the picker window):
|
|
LEFT-CLICK add a point (top → corner → left)
|
|
r reset clicks for current video
|
|
d skip this video for THIS run only (no JSON written)
|
|
u mark this video unusable (FOV wrong etc.); skipped forever
|
|
. / , advance / rewind by 25 frames (≈ 1 s @ 25 fps)
|
|
] / [ advance / rewind by 5% of the video (~3 min in a 1 h video)
|
|
# jump to the middle of the video
|
|
enter save the 3 points and move on
|
|
q / ESC quit picker
|
|
|
|
After the 3rd click, the 6 ROI rectangles are drawn over the frame so you
|
|
can sanity-check the geometry before pressing ENTER.
|
|
|
|
With --redo, if a JSON sidecar exists, its points are pre-loaded so you can
|
|
nudge them rather than restart from scratch.
|
|
|
|
Why matplotlib instead of cv2.imshow:
|
|
OpenCV's bundled GUI uses Qt, which needs XKeyboard + a fonts directory and
|
|
is fragile over SSH X11-forwarding. matplotlib's TkAgg backend uses pure
|
|
Tk/X11 and works out of the box on any DISPLAY (and gives free pan/zoom
|
|
via the toolbar — useful for clicking small targets precisely).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import datetime as dt
|
|
import json
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Force TkAgg BEFORE importing matplotlib. We override even if MPLBACKEND is
|
|
# already set, because the script is unusable with a non-interactive backend.
|
|
os.environ["MPLBACKEND"] = "TkAgg"
|
|
|
|
import cv2 # noqa: E402
|
|
import matplotlib # noqa: E402
|
|
import matplotlib.pyplot as plt # noqa: E402
|
|
import numpy as np # noqa: E402
|
|
import pandas as pd # noqa: E402
|
|
|
|
# matplotlib.backend_bases exposes the cursor identifiers under different
|
|
# names depending on version: `Cursors` enum on 3.5+, lowercase `cursors`
|
|
# instance on older releases. Both have the same integer attributes.
|
|
try:
|
|
from matplotlib.backend_bases import Cursors as _Cursors # 3.5+
|
|
except ImportError:
|
|
try:
|
|
from matplotlib.backend_bases import cursors as _Cursors # older
|
|
except ImportError:
|
|
_Cursors = None
|
|
|
|
# Verify we ended up on an interactive backend; bail loud (with a concrete
|
|
# explanation) if not. matplotlib silently falls back to 'agg' when its
|
|
# requested backend can't load, which is hard to debug without help.
|
|
_backend = matplotlib.get_backend()
|
|
if _backend.lower() in ("agg", "headless", "template", "pdf", "svg", "ps"):
|
|
diag = []
|
|
try:
|
|
import tkinter as _tk
|
|
try:
|
|
_tk.Tk().destroy()
|
|
diag.append("tkinter import + Tk() instantiation: OK")
|
|
except Exception as e:
|
|
diag.append(f"tkinter imported but Tk() failed: {e!r}")
|
|
except Exception as e:
|
|
diag.append(f"tkinter import FAILED: {e!r}")
|
|
diag.append(" → on Manjaro/Arch, run: sudo pacman -S tk")
|
|
print(
|
|
f"ERROR: matplotlib loaded the non-interactive backend {_backend!r}.\n"
|
|
f" Expected 'TkAgg'. Diagnostic info:\n"
|
|
f" DISPLAY = {os.environ.get('DISPLAY')!r}\n"
|
|
f" MPLBACKEND = {os.environ.get('MPLBACKEND')!r}\n"
|
|
f" matplotlib ver = {matplotlib.__version__}\n"
|
|
+ "\n".join(f" {d}" for d in diag),
|
|
file=sys.stderr,
|
|
)
|
|
sys.exit(2)
|
|
|
|
from config import INVENTORY_CSV, TARGETS_DIR # noqa: E402
|
|
from tracking_geometry import compute_roi_polygons # noqa: E402
|
|
|
|
# Strip default matplotlib keybindings that would conflict with ours.
|
|
for k in ("keymap.home", "keymap.save", "keymap.quit", "keymap.fullscreen",
|
|
"keymap.pan", "keymap.zoom", "keymap.back", "keymap.forward"):
|
|
try:
|
|
plt.rcParams[k] = []
|
|
except KeyError:
|
|
pass
|
|
|
|
CLICK_LABELS = ("TOP", "CORNER", "LEFT")
|
|
CLICK_COLORS = ("red", "lime", "deepskyblue")
|
|
|
|
|
|
def grab_frame(
|
|
video_path: Path, frame_idx: int
|
|
) -> tuple[np.ndarray, int, int] | None:
|
|
"""Return (RGB frame, actual_frame_idx, n_frames) from the video, or None.
|
|
|
|
Clamps frame_idx to [0, n_frames-1] so callers can step blindly.
|
|
"""
|
|
cap = cv2.VideoCapture(str(video_path))
|
|
if not cap.isOpened():
|
|
return None
|
|
n = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
if n > 0:
|
|
frame_idx = max(0, min(frame_idx, n - 1))
|
|
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
|
|
ok, frame = cap.read()
|
|
cap.release()
|
|
if not ok or frame is None:
|
|
return None
|
|
return cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), frame_idx, n
|
|
|
|
|
|
def pick_one(
|
|
video_path: Path,
|
|
frame_idx: int,
|
|
status_prefix: str,
|
|
initial_points: list[tuple[float, float]] | None = None,
|
|
) -> dict | None:
|
|
"""Show the picker UI for a single video; return the result dict or None."""
|
|
grabbed = grab_frame(video_path, frame_idx)
|
|
if grabbed is None:
|
|
print(f" ! cannot read {video_path}")
|
|
return None
|
|
frame, frame_idx, n_frames = grabbed
|
|
# Big-step size for ] / [ : 5% of total length, ~3 min in a 1h video.
|
|
big_step = max(1, int(round(0.05 * n_frames))) if n_frames > 0 else 250
|
|
|
|
fig, ax = plt.subplots(figsize=(14, 8))
|
|
try:
|
|
fig.canvas.manager.set_window_title("pick targets")
|
|
except Exception:
|
|
pass
|
|
# Use a crosshair cursor over the axes so it's obvious where the click
|
|
# will land. matplotlib's toolbar resets the cursor to POINTER (arrow) on
|
|
# every mouse-move when no tool is active, so we intercept set_cursor:
|
|
# whenever it asks for POINTER, we substitute SELECT_REGION (crosshair).
|
|
# Tool modes (zoom/pan) keep their native cursors.
|
|
if _Cursors is not None:
|
|
_orig_set_cursor = fig.canvas.set_cursor
|
|
|
|
def _set_cursor_with_crosshair(cursor):
|
|
if cursor == _Cursors.POINTER:
|
|
cursor = _Cursors.SELECT_REGION
|
|
return _orig_set_cursor(cursor)
|
|
|
|
fig.canvas.set_cursor = _set_cursor_with_crosshair
|
|
try:
|
|
fig.canvas.set_cursor(_Cursors.SELECT_REGION)
|
|
except Exception:
|
|
pass
|
|
else:
|
|
# Last-ditch: just set the Tk widget's cursor once and hope the
|
|
# toolbar doesn't immediately overwrite it.
|
|
try:
|
|
fig.canvas.get_tk_widget().config(cursor="tcross")
|
|
except Exception:
|
|
pass
|
|
img_artist = ax.imshow(frame)
|
|
ax.set_axis_off()
|
|
fig.tight_layout()
|
|
|
|
state = {
|
|
"points": list(initial_points) if initial_points else [],
|
|
"action": None, # 'save' | 'skip' | 'quit' | 'unusable'
|
|
"frame": frame,
|
|
"frame_idx": frame_idx,
|
|
"drawn": [], # artists drawn on top of the image
|
|
}
|
|
|
|
def update_title():
|
|
nb = len(state["points"])
|
|
nxt = (
|
|
f"click {CLICK_LABELS[nb]}"
|
|
if nb < 3
|
|
else "ENTER=save | r=reset d=skip u=unusable q=quit | . , [ ] # = step frame"
|
|
)
|
|
ax.set_title(
|
|
f'{status_prefix} frame {state["frame_idx"]} | {nxt}',
|
|
fontsize=10,
|
|
)
|
|
|
|
def redraw_points():
|
|
for a in state["drawn"]:
|
|
try:
|
|
a.remove()
|
|
except Exception:
|
|
pass
|
|
state["drawn"].clear()
|
|
for i, (x, y) in enumerate(state["points"]):
|
|
color = CLICK_COLORS[i]
|
|
label = CLICK_LABELS[i]
|
|
(cross,) = ax.plot(x, y, marker="+", color=color, markersize=22, mew=2)
|
|
(ring,) = ax.plot(
|
|
x, y, marker="o", color=color, markersize=22,
|
|
fillstyle="none", mew=2,
|
|
)
|
|
txt = ax.text(
|
|
x + 14, y - 14, label,
|
|
color=color, fontsize=10, weight="bold",
|
|
)
|
|
state["drawn"].extend([cross, ring, txt])
|
|
if len(state["points"]) >= 2:
|
|
(line1,) = ax.plot(
|
|
[state["points"][0][0], state["points"][1][0]],
|
|
[state["points"][0][1], state["points"][1][1]],
|
|
color="white", linewidth=0.7, alpha=0.6,
|
|
)
|
|
state["drawn"].append(line1)
|
|
if len(state["points"]) == 3:
|
|
(line2,) = ax.plot(
|
|
[state["points"][1][0], state["points"][2][0]],
|
|
[state["points"][1][1], state["points"][2][1]],
|
|
color="white", linewidth=0.7, alpha=0.6,
|
|
)
|
|
state["drawn"].append(line2)
|
|
# ROI overlay — draw the 6 computed rectangles on top of the frame
|
|
try:
|
|
polys = compute_roi_polygons(state["points"])
|
|
except Exception as e:
|
|
polys = []
|
|
print(f" (ROI preview failed: {e})")
|
|
for j, poly in enumerate(polys):
|
|
# Close the polygon by repeating the first point
|
|
xs = list(poly[:, 0]) + [poly[0, 0]]
|
|
ys = list(poly[:, 1]) + [poly[0, 1]]
|
|
(line,) = ax.plot(
|
|
xs, ys, color="yellow", linewidth=1.5, alpha=0.9,
|
|
)
|
|
state["drawn"].append(line)
|
|
cx = float(np.mean(poly[:, 0]))
|
|
cy = float(np.mean(poly[:, 1]))
|
|
lbl = ax.text(
|
|
cx, cy, str(j + 1),
|
|
color="yellow", fontsize=14, weight="bold",
|
|
ha="center", va="center",
|
|
)
|
|
state["drawn"].append(lbl)
|
|
update_title()
|
|
fig.canvas.draw_idle()
|
|
|
|
def reload_frame(new_idx: int):
|
|
grabbed = grab_frame(video_path, new_idx)
|
|
if grabbed is None:
|
|
return
|
|
new_frame, new_idx, _ = grabbed
|
|
state["frame"] = new_frame
|
|
state["frame_idx"] = new_idx
|
|
img_artist.set_data(new_frame)
|
|
# Keep clicked targets + ROI overlay in place across frame-stepping —
|
|
# press 'r' to clear them explicitly.
|
|
redraw_points()
|
|
|
|
def on_click(event):
|
|
if event.inaxes is not ax:
|
|
return
|
|
if event.button != 1: # left click only
|
|
return
|
|
if event.xdata is None or event.ydata is None:
|
|
return
|
|
# Skip clicks fired while the toolbar's pan/zoom is active.
|
|
toolbar = getattr(fig.canvas, "toolbar", None)
|
|
if toolbar is not None and getattr(toolbar, "mode", ""):
|
|
return
|
|
x, y = float(event.xdata), float(event.ydata)
|
|
if len(state["points"]) < 3:
|
|
state["points"].append((x, y))
|
|
else:
|
|
# 3 points already there — replace the nearest one. Lets the user
|
|
# nudge pre-loaded targets in --redo mode, or correct a bad click.
|
|
dists = [(x - px) ** 2 + (y - py) ** 2 for px, py in state["points"]]
|
|
i_nearest = min(range(3), key=dists.__getitem__)
|
|
state["points"][i_nearest] = (x, y)
|
|
redraw_points()
|
|
|
|
def on_key(event):
|
|
k = event.key or ""
|
|
if k in ("escape", "q"):
|
|
state["action"] = "quit"
|
|
plt.close(fig)
|
|
elif k == "r":
|
|
state["points"].clear()
|
|
redraw_points()
|
|
elif k == "d":
|
|
state["action"] = "skip"
|
|
plt.close(fig)
|
|
elif k == "u":
|
|
state["action"] = "unusable"
|
|
plt.close(fig)
|
|
elif k == "enter":
|
|
if len(state["points"]) == 3:
|
|
state["action"] = "save"
|
|
plt.close(fig)
|
|
elif k == ".":
|
|
reload_frame(state["frame_idx"] + 25)
|
|
elif k == ",":
|
|
reload_frame(state["frame_idx"] - 25)
|
|
elif k == "]":
|
|
reload_frame(state["frame_idx"] + big_step)
|
|
elif k == "[":
|
|
reload_frame(state["frame_idx"] - big_step)
|
|
elif k == "#":
|
|
if n_frames > 0:
|
|
reload_frame(n_frames // 2)
|
|
|
|
fig.canvas.mpl_connect("button_press_event", on_click)
|
|
fig.canvas.mpl_connect("key_press_event", on_key)
|
|
update_title()
|
|
plt.show() # blocks until the figure is closed
|
|
|
|
if state["action"] == "save":
|
|
return {
|
|
"action": "save",
|
|
"frame_idx": state["frame_idx"],
|
|
"points": state["points"],
|
|
}
|
|
if state["action"] == "unusable":
|
|
return {"action": "unusable", "frame_idx": state["frame_idx"]}
|
|
if state["action"] in ("skip", "quit"):
|
|
return {"action": state["action"]}
|
|
# Window closed via the WM "X" button — treat as quit so the loop stops
|
|
return {"action": "quit"}
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(description=__doc__)
|
|
parser.add_argument(
|
|
"--redo", action="store_true",
|
|
help="re-pick videos that already have JSON sidecars",
|
|
)
|
|
parser.add_argument(
|
|
"--frame", type=int, default=125,
|
|
help="default frame index to display (default 125 ≈ 5 s @ 25 fps)",
|
|
)
|
|
parser.add_argument(
|
|
"--limit", type=int, default=None,
|
|
help="only process the first N videos",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
if not INVENTORY_CSV.exists():
|
|
sys.exit(
|
|
f"Inventory not found at {INVENTORY_CSV}. "
|
|
"Run build_video_inventory.py first."
|
|
)
|
|
|
|
inv = pd.read_csv(INVENTORY_CSV)
|
|
todo = inv[inv["in_xlsx"] & ~inv["already_tracked"]].copy()
|
|
todo = todo.sort_values(
|
|
["session_date", "machine_name", "session_time"]
|
|
).reset_index(drop=True)
|
|
|
|
TARGETS_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
def sidecar_for(mp4_path: str) -> Path:
|
|
return TARGETS_DIR / (Path(mp4_path).stem + ".json")
|
|
|
|
if not args.redo:
|
|
todo = todo[
|
|
~todo["mp4_path"].apply(lambda p: sidecar_for(p).exists())
|
|
].reset_index(drop=True)
|
|
|
|
if args.limit:
|
|
todo = todo.head(args.limit)
|
|
|
|
n = len(todo)
|
|
if n == 0:
|
|
print("Nothing to pick. All eligible videos already have target JSONs.")
|
|
return
|
|
|
|
print(
|
|
f"Picking targets for {n} videos. "
|
|
"Window keys: ENTER=save r=reset d=skip u=unusable q=quit "
|
|
".,[]=step frame | pan/zoom via toolbar"
|
|
)
|
|
saved = skipped = unusable = 0
|
|
for i, row in todo.iterrows():
|
|
mp4 = Path(row["mp4_path"])
|
|
prefix = f"[{i + 1}/{n}] {row['machine_name']} {row['session_datetime']}"
|
|
print(f"\n{prefix}")
|
|
|
|
# If --redo and a JSON sidecar exists, pre-load its points (only for
|
|
# regular saves — unusable sidecars are left as-is and shown empty).
|
|
initial_points = None
|
|
existing = sidecar_for(row["mp4_path"])
|
|
if args.redo and existing.exists():
|
|
try:
|
|
prev = json.loads(existing.read_text())
|
|
if not prev.get("unusable") and prev.get("reference_points"):
|
|
initial_points = [tuple(p) for p in prev["reference_points"]]
|
|
print(f" pre-loaded {len(initial_points)} previous point(s)")
|
|
except Exception as e:
|
|
print(f" ! could not read previous sidecar: {e}")
|
|
|
|
result = pick_one(mp4, args.frame, prefix, initial_points=initial_points)
|
|
if result is None or result.get("action") == "quit":
|
|
print(" quitting picker.")
|
|
break
|
|
if result["action"] == "skip":
|
|
skipped += 1
|
|
print(" skipped (no JSON written, will be re-asked next run).")
|
|
continue
|
|
if result["action"] == "unusable":
|
|
try:
|
|
reason = input(" reason for marking unusable (Enter to skip): ").strip()
|
|
except EOFError:
|
|
reason = ""
|
|
payload = {
|
|
"video_path": str(mp4),
|
|
"unusable": True,
|
|
"reason": reason,
|
|
"marked_at": dt.datetime.now().isoformat(timespec="seconds"),
|
|
}
|
|
out_path = sidecar_for(row["mp4_path"])
|
|
out_path.write_text(json.dumps(payload, indent=2))
|
|
unusable += 1
|
|
print(f" marked unusable → {out_path.name}")
|
|
continue
|
|
if result["action"] == "save":
|
|
payload = {
|
|
"video_path": str(mp4),
|
|
"frame_index": int(result["frame_idx"]),
|
|
"reference_points": [list(map(int, p)) for p in result["points"]],
|
|
"order": ["top", "corner", "left"],
|
|
"picked_at": dt.datetime.now().isoformat(timespec="seconds"),
|
|
}
|
|
out_path = sidecar_for(row["mp4_path"])
|
|
out_path.write_text(json.dumps(payload, indent=2))
|
|
saved += 1
|
|
print(f" saved → {out_path.name}")
|
|
|
|
remaining = n - saved - skipped - unusable
|
|
print(
|
|
f"\nDone. saved={saved} unusable={unusable} "
|
|
f"skipped(this run)={skipped} remaining={remaining}"
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|