Unify analysis pipeline around the TSV; move tracked DBs out of cloud sync
- Tracked DBs now live at /mnt/data/projects/cupido/tracked/ (out of
ownCloud to avoid sync conflicts and bandwidth churn). config.py
TRACKING_OUTPUT_DIR points there; the docker-compose for ethoscope-lab
mounts it world-readable for JupyterHub users.
- New scripts/export_video_db_index.py joins all_video_info_merged.xlsx
with the video inventory and the on-disk DBs, producing a TSV that has
one row per fly/ROI plus training/testing video and DB paths. Handles
approximate xlsx times, cross-day training/testing, the 12 AM/PM
ambiguity, and date typos.
- scripts/load_roi_data.py rewritten as a TSV-driven loader returning a
single DataFrame with session and metadata columns. calculate_distances
and the two flies_analysis notebooks migrated to use it; downstream
trained/naive splits remain available via simple equality filters.
- Metadata vocabulary canonicalized: {naïve, niave, untrained, test} all
resolve to {trained, naive}. Normalization happens at the TSV-export
boundary (idempotent); the xlsx and the 2025-07-15 legacy CSV were
edited in place to remove the worst variants.
- scripts/monitor_tracking.py rate calculation fixed: with N parallel
workers, completions arrive in bursts; the old formula divided by burst
width and reported nonsense rates. Now uses a 6 h window denominator.
- scripts/track_videos.py: BGRMovieCamera retries cv2.read on transient
NFS hiccups and a post-tracking completeness gate (≥ 90 % of expected
duration via MAX(t) across all 6 ROIs) deletes silent partial DBs.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
e4da7691d5
commit
f60a9d0530
13 changed files with 569 additions and 237 deletions
|
|
@ -3,7 +3,7 @@
|
|||
Reads target JSONs produced by `pick_targets.py`, builds the 6 ROIs of the
|
||||
HD mating arena from the L-shape reference points, runs ethoscope's
|
||||
`MultiFlyTracker` against the merged.mp4 file via `MovieVirtualCamera`, and
|
||||
writes a SQLite DB to `data/tracked/<video_basename>_tracking.db`.
|
||||
writes a SQLite DB to `TRACKING_OUTPUT_DIR/<video_basename>_tracking.db`.
|
||||
|
||||
Idempotent: skips videos whose tracking DB already exists (unless --redo).
|
||||
|
||||
|
|
@ -58,17 +58,46 @@ def track_one(json_path: Path, output_dir: Path, max_duration: float | None,
|
|||
from ethoscope.io.sqlite import SQLiteResultWriter
|
||||
from ethoscope.trackers.multi_fly_tracker import MultiFlyTracker
|
||||
|
||||
class BGRMovieCamera(MovieVirtualCamera):
|
||||
"""MovieVirtualCamera variant that keeps BGR frames.
|
||||
import time as _time
|
||||
|
||||
MultiFlyTracker calls cv2.cvtColor(img, COLOR_BGR2GRAY) without checking
|
||||
whether img is already grayscale, so we must feed it 3-channel input.
|
||||
class BGRMovieCamera(MovieVirtualCamera):
|
||||
"""MovieVirtualCamera that keeps BGR frames AND retries on transient
|
||||
read failures.
|
||||
|
||||
Two reasons for the override:
|
||||
|
||||
1. MultiFlyTracker calls cv2.cvtColor(img, COLOR_BGR2GRAY) without
|
||||
checking whether img is already grayscale, so we must feed it
|
||||
3-channel input.
|
||||
|
||||
2. cv2.VideoCapture.read() can return False on transient I/O hiccups
|
||||
(NFS contention when 8 workers pull big mp4s in parallel) without
|
||||
the file actually being at EOF. A naive "False -> StopIteration"
|
||||
handling makes the tracker silently exit mid-video and write a
|
||||
short, lying DB. We retry a few times and only treat persistent
|
||||
failures within the *interior* of the video as real EOF.
|
||||
"""
|
||||
|
||||
_retry_count = 5
|
||||
_retry_backoff_s = 0.25
|
||||
_eof_safety_frames = 50 # near end-of-file, treat False as legitimate
|
||||
|
||||
def _next_image(self):
|
||||
ret, frame = self.capture.read()
|
||||
if not ret or frame is None:
|
||||
return None
|
||||
return frame # BGR, untouched
|
||||
for attempt in range(self._retry_count):
|
||||
ret, frame = self.capture.read()
|
||||
if ret and frame is not None:
|
||||
return frame # BGR, untouched
|
||||
# If we're near the genuine end of the file, accept it.
|
||||
if (
|
||||
self._has_end_of_file
|
||||
and self._frame_idx >= self._total_n_frames - self._eof_safety_frames
|
||||
):
|
||||
return None
|
||||
# Otherwise, this is a suspected transient hiccup — back off
|
||||
# and try again. The capture is still open; cv2 will pick up
|
||||
# the next decoded frame.
|
||||
_time.sleep(self._retry_backoff_s)
|
||||
return None # truly persistent failure
|
||||
|
||||
payload = json.loads(json_path.read_text())
|
||||
if payload.get("unusable"):
|
||||
|
|
@ -146,6 +175,42 @@ def track_one(json_path: Path, output_dir: Path, max_duration: float | None,
|
|||
|
||||
if not out_db.exists():
|
||||
return "error", "tracking finished but DB was not created"
|
||||
|
||||
# Post-tracking sanity check: did we cover most of the source video?
|
||||
# If not (cv2 retry exhausted, codec corruption, etc.), reject the DB so
|
||||
# it doesn't get cached as "done" — better an explicit failure than a
|
||||
# silent partial write.
|
||||
expected_ms = (cam._total_n_frames / 25.0) * 1000.0
|
||||
if max_duration is not None:
|
||||
expected_ms = min(expected_ms, max_duration * 1000.0)
|
||||
completeness_threshold = 0.90 # require ≥ 90 % of expected duration
|
||||
|
||||
# Use MAX(t) across all ROIs — a single ROI can run dry early if its fly
|
||||
# stops moving, so the latest detection anywhere in the arena is the
|
||||
# better signal of how far the iterator actually got.
|
||||
import sqlite3 as _sqlite3
|
||||
try:
|
||||
_con = _sqlite3.connect(f"file:{out_db}?mode=ro", uri=True)
|
||||
t_max = 0
|
||||
for _i in range(1, 7):
|
||||
_v = _con.execute(f"SELECT MAX(t) FROM ROI_{_i}").fetchone()[0]
|
||||
if _v and _v > t_max:
|
||||
t_max = _v
|
||||
_con.close()
|
||||
except Exception:
|
||||
t_max = 0
|
||||
|
||||
if expected_ms > 0 and t_max < expected_ms * completeness_threshold:
|
||||
out_db.unlink()
|
||||
for sidecar in (str(out_db) + "-wal", str(out_db) + "-shm"):
|
||||
Path(sidecar).unlink(missing_ok=True)
|
||||
ratio = t_max / expected_ms if expected_ms else 0
|
||||
return (
|
||||
"error",
|
||||
f"short output: t_max={t_max} ms vs expected {int(expected_ms)} ms "
|
||||
f"({ratio*100:.0f}%); DB removed",
|
||||
)
|
||||
|
||||
return "ok", str(out_db)
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue