From b8f23a48842c995e8d548aebadbf061963ff544b Mon Sep 17 00:00:00 2001 From: Giorgio Gilestro Date: Tue, 12 May 2026 09:42:44 +0100 Subject: [PATCH 1/2] Annotations: complete barrier_opening.csv for all 110 sessions Output of the web picker (RB 64, GG 46). 103 sessions have a usable opening time; 7 are flagged unusable (6 of those in the 2024-10-04 batch). 56 sessions had only the lower row release; 52 had all six barriers open; 2 had only the upper row. --- data/metadata/barrier_opening.csv | 91 +++++++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 4 deletions(-) diff --git a/data/metadata/barrier_opening.csv b/data/metadata/barrier_opening.csv index e657e91..55632f5 100644 --- a/data/metadata/barrier_opening.csv +++ b/data/metadata/barrier_opening.csv @@ -1,17 +1,13 @@ machine_name,session_date,session_time,opening_s,trim_first_s,bad_rois,analyst,notes ETHOSCOPE_076,2025-07-15,16-03-10,52.0,0,,GG,hand-annotated 2025-07-15 batch -ETHOSCOPE_076,2025-07-15,16-31-34,94.0,69,,GG,first ~66s misframed (arena partly out of frame) ETHOSCOPE_145,2025-07-15,16-03-27,42.0,0,,GG,hand-annotated 2025-07-15 batch -ETHOSCOPE_145,2025-07-15,16-31-41,89.0,69,,GG,first ~60s misframed (arena partly out of frame) ETHOSCOPE_268,2025-07-15,16-32-05,75.0,0,,GG,hand-annotated 2025-07-15 batch -ETHOSCOPE_076,2024-10-21,11-07-54,346.8,0,,GG, ETHOSCOPE_181,2024-10-21,11-08-57,287.3,0,,GG, ETHOSCOPE_225,2024-10-21,11-09-12,277.9,0,"1,3,5",GG, ETHOSCOPE_082,2024-10-21,11-07-46,365.3,0,"1,3,5",GG, ETHOSCOPE_140,2024-10-21,11-06-58,423.9,0,"1,3,5",GG, ETHOSCOPE_083,2024-10-21,11-09-07,306.4,0,"1,3,5",GG, ETHOSCOPE_145,2024-10-21,11-08-35,341.0,0,"1,3,5",GG, -ETHOSCOPE_076,2024-09-17,10-32-10,1875.8,0,"1,3,5",GG, ETHOSCOPE_082,2024-09-17,10-53-16,646.8,0,"1,3,5",GG, ETHOSCOPE_140,2024-09-17,11-03-05,86.2,0,"1,3,5",GG, ETHOSCOPE_181,2024-09-17,10-33-12,1824.3,0,"1,3,5",GG, @@ -26,3 +22,90 @@ ETHOSCOPE_083,2024-10-01,11-07-41,560.3,0,"1,3,5",GG, ETHOSCOPE_113,2024-10-01,11-07-48,565.7,0,"2,4,6",GG, ETHOSCOPE_140,2024-10-01,11-04-07,755.0,0,"1,3,5",GG, ETHOSCOPE_167,2024-10-01,11-07-55,564.5,0,"1,3,5",GG, +ETHOSCOPE_076,2024-09-18,10-15-53,175.5,0,"1,3,5",GG, +ETHOSCOPE_169,2024-10-01,11-09-49,437.9,0,"1,3,5",GG, +ETHOSCOPE_181,2024-10-01,11-04-44,694.0,0,"1,3,5",GG, +ETHOSCOPE_225,2024-10-01,11-03-45,758.7,0,"1,3,5",GG, +ETHOSCOPE_282,2024-10-01,11-09-43,436.3,0,"1,3,5",GG, +ETHOSCOPE_076,2024-10-02,10-44-41,1547.5,0,"1,3,5",GG, +ETHOSCOPE_082,2024-10-02,10-44-59,1540.5,0,"1,3,5",GG, +ETHOSCOPE_083,2024-10-02,10-45-19,1530.1,0,"1,3,5",GG, +ETHOSCOPE_140,2024-10-02,10-45-13,1531.6,0,"1,3,5",GG, +ETHOSCOPE_167,2024-10-02,10-45-37,1521.3,0,"1,3,5",GG, +ETHOSCOPE_169,2024-10-02,10-45-31,1522.3,0,"1,3,5",GG, +ETHOSCOPE_181,2024-10-02,10-44-47,1544.2,0,"1,3,5",GG, +ETHOSCOPE_225,2024-10-02,10-44-53,1540.4,0,"1,3,5",GG, +ETHOSCOPE_282,2024-10-02,10-45-25,1525.4,0,"1,3,5",GG, +ETHOSCOPE_067,2024-10-15,10-51-44,590.9,0,"1,3,5",GG, +ETHOSCOPE_076,2024-10-15,10-50-12,545.5,0,"1,3,5",GG, +ETHOSCOPE_082,2024-10-15,10-50-33,566.9,0,"1,3,5",GG, +ETHOSCOPE_083,2024-10-15,10-58-55,110.3,0,"1,3,5",GG, +ETHOSCOPE_113,2024-10-15,10-51-36,623.4,0,"2,4,6",GG, +ETHOSCOPE_139,2024-10-15,10-51-15,592.9,0,"1,3,5",RB, +ETHOSCOPE_140,2024-10-15,10-50-45,567.2,0,"1,3,5",RB, +ETHOSCOPE_145,2024-10-15,10-51-09,597.4,0,"1,3,5",RB, +ETHOSCOPE_169,2024-10-15,10-51-28,601.0,0,"1,3,5",GG, +ETHOSCOPE_181,2024-10-15,10-50-19,545.5,0,"1,3,5",RB, +ETHOSCOPE_225,2024-10-15,10-50-25,546.5,0,"1,3,5",RB, +ETHOSCOPE_076,2024-10-21,11-07-54,346.2,0,"1,3,5",RB, +ETHOSCOPE_139,2024-10-21,11-07-55,385.6,0,"1,3,5",RB, +ETHOSCOPE_169,2024-10-21,11-09-30,293.2,0,"1,3,5",RB, +ETHOSCOPE_268,2024-10-21,11-09-59,271.2,0,"1,3,5",RB, +ETHOSCOPE_139,2025-07-15,16-31-52,84.8,0,,RB, +ETHOSCOPE_076,2024-09-17,13-10-59,84.1,0,,RB, +ETHOSCOPE_082,2024-09-17,13-10-54,154.7,0,,RB, +ETHOSCOPE_140,2024-09-17,13-10-45,183.2,0,,RB, +ETHOSCOPE_181,2024-09-17,13-11-03,107.1,0,,RB, +ETHOSCOPE_225,2024-09-17,13-10-51,134.6,0,,RB, +ETHOSCOPE_076,2024-09-18,12-34-16,,0,,RB,unusable +ETHOSCOPE_082,2024-09-18,12-34-12,133.4,0,,RB, +ETHOSCOPE_140,2024-09-18,12-34-04,130.1,0,,RB, +ETHOSCOPE_181,2024-09-18,12-34-20,94.3,0,,RB, +ETHOSCOPE_225,2024-09-18,12-34-08,113.4,0,,RB, +ETHOSCOPE_076,2024-10-01,13-27-24,94.8,0,,RB, +ETHOSCOPE_082,2024-10-01,13-27-35,131.8,0,,RB, +ETHOSCOPE_083,2024-10-01,13-27-06,227.1,0,,RB, +ETHOSCOPE_113,2024-10-01,13-26-57,293.6,0,,RB, +ETHOSCOPE_140,2024-10-01,13-27-44,147.5,0,,RB, +ETHOSCOPE_167,2024-10-01,13-27-03,301.2,0,,RB, +ETHOSCOPE_169,2024-10-01,13-27-24,251.9,0,,RB, +ETHOSCOPE_181,2024-10-01,13-27-27,101.8,0,,RB, +ETHOSCOPE_225,2024-10-01,13-27-32,111.2,0,,RB, +ETHOSCOPE_282,2024-10-01,13-27-14,236.0,0,,RB, +ETHOSCOPE_076,2024-10-02,14-23-32,63.6,0,,RB, +ETHOSCOPE_082,2024-10-02,14-23-44,71.4,0,,RB, +ETHOSCOPE_083,2024-10-02,14-23-54,75.7,0,,RB, +ETHOSCOPE_140,2024-10-02,14-23-51,73.5,0,,RB, +ETHOSCOPE_167,2024-10-02,14-24-05,84.3,0,,RB, +ETHOSCOPE_169,2024-10-02,14-24-02,79.5,0,,RB, +ETHOSCOPE_181,2024-10-02,14-23-36,67.0,0,,RB, +ETHOSCOPE_225,2024-10-02,14-23-40,69.1,0,,RB, +ETHOSCOPE_282,2024-10-02,14-23-58,78.3,0,,RB, +ETHOSCOPE_076,2024-10-04,16-11-56,,0,,RB,unusable +ETHOSCOPE_181,2024-10-04,16-12-10,,0,,RB,unusable +ETHOSCOPE_225,2024-10-04,16-12-21,,0,,RB,unusable +ETHOSCOPE_067,2024-10-15,13-16-18,206.9,0,,RB, +ETHOSCOPE_082,2024-10-15,13-15-36,172.8,0,,RB, +ETHOSCOPE_083,2024-10-15,13-17-37,90.5,0,,RB, +ETHOSCOPE_113,2024-10-15,13-16-24,212.2,0,,RB, +ETHOSCOPE_139,2024-10-15,13-16-07,203.4,0,,RB, +ETHOSCOPE_140,2024-10-15,13-15-50,176.2,0,,RB, +ETHOSCOPE_145,2024-10-15,13-16-01,201.9,0,,RB, +ETHOSCOPE_169,2024-10-15,13-16-13,202.8,0,,RB, +ETHOSCOPE_181,2024-10-15,13-15-23,166.0,0,,RB, +ETHOSCOPE_225,2024-10-15,13-15-30,171.3,0,,RB, +ETHOSCOPE_076,2024-10-21,13-25-18,442.6,0,"1,3,5",RB, +ETHOSCOPE_082,2024-10-21,13-28-01,296.7,0,"1,3,5",RB, +ETHOSCOPE_083,2024-10-21,13-30-11,183.4,0,"1,3,5",RB, +ETHOSCOPE_139,2024-10-21,13-29-41,220.4,0,"1,3,5",RB, +ETHOSCOPE_140,2024-10-21,13-28-03,301.4,0,"1,3,5",RB, +ETHOSCOPE_145,2024-10-21,13-28-17,299.3,0,"1,3,5",RB, +ETHOSCOPE_169,2024-10-21,13-28-31,295.5,0,"1,3,5",RB, +ETHOSCOPE_225,2024-10-21,13-30-10,166.3,0,"1,3,5",RB, +ETHOSCOPE_268,2024-10-21,13-29-14,257.1,0,"1,3,5",RB, +ETHOSCOPE_076,2025-07-15,16-31-34,96.0,0,,RB, +ETHOSCOPE_145,2025-07-15,16-31-41,90.5,0,,RB, +ETHOSCOPE_076,2024-09-17,10-32-10,1871.3,0,"1,3,5",RB, +ETHOSCOPE_082,2024-10-04,16-12-30,,0,,GG,unusable +ETHOSCOPE_086,2024-10-04,16-18-12,,0,,GG,unusable +ETHOSCOPE_140,2024-10-04,16-18-22,,0,,GG,unusable From 28b7a227c0f722adb6edee1d22cb0059f343830d Mon Sep 17 00:00:00 2001 From: Giorgio Gilestro Date: Tue, 12 May 2026 09:45:59 +0100 Subject: [PATCH 2/2] load_roi_data: filter on barrier_opening.csv and stamp opening_s For every session (training and testing alike), the loader now looks up the corresponding row in barrier_opening.csv and: - drops the read if the ROI is in bad_rois (barrier never opened for that fly so its tracking has no biological meaning) - drops the read if the session is flagged unusable - stamps the session's opening_s onto every sample so downstream code can compute t_from_opening = t - opening_s Tested against ETHOSCOPE_082 2024-09-17: training (bad_rois=1,3,5) correctly drops ROIs 1/3/5; testing keeps all six; opening_s differs between sessions as expected (646.8 vs 154.7). Opt out with apply_barrier_filter=False if you need raw data. --- scripts/config.py | 5 +++ scripts/load_roi_data.py | 92 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 93 insertions(+), 4 deletions(-) diff --git a/scripts/config.py b/scripts/config.py index 18e89ef..9b72a29 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -26,6 +26,11 @@ VIDEO_INFO_TSV = DATA_VOLUME / "all_video_info_merged.tsv" # A small CSV listing every video file we know about (built locally). INVENTORY_CSV = DATA_METADATA / "video_inventory.csv" +# Hand-annotated barrier-opening times (output of the picker app). One +# row per testing session; columns: machine_name, session_date, +# session_time, opening_s, trim_first_s, bad_rois, analyst, notes. +BARRIER_OPENING_CSV = DATA_METADATA / "barrier_opening.csv" + # Where the ethoscope source tree is checked out (used by track_videos.py # and auto_detect_targets.py — host-side scripts that import ethoscope # from a local clone rather than from pip). Default assumes the standard diff --git a/scripts/load_roi_data.py b/scripts/load_roi_data.py index ee2263c..309709a 100644 --- a/scripts/load_roi_data.py +++ b/scripts/load_roi_data.py @@ -8,12 +8,48 @@ The TSV is the single source of truth for what data exists and how it maps to flies and conditions. """ +import re import sqlite3 from pathlib import Path import pandas as pd -from config import VIDEO_INFO_TSV +from config import BARRIER_OPENING_CSV, VIDEO_INFO_TSV + +# DB filenames start with `YYYY-MM-DD_HH-MM-SS__...` — pull the +# session date/time out so we can join against barrier_opening.csv. +_DB_TIMESTAMP_RE = re.compile(r"(\d{4}-\d{2}-\d{2})_(\d{2}-\d{2}-\d{2})_") + + +def _session_key(db_path: str) -> tuple[str, str] | None: + """Extract (session_date, session_time) from a tracking DB filename.""" + if not isinstance(db_path, str) or not db_path: + return None + m = _DB_TIMESTAMP_RE.search(Path(db_path).name) + return (m.group(1), m.group(2)) if m else None + + +def _load_barrier_lookup(csv_path: Path) -> dict[tuple[str, str, str], dict]: + """Build (machine, session_date, session_time) → opening/bad_rois lookup. + + Returns an empty dict if the CSV is missing — callers should treat + that as "no per-session annotations available" rather than an error. + """ + if not Path(csv_path).exists(): + return {} + df = pd.read_csv(csv_path) + lookup: dict[tuple[str, str, str], dict] = {} + for r in df.itertuples(index=False): + bad = set() + if isinstance(r.bad_rois, str) and r.bad_rois.strip(): + bad = {int(x) for x in r.bad_rois.split(",") if x.strip()} + lookup[(r.machine_name, r.session_date, r.session_time)] = { + "opening_s": float(r.opening_s) if pd.notna(r.opening_s) else float("nan"), + "trim_first_s": float(r.trim_first_s) if pd.notna(r.trim_first_s) else 0.0, + "bad_rois": bad, + "unusable": pd.isna(r.opening_s), + } + return lookup # Reason: prefer the explicit Jupyter-widget tqdm when available (it # updates reliably in JupyterLab, where text \r-style bars sometimes @@ -66,6 +102,7 @@ def _open_ro(db_path: str, cache: dict) -> sqlite3.Connection | None: def load_roi_data( meta: pd.DataFrame | None = None, progress: bool = True, + apply_barrier_filter: bool = True, ) -> pd.DataFrame: """Load ROI tracking data joined with experimental metadata. @@ -75,6 +112,14 @@ def load_roi_data( (``"training"`` or ``"testing"``). Rows with empty DB paths (unusable videos, or videos that didn't pass the completeness gate) are skipped. + Both training and testing reads are filtered against + ``barrier_opening.csv`` (the picker annotates both video types): + flies whose ROI never released (listed in ``bad_rois``) and entire + sessions flagged unusable are dropped. The session's ``opening_s`` + is stamped onto its samples so downstream code can compute + ``t_from_opening = t - opening_s``. Sessions missing from the CSV + are still loaded, but with ``opening_s = NaN``. + Args: meta: optional DataFrame with the same schema as ``all_video_info_merged.tsv``. Pass a filtered slice to load a @@ -82,11 +127,17 @@ def load_roi_data( Defaults to the full TSV. progress: show a tqdm progress bar (one tick per fly/ROI row). Defaults to True. Set False for silent batch jobs. + apply_barrier_filter: if True (default), drop session data for + flies whose barrier never opened and stamp ``opening_s`` + onto every sample. Set False to load raw data without any + barrier-derived filtering or columns. Returns: DataFrame with columns ``id, t, x, y, w, h, phi, is_inferred, - has_interacted, session, `` — one row per tracking - sample. Empty if nothing could be loaded. + has_interacted, session, ROI, opening_s, `` — one row + per tracking sample. ``opening_s`` is NaN for sessions not + covered by ``barrier_opening.csv``. Empty if nothing could be + loaded. """ if meta is None: meta = pd.read_csv(VIDEO_INFO_TSV, sep="\t") @@ -97,8 +148,12 @@ def load_roi_data( if "include" in meta.columns: meta = meta[meta["include"].astype(bool)] + barrier_lookup = _load_barrier_lookup(BARRIER_OPENING_CSV) if apply_barrier_filter else {} + db_cache: dict = {} chunks: list[pd.DataFrame] = [] + n_skipped_bad_roi = 0 + n_skipped_unusable = 0 n_rows = len(meta) if progress: @@ -125,7 +180,28 @@ def load_roi_data( for row in meta.itertuples(index=False): for session in ("training", "testing"): pbar.set_postfix_str(f"{row.machine_name} ROI {int(row.roi)} {session}") - conn = _open_ro(getattr(row, f"{session}_db_path"), db_cache) + db_path = getattr(row, f"{session}_db_path") + + # The picker annotates barrier_opening per video, and both + # the training and testing videos have their own entries. + # Apply the same per-session filter to both. + opening_s = float("nan") + if barrier_lookup: + key = _session_key(db_path) + if key is not None: + bo = barrier_lookup.get((row.machine_name, key[0], key[1])) + if bo is not None: + if bo["unusable"]: + n_skipped_unusable += 1 + pbar.update(1) + continue + if int(row.roi) in bo["bad_rois"]: + n_skipped_bad_roi += 1 + pbar.update(1) + continue + opening_s = bo["opening_s"] + + conn = _open_ro(db_path, db_cache) if conn is None: pbar.update(1) continue @@ -141,6 +217,7 @@ def load_roi_data( continue df["session"] = session df["ROI"] = int(row.roi) + df["opening_s"] = opening_s for col in _META_COLS: df[col] = getattr(row, col) chunks.append(df) @@ -148,6 +225,13 @@ def load_roi_data( pbar.close() + if apply_barrier_filter and (n_skipped_bad_roi or n_skipped_unusable): + print( + f"Barrier filter: dropped {n_skipped_bad_roi} ROI loads (barrier " + f"never opened) and {n_skipped_unusable} unusable sessions.", + flush=True, + ) + for conn in db_cache.values(): if conn is not None: conn.close()