diff --git a/data/metadata/barrier_opening.csv b/data/metadata/barrier_opening.csv index 55632f5..e657e91 100644 --- a/data/metadata/barrier_opening.csv +++ b/data/metadata/barrier_opening.csv @@ -1,13 +1,17 @@ machine_name,session_date,session_time,opening_s,trim_first_s,bad_rois,analyst,notes ETHOSCOPE_076,2025-07-15,16-03-10,52.0,0,,GG,hand-annotated 2025-07-15 batch +ETHOSCOPE_076,2025-07-15,16-31-34,94.0,69,,GG,first ~66s misframed (arena partly out of frame) ETHOSCOPE_145,2025-07-15,16-03-27,42.0,0,,GG,hand-annotated 2025-07-15 batch +ETHOSCOPE_145,2025-07-15,16-31-41,89.0,69,,GG,first ~60s misframed (arena partly out of frame) ETHOSCOPE_268,2025-07-15,16-32-05,75.0,0,,GG,hand-annotated 2025-07-15 batch +ETHOSCOPE_076,2024-10-21,11-07-54,346.8,0,,GG, ETHOSCOPE_181,2024-10-21,11-08-57,287.3,0,,GG, ETHOSCOPE_225,2024-10-21,11-09-12,277.9,0,"1,3,5",GG, ETHOSCOPE_082,2024-10-21,11-07-46,365.3,0,"1,3,5",GG, ETHOSCOPE_140,2024-10-21,11-06-58,423.9,0,"1,3,5",GG, ETHOSCOPE_083,2024-10-21,11-09-07,306.4,0,"1,3,5",GG, ETHOSCOPE_145,2024-10-21,11-08-35,341.0,0,"1,3,5",GG, +ETHOSCOPE_076,2024-09-17,10-32-10,1875.8,0,"1,3,5",GG, ETHOSCOPE_082,2024-09-17,10-53-16,646.8,0,"1,3,5",GG, ETHOSCOPE_140,2024-09-17,11-03-05,86.2,0,"1,3,5",GG, ETHOSCOPE_181,2024-09-17,10-33-12,1824.3,0,"1,3,5",GG, @@ -22,90 +26,3 @@ ETHOSCOPE_083,2024-10-01,11-07-41,560.3,0,"1,3,5",GG, ETHOSCOPE_113,2024-10-01,11-07-48,565.7,0,"2,4,6",GG, ETHOSCOPE_140,2024-10-01,11-04-07,755.0,0,"1,3,5",GG, ETHOSCOPE_167,2024-10-01,11-07-55,564.5,0,"1,3,5",GG, -ETHOSCOPE_076,2024-09-18,10-15-53,175.5,0,"1,3,5",GG, -ETHOSCOPE_169,2024-10-01,11-09-49,437.9,0,"1,3,5",GG, -ETHOSCOPE_181,2024-10-01,11-04-44,694.0,0,"1,3,5",GG, -ETHOSCOPE_225,2024-10-01,11-03-45,758.7,0,"1,3,5",GG, -ETHOSCOPE_282,2024-10-01,11-09-43,436.3,0,"1,3,5",GG, -ETHOSCOPE_076,2024-10-02,10-44-41,1547.5,0,"1,3,5",GG, -ETHOSCOPE_082,2024-10-02,10-44-59,1540.5,0,"1,3,5",GG, -ETHOSCOPE_083,2024-10-02,10-45-19,1530.1,0,"1,3,5",GG, -ETHOSCOPE_140,2024-10-02,10-45-13,1531.6,0,"1,3,5",GG, -ETHOSCOPE_167,2024-10-02,10-45-37,1521.3,0,"1,3,5",GG, -ETHOSCOPE_169,2024-10-02,10-45-31,1522.3,0,"1,3,5",GG, -ETHOSCOPE_181,2024-10-02,10-44-47,1544.2,0,"1,3,5",GG, -ETHOSCOPE_225,2024-10-02,10-44-53,1540.4,0,"1,3,5",GG, -ETHOSCOPE_282,2024-10-02,10-45-25,1525.4,0,"1,3,5",GG, -ETHOSCOPE_067,2024-10-15,10-51-44,590.9,0,"1,3,5",GG, -ETHOSCOPE_076,2024-10-15,10-50-12,545.5,0,"1,3,5",GG, -ETHOSCOPE_082,2024-10-15,10-50-33,566.9,0,"1,3,5",GG, -ETHOSCOPE_083,2024-10-15,10-58-55,110.3,0,"1,3,5",GG, -ETHOSCOPE_113,2024-10-15,10-51-36,623.4,0,"2,4,6",GG, -ETHOSCOPE_139,2024-10-15,10-51-15,592.9,0,"1,3,5",RB, -ETHOSCOPE_140,2024-10-15,10-50-45,567.2,0,"1,3,5",RB, -ETHOSCOPE_145,2024-10-15,10-51-09,597.4,0,"1,3,5",RB, -ETHOSCOPE_169,2024-10-15,10-51-28,601.0,0,"1,3,5",GG, -ETHOSCOPE_181,2024-10-15,10-50-19,545.5,0,"1,3,5",RB, -ETHOSCOPE_225,2024-10-15,10-50-25,546.5,0,"1,3,5",RB, -ETHOSCOPE_076,2024-10-21,11-07-54,346.2,0,"1,3,5",RB, -ETHOSCOPE_139,2024-10-21,11-07-55,385.6,0,"1,3,5",RB, -ETHOSCOPE_169,2024-10-21,11-09-30,293.2,0,"1,3,5",RB, -ETHOSCOPE_268,2024-10-21,11-09-59,271.2,0,"1,3,5",RB, -ETHOSCOPE_139,2025-07-15,16-31-52,84.8,0,,RB, -ETHOSCOPE_076,2024-09-17,13-10-59,84.1,0,,RB, -ETHOSCOPE_082,2024-09-17,13-10-54,154.7,0,,RB, -ETHOSCOPE_140,2024-09-17,13-10-45,183.2,0,,RB, -ETHOSCOPE_181,2024-09-17,13-11-03,107.1,0,,RB, -ETHOSCOPE_225,2024-09-17,13-10-51,134.6,0,,RB, -ETHOSCOPE_076,2024-09-18,12-34-16,,0,,RB,unusable -ETHOSCOPE_082,2024-09-18,12-34-12,133.4,0,,RB, -ETHOSCOPE_140,2024-09-18,12-34-04,130.1,0,,RB, -ETHOSCOPE_181,2024-09-18,12-34-20,94.3,0,,RB, -ETHOSCOPE_225,2024-09-18,12-34-08,113.4,0,,RB, -ETHOSCOPE_076,2024-10-01,13-27-24,94.8,0,,RB, -ETHOSCOPE_082,2024-10-01,13-27-35,131.8,0,,RB, -ETHOSCOPE_083,2024-10-01,13-27-06,227.1,0,,RB, -ETHOSCOPE_113,2024-10-01,13-26-57,293.6,0,,RB, -ETHOSCOPE_140,2024-10-01,13-27-44,147.5,0,,RB, -ETHOSCOPE_167,2024-10-01,13-27-03,301.2,0,,RB, -ETHOSCOPE_169,2024-10-01,13-27-24,251.9,0,,RB, -ETHOSCOPE_181,2024-10-01,13-27-27,101.8,0,,RB, -ETHOSCOPE_225,2024-10-01,13-27-32,111.2,0,,RB, -ETHOSCOPE_282,2024-10-01,13-27-14,236.0,0,,RB, -ETHOSCOPE_076,2024-10-02,14-23-32,63.6,0,,RB, -ETHOSCOPE_082,2024-10-02,14-23-44,71.4,0,,RB, -ETHOSCOPE_083,2024-10-02,14-23-54,75.7,0,,RB, -ETHOSCOPE_140,2024-10-02,14-23-51,73.5,0,,RB, -ETHOSCOPE_167,2024-10-02,14-24-05,84.3,0,,RB, -ETHOSCOPE_169,2024-10-02,14-24-02,79.5,0,,RB, -ETHOSCOPE_181,2024-10-02,14-23-36,67.0,0,,RB, -ETHOSCOPE_225,2024-10-02,14-23-40,69.1,0,,RB, -ETHOSCOPE_282,2024-10-02,14-23-58,78.3,0,,RB, -ETHOSCOPE_076,2024-10-04,16-11-56,,0,,RB,unusable -ETHOSCOPE_181,2024-10-04,16-12-10,,0,,RB,unusable -ETHOSCOPE_225,2024-10-04,16-12-21,,0,,RB,unusable -ETHOSCOPE_067,2024-10-15,13-16-18,206.9,0,,RB, -ETHOSCOPE_082,2024-10-15,13-15-36,172.8,0,,RB, -ETHOSCOPE_083,2024-10-15,13-17-37,90.5,0,,RB, -ETHOSCOPE_113,2024-10-15,13-16-24,212.2,0,,RB, -ETHOSCOPE_139,2024-10-15,13-16-07,203.4,0,,RB, -ETHOSCOPE_140,2024-10-15,13-15-50,176.2,0,,RB, -ETHOSCOPE_145,2024-10-15,13-16-01,201.9,0,,RB, -ETHOSCOPE_169,2024-10-15,13-16-13,202.8,0,,RB, -ETHOSCOPE_181,2024-10-15,13-15-23,166.0,0,,RB, -ETHOSCOPE_225,2024-10-15,13-15-30,171.3,0,,RB, -ETHOSCOPE_076,2024-10-21,13-25-18,442.6,0,"1,3,5",RB, -ETHOSCOPE_082,2024-10-21,13-28-01,296.7,0,"1,3,5",RB, -ETHOSCOPE_083,2024-10-21,13-30-11,183.4,0,"1,3,5",RB, -ETHOSCOPE_139,2024-10-21,13-29-41,220.4,0,"1,3,5",RB, -ETHOSCOPE_140,2024-10-21,13-28-03,301.4,0,"1,3,5",RB, -ETHOSCOPE_145,2024-10-21,13-28-17,299.3,0,"1,3,5",RB, -ETHOSCOPE_169,2024-10-21,13-28-31,295.5,0,"1,3,5",RB, -ETHOSCOPE_225,2024-10-21,13-30-10,166.3,0,"1,3,5",RB, -ETHOSCOPE_268,2024-10-21,13-29-14,257.1,0,"1,3,5",RB, -ETHOSCOPE_076,2025-07-15,16-31-34,96.0,0,,RB, -ETHOSCOPE_145,2025-07-15,16-31-41,90.5,0,,RB, -ETHOSCOPE_076,2024-09-17,10-32-10,1871.3,0,"1,3,5",RB, -ETHOSCOPE_082,2024-10-04,16-12-30,,0,,GG,unusable -ETHOSCOPE_086,2024-10-04,16-18-12,,0,,GG,unusable -ETHOSCOPE_140,2024-10-04,16-18-22,,0,,GG,unusable diff --git a/scripts/config.py b/scripts/config.py index 9b72a29..18e89ef 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -26,11 +26,6 @@ VIDEO_INFO_TSV = DATA_VOLUME / "all_video_info_merged.tsv" # A small CSV listing every video file we know about (built locally). INVENTORY_CSV = DATA_METADATA / "video_inventory.csv" -# Hand-annotated barrier-opening times (output of the picker app). One -# row per testing session; columns: machine_name, session_date, -# session_time, opening_s, trim_first_s, bad_rois, analyst, notes. -BARRIER_OPENING_CSV = DATA_METADATA / "barrier_opening.csv" - # Where the ethoscope source tree is checked out (used by track_videos.py # and auto_detect_targets.py — host-side scripts that import ethoscope # from a local clone rather than from pip). Default assumes the standard diff --git a/scripts/load_roi_data.py b/scripts/load_roi_data.py index 309709a..ee2263c 100644 --- a/scripts/load_roi_data.py +++ b/scripts/load_roi_data.py @@ -8,48 +8,12 @@ The TSV is the single source of truth for what data exists and how it maps to flies and conditions. """ -import re import sqlite3 from pathlib import Path import pandas as pd -from config import BARRIER_OPENING_CSV, VIDEO_INFO_TSV - -# DB filenames start with `YYYY-MM-DD_HH-MM-SS__...` — pull the -# session date/time out so we can join against barrier_opening.csv. -_DB_TIMESTAMP_RE = re.compile(r"(\d{4}-\d{2}-\d{2})_(\d{2}-\d{2}-\d{2})_") - - -def _session_key(db_path: str) -> tuple[str, str] | None: - """Extract (session_date, session_time) from a tracking DB filename.""" - if not isinstance(db_path, str) or not db_path: - return None - m = _DB_TIMESTAMP_RE.search(Path(db_path).name) - return (m.group(1), m.group(2)) if m else None - - -def _load_barrier_lookup(csv_path: Path) -> dict[tuple[str, str, str], dict]: - """Build (machine, session_date, session_time) → opening/bad_rois lookup. - - Returns an empty dict if the CSV is missing — callers should treat - that as "no per-session annotations available" rather than an error. - """ - if not Path(csv_path).exists(): - return {} - df = pd.read_csv(csv_path) - lookup: dict[tuple[str, str, str], dict] = {} - for r in df.itertuples(index=False): - bad = set() - if isinstance(r.bad_rois, str) and r.bad_rois.strip(): - bad = {int(x) for x in r.bad_rois.split(",") if x.strip()} - lookup[(r.machine_name, r.session_date, r.session_time)] = { - "opening_s": float(r.opening_s) if pd.notna(r.opening_s) else float("nan"), - "trim_first_s": float(r.trim_first_s) if pd.notna(r.trim_first_s) else 0.0, - "bad_rois": bad, - "unusable": pd.isna(r.opening_s), - } - return lookup +from config import VIDEO_INFO_TSV # Reason: prefer the explicit Jupyter-widget tqdm when available (it # updates reliably in JupyterLab, where text \r-style bars sometimes @@ -102,7 +66,6 @@ def _open_ro(db_path: str, cache: dict) -> sqlite3.Connection | None: def load_roi_data( meta: pd.DataFrame | None = None, progress: bool = True, - apply_barrier_filter: bool = True, ) -> pd.DataFrame: """Load ROI tracking data joined with experimental metadata. @@ -112,14 +75,6 @@ def load_roi_data( (``"training"`` or ``"testing"``). Rows with empty DB paths (unusable videos, or videos that didn't pass the completeness gate) are skipped. - Both training and testing reads are filtered against - ``barrier_opening.csv`` (the picker annotates both video types): - flies whose ROI never released (listed in ``bad_rois``) and entire - sessions flagged unusable are dropped. The session's ``opening_s`` - is stamped onto its samples so downstream code can compute - ``t_from_opening = t - opening_s``. Sessions missing from the CSV - are still loaded, but with ``opening_s = NaN``. - Args: meta: optional DataFrame with the same schema as ``all_video_info_merged.tsv``. Pass a filtered slice to load a @@ -127,17 +82,11 @@ def load_roi_data( Defaults to the full TSV. progress: show a tqdm progress bar (one tick per fly/ROI row). Defaults to True. Set False for silent batch jobs. - apply_barrier_filter: if True (default), drop session data for - flies whose barrier never opened and stamp ``opening_s`` - onto every sample. Set False to load raw data without any - barrier-derived filtering or columns. Returns: DataFrame with columns ``id, t, x, y, w, h, phi, is_inferred, - has_interacted, session, ROI, opening_s, `` — one row - per tracking sample. ``opening_s`` is NaN for sessions not - covered by ``barrier_opening.csv``. Empty if nothing could be - loaded. + has_interacted, session, `` — one row per tracking + sample. Empty if nothing could be loaded. """ if meta is None: meta = pd.read_csv(VIDEO_INFO_TSV, sep="\t") @@ -148,12 +97,8 @@ def load_roi_data( if "include" in meta.columns: meta = meta[meta["include"].astype(bool)] - barrier_lookup = _load_barrier_lookup(BARRIER_OPENING_CSV) if apply_barrier_filter else {} - db_cache: dict = {} chunks: list[pd.DataFrame] = [] - n_skipped_bad_roi = 0 - n_skipped_unusable = 0 n_rows = len(meta) if progress: @@ -180,28 +125,7 @@ def load_roi_data( for row in meta.itertuples(index=False): for session in ("training", "testing"): pbar.set_postfix_str(f"{row.machine_name} ROI {int(row.roi)} {session}") - db_path = getattr(row, f"{session}_db_path") - - # The picker annotates barrier_opening per video, and both - # the training and testing videos have their own entries. - # Apply the same per-session filter to both. - opening_s = float("nan") - if barrier_lookup: - key = _session_key(db_path) - if key is not None: - bo = barrier_lookup.get((row.machine_name, key[0], key[1])) - if bo is not None: - if bo["unusable"]: - n_skipped_unusable += 1 - pbar.update(1) - continue - if int(row.roi) in bo["bad_rois"]: - n_skipped_bad_roi += 1 - pbar.update(1) - continue - opening_s = bo["opening_s"] - - conn = _open_ro(db_path, db_cache) + conn = _open_ro(getattr(row, f"{session}_db_path"), db_cache) if conn is None: pbar.update(1) continue @@ -217,7 +141,6 @@ def load_roi_data( continue df["session"] = session df["ROI"] = int(row.roi) - df["opening_s"] = opening_s for col in _META_COLS: df[col] = getattr(row, col) chunks.append(df) @@ -225,13 +148,6 @@ def load_roi_data( pbar.close() - if apply_barrier_filter and (n_skipped_bad_roi or n_skipped_unusable): - print( - f"Barrier filter: dropped {n_skipped_bad_roi} ROI loads (barrier " - f"never opened) and {n_skipped_unusable} unusable sessions.", - flush=True, - ) - for conn in db_cache.values(): if conn is not None: conn.close()