Compare commits

..

2 commits

Author SHA1 Message Date
28b7a227c0 load_roi_data: filter on barrier_opening.csv and stamp opening_s
For every session (training and testing alike), the loader now looks
up the corresponding row in barrier_opening.csv and:
  - drops the read if the ROI is in bad_rois (barrier never opened
    for that fly so its tracking has no biological meaning)
  - drops the read if the session is flagged unusable
  - stamps the session's opening_s onto every sample so downstream
    code can compute t_from_opening = t - opening_s

Tested against ETHOSCOPE_082 2024-09-17: training (bad_rois=1,3,5)
correctly drops ROIs 1/3/5; testing keeps all six; opening_s differs
between sessions as expected (646.8 vs 154.7).

Opt out with apply_barrier_filter=False if you need raw data.
2026-05-12 09:45:59 +01:00
b8f23a4884 Annotations: complete barrier_opening.csv for all 110 sessions
Output of the web picker (RB 64, GG 46). 103 sessions have a usable
opening time; 7 are flagged unusable (6 of those in the 2024-10-04
batch). 56 sessions had only the lower row release; 52 had all six
barriers open; 2 had only the upper row.
2026-05-12 09:42:44 +01:00
3 changed files with 180 additions and 8 deletions

View file

@ -1,17 +1,13 @@
machine_name,session_date,session_time,opening_s,trim_first_s,bad_rois,analyst,notes
ETHOSCOPE_076,2025-07-15,16-03-10,52.0,0,,GG,hand-annotated 2025-07-15 batch
ETHOSCOPE_076,2025-07-15,16-31-34,94.0,69,,GG,first ~66s misframed (arena partly out of frame)
ETHOSCOPE_145,2025-07-15,16-03-27,42.0,0,,GG,hand-annotated 2025-07-15 batch
ETHOSCOPE_145,2025-07-15,16-31-41,89.0,69,,GG,first ~60s misframed (arena partly out of frame)
ETHOSCOPE_268,2025-07-15,16-32-05,75.0,0,,GG,hand-annotated 2025-07-15 batch
ETHOSCOPE_076,2024-10-21,11-07-54,346.8,0,,GG,
ETHOSCOPE_181,2024-10-21,11-08-57,287.3,0,,GG,
ETHOSCOPE_225,2024-10-21,11-09-12,277.9,0,"1,3,5",GG,
ETHOSCOPE_082,2024-10-21,11-07-46,365.3,0,"1,3,5",GG,
ETHOSCOPE_140,2024-10-21,11-06-58,423.9,0,"1,3,5",GG,
ETHOSCOPE_083,2024-10-21,11-09-07,306.4,0,"1,3,5",GG,
ETHOSCOPE_145,2024-10-21,11-08-35,341.0,0,"1,3,5",GG,
ETHOSCOPE_076,2024-09-17,10-32-10,1875.8,0,"1,3,5",GG,
ETHOSCOPE_082,2024-09-17,10-53-16,646.8,0,"1,3,5",GG,
ETHOSCOPE_140,2024-09-17,11-03-05,86.2,0,"1,3,5",GG,
ETHOSCOPE_181,2024-09-17,10-33-12,1824.3,0,"1,3,5",GG,
@ -26,3 +22,90 @@ ETHOSCOPE_083,2024-10-01,11-07-41,560.3,0,"1,3,5",GG,
ETHOSCOPE_113,2024-10-01,11-07-48,565.7,0,"2,4,6",GG,
ETHOSCOPE_140,2024-10-01,11-04-07,755.0,0,"1,3,5",GG,
ETHOSCOPE_167,2024-10-01,11-07-55,564.5,0,"1,3,5",GG,
ETHOSCOPE_076,2024-09-18,10-15-53,175.5,0,"1,3,5",GG,
ETHOSCOPE_169,2024-10-01,11-09-49,437.9,0,"1,3,5",GG,
ETHOSCOPE_181,2024-10-01,11-04-44,694.0,0,"1,3,5",GG,
ETHOSCOPE_225,2024-10-01,11-03-45,758.7,0,"1,3,5",GG,
ETHOSCOPE_282,2024-10-01,11-09-43,436.3,0,"1,3,5",GG,
ETHOSCOPE_076,2024-10-02,10-44-41,1547.5,0,"1,3,5",GG,
ETHOSCOPE_082,2024-10-02,10-44-59,1540.5,0,"1,3,5",GG,
ETHOSCOPE_083,2024-10-02,10-45-19,1530.1,0,"1,3,5",GG,
ETHOSCOPE_140,2024-10-02,10-45-13,1531.6,0,"1,3,5",GG,
ETHOSCOPE_167,2024-10-02,10-45-37,1521.3,0,"1,3,5",GG,
ETHOSCOPE_169,2024-10-02,10-45-31,1522.3,0,"1,3,5",GG,
ETHOSCOPE_181,2024-10-02,10-44-47,1544.2,0,"1,3,5",GG,
ETHOSCOPE_225,2024-10-02,10-44-53,1540.4,0,"1,3,5",GG,
ETHOSCOPE_282,2024-10-02,10-45-25,1525.4,0,"1,3,5",GG,
ETHOSCOPE_067,2024-10-15,10-51-44,590.9,0,"1,3,5",GG,
ETHOSCOPE_076,2024-10-15,10-50-12,545.5,0,"1,3,5",GG,
ETHOSCOPE_082,2024-10-15,10-50-33,566.9,0,"1,3,5",GG,
ETHOSCOPE_083,2024-10-15,10-58-55,110.3,0,"1,3,5",GG,
ETHOSCOPE_113,2024-10-15,10-51-36,623.4,0,"2,4,6",GG,
ETHOSCOPE_139,2024-10-15,10-51-15,592.9,0,"1,3,5",RB,
ETHOSCOPE_140,2024-10-15,10-50-45,567.2,0,"1,3,5",RB,
ETHOSCOPE_145,2024-10-15,10-51-09,597.4,0,"1,3,5",RB,
ETHOSCOPE_169,2024-10-15,10-51-28,601.0,0,"1,3,5",GG,
ETHOSCOPE_181,2024-10-15,10-50-19,545.5,0,"1,3,5",RB,
ETHOSCOPE_225,2024-10-15,10-50-25,546.5,0,"1,3,5",RB,
ETHOSCOPE_076,2024-10-21,11-07-54,346.2,0,"1,3,5",RB,
ETHOSCOPE_139,2024-10-21,11-07-55,385.6,0,"1,3,5",RB,
ETHOSCOPE_169,2024-10-21,11-09-30,293.2,0,"1,3,5",RB,
ETHOSCOPE_268,2024-10-21,11-09-59,271.2,0,"1,3,5",RB,
ETHOSCOPE_139,2025-07-15,16-31-52,84.8,0,,RB,
ETHOSCOPE_076,2024-09-17,13-10-59,84.1,0,,RB,
ETHOSCOPE_082,2024-09-17,13-10-54,154.7,0,,RB,
ETHOSCOPE_140,2024-09-17,13-10-45,183.2,0,,RB,
ETHOSCOPE_181,2024-09-17,13-11-03,107.1,0,,RB,
ETHOSCOPE_225,2024-09-17,13-10-51,134.6,0,,RB,
ETHOSCOPE_076,2024-09-18,12-34-16,,0,,RB,unusable
ETHOSCOPE_082,2024-09-18,12-34-12,133.4,0,,RB,
ETHOSCOPE_140,2024-09-18,12-34-04,130.1,0,,RB,
ETHOSCOPE_181,2024-09-18,12-34-20,94.3,0,,RB,
ETHOSCOPE_225,2024-09-18,12-34-08,113.4,0,,RB,
ETHOSCOPE_076,2024-10-01,13-27-24,94.8,0,,RB,
ETHOSCOPE_082,2024-10-01,13-27-35,131.8,0,,RB,
ETHOSCOPE_083,2024-10-01,13-27-06,227.1,0,,RB,
ETHOSCOPE_113,2024-10-01,13-26-57,293.6,0,,RB,
ETHOSCOPE_140,2024-10-01,13-27-44,147.5,0,,RB,
ETHOSCOPE_167,2024-10-01,13-27-03,301.2,0,,RB,
ETHOSCOPE_169,2024-10-01,13-27-24,251.9,0,,RB,
ETHOSCOPE_181,2024-10-01,13-27-27,101.8,0,,RB,
ETHOSCOPE_225,2024-10-01,13-27-32,111.2,0,,RB,
ETHOSCOPE_282,2024-10-01,13-27-14,236.0,0,,RB,
ETHOSCOPE_076,2024-10-02,14-23-32,63.6,0,,RB,
ETHOSCOPE_082,2024-10-02,14-23-44,71.4,0,,RB,
ETHOSCOPE_083,2024-10-02,14-23-54,75.7,0,,RB,
ETHOSCOPE_140,2024-10-02,14-23-51,73.5,0,,RB,
ETHOSCOPE_167,2024-10-02,14-24-05,84.3,0,,RB,
ETHOSCOPE_169,2024-10-02,14-24-02,79.5,0,,RB,
ETHOSCOPE_181,2024-10-02,14-23-36,67.0,0,,RB,
ETHOSCOPE_225,2024-10-02,14-23-40,69.1,0,,RB,
ETHOSCOPE_282,2024-10-02,14-23-58,78.3,0,,RB,
ETHOSCOPE_076,2024-10-04,16-11-56,,0,,RB,unusable
ETHOSCOPE_181,2024-10-04,16-12-10,,0,,RB,unusable
ETHOSCOPE_225,2024-10-04,16-12-21,,0,,RB,unusable
ETHOSCOPE_067,2024-10-15,13-16-18,206.9,0,,RB,
ETHOSCOPE_082,2024-10-15,13-15-36,172.8,0,,RB,
ETHOSCOPE_083,2024-10-15,13-17-37,90.5,0,,RB,
ETHOSCOPE_113,2024-10-15,13-16-24,212.2,0,,RB,
ETHOSCOPE_139,2024-10-15,13-16-07,203.4,0,,RB,
ETHOSCOPE_140,2024-10-15,13-15-50,176.2,0,,RB,
ETHOSCOPE_145,2024-10-15,13-16-01,201.9,0,,RB,
ETHOSCOPE_169,2024-10-15,13-16-13,202.8,0,,RB,
ETHOSCOPE_181,2024-10-15,13-15-23,166.0,0,,RB,
ETHOSCOPE_225,2024-10-15,13-15-30,171.3,0,,RB,
ETHOSCOPE_076,2024-10-21,13-25-18,442.6,0,"1,3,5",RB,
ETHOSCOPE_082,2024-10-21,13-28-01,296.7,0,"1,3,5",RB,
ETHOSCOPE_083,2024-10-21,13-30-11,183.4,0,"1,3,5",RB,
ETHOSCOPE_139,2024-10-21,13-29-41,220.4,0,"1,3,5",RB,
ETHOSCOPE_140,2024-10-21,13-28-03,301.4,0,"1,3,5",RB,
ETHOSCOPE_145,2024-10-21,13-28-17,299.3,0,"1,3,5",RB,
ETHOSCOPE_169,2024-10-21,13-28-31,295.5,0,"1,3,5",RB,
ETHOSCOPE_225,2024-10-21,13-30-10,166.3,0,"1,3,5",RB,
ETHOSCOPE_268,2024-10-21,13-29-14,257.1,0,"1,3,5",RB,
ETHOSCOPE_076,2025-07-15,16-31-34,96.0,0,,RB,
ETHOSCOPE_145,2025-07-15,16-31-41,90.5,0,,RB,
ETHOSCOPE_076,2024-09-17,10-32-10,1871.3,0,"1,3,5",RB,
ETHOSCOPE_082,2024-10-04,16-12-30,,0,,GG,unusable
ETHOSCOPE_086,2024-10-04,16-18-12,,0,,GG,unusable
ETHOSCOPE_140,2024-10-04,16-18-22,,0,,GG,unusable

1 machine_name session_date session_time opening_s trim_first_s bad_rois analyst notes
2 ETHOSCOPE_076 2025-07-15 16-03-10 52.0 0 GG hand-annotated 2025-07-15 batch
ETHOSCOPE_076 2025-07-15 16-31-34 94.0 69 GG first ~66s misframed (arena partly out of frame)
3 ETHOSCOPE_145 2025-07-15 16-03-27 42.0 0 GG hand-annotated 2025-07-15 batch
ETHOSCOPE_145 2025-07-15 16-31-41 89.0 69 GG first ~60s misframed (arena partly out of frame)
4 ETHOSCOPE_268 2025-07-15 16-32-05 75.0 0 GG hand-annotated 2025-07-15 batch
ETHOSCOPE_076 2024-10-21 11-07-54 346.8 0 GG
5 ETHOSCOPE_181 2024-10-21 11-08-57 287.3 0 GG
6 ETHOSCOPE_225 2024-10-21 11-09-12 277.9 0 1,3,5 GG
7 ETHOSCOPE_082 2024-10-21 11-07-46 365.3 0 1,3,5 GG
8 ETHOSCOPE_140 2024-10-21 11-06-58 423.9 0 1,3,5 GG
9 ETHOSCOPE_083 2024-10-21 11-09-07 306.4 0 1,3,5 GG
10 ETHOSCOPE_145 2024-10-21 11-08-35 341.0 0 1,3,5 GG
ETHOSCOPE_076 2024-09-17 10-32-10 1875.8 0 1,3,5 GG
11 ETHOSCOPE_082 2024-09-17 10-53-16 646.8 0 1,3,5 GG
12 ETHOSCOPE_140 2024-09-17 11-03-05 86.2 0 1,3,5 GG
13 ETHOSCOPE_181 2024-09-17 10-33-12 1824.3 0 1,3,5 GG
22 ETHOSCOPE_113 2024-10-01 11-07-48 565.7 0 2,4,6 GG
23 ETHOSCOPE_140 2024-10-01 11-04-07 755.0 0 1,3,5 GG
24 ETHOSCOPE_167 2024-10-01 11-07-55 564.5 0 1,3,5 GG
25 ETHOSCOPE_076 2024-09-18 10-15-53 175.5 0 1,3,5 GG
26 ETHOSCOPE_169 2024-10-01 11-09-49 437.9 0 1,3,5 GG
27 ETHOSCOPE_181 2024-10-01 11-04-44 694.0 0 1,3,5 GG
28 ETHOSCOPE_225 2024-10-01 11-03-45 758.7 0 1,3,5 GG
29 ETHOSCOPE_282 2024-10-01 11-09-43 436.3 0 1,3,5 GG
30 ETHOSCOPE_076 2024-10-02 10-44-41 1547.5 0 1,3,5 GG
31 ETHOSCOPE_082 2024-10-02 10-44-59 1540.5 0 1,3,5 GG
32 ETHOSCOPE_083 2024-10-02 10-45-19 1530.1 0 1,3,5 GG
33 ETHOSCOPE_140 2024-10-02 10-45-13 1531.6 0 1,3,5 GG
34 ETHOSCOPE_167 2024-10-02 10-45-37 1521.3 0 1,3,5 GG
35 ETHOSCOPE_169 2024-10-02 10-45-31 1522.3 0 1,3,5 GG
36 ETHOSCOPE_181 2024-10-02 10-44-47 1544.2 0 1,3,5 GG
37 ETHOSCOPE_225 2024-10-02 10-44-53 1540.4 0 1,3,5 GG
38 ETHOSCOPE_282 2024-10-02 10-45-25 1525.4 0 1,3,5 GG
39 ETHOSCOPE_067 2024-10-15 10-51-44 590.9 0 1,3,5 GG
40 ETHOSCOPE_076 2024-10-15 10-50-12 545.5 0 1,3,5 GG
41 ETHOSCOPE_082 2024-10-15 10-50-33 566.9 0 1,3,5 GG
42 ETHOSCOPE_083 2024-10-15 10-58-55 110.3 0 1,3,5 GG
43 ETHOSCOPE_113 2024-10-15 10-51-36 623.4 0 2,4,6 GG
44 ETHOSCOPE_139 2024-10-15 10-51-15 592.9 0 1,3,5 RB
45 ETHOSCOPE_140 2024-10-15 10-50-45 567.2 0 1,3,5 RB
46 ETHOSCOPE_145 2024-10-15 10-51-09 597.4 0 1,3,5 RB
47 ETHOSCOPE_169 2024-10-15 10-51-28 601.0 0 1,3,5 GG
48 ETHOSCOPE_181 2024-10-15 10-50-19 545.5 0 1,3,5 RB
49 ETHOSCOPE_225 2024-10-15 10-50-25 546.5 0 1,3,5 RB
50 ETHOSCOPE_076 2024-10-21 11-07-54 346.2 0 1,3,5 RB
51 ETHOSCOPE_139 2024-10-21 11-07-55 385.6 0 1,3,5 RB
52 ETHOSCOPE_169 2024-10-21 11-09-30 293.2 0 1,3,5 RB
53 ETHOSCOPE_268 2024-10-21 11-09-59 271.2 0 1,3,5 RB
54 ETHOSCOPE_139 2025-07-15 16-31-52 84.8 0 RB
55 ETHOSCOPE_076 2024-09-17 13-10-59 84.1 0 RB
56 ETHOSCOPE_082 2024-09-17 13-10-54 154.7 0 RB
57 ETHOSCOPE_140 2024-09-17 13-10-45 183.2 0 RB
58 ETHOSCOPE_181 2024-09-17 13-11-03 107.1 0 RB
59 ETHOSCOPE_225 2024-09-17 13-10-51 134.6 0 RB
60 ETHOSCOPE_076 2024-09-18 12-34-16 0 RB unusable
61 ETHOSCOPE_082 2024-09-18 12-34-12 133.4 0 RB
62 ETHOSCOPE_140 2024-09-18 12-34-04 130.1 0 RB
63 ETHOSCOPE_181 2024-09-18 12-34-20 94.3 0 RB
64 ETHOSCOPE_225 2024-09-18 12-34-08 113.4 0 RB
65 ETHOSCOPE_076 2024-10-01 13-27-24 94.8 0 RB
66 ETHOSCOPE_082 2024-10-01 13-27-35 131.8 0 RB
67 ETHOSCOPE_083 2024-10-01 13-27-06 227.1 0 RB
68 ETHOSCOPE_113 2024-10-01 13-26-57 293.6 0 RB
69 ETHOSCOPE_140 2024-10-01 13-27-44 147.5 0 RB
70 ETHOSCOPE_167 2024-10-01 13-27-03 301.2 0 RB
71 ETHOSCOPE_169 2024-10-01 13-27-24 251.9 0 RB
72 ETHOSCOPE_181 2024-10-01 13-27-27 101.8 0 RB
73 ETHOSCOPE_225 2024-10-01 13-27-32 111.2 0 RB
74 ETHOSCOPE_282 2024-10-01 13-27-14 236.0 0 RB
75 ETHOSCOPE_076 2024-10-02 14-23-32 63.6 0 RB
76 ETHOSCOPE_082 2024-10-02 14-23-44 71.4 0 RB
77 ETHOSCOPE_083 2024-10-02 14-23-54 75.7 0 RB
78 ETHOSCOPE_140 2024-10-02 14-23-51 73.5 0 RB
79 ETHOSCOPE_167 2024-10-02 14-24-05 84.3 0 RB
80 ETHOSCOPE_169 2024-10-02 14-24-02 79.5 0 RB
81 ETHOSCOPE_181 2024-10-02 14-23-36 67.0 0 RB
82 ETHOSCOPE_225 2024-10-02 14-23-40 69.1 0 RB
83 ETHOSCOPE_282 2024-10-02 14-23-58 78.3 0 RB
84 ETHOSCOPE_076 2024-10-04 16-11-56 0 RB unusable
85 ETHOSCOPE_181 2024-10-04 16-12-10 0 RB unusable
86 ETHOSCOPE_225 2024-10-04 16-12-21 0 RB unusable
87 ETHOSCOPE_067 2024-10-15 13-16-18 206.9 0 RB
88 ETHOSCOPE_082 2024-10-15 13-15-36 172.8 0 RB
89 ETHOSCOPE_083 2024-10-15 13-17-37 90.5 0 RB
90 ETHOSCOPE_113 2024-10-15 13-16-24 212.2 0 RB
91 ETHOSCOPE_139 2024-10-15 13-16-07 203.4 0 RB
92 ETHOSCOPE_140 2024-10-15 13-15-50 176.2 0 RB
93 ETHOSCOPE_145 2024-10-15 13-16-01 201.9 0 RB
94 ETHOSCOPE_169 2024-10-15 13-16-13 202.8 0 RB
95 ETHOSCOPE_181 2024-10-15 13-15-23 166.0 0 RB
96 ETHOSCOPE_225 2024-10-15 13-15-30 171.3 0 RB
97 ETHOSCOPE_076 2024-10-21 13-25-18 442.6 0 1,3,5 RB
98 ETHOSCOPE_082 2024-10-21 13-28-01 296.7 0 1,3,5 RB
99 ETHOSCOPE_083 2024-10-21 13-30-11 183.4 0 1,3,5 RB
100 ETHOSCOPE_139 2024-10-21 13-29-41 220.4 0 1,3,5 RB
101 ETHOSCOPE_140 2024-10-21 13-28-03 301.4 0 1,3,5 RB
102 ETHOSCOPE_145 2024-10-21 13-28-17 299.3 0 1,3,5 RB
103 ETHOSCOPE_169 2024-10-21 13-28-31 295.5 0 1,3,5 RB
104 ETHOSCOPE_225 2024-10-21 13-30-10 166.3 0 1,3,5 RB
105 ETHOSCOPE_268 2024-10-21 13-29-14 257.1 0 1,3,5 RB
106 ETHOSCOPE_076 2025-07-15 16-31-34 96.0 0 RB
107 ETHOSCOPE_145 2025-07-15 16-31-41 90.5 0 RB
108 ETHOSCOPE_076 2024-09-17 10-32-10 1871.3 0 1,3,5 RB
109 ETHOSCOPE_082 2024-10-04 16-12-30 0 GG unusable
110 ETHOSCOPE_086 2024-10-04 16-18-12 0 GG unusable
111 ETHOSCOPE_140 2024-10-04 16-18-22 0 GG unusable

View file

@ -26,6 +26,11 @@ VIDEO_INFO_TSV = DATA_VOLUME / "all_video_info_merged.tsv"
# A small CSV listing every video file we know about (built locally).
INVENTORY_CSV = DATA_METADATA / "video_inventory.csv"
# Hand-annotated barrier-opening times (output of the picker app). One
# row per testing session; columns: machine_name, session_date,
# session_time, opening_s, trim_first_s, bad_rois, analyst, notes.
BARRIER_OPENING_CSV = DATA_METADATA / "barrier_opening.csv"
# Where the ethoscope source tree is checked out (used by track_videos.py
# and auto_detect_targets.py — host-side scripts that import ethoscope
# from a local clone rather than from pip). Default assumes the standard

View file

@ -8,12 +8,48 @@ The TSV is the single source of truth for what data exists and how it
maps to flies and conditions.
"""
import re
import sqlite3
from pathlib import Path
import pandas as pd
from config import VIDEO_INFO_TSV
from config import BARRIER_OPENING_CSV, VIDEO_INFO_TSV
# DB filenames start with `YYYY-MM-DD_HH-MM-SS_<uuid>_...` — pull the
# session date/time out so we can join against barrier_opening.csv.
_DB_TIMESTAMP_RE = re.compile(r"(\d{4}-\d{2}-\d{2})_(\d{2}-\d{2}-\d{2})_")
def _session_key(db_path: str) -> tuple[str, str] | None:
"""Extract (session_date, session_time) from a tracking DB filename."""
if not isinstance(db_path, str) or not db_path:
return None
m = _DB_TIMESTAMP_RE.search(Path(db_path).name)
return (m.group(1), m.group(2)) if m else None
def _load_barrier_lookup(csv_path: Path) -> dict[tuple[str, str, str], dict]:
"""Build (machine, session_date, session_time) → opening/bad_rois lookup.
Returns an empty dict if the CSV is missing callers should treat
that as "no per-session annotations available" rather than an error.
"""
if not Path(csv_path).exists():
return {}
df = pd.read_csv(csv_path)
lookup: dict[tuple[str, str, str], dict] = {}
for r in df.itertuples(index=False):
bad = set()
if isinstance(r.bad_rois, str) and r.bad_rois.strip():
bad = {int(x) for x in r.bad_rois.split(",") if x.strip()}
lookup[(r.machine_name, r.session_date, r.session_time)] = {
"opening_s": float(r.opening_s) if pd.notna(r.opening_s) else float("nan"),
"trim_first_s": float(r.trim_first_s) if pd.notna(r.trim_first_s) else 0.0,
"bad_rois": bad,
"unusable": pd.isna(r.opening_s),
}
return lookup
# Reason: prefer the explicit Jupyter-widget tqdm when available (it
# updates reliably in JupyterLab, where text \r-style bars sometimes
@ -66,6 +102,7 @@ def _open_ro(db_path: str, cache: dict) -> sqlite3.Connection | None:
def load_roi_data(
meta: pd.DataFrame | None = None,
progress: bool = True,
apply_barrier_filter: bool = True,
) -> pd.DataFrame:
"""Load ROI tracking data joined with experimental metadata.
@ -75,6 +112,14 @@ def load_roi_data(
(``"training"`` or ``"testing"``). Rows with empty DB paths (unusable
videos, or videos that didn't pass the completeness gate) are skipped.
Both training and testing reads are filtered against
``barrier_opening.csv`` (the picker annotates both video types):
flies whose ROI never released (listed in ``bad_rois``) and entire
sessions flagged unusable are dropped. The session's ``opening_s``
is stamped onto its samples so downstream code can compute
``t_from_opening = t - opening_s``. Sessions missing from the CSV
are still loaded, but with ``opening_s = NaN``.
Args:
meta: optional DataFrame with the same schema as
``all_video_info_merged.tsv``. Pass a filtered slice to load a
@ -82,11 +127,17 @@ def load_roi_data(
Defaults to the full TSV.
progress: show a tqdm progress bar (one tick per fly/ROI row).
Defaults to True. Set False for silent batch jobs.
apply_barrier_filter: if True (default), drop session data for
flies whose barrier never opened and stamp ``opening_s``
onto every sample. Set False to load raw data without any
barrier-derived filtering or columns.
Returns:
DataFrame with columns ``id, t, x, y, w, h, phi, is_inferred,
has_interacted, session, <metadata>`` one row per tracking
sample. Empty if nothing could be loaded.
has_interacted, session, ROI, opening_s, <metadata>`` one row
per tracking sample. ``opening_s`` is NaN for sessions not
covered by ``barrier_opening.csv``. Empty if nothing could be
loaded.
"""
if meta is None:
meta = pd.read_csv(VIDEO_INFO_TSV, sep="\t")
@ -97,8 +148,12 @@ def load_roi_data(
if "include" in meta.columns:
meta = meta[meta["include"].astype(bool)]
barrier_lookup = _load_barrier_lookup(BARRIER_OPENING_CSV) if apply_barrier_filter else {}
db_cache: dict = {}
chunks: list[pd.DataFrame] = []
n_skipped_bad_roi = 0
n_skipped_unusable = 0
n_rows = len(meta)
if progress:
@ -125,7 +180,28 @@ def load_roi_data(
for row in meta.itertuples(index=False):
for session in ("training", "testing"):
pbar.set_postfix_str(f"{row.machine_name} ROI {int(row.roi)} {session}")
conn = _open_ro(getattr(row, f"{session}_db_path"), db_cache)
db_path = getattr(row, f"{session}_db_path")
# The picker annotates barrier_opening per video, and both
# the training and testing videos have their own entries.
# Apply the same per-session filter to both.
opening_s = float("nan")
if barrier_lookup:
key = _session_key(db_path)
if key is not None:
bo = barrier_lookup.get((row.machine_name, key[0], key[1]))
if bo is not None:
if bo["unusable"]:
n_skipped_unusable += 1
pbar.update(1)
continue
if int(row.roi) in bo["bad_rois"]:
n_skipped_bad_roi += 1
pbar.update(1)
continue
opening_s = bo["opening_s"]
conn = _open_ro(db_path, db_cache)
if conn is None:
pbar.update(1)
continue
@ -141,6 +217,7 @@ def load_roi_data(
continue
df["session"] = session
df["ROI"] = int(row.roi)
df["opening_s"] = opening_s
for col in _META_COLS:
df[col] = getattr(row, col)
chunks.append(df)
@ -148,6 +225,13 @@ def load_roi_data(
pbar.close()
if apply_barrier_filter and (n_skipped_bad_roi or n_skipped_unusable):
print(
f"Barrier filter: dropped {n_skipped_bad_roi} ROI loads (barrier "
f"never opened) and {n_skipped_unusable} unusable sessions.",
flush=True,
)
for conn in db_cache.values():
if conn is not None:
conn.close()