Show experimental metadata above the video in the picker

Each video row now carries a `metadata` dict aggregated from the merged TSV: species, memory (STM/LTM), training_length_hr, consolidation_length_hr, age, training/testing date-time, and trained/naive fly counts. The UI renders these as a row of key:value pills above the video, with the session role (training/testing) colour-coded so the analyst can see at a glance what they're picking. The merged TSV currently has duplicate rows per (date, machine, ROI); the aggregator de-dups on those keys so counts aren't doubled. (The duplication itself should be cleaned up upstream.) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-01 12:54:40 +01:00 · 2026-05-01 12:54:40 +01:00 · 4ed988a617
commit 4ed988a617
parent 1a7542def2
2 changed files with 100 additions and 0 deletions
--- a/scripts/barrier_picker_app/app.py
+++ b/scripts/barrier_picker_app/app.py
@ -72,9 +72,60 @@ class QueueItem:
    mp4_path: str
    duration_s: float | None
    done: bool
+    metadata: dict   # experimental fields aggregated from the merged TSV


 # ─── Queue building ─────────────────────────────────────────────────────
+_META_FIELDS = (
+    "species", "training_length_hr", "consolidation_length_hr",
+    "memory", "age", "training_date_time", "testing_date_time",
+)
+
+
+def _aggregate_metadata(rows: pd.DataFrame, db_filename: str) -> dict:
+    """Pull the experimental metadata for one video from its TSV rows.
+
+    Most fields are uniform across the 6 ROIs of a video so the first-row
+    value is representative. `male` is a per-fly label, so we summarise
+    counts. `session_role` flags whether this video was the training or
+    testing session for the flies in it.
+    """
+    if rows.empty:
+        return {}
+    # Reason: the merged xlsx/TSV currently has duplicate rows per
+    # (date, machine, ROI). De-dup on those keys so the male counts and
+    # any per-ROI fields aren't doubled.
+    if {"date", "machine_name", "roi"}.issubset(rows.columns):
+        rows = rows.drop_duplicates(subset=["date", "machine_name", "roi"])
+    r0 = rows.iloc[0]
+    meta = {}
+    for f in _META_FIELDS:
+        v = r0.get(f)
+        if pd.isna(v):
+            meta[f] = None
+        else:
+            meta[f] = v if isinstance(v, str) else (
+                int(v) if isinstance(v, float) and v.is_integer() else v
+            )
+    # Per-ROI tally.
+    if "male" in rows.columns:
+        m = rows["male"].dropna()
+        meta["n_trained"] = int((m == "trained").sum())
+        meta["n_naive"] = int((m == "naive").sum())
+    # Was this the training session, the testing session, or both?
+    is_training = rows["training_db_path"].astype(str).str.endswith(db_filename).any()
+    is_testing  = rows["testing_db_path"].astype(str).str.endswith(db_filename).any()
+    if is_training and is_testing:
+        meta["session_role"] = "training+testing"
+    elif is_training:
+        meta["session_role"] = "training"
+    elif is_testing:
+        meta["session_role"] = "testing"
+    else:
+        meta["session_role"] = "?"
+    return meta
+
+
 def _build_queue() -> list[QueueItem]:
    """Build the ordered queue of pickable videos."""
    if not TSV_PATH.exists():
@ -120,6 +171,15 @@ def _build_queue() -> list[QueueItem]:
            inv_row = inv_by_key.get(key)
            if inv_row is None or not Path(inv_row["mp4_path"]).exists():
                continue
+            # Reason: gather all TSV rows that reference this video — there
+            # are typically 6 ROI-rows per session, sometimes also rows
+            # using it as both training AND testing.
+            db_filename = db_path.name
+            related = tsv[
+                tsv["training_db_path"].astype(str).str.endswith(db_filename)
+                | tsv["testing_db_path"].astype(str).str.endswith(db_filename)
+            ]
+            metadata = _aggregate_metadata(related, db_filename)
            items.append(QueueItem(
                idx=len(items),
                machine_name=row.machine_name,
@ -128,6 +188,7 @@ def _build_queue() -> list[QueueItem]:
                mp4_path=inv_row["mp4_path"],
                duration_s=inv_row["duration_s"],
                done=key in done_keys,
+                metadata=metadata,
            ))
    return items

@ -155,6 +216,7 @@ async def get_queue() -> JSONResponse:
            "session_time": q.session_time,
            "duration_s": q.duration_s,
            "done": q.done,
+            "metadata": q.metadata,
        }
        for q in queue
    ])