Show experimental metadata above the video in the picker

Each video row now carries a `metadata` dict aggregated from the
merged TSV: species, memory (STM/LTM), training_length_hr,
consolidation_length_hr, age, training/testing date-time, and
trained/naive fly counts. The UI renders these as a row of key:value
pills above the video, with the session role (training/testing)
colour-coded so the analyst can see at a glance what they're picking.

The merged TSV currently has duplicate rows per (date, machine, ROI);
the aggregator de-dups on those keys so counts aren't doubled. (The
duplication itself should be cleaned up upstream.)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Giorgio Gilestro 2026-05-01 12:54:40 +01:00
parent 1a7542def2
commit 4ed988a617
2 changed files with 100 additions and 0 deletions

View file

@ -72,9 +72,60 @@ class QueueItem:
mp4_path: str
duration_s: float | None
done: bool
metadata: dict # experimental fields aggregated from the merged TSV
# ─── Queue building ─────────────────────────────────────────────────────
_META_FIELDS = (
"species", "training_length_hr", "consolidation_length_hr",
"memory", "age", "training_date_time", "testing_date_time",
)
def _aggregate_metadata(rows: pd.DataFrame, db_filename: str) -> dict:
"""Pull the experimental metadata for one video from its TSV rows.
Most fields are uniform across the 6 ROIs of a video so the first-row
value is representative. `male` is a per-fly label, so we summarise
counts. `session_role` flags whether this video was the training or
testing session for the flies in it.
"""
if rows.empty:
return {}
# Reason: the merged xlsx/TSV currently has duplicate rows per
# (date, machine, ROI). De-dup on those keys so the male counts and
# any per-ROI fields aren't doubled.
if {"date", "machine_name", "roi"}.issubset(rows.columns):
rows = rows.drop_duplicates(subset=["date", "machine_name", "roi"])
r0 = rows.iloc[0]
meta = {}
for f in _META_FIELDS:
v = r0.get(f)
if pd.isna(v):
meta[f] = None
else:
meta[f] = v if isinstance(v, str) else (
int(v) if isinstance(v, float) and v.is_integer() else v
)
# Per-ROI tally.
if "male" in rows.columns:
m = rows["male"].dropna()
meta["n_trained"] = int((m == "trained").sum())
meta["n_naive"] = int((m == "naive").sum())
# Was this the training session, the testing session, or both?
is_training = rows["training_db_path"].astype(str).str.endswith(db_filename).any()
is_testing = rows["testing_db_path"].astype(str).str.endswith(db_filename).any()
if is_training and is_testing:
meta["session_role"] = "training+testing"
elif is_training:
meta["session_role"] = "training"
elif is_testing:
meta["session_role"] = "testing"
else:
meta["session_role"] = "?"
return meta
def _build_queue() -> list[QueueItem]:
"""Build the ordered queue of pickable videos."""
if not TSV_PATH.exists():
@ -120,6 +171,15 @@ def _build_queue() -> list[QueueItem]:
inv_row = inv_by_key.get(key)
if inv_row is None or not Path(inv_row["mp4_path"]).exists():
continue
# Reason: gather all TSV rows that reference this video — there
# are typically 6 ROI-rows per session, sometimes also rows
# using it as both training AND testing.
db_filename = db_path.name
related = tsv[
tsv["training_db_path"].astype(str).str.endswith(db_filename)
| tsv["testing_db_path"].astype(str).str.endswith(db_filename)
]
metadata = _aggregate_metadata(related, db_filename)
items.append(QueueItem(
idx=len(items),
machine_name=row.machine_name,
@ -128,6 +188,7 @@ def _build_queue() -> list[QueueItem]:
mp4_path=inv_row["mp4_path"],
duration_s=inv_row["duration_s"],
done=key in done_keys,
metadata=metadata,
))
return items
@ -155,6 +216,7 @@ async def get_queue() -> JSONResponse:
"session_time": q.session_time,
"duration_s": q.duration_s,
"done": q.done,
"metadata": q.metadata,
}
for q in queue
])