Make data paths visible and explicit in flies_analysis notebooks
Define METADATA_TSV and TRACKED_DBS up front in cell 1, assert they exist before doing anything else, and pass the loaded metadata to load_roi_data() explicitly. Surfaces path problems immediately with a readable message instead of failing deep inside the loader. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
231c7a437f
commit
723d1f3682
2 changed files with 4 additions and 4 deletions
|
|
@ -14,7 +14,7 @@
|
|||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "import sys\nfrom pathlib import Path\n\nimport pandas as pd\nimport numpy as np\nimport sqlite3\nimport glob\nimport re\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom scipy.spatial.distance import euclidean\nfrom scipy import stats\n\n# Two locations to know about:\n# - DATA_DIR : where the project's bulky data lives (mounted into the container)\n# - REPO_ROOT : where the code repository is checked out (your home directory)\n# Path.home() expands to the current user's home, so this works for any\n# user running the container (no hard-coded usernames).\nDATA_DIR = Path(\"/mnt/data/projects/cupido\")\nREPO_ROOT = Path.home() / \"cupido\"\n\n# Pull every other path constant from scripts/config.py so this notebook\n# stays in sync with the rest of the codebase. (Reason: avoids drift when\n# paths change — config.py is the single source of truth.)\nsys.path.insert(0, str(REPO_ROOT / \"scripts\"))\nfrom config import DATA_RAW, DATA_METADATA, DATA_PROCESSED, FIGURES\n\n# Plotting style\nplt.style.use('seaborn-v0_8')\nsns.set_palette(\"husl\")\n"
|
||||
"source": "import sys\nfrom pathlib import Path\n\nimport pandas as pd\nimport numpy as np\nimport sqlite3\nimport glob\nimport re\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom scipy.spatial.distance import euclidean\nfrom scipy import stats\n\n# ─── Where the data lives ────────────────────────────────────────────────\n# DATA_DIR holds everything bulky/regenerable: the metadata TSV and the\n# tracking SQLite DBs. It's mounted into the container at this fixed path.\n# REPO_ROOT is your checkout of the cupido repo, in your home directory.\n# Path.home() expands to /home/<your-username>, so this works for any\n# user (no hard-coded usernames).\nDATA_DIR = Path(\"/mnt/data/projects/cupido\")\nREPO_ROOT = Path.home() / \"cupido\"\n\nMETADATA_TSV = DATA_DIR / \"all_video_info_merged.tsv\"\nTRACKED_DBS = DATA_DIR / \"tracked\"\n\n# Sanity-check the data location up front so any failure here points at\n# the obvious thing — rather than crashing inside load_roi_data later.\nassert METADATA_TSV.exists(), f\"Metadata TSV not found at {METADATA_TSV}\"\nassert TRACKED_DBS.is_dir(), f\"Tracked-DB directory not found at {TRACKED_DBS}\"\n\n# Pull the in-repo path constants (DATA_RAW, DATA_METADATA, DATA_PROCESSED,\n# FIGURES) from scripts/config.py — single source of truth.\nsys.path.insert(0, str(REPO_ROOT / \"scripts\"))\nfrom config import DATA_RAW, DATA_METADATA, DATA_PROCESSED, FIGURES\n\n# Plotting style\nplt.style.use('seaborn-v0_8')\nsns.set_palette(\"husl\")\n"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
|
|
@ -28,7 +28,7 @@
|
|||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "# Load tracking data via the unified loader (driven by all_video_info_merged.tsv).\nfrom load_roi_data import load_roi_data\n\ndata = load_roi_data()\ntrained_data = data[data['male'] == 'trained'].copy()\nuntrained_data = data[data['male'] == 'naive'].copy()\n\nprint(f\"all data: {data.shape}\")\nprint(f\"trained: {trained_data.shape}\")\nprint(f\"naive: {untrained_data.shape}\")\n"
|
||||
"source": "# Load the metadata explicitly from METADATA_TSV (no hidden defaults), then\n# hand it to load_roi_data which opens each referenced tracking DB.\nfrom load_roi_data import load_roi_data\n\nmeta = pd.read_csv(METADATA_TSV, sep=\"\\t\")\nprint(f\"metadata rows: {len(meta)}\")\n\ndata = load_roi_data(meta)\ntrained_data = data[data['male'] == 'trained'].copy()\nuntrained_data = data[data['male'] == 'naive'].copy()\n\nprint(f\"all data: {data.shape}\")\nprint(f\"trained: {trained_data.shape}\")\nprint(f\"naive: {untrained_data.shape}\")\n"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@
|
|||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "import sys\nfrom pathlib import Path\n\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom scipy.spatial.distance import euclidean\nfrom scipy import stats\n\n# Two locations to know about:\n# - DATA_DIR : where the project's bulky data lives (mounted into the container)\n# - REPO_ROOT : where the code repository is checked out (your home directory)\n# Path.home() expands to the current user's home, so this works for any\n# user running the container (no hard-coded usernames).\nDATA_DIR = Path(\"/mnt/data/projects/cupido\")\nREPO_ROOT = Path.home() / \"cupido\"\n\n# Pull every other path constant from scripts/config.py so this notebook\n# stays in sync with the rest of the codebase. (Reason: avoids drift when\n# paths change — config.py is the single source of truth.)\nsys.path.insert(0, str(REPO_ROOT / \"scripts\"))\nfrom config import DATA_RAW, DATA_METADATA, DATA_PROCESSED, FIGURES\n\n# Plotting style\nplt.style.use('seaborn-v0_8')\nsns.set_palette(\"husl\")\n\nprint(f\"Data directory: {DATA_DIR}\")\nprint(f\"Repo root: {REPO_ROOT}\")\nprint(f\"Pandas version: {pd.__version__}\")\nprint(f\"NumPy version: {np.__version__}\")\n"
|
||||
"source": "import sys\nfrom pathlib import Path\n\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom scipy.spatial.distance import euclidean\nfrom scipy import stats\n\n# ─── Where the data lives ────────────────────────────────────────────────\n# DATA_DIR holds everything bulky/regenerable: the metadata TSV and the\n# tracking SQLite DBs. It's mounted into the container at this fixed path.\n# REPO_ROOT is your checkout of the cupido repo, in your home directory.\n# Path.home() expands to /home/<your-username>, so this works for any\n# user (no hard-coded usernames).\nDATA_DIR = Path(\"/mnt/data/projects/cupido\")\nREPO_ROOT = Path.home() / \"cupido\"\n\nMETADATA_TSV = DATA_DIR / \"all_video_info_merged.tsv\"\nTRACKED_DBS = DATA_DIR / \"tracked\"\n\n# Sanity-check the data location up front so any failure here points at\n# the obvious thing — rather than crashing inside load_roi_data later.\nassert METADATA_TSV.exists(), f\"Metadata TSV not found at {METADATA_TSV}\"\nassert TRACKED_DBS.is_dir(), f\"Tracked-DB directory not found at {TRACKED_DBS}\"\n\n# Pull the in-repo path constants (DATA_RAW, DATA_METADATA, DATA_PROCESSED,\n# FIGURES) from scripts/config.py — single source of truth.\nsys.path.insert(0, str(REPO_ROOT / \"scripts\"))\nfrom config import DATA_RAW, DATA_METADATA, DATA_PROCESSED, FIGURES\n\n# Plotting style\nplt.style.use('seaborn-v0_8')\nsns.set_palette(\"husl\")\n\nprint(f\"Data directory: {DATA_DIR}\")\nprint(f\"Repo root: {REPO_ROOT}\")\nprint(f\"Metadata TSV: {METADATA_TSV}\")\nprint(f\"Pandas version: {pd.__version__}\")\nprint(f\"NumPy version: {np.__version__}\")\n"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
|
|
@ -28,7 +28,7 @@
|
|||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "# Load tracking data via the unified loader (driven by all_video_info_merged.tsv).\nfrom load_roi_data import load_roi_data\n\ndata = load_roi_data()\ntrained_data = data[data['male'] == 'trained'].copy()\nuntrained_data = data[data['male'] == 'naive'].copy()\n\nprint(f\"all data shape: {data.shape}\")\nprint(f\"Trained data: {trained_data.shape}\")\nprint(f\"Naive data: {untrained_data.shape}\")\nprint(f\"Columns: {list(trained_data.columns)}\")\n"
|
||||
"source": "# Load the metadata explicitly from METADATA_TSV (no hidden defaults), then\n# hand it to load_roi_data which opens each referenced tracking DB.\nfrom load_roi_data import load_roi_data\n\nmeta = pd.read_csv(METADATA_TSV, sep=\"\\t\")\nprint(f\"metadata rows: {len(meta)}\")\n\ndata = load_roi_data(meta)\ntrained_data = data[data['male'] == 'trained'].copy()\nuntrained_data = data[data['male'] == 'naive'].copy()\n\nprint(f\"all data shape: {data.shape}\")\nprint(f\"Trained data: {trained_data.shape}\")\nprint(f\"Naive data: {untrained_data.shape}\")\nprint(f\"Columns: {list(trained_data.columns)}\")\n"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue