Simplify path setup in flies_analysis notebooks

Replace the cryptic Path("..").resolve() walk-up with explicit DATA_DIR
and REPO_ROOT constants, then import the rest of the path constants
(DATA_RAW, DATA_METADATA, DATA_PROCESSED, FIGURES) directly from
scripts/config.py — single source of truth, easier to read for students.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Giorgio Gilestro 2026-05-01 08:50:11 +01:00
parent f176224150
commit 5934dce21e
2 changed files with 6 additions and 36 deletions

View file

@ -14,7 +14,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "import pandas as pd\nimport numpy as np\nimport sqlite3\nimport glob\nimport re\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom scipy.spatial.distance import euclidean\nfrom scipy import stats\nfrom pathlib import Path\nimport sys\n\n# Set up paths relative to notebook location\nPROJECT_ROOT = Path(\"..\").resolve()\nDATA_RAW = PROJECT_ROOT / \"data\" / \"raw\"\nDATA_METADATA = PROJECT_ROOT / \"data\" / \"metadata\"\nDATA_PROCESSED = PROJECT_ROOT / \"data\" / \"processed\"\nFIGURES = PROJECT_ROOT / \"figures\"\n\nsys.path.insert(0, str(PROJECT_ROOT / \"scripts\"))\n\n# Set plotting style\nplt.style.use('seaborn-v0_8')\nsns.set_palette(\"husl\")"
"source": "import sys\nfrom pathlib import Path\n\nimport pandas as pd\nimport numpy as np\nimport sqlite3\nimport glob\nimport re\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom scipy.spatial.distance import euclidean\nfrom scipy import stats\n\n# Two locations to know about:\n# - DATA_DIR : where the project's bulky data lives (data volume)\n# - REPO_ROOT : where the code repository lives (this notebook is inside it)\nDATA_DIR = Path(\"/mnt/data/projects/cupido\")\nREPO_ROOT = Path(\"/home/gg/ownCloud/Work/Projects/coding/cupido/tracking\")\n\n# Pull every other path constant from scripts/config.py so this notebook\n# stays in sync with the rest of the codebase. (Reason: avoids drift when\n# paths change — config.py is the single source of truth.)\nsys.path.insert(0, str(REPO_ROOT / \"scripts\"))\nfrom config import DATA_RAW, DATA_METADATA, DATA_PROCESSED, FIGURES\n\n# Plotting style\nplt.style.use('seaborn-v0_8')\nsns.set_palette(\"husl\")\n"
},
{
"cell_type": "markdown",
@ -28,22 +28,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Load tracking data via the unified loader (driven by all_video_info_merged.tsv).\n",
"# Reason: replaces the old data/raw + 2025_07_15_metadata_fixed.csv path with\n",
"# the TSV-based loader that covers the entire batch (2025-07-15 + 2024).\n",
"sys.path.insert(0, str(PROJECT_ROOT / 'scripts'))\n",
"from load_roi_data import load_roi_data\n",
"\n",
"data = load_roi_data()\n",
"# Backwards-compat slices for the rest of the notebook.\n",
"trained_data = data[data['male'] == 'trained'].copy()\n",
"untrained_data = data[data['male'] == 'naive'].copy()\n",
"\n",
"print(f\"all data: {data.shape}\")\n",
"print(f\"trained: {trained_data.shape}\")\n",
"print(f\"naive: {untrained_data.shape}\")\n"
]
"source": "# Load tracking data via the unified loader (driven by all_video_info_merged.tsv).\nfrom load_roi_data import load_roi_data\n\ndata = load_roi_data()\ntrained_data = data[data['male'] == 'trained'].copy()\nuntrained_data = data[data['male'] == 'naive'].copy()\n\nprint(f\"all data: {data.shape}\")\nprint(f\"trained: {trained_data.shape}\")\nprint(f\"naive: {untrained_data.shape}\")\n"
},
{
"cell_type": "markdown",
@ -234,4 +219,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}

View file

@ -14,7 +14,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "import pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom scipy.spatial.distance import euclidean\nfrom scipy import stats\nfrom pathlib import Path\nimport sys\nimport os\n\n# Set up paths relative to notebook location\nPROJECT_ROOT = Path(\"..\").resolve()\nDATA_RAW = PROJECT_ROOT / \"data\" / \"raw\"\nDATA_METADATA = PROJECT_ROOT / \"data\" / \"metadata\"\nDATA_PROCESSED = PROJECT_ROOT / \"data\" / \"processed\"\nFIGURES = PROJECT_ROOT / \"figures\"\n\n# Add scripts to path for imports\nsys.path.insert(0, str(PROJECT_ROOT / \"scripts\"))\n\n# Set plotting style\nplt.style.use('seaborn-v0_8')\nsns.set_palette(\"husl\")\n\nprint(f\"Project root: {PROJECT_ROOT}\")\nprint(f\"Pandas version: {pd.__version__}\")\nprint(f\"NumPy version: {np.__version__}\")"
"source": "import sys\nfrom pathlib import Path\n\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom scipy.spatial.distance import euclidean\nfrom scipy import stats\n\n# Two locations to know about:\n# - DATA_DIR : where the project's bulky data lives (data volume)\n# - REPO_ROOT : where the code repository lives (this notebook is inside it)\nDATA_DIR = Path(\"/mnt/data/projects/cupido\")\nREPO_ROOT = Path(\"/home/gg/ownCloud/Work/Projects/coding/cupido/tracking\")\n\n# Pull every other path constant from scripts/config.py so this notebook\n# stays in sync with the rest of the codebase. (Reason: avoids drift when\n# paths change — config.py is the single source of truth.)\nsys.path.insert(0, str(REPO_ROOT / \"scripts\"))\nfrom config import DATA_RAW, DATA_METADATA, DATA_PROCESSED, FIGURES\n\n# Plotting style\nplt.style.use('seaborn-v0_8')\nsns.set_palette(\"husl\")\n\nprint(f\"Data directory: {DATA_DIR}\")\nprint(f\"Repo root: {REPO_ROOT}\")\nprint(f\"Pandas version: {pd.__version__}\")\nprint(f\"NumPy version: {np.__version__}\")\n"
},
{
"cell_type": "markdown",
@ -28,22 +28,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Load tracking data via the unified loader (driven by all_video_info_merged.tsv).\n",
"# Reason: replaces reads of trained_roi_data.csv / untrained_roi_data.csv with\n",
"# the live loader so the notebook always sees the current batch.\n",
"sys.path.insert(0, str(PROJECT_ROOT / 'scripts'))\n",
"from load_roi_data import load_roi_data\n",
"\n",
"data = load_roi_data()\n",
"trained_data = data[data['male'] == 'trained'].copy()\n",
"untrained_data = data[data['male'] == 'naive'].copy()\n",
"\n",
"print(f\"all data shape: {data.shape}\")\n",
"print(f\"Trained data: {trained_data.shape}\")\n",
"print(f\"Naive data: {untrained_data.shape}\")\n",
"print(f\"Columns: {list(trained_data.columns)}\")\n"
]
"source": "# Load tracking data via the unified loader (driven by all_video_info_merged.tsv).\nfrom load_roi_data import load_roi_data\n\ndata = load_roi_data()\ntrained_data = data[data['male'] == 'trained'].copy()\nuntrained_data = data[data['male'] == 'naive'].copy()\n\nprint(f\"all data shape: {data.shape}\")\nprint(f\"Trained data: {trained_data.shape}\")\nprint(f\"Naive data: {untrained_data.shape}\")\nprint(f\"Columns: {list(trained_data.columns)}\")\n"
},
{
"cell_type": "markdown",
@ -433,4 +418,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}