diff --git a/.gitignore b/.gitignore
index b094370..21d3991 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,9 @@ data/processed/*.csv
 
 # Tracking DBs and target JSONs live outside the repo at /mnt/data/projects/cupido/
 data/metadata/video_inventory.csv
+# Per-user writable copy of the metadata TSV (see notebook setup cells).
+# The shared master is at /mnt/data/projects/cupido/all_video_info_merged.tsv.
+data/metadata/all_video_info_merged.tsv
 data/logs/*.log
 
 # Generated figures (reproducible from scripts)
diff --git a/data/processed/README.md b/data/processed/README.md
index 75d359a..dbe1b52 100644
--- a/data/processed/README.md
+++ b/data/processed/README.md
@@ -42,13 +42,14 @@ without deleting the row. `load_roi_data` honors this flag automatically.
 
 The shared TSV at `/mnt/data/projects/cupido/all_video_info_merged.tsv`
 is **read-only** (the data volume is mounted `:ro` in the container) so
-each user keeps their own edits in a personal copy at
-`~/cupido_metadata.tsv`. Notebooks pick up that personal copy
-automatically if it exists; otherwise they fall back to the shared
-master. To start your personal copy, run once in a terminal:
+each user keeps their own edits in a personal copy inside the repo's
+`data/metadata/` folder. That path is gitignored. Notebooks pick up
+the personal copy automatically if it exists; otherwise they fall back
+to the shared master. To start your personal copy, run once in a
+terminal:
 
 ```bash
-cp /mnt/data/projects/cupido/all_video_info_merged.tsv ~/cupido_metadata.tsv
+cp /mnt/data/projects/cupido/all_video_info_merged.tsv ~/cupido/data/metadata/
 ```
 
 ## Column Reference (`distances.csv`)
diff --git a/notebooks/flies_analysis.ipynb b/notebooks/flies_analysis.ipynb
index 882f499..64f9462 100644
--- a/notebooks/flies_analysis.ipynb
+++ b/notebooks/flies_analysis.ipynb
@@ -14,7 +14,7 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "import sys\nfrom pathlib import Path\n\nimport pandas as pd\nimport numpy as np\nimport sqlite3\nimport glob\nimport re\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom scipy.spatial.distance import euclidean\nfrom scipy import stats\n\n# ─── Where the data lives ────────────────────────────────────────────────\n# DATA_DIR holds everything bulky/regenerable: the metadata TSV and the\n# tracking SQLite DBs. It's mounted into the container at this fixed path.\n# REPO_ROOT is your checkout of the cupido repo, in your home directory.\n# Path.home() expands to /home/<your-username>, so this works for any\n# user (no hard-coded usernames).\nDATA_DIR  = Path(\"/mnt/data/projects/cupido\")\nREPO_ROOT = Path.home() / \"cupido\"\n\nTRACKED_DBS = DATA_DIR / \"tracked\"\n\n# ─── The metadata TSV — shared master vs. your personal copy ─────────────\n# DATA_DIR is mounted read-only inside the container, so the shared TSV\n# at SHARED_TSV cannot be edited. Fine for read-only analysis. But if\n# you want to flip `include` flags (or otherwise customize the metadata\n# for your own analysis), copy it to your home folder ONCE:\n#\n#     $ cp /mnt/data/projects/cupido/all_video_info_merged.tsv ~/cupido_metadata.tsv\n#\n# After that, the auto-select line below will pick up your personal copy\n# automatically. Other users are unaffected.\nSHARED_TSV   = DATA_DIR / \"all_video_info_merged.tsv\"\nPERSONAL_TSV = Path.home() / \"cupido_metadata.tsv\"\nMETADATA_TSV = PERSONAL_TSV if PERSONAL_TSV.exists() else SHARED_TSV\n\n# Sanity-check the data location up front so any failure here points at\n# the obvious thing — rather than crashing inside load_roi_data later.\nassert METADATA_TSV.exists(), f\"Metadata TSV not found at {METADATA_TSV}\"\nassert TRACKED_DBS.is_dir(),  f\"Tracked-DB directory not found at {TRACKED_DBS}\"\n\n# Pull the in-repo path constants (DATA_METADATA, DATA_PROCESSED, FIGURES)\n# from scripts/config.py — single source of truth.\nsys.path.insert(0, str(REPO_ROOT / \"scripts\"))\nfrom config import DATA_METADATA, DATA_PROCESSED, FIGURES\n\n# Plotting style\nplt.style.use('seaborn-v0_8')\nsns.set_palette(\"husl\")\n"
+   "source": "import sys\nfrom pathlib import Path\n\nimport pandas as pd\nimport numpy as np\nimport sqlite3\nimport glob\nimport re\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom scipy.spatial.distance import euclidean\nfrom scipy import stats\n\n# ─── Where the data lives ────────────────────────────────────────────────\n# DATA_DIR holds everything bulky/regenerable: the metadata TSV and the\n# tracking SQLite DBs. It's mounted into the container at this fixed path.\n# REPO_ROOT is your checkout of the cupido repo, in your home directory.\n# Path.home() expands to /home/<your-username>, so this works for any\n# user (no hard-coded usernames).\nDATA_DIR  = Path(\"/mnt/data/projects/cupido\")\nREPO_ROOT = Path.home() / \"cupido\"\n\nTRACKED_DBS = DATA_DIR / \"tracked\"\n\n# ─── The metadata TSV — shared master vs. your personal copy ─────────────\n# DATA_DIR is mounted read-only inside the container, so the shared TSV\n# at SHARED_TSV cannot be edited. Fine for read-only analysis. But if\n# you want to flip `include` flags (or otherwise customize the metadata\n# for your own analysis), copy it to your repo's data/metadata/ ONCE:\n#\n#     $ cp /mnt/data/projects/cupido/all_video_info_merged.tsv ~/cupido/data/metadata/\n#\n# That location is gitignored, so your edits won't pollute the repo and\n# other users are unaffected. The auto-select line below picks up your\n# personal copy automatically once it's there.\nSHARED_TSV   = DATA_DIR / \"all_video_info_merged.tsv\"\nPERSONAL_TSV = REPO_ROOT / \"data\" / \"metadata\" / \"all_video_info_merged.tsv\"\nMETADATA_TSV = PERSONAL_TSV if PERSONAL_TSV.exists() else SHARED_TSV\n\n# Sanity-check the data location up front so any failure here points at\n# the obvious thing — rather than crashing inside load_roi_data later.\nassert METADATA_TSV.exists(), f\"Metadata TSV not found at {METADATA_TSV}\"\nassert TRACKED_DBS.is_dir(),  f\"Tracked-DB directory not found at {TRACKED_DBS}\"\n\n# Pull the in-repo path constants (DATA_METADATA, DATA_PROCESSED, FIGURES)\n# from scripts/config.py — single source of truth.\nsys.path.insert(0, str(REPO_ROOT / \"scripts\"))\nfrom config import DATA_METADATA, DATA_PROCESSED, FIGURES\n\n# Plotting style\nplt.style.use('seaborn-v0_8')\nsns.set_palette(\"husl\")\n"
   },
   {
    "cell_type": "markdown",
diff --git a/notebooks/flies_analysis_simple.ipynb b/notebooks/flies_analysis_simple.ipynb
index 6bd7918..7a39272 100644
--- a/notebooks/flies_analysis_simple.ipynb
+++ b/notebooks/flies_analysis_simple.ipynb
@@ -10,7 +10,7 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "import sys\nfrom pathlib import Path\n\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom scipy.spatial.distance import euclidean\nfrom scipy import stats\n\n# ─── Where the data lives ────────────────────────────────────────────────\n# DATA_DIR holds everything bulky/regenerable: the metadata TSV and the\n# tracking SQLite DBs. It's mounted into the container at this fixed path.\n# REPO_ROOT is your checkout of the cupido repo, in your home directory.\n# Path.home() expands to /home/<your-username>, so this works for any\n# user (no hard-coded usernames).\nDATA_DIR  = Path(\"/mnt/data/projects/cupido\")\nREPO_ROOT = Path.home() / \"cupido\"\n\nTRACKED_DBS = DATA_DIR / \"tracked\"\n\n# ─── The metadata TSV — shared master vs. your personal copy ─────────────\n# DATA_DIR is mounted read-only inside the container, so the shared TSV\n# at SHARED_TSV cannot be edited. Fine for read-only analysis. But if\n# you want to flip `include` flags (or otherwise customize the metadata\n# for your own analysis), copy it to your home folder ONCE:\n#\n#     $ cp /mnt/data/projects/cupido/all_video_info_merged.tsv ~/cupido_metadata.tsv\n#\n# After that, the auto-select line below will pick up your personal copy\n# automatically. Other users are unaffected.\nSHARED_TSV   = DATA_DIR / \"all_video_info_merged.tsv\"\nPERSONAL_TSV = Path.home() / \"cupido_metadata.tsv\"\nMETADATA_TSV = PERSONAL_TSV if PERSONAL_TSV.exists() else SHARED_TSV\n\n# Sanity-check the data location up front so any failure here points at\n# the obvious thing — rather than crashing inside load_roi_data later.\nassert METADATA_TSV.exists(), f\"Metadata TSV not found at {METADATA_TSV}\"\nassert TRACKED_DBS.is_dir(),  f\"Tracked-DB directory not found at {TRACKED_DBS}\"\n\n# Pull the in-repo path constants (DATA_METADATA, DATA_PROCESSED, FIGURES)\n# from scripts/config.py — single source of truth.\nsys.path.insert(0, str(REPO_ROOT / \"scripts\"))\nfrom config import DATA_METADATA, DATA_PROCESSED, FIGURES\n\n# Plotting style\nplt.style.use('seaborn-v0_8')\nsns.set_palette(\"husl\")\n\nprint(f\"Data directory: {DATA_DIR}\")\nprint(f\"Repo root:      {REPO_ROOT}\")\nprint(f\"Metadata TSV:   {METADATA_TSV}  ({'personal' if METADATA_TSV == PERSONAL_TSV else 'shared (read-only)'})\")\nprint(f\"Pandas version: {pd.__version__}\")\nprint(f\"NumPy version:  {np.__version__}\")\n"
+   "source": "import sys\nfrom pathlib import Path\n\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom scipy.spatial.distance import euclidean\nfrom scipy import stats\n\n# ─── Where the data lives ────────────────────────────────────────────────\n# DATA_DIR holds everything bulky/regenerable: the metadata TSV and the\n# tracking SQLite DBs. It's mounted into the container at this fixed path.\n# REPO_ROOT is your checkout of the cupido repo, in your home directory.\n# Path.home() expands to /home/<your-username>, so this works for any\n# user (no hard-coded usernames).\nDATA_DIR  = Path(\"/mnt/data/projects/cupido\")\nREPO_ROOT = Path.home() / \"cupido\"\n\nTRACKED_DBS = DATA_DIR / \"tracked\"\n\n# ─── The metadata TSV — shared master vs. your personal copy ─────────────\n# DATA_DIR is mounted read-only inside the container, so the shared TSV\n# at SHARED_TSV cannot be edited. Fine for read-only analysis. But if\n# you want to flip `include` flags (or otherwise customize the metadata\n# for your own analysis), copy it to your repo's data/metadata/ ONCE:\n#\n#     $ cp /mnt/data/projects/cupido/all_video_info_merged.tsv ~/cupido/data/metadata/\n#\n# That location is gitignored, so your edits won't pollute the repo and\n# other users are unaffected. The auto-select line below picks up your\n# personal copy automatically once it's there.\nSHARED_TSV   = DATA_DIR / \"all_video_info_merged.tsv\"\nPERSONAL_TSV = REPO_ROOT / \"data\" / \"metadata\" / \"all_video_info_merged.tsv\"\nMETADATA_TSV = PERSONAL_TSV if PERSONAL_TSV.exists() else SHARED_TSV\n\n# Sanity-check the data location up front so any failure here points at\n# the obvious thing — rather than crashing inside load_roi_data later.\nassert METADATA_TSV.exists(), f\"Metadata TSV not found at {METADATA_TSV}\"\nassert TRACKED_DBS.is_dir(),  f\"Tracked-DB directory not found at {TRACKED_DBS}\"\n\n# Pull the in-repo path constants (DATA_METADATA, DATA_PROCESSED, FIGURES)\n# from scripts/config.py — single source of truth.\nsys.path.insert(0, str(REPO_ROOT / \"scripts\"))\nfrom config import DATA_METADATA, DATA_PROCESSED, FIGURES\n\n# Plotting style\nplt.style.use('seaborn-v0_8')\nsns.set_palette(\"husl\")\n\nprint(f\"Data directory: {DATA_DIR}\")\nprint(f\"Repo root:      {REPO_ROOT}\")\nprint(f\"Metadata TSV:   {METADATA_TSV}  ({'personal' if METADATA_TSV == PERSONAL_TSV else 'shared (read-only)'})\")\nprint(f\"Pandas version: {pd.__version__}\")\nprint(f\"NumPy version:  {np.__version__}\")\n"
   },
   {
    "cell_type": "markdown",
diff --git a/notebooks/getting_started/00_welcome.ipynb b/notebooks/getting_started/00_welcome.ipynb
index ebf62aa..db1e120 100644
--- a/notebooks/getting_started/00_welcome.ipynb
+++ b/notebooks/getting_started/00_welcome.ipynb
@@ -161,7 +161,7 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": "You should see roughly 113 tracking DBs and 130 target JSONs. If those\nnumbers are zero, the storage volume isn't mounted — ask Giorgio.\n\n> **Note**: the data volume is **read-only** inside the JupyterLab\n> container. You can read everything but not modify or delete it. That's\n> a deliberate safety measure — we don't want analysis code accidentally\n> corrupting the source data.\n\n### Personalising the metadata TSV\n\nBecause the volume is read-only, the shared metadata file\n`all_video_info_merged.tsv` cannot be edited in place. If you want to\nmark a row as \"skip this fly\" — e.g. by flipping its `include` column to\n`False` because the video is too noisy — copy the file to your home\nfolder **once**:\n\n```bash\ncp /mnt/data/projects/cupido/all_video_info_merged.tsv ~/cupido_metadata.tsv\n```\n\nThe notebooks check for `~/cupido_metadata.tsv` first and fall back to\nthe shared master if your personal copy doesn't exist. Each user keeps\ntheir own edits; nobody steps on anyone else's analysis.\n"
+   "source": "You should see roughly 113 tracking DBs and 130 target JSONs. If those\nnumbers are zero, the storage volume isn't mounted — ask Giorgio.\n\n> **Note**: the data volume is **read-only** inside the JupyterLab\n> container. You can read everything but not modify or delete it. That's\n> a deliberate safety measure — we don't want analysis code accidentally\n> corrupting the source data.\n\n### Personalising the metadata TSV\n\nBecause the volume is read-only, the shared metadata file\n`all_video_info_merged.tsv` cannot be edited in place. If you want to\nmark a row as \"skip this fly\" — e.g. by flipping its `include` column to\n`False` because the video is too noisy — copy the file into the repo's\n`data/metadata/` folder **once**:\n\n```bash\ncp /mnt/data/projects/cupido/all_video_info_merged.tsv ~/cupido/data/metadata/\n```\n\nThat location is gitignored, so your edits stay local. The notebooks\ncheck for `~/cupido/data/metadata/all_video_info_merged.tsv` first and\nfall back to the shared master if your personal copy doesn't exist.\nEach user keeps their own edits; nobody steps on anyone else's analysis.\n"
   },
   {
    "cell_type": "markdown",
diff --git a/notebooks/getting_started/01_python_pandas_basics.ipynb b/notebooks/getting_started/01_python_pandas_basics.ipynb
index 0922b47..5a5362e 100644
--- a/notebooks/getting_started/01_python_pandas_basics.ipynb
+++ b/notebooks/getting_started/01_python_pandas_basics.ipynb
@@ -257,7 +257,7 @@
    "metadata": {},
    "execution_count": null,
    "outputs": [],
-   "source": "import pandas as pd\nfrom pathlib import Path\n\n# All the project's bulky data lives under /mnt/data/projects/cupido/.\n# Defining one DATA_DIR variable and building sub-paths from it is much\n# easier to read (and to update) than hard-coding long strings everywhere.\nDATA_DIR = Path(\"/mnt/data/projects/cupido\")\n\n# Pick the metadata TSV: prefer your personal copy if you have one,\n# otherwise fall back to the shared (read-only) master. To make a\n# personal copy you can edit, run ONCE in a terminal:\n#     cp /mnt/data/projects/cupido/all_video_info_merged.tsv ~/cupido_metadata.tsv\nSHARED_TSV   = DATA_DIR / \"all_video_info_merged.tsv\"\nPERSONAL_TSV = Path.home() / \"cupido_metadata.tsv\"\ntsv_path     = PERSONAL_TSV if PERSONAL_TSV.exists() else SHARED_TSV\n\n# Read the project's metadata TSV (Tab-Separated Values).\ndf = pd.read_csv(tsv_path, sep=\"\\t\")\n\n# How big is it?\nprint(f\"Reading from: {tsv_path}\")\nprint(f\"Rows: {len(df)}\")\nprint(f\"Columns: {df.shape[1]}\")\n"
+   "source": "import pandas as pd\nfrom pathlib import Path\n\n# Two locations to know about:\n#   - DATA_DIR  : where the project's bulky data lives (mounted read-only)\n#   - REPO_ROOT : where the code repo is checked out (your home directory)\nDATA_DIR  = Path(\"/mnt/data/projects/cupido\")\nREPO_ROOT = Path.home() / \"cupido\"\n\n# Pick the metadata TSV: prefer your personal copy (in the repo's\n# data/metadata/ folder, gitignored) if you have one, otherwise fall\n# back to the shared (read-only) master on the data volume. To make a\n# personal copy you can edit, run ONCE in a terminal:\n#     cp /mnt/data/projects/cupido/all_video_info_merged.tsv ~/cupido/data/metadata/\nSHARED_TSV   = DATA_DIR / \"all_video_info_merged.tsv\"\nPERSONAL_TSV = REPO_ROOT / \"data\" / \"metadata\" / \"all_video_info_merged.tsv\"\ntsv_path     = PERSONAL_TSV if PERSONAL_TSV.exists() else SHARED_TSV\n\n# Read the project's metadata TSV (Tab-Separated Values).\ndf = pd.read_csv(tsv_path, sep=\"\\t\")\n\n# How big is it?\nprint(f\"Reading from: {tsv_path}\")\nprint(f\"Rows: {len(df)}\")\nprint(f\"Columns: {df.shape[1]}\")\n"
   },
   {
    "cell_type": "markdown",
diff --git a/notebooks/getting_started/03_compare_trained_vs_naive.ipynb b/notebooks/getting_started/03_compare_trained_vs_naive.ipynb
index abe72be..7d06d60 100644
--- a/notebooks/getting_started/03_compare_trained_vs_naive.ipynb
+++ b/notebooks/getting_started/03_compare_trained_vs_naive.ipynb
@@ -66,7 +66,7 @@
    "metadata": {},
    "execution_count": null,
    "outputs": [],
-   "source": "# Pick the metadata TSV: prefer your personal copy if you have one,\n# otherwise fall back to the shared (read-only) master.\n#\n# To make a personal copy that you can edit (e.g. flip `include` flags\n# for noisy rows), run this ONCE in a terminal:\n#     cp /mnt/data/projects/cupido/all_video_info_merged.tsv ~/cupido_metadata.tsv\nSHARED_TSV   = DATA_DIR / \"all_video_info_merged.tsv\"\nPERSONAL_TSV = Path.home() / \"cupido_metadata.tsv\"\ntsv_path     = PERSONAL_TSV if PERSONAL_TSV.exists() else SHARED_TSV\n\n# Load the metadata TSV first — it's small and fast.\nmeta = pd.read_csv(tsv_path, sep=\"\\t\")\nprint(f\"loaded {tsv_path}  ({'personal' if tsv_path == PERSONAL_TSV else 'shared (read-only)'})\")\nprint(f\"metadata rows: {len(meta)}\")\n"
+   "source": "# Pick the metadata TSV: prefer your personal copy (a writable copy in\n# your repo's data/metadata/ folder) if you have one, otherwise fall\n# back to the shared (read-only) master on the data volume.\n#\n# To make a personal copy that you can edit (e.g. flip `include` flags\n# for noisy rows), run this ONCE in a terminal:\n#     cp /mnt/data/projects/cupido/all_video_info_merged.tsv ~/cupido/data/metadata/\nSHARED_TSV   = DATA_DIR / \"all_video_info_merged.tsv\"\nPERSONAL_TSV = REPO_ROOT / \"data\" / \"metadata\" / \"all_video_info_merged.tsv\"\ntsv_path     = PERSONAL_TSV if PERSONAL_TSV.exists() else SHARED_TSV\n\n# Load the metadata TSV first — it's small and fast.\nmeta = pd.read_csv(tsv_path, sep=\"\\t\")\nprint(f\"loaded {tsv_path}  ({'personal' if tsv_path == PERSONAL_TSV else 'shared (read-only)'})\")\nprint(f\"metadata rows: {len(meta)}\")\n"
   },
   {
    "cell_type": "markdown",