Move metadata xlsx/TSV to /mnt/data/projects/cupido/
Consolidates everything bulky (tracking DBs, targets, metadata spreadsheet) under a single DATA_VOLUME root outside the ownCloud-synced repo. Notebooks now use a visible DATA_DIR = Path(...) idiom rather than walking up the filesystem with PROJECT_ROOT.parent — easier for students with no Python background to follow. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
ec56e51bf9
commit
f176224150
8 changed files with 102 additions and 160 deletions
|
|
@ -2,21 +2,26 @@
|
|||
|
||||
from pathlib import Path
|
||||
|
||||
# Where this code repository lives (the directory containing scripts/, notebooks/, ...).
|
||||
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||
DATA_RAW = PROJECT_ROOT / "data" / "raw"
|
||||
DATA_METADATA = PROJECT_ROOT / "data" / "metadata"
|
||||
DATA_PROCESSED = PROJECT_ROOT / "data" / "processed"
|
||||
FIGURES = PROJECT_ROOT / "figures"
|
||||
|
||||
# Offline-tracking pipeline paths
|
||||
VIDEOS_ROOT = Path("/mnt/ethoscope_data/videos")
|
||||
VIDEO_INFO_XLSX = PROJECT_ROOT.parent / "all_video_info_merged.xlsx"
|
||||
INVENTORY_CSV = DATA_METADATA / "video_inventory.csv"
|
||||
# Reason: kept on the local data volume alongside the tracking DBs (out of
|
||||
# ownCloud sync). See TRACKING_OUTPUT_DIR comment below.
|
||||
TARGETS_DIR = Path("/mnt/data/projects/cupido/targets")
|
||||
# Reason: tracking DBs are large binary files that don't belong in
|
||||
# ownCloud-synced storage (sync conflicts + bandwidth). They live on the
|
||||
# local data volume instead. Regenerable from videos + target JSONs.
|
||||
TRACKING_OUTPUT_DIR = Path("/mnt/data/projects/cupido/tracked")
|
||||
LOGS_DIR = PROJECT_ROOT / "data" / "logs"
|
||||
|
||||
# Where the source videos live (read-only NFS mount).
|
||||
VIDEOS_ROOT = Path("/mnt/ethoscope_data/videos")
|
||||
|
||||
# Where the project's bulky data lives — outside the ownCloud-synced repo so
|
||||
# it doesn't churn the cloud sync. This single root holds everything that's
|
||||
# big or regenerable: tracking DBs, target-point JSONs, and the metadata
|
||||
# spreadsheet (xlsx + TSV).
|
||||
DATA_VOLUME = Path("/mnt/data/projects/cupido")
|
||||
TARGETS_DIR = DATA_VOLUME / "targets"
|
||||
TRACKING_OUTPUT_DIR = DATA_VOLUME / "tracked"
|
||||
VIDEO_INFO_XLSX = DATA_VOLUME / "all_video_info_merged.xlsx"
|
||||
VIDEO_INFO_TSV = DATA_VOLUME / "all_video_info_merged.tsv"
|
||||
|
||||
# A small CSV listing every video file we know about (built locally).
|
||||
INVENTORY_CSV = DATA_METADATA / "video_inventory.csv"
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ from pathlib import Path
|
|||
|
||||
import pandas as pd
|
||||
|
||||
from config import INVENTORY_CSV, TRACKING_OUTPUT_DIR, VIDEO_INFO_XLSX
|
||||
from config import INVENTORY_CSV, TRACKING_OUTPUT_DIR, VIDEO_INFO_TSV, VIDEO_INFO_XLSX
|
||||
|
||||
|
||||
_TIME_RE = re.compile(r"^(\d{8})_(\d{1,2})(\d{2})?(AM|PM)$", re.IGNORECASE)
|
||||
|
|
@ -138,7 +138,7 @@ def main() -> None:
|
|||
parser.add_argument(
|
||||
"--out",
|
||||
type=Path,
|
||||
default=VIDEO_INFO_XLSX.with_suffix(".tsv"),
|
||||
default=VIDEO_INFO_TSV,
|
||||
help="output TSV path (default: alongside the xlsx)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ from pathlib import Path
|
|||
|
||||
import pandas as pd
|
||||
|
||||
from config import VIDEO_INFO_XLSX
|
||||
from config import VIDEO_INFO_TSV
|
||||
|
||||
|
||||
# Metadata columns to copy onto every tracking sample. These are the xlsx
|
||||
|
|
@ -68,7 +68,7 @@ def load_roi_data(meta: pd.DataFrame | None = None) -> pd.DataFrame:
|
|||
sample. Empty if nothing could be loaded.
|
||||
"""
|
||||
if meta is None:
|
||||
meta = pd.read_csv(VIDEO_INFO_XLSX.with_suffix(".tsv"), sep="\t")
|
||||
meta = pd.read_csv(VIDEO_INFO_TSV, sep="\t")
|
||||
|
||||
db_cache: dict = {}
|
||||
chunks: list[pd.DataFrame] = []
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue