"""Live progress + ETA for the offline tracker batch. Counts ground-truth (DBs on disk) rather than parsing log lines, so it works whether the batch is running fresh or was resumed after a crash. Errors are parsed out of any *.log files in data/logs/. Usage: python monitor_tracking.py # one snapshot, exit python monitor_tracking.py --watch # refresh every 10 s python monitor_tracking.py --watch 30 # refresh every 30 s """ from __future__ import annotations import argparse import json import re import time from datetime import datetime, timedelta from pathlib import Path from config import LOGS_DIR, TARGETS_DIR, TRACKING_OUTPUT_DIR def count_target_jsons() -> tuple[int, int, list[str]]: """Return (n_pickable, n_unusable, unusable_video_stems).""" pickable = 0 unusable_stems: list[str] = [] for j in TARGETS_DIR.glob("*.json"): try: d = json.loads(j.read_text()) except Exception: continue if d.get("unusable"): unusable_stems.append(j.stem) elif d.get("reference_points"): pickable += 1 return pickable, len(unusable_stems), unusable_stems def count_tracked_dbs() -> tuple[int, datetime | None, str | None]: """Return (n_dbs, mtime_of_newest, name_of_newest).""" dbs = list(TRACKING_OUTPUT_DIR.glob("*_tracking.db")) if not dbs: return 0, None, None newest = max(dbs, key=lambda p: p.stat().st_mtime) return len(dbs), datetime.fromtimestamp(newest.stat().st_mtime), newest.stem def parse_recent_errors(log_dir: Path, tail_lines: int = 5000) -> list[str]: """Scan the most recent *.log file for lines reporting errors.""" if not log_dir.exists(): return [] logs = sorted(log_dir.glob("*.log"), key=lambda p: p.stat().st_mtime) if not logs: return [] latest = logs[-1] try: with latest.open() as f: tail = f.readlines()[-tail_lines:] except Exception: return [] out = [] for line in tail: if re.search(r":\s*error\b", line) or " error: " in line.lower(): out.append(line.rstrip()) return out def db_completion_history() -> list[float]: """Return mtimes of all tracking DBs, sorted ascending. Used for rate.""" return sorted(p.stat().st_mtime for p in TRACKING_OUTPUT_DIR.glob("*_tracking.db")) def fmt_duration(seconds: float) -> str: if seconds < 60: return f"{int(seconds)} s" if seconds < 3600: return f"{int(seconds // 60)} min" h = int(seconds // 3600) m = int((seconds % 3600) // 60) return f"{h} h {m} min" def snapshot() -> str: pickable, unusable, _ = count_target_jsons() tracked, last_mtime, last_name = count_tracked_dbs() history = db_completion_history() errors = parse_recent_errors(LOGS_DIR) lines = [f"tracking progress @ {datetime.now():%Y-%m-%d %H:%M:%S}"] lines.append(f" pickable JSONs: {pickable}") lines.append(f" unusable JSONs: {unusable} (skipped by tracker)") pct = (tracked / pickable * 100) if pickable else 0 lines.append( f" DBs on disk: {tracked} / {pickable} ({pct:.0f}%)" ) lines.append(f" errors in log: {len(errors)}") # Rate from the last 10 completions, when available. if len(history) >= 2: window = history[-min(10, len(history)) :] span = window[-1] - window[0] if span > 0: rate_per_hour = (len(window) - 1) / span * 3600 lines.append(f" rate (last {len(window) - 1}): {rate_per_hour:.1f} videos/hour") remaining = max(0, pickable - tracked) if rate_per_hour > 0 and remaining > 0: eta_sec = remaining * 3600 / rate_per_hour eta_at = datetime.now() + timedelta(seconds=eta_sec) lines.append( f" ETA remaining: {fmt_duration(eta_sec)} " f"(done by {eta_at:%H:%M %a})" ) if last_mtime is not None and last_name is not None: ago = (datetime.now() - last_mtime).total_seconds() lines.append( f" most recent DB: {last_name[:60]}... ({fmt_duration(ago)} ago)" ) if errors: lines.append("") lines.append(f" recent errors ({min(5, len(errors))} of {len(errors)}):") for e in errors[-5:]: lines.append(f" {e[:120]}") return "\n".join(lines) def main() -> None: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--watch", nargs="?", type=int, const=10, default=None, help="refresh every N seconds (default 10 if flag given without value)", ) args = parser.parse_args() if args.watch is None: print(snapshot()) return try: while True: # Clear screen and reprint print("\033[2J\033[H", end="") print(snapshot()) print(f"\n(refreshing every {args.watch}s — Ctrl-C to exit)") time.sleep(args.watch) except KeyboardInterrupt: print() if __name__ == "__main__": main()