Initial commit: organized project structure for student handoff
Reorganized flat 41-file directory into structured layout with: - scripts/ for Python analysis code with shared config.py - notebooks/ for Jupyter analysis notebooks - data/ split into raw/, metadata/, processed/ - docs/ with analysis summary, experimental design, and bimodal hypothesis tutorial - tasks/ with todo checklist and lessons learned - Comprehensive README, PLANNING.md, and .gitignore Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
commit
e7e4db264d
27 changed files with 3105 additions and 0 deletions
90
scripts/statistical_tests.py
Normal file
90
scripts/statistical_tests.py
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
|
||||
from config import DATA_PROCESSED, DATA_METADATA
|
||||
|
||||
# Load data
|
||||
trained_distances = pd.read_csv(DATA_PROCESSED / 'trained_distances.csv')
|
||||
untrained_distances = pd.read_csv(DATA_PROCESSED / 'untrained_distances.csv')
|
||||
barrier_data = pd.read_csv(DATA_METADATA / '2025_07_15_barrier_opening.csv')
|
||||
|
||||
# Convert opening_time to milliseconds and create a mapping
|
||||
barrier_data['opening_time_ms'] = barrier_data['opening_time'] * 1000
|
||||
opening_times = dict(zip(barrier_data['machine'], barrier_data['opening_time_ms']))
|
||||
|
||||
|
||||
def align_to_opening_time(df, opening_times):
|
||||
"""Align distance data to barrier opening time.
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): Distance data with machine_name and t columns.
|
||||
opening_times (dict): Mapping of machine ID to opening time in ms.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: Data with aligned_time column added.
|
||||
"""
|
||||
df_aligned = df.copy()
|
||||
df_aligned['aligned_time'] = np.nan
|
||||
|
||||
for machine in df['machine_name'].unique():
|
||||
if machine in opening_times:
|
||||
opening_time = opening_times[machine]
|
||||
mask = df['machine_name'] == machine
|
||||
df_aligned.loc[mask, 'aligned_time'] = df.loc[mask, 't'] - opening_time
|
||||
|
||||
df_aligned = df_aligned.dropna(subset=['aligned_time'])
|
||||
return df_aligned
|
||||
|
||||
|
||||
# Align the data
|
||||
trained_aligned = align_to_opening_time(trained_distances, opening_times)
|
||||
untrained_aligned = align_to_opening_time(untrained_distances, opening_times)
|
||||
|
||||
# Remove NaN distances
|
||||
trained_clean = trained_aligned.dropna(subset=['distance'])
|
||||
untrained_clean = untrained_aligned.dropna(subset=['distance'])
|
||||
|
||||
# Split into pre- and post-opening periods
|
||||
trained_pre = trained_clean[trained_clean['aligned_time'] < 0]['distance']
|
||||
trained_post = trained_clean[trained_clean['aligned_time'] > 0]['distance']
|
||||
untrained_pre = untrained_clean[untrained_clean['aligned_time'] < 0]['distance']
|
||||
untrained_post = untrained_clean[untrained_clean['aligned_time'] > 0]['distance']
|
||||
|
||||
print("=== STATISTICAL TESTS ===")
|
||||
|
||||
# Pre-opening period comparison
|
||||
t_stat_pre, p_val_pre = stats.ttest_ind(trained_pre, untrained_pre)
|
||||
cohens_d_pre = (trained_pre.mean() - untrained_pre.mean()) / np.sqrt(((len(trained_pre)-1)*trained_pre.var() + (len(untrained_pre)-1)*untrained_pre.var()) / (len(trained_pre) + len(untrained_pre) - 2))
|
||||
|
||||
print(f"Pre-opening period:")
|
||||
print(f" Trained mean: {trained_pre.mean():.2f}, Untrained mean: {untrained_pre.mean():.2f}")
|
||||
print(f" T-statistic: {t_stat_pre:.4f}, P-value: {p_val_pre:.2e}")
|
||||
print(f" Cohen's d: {cohens_d_pre:.4f}")
|
||||
|
||||
# Post-opening period comparison
|
||||
t_stat_post, p_val_post = stats.ttest_ind(trained_post, untrained_post)
|
||||
cohens_d_post = (trained_post.mean() - untrained_post.mean()) / np.sqrt(((len(trained_post)-1)*trained_post.var() + (len(untrained_post)-1)*untrained_post.var()) / (len(trained_post) + len(untrained_post) - 2))
|
||||
|
||||
print(f"\nPost-opening period:")
|
||||
print(f" Trained mean: {trained_post.mean():.2f}, Untrained mean: {untrained_post.mean():.2f}")
|
||||
print(f" T-statistic: {t_stat_post:.4f}, P-value: {p_val_post:.2e}")
|
||||
print(f" Cohen's d: {cohens_d_post:.4f}")
|
||||
|
||||
# Within-group comparisons (pre vs post)
|
||||
t_stat_trained, p_val_trained = stats.ttest_ind(trained_pre, trained_post)
|
||||
cohens_d_trained = (trained_post.mean() - trained_pre.mean()) / np.sqrt(((len(trained_post)-1)*trained_post.var() + (len(trained_pre)-1)*trained_pre.var()) / (len(trained_post) + len(trained_pre) - 2))
|
||||
|
||||
t_stat_untrained, p_val_untrained = stats.ttest_ind(untrained_pre, untrained_post)
|
||||
cohens_d_untrained = (untrained_post.mean() - untrained_pre.mean()) / np.sqrt(((len(untrained_post)-1)*untrained_post.var() + (len(untrained_pre)-1)*untrained_pre.var()) / (len(untrained_post) + len(untrained_pre) - 2))
|
||||
|
||||
print(f"\nWithin-group changes:")
|
||||
print(f" Trained flies - Pre vs Post:")
|
||||
print(f" Mean change: {trained_post.mean() - trained_pre.mean():.2f}")
|
||||
print(f" T-statistic: {t_stat_trained:.4f}, P-value: {p_val_trained:.2e}")
|
||||
print(f" Cohen's d: {cohens_d_trained:.4f}")
|
||||
|
||||
print(f" Untrained flies - Pre vs Post:")
|
||||
print(f" Mean change: {untrained_post.mean() - untrained_pre.mean():.2f}")
|
||||
print(f" T-statistic: {t_stat_untrained:.4f}, P-value: {p_val_untrained:.2e}")
|
||||
print(f" Cohen's d: {cohens_d_untrained:.4f}")
|
||||
Loading…
Add table
Add a link
Reference in a new issue