Advanced Statistical Testing

Add significance indicators and model fits to plots

Overview

The stats module provides advanced statistical testing and visualization tools for Plotly figures:

  • Cluster-based permutation tests for time series data
  • Box plot significance indicators with automatic pairwise testing
  • Statistical model fits with confidence intervals

Cluster-Based Permutation Tests

Identify significant time windows in multi-trial time series data (EEG, behavioral, etc.):

import numpy as np
import plotly.graph_objects as go
from mdu.plotly.stats import add_cluster_permut_sig_to_plotly
from mdu.plotly.template import set_template

set_template()

# Simulate EEG-like data: 20 trials, 100 time points
n_trials, n_time = 20, 100
time = np.linspace(0, 1, n_time)

# Group A: baseline activity
curves_a = np.random.randn(n_trials, n_time) * 0.5

# Group B: enhanced activity during 0.4-0.6s (simulated effect)
curves_b = np.random.randn(n_trials, n_time) * 0.5
curves_b[:, 40:60] += 2.0  # Add signal in middle period

# Create plot with mean ± SEM
# Note: this plot could be much more conveniently created with `mdu.plotly.multiline.multiline_plot`, but here we do it manually to demonstrate the cluster permutation test integration.
fig = go.Figure()
mean_a = curves_a.mean(axis=0)
sem_a = curves_a.std(axis=0) / np.sqrt(n_trials)
fig = (
    fig.add_scatter(x=time, y=mean_a, name="Control", line=dict(color="blue"))
    .add_scatter(x=time, y=mean_a + sem_a, line=dict(width=0), showlegend=False)
    .add_scatter(
        x=time,
        y=mean_a - sem_a,
        fill="tonexty",
        line=dict(width=0),
        fillcolor="rgba(0,0,255,0.2)",
        showlegend=False,
    )
)

mean_b = curves_b.mean(axis=0)
sem_b = curves_b.std(axis=0) / np.sqrt(n_trials)
fig = (
    fig.add_scatter(x=time, y=mean_b, name="Treatment", line=dict(color="red"))
    .add_scatter(x=time, y=mean_b + sem_b, line=dict(width=0), showlegend=False)
    .add_scatter(
        x=time,
        y=mean_b - sem_b,
        fill="tonexty",
        line=dict(width=0),
        fillcolor="rgba(255,0,0,0.2)",
        showlegend=False,
    )
)

# Add cluster permutation test
fig = add_cluster_permut_sig_to_plotly(
    curves_a=curves_a,
    curves_b=curves_b,
    fig=fig,
    xaxes_vals=time,
    pval=0.05,
    nperm=1000,
    mode="line",  # Options: 'line', 'spark', 'p_bg', 'p_colorbar'
)

fig = fig.update_layout(
    title="Cluster Permutation Test Example",
    xaxis_title="Time (s)",
    yaxis_title="Amplitude (μV)",
)
fig.show()
stat_fun(H1): min=0.00011853515119853252 max=247.0990606559303
Running initial clustering …
Found 1 cluster

Visualization Modes

# Mode 1: Simple line indicator (default)
fig = add_cluster_permut_sig_to_plotly(curves_a, curves_b, fig, xaxes_vals=time, mode='line')
fig.show()
stat_fun(H1): min=0.00011853515119853252 max=247.0990606559303
Running initial clustering …
Found 1 cluster
# Mode 2: Sparklines showing F-statistic
fig = add_cluster_permut_sig_to_plotly(curves_a, curves_b, fig, xaxes_vals=time, mode='spark')
fig.show()
stat_fun(H1): min=0.00011853515119853252 max=247.0990606559303
Running initial clustering …
Found 1 cluster
# Mode 3: Background coloring for significant periods
fig = add_cluster_permut_sig_to_plotly(curves_a, curves_b, fig, xaxes_vals=time, mode='p_bg')
fig.show()
stat_fun(H1): min=0.00011853515119853252 max=247.0990606559303
Running initial clustering …
Found 1 cluster
# Mode 4: Colorbar with p-values
fig = add_cluster_permut_sig_to_plotly(curves_a, curves_b, fig, xaxes_vals=time, mode='p_colorbar')
fig.show()
stat_fun(H1): min=0.00011853515119853252 max=247.0990606559303
Running initial clustering …
Found 1 cluster

Box Plot Significance Indicators

Automatically add significance indicators between box or violin plots:

import plotly.express as px
from mdu.plotly.stats import add_box_significance_indicator
from scipy import stats

# Create sample data
df = px.data.tips()

# Create box plot
fig = px.box(df, x='day', y='total_bill', color='time',
             title='Total Bill by Day and Time')

# Add significance indicators between all groups
fig = add_box_significance_indicator(
    fig,
    stat_func=stats.ttest_ind,
    same_legendgroup_only=True,  # Only compare same colors
    only_significant=True
)

fig.show()

Testing Specific Pairs

# Test only specific day pairs
fig = add_box_significance_indicator(
    fig,
    xval_pairs=[('Thur', 'Fri'), ('Fri', 'Sat'), ('Sat', 'Sun')],
    same_legendgroup_only=True
)
fig.show()

# Test specific color combinations
fig = add_box_significance_indicator(
    fig,
    color_pairs=[('Dinner', 'Lunch')],
    same_legendgroup_only=False
)
fig.show()

Non-parametric Tests

# Use Mann-Whitney U test instead of t-test
from scipy.stats import mannwhitneyu

fig = add_box_significance_indicator(
    fig,
    stat_func=mannwhitneyu,
    p_quantiles=(0.05, 0.01, 0.001)  # Three significance levels
)
fig.show()

Showing All Comparisons

# Show both significant and non-significant (ns) comparisons
fig = add_box_significance_indicator(
    fig,
    only_significant=False  # Will add 'ns' for non-significant
)
fig.show()

Statistical Model Fits

Add regression lines with confidence intervals:

import numpy as np
import plotly.express as px
import statsmodels.api as sm
from mdu.plotly.stats import add_statsmodel_fit

# Create sample data
np.random.seed(42)
x = np.linspace(0, 10, 50)
y = 2 * x + 5 + np.random.normal(0, 3, 50)

# Create scatter plot
fig = px.scatter(x=x, y=y, title='OLS Regression with Confidence Intervals')

# Add OLS fit with 95% CI
fig = add_statsmodel_fit(
    fig,
    x=x,
    y=y,
    fitfunc=sm.OLS,
    show_ci=True,
    show_obs_ci=False,
    ci_alpha=0.05
)

fig.show()

With Prediction Intervals

# Show both confidence interval (mean) and prediction interval (observations)
fig = add_statsmodel_fit(
    fig, x=x, y=y,
    show_ci=True,
    show_obs_ci=True,  # Prediction interval for new observations
    ci_alpha=0.05
)
fig.show()

Custom Styling

# Customize appearance
fig = add_statsmodel_fit(
    fig, x=x, y=y,
    line_kwargs={'line': {'color': 'red', 'width': 3}},
    ci_kwargs={'fill': 'toself', 'fillcolor': 'rgba(255,0,0,0.1)',
               'line_color': 'rgba(255,0,0,0)'},
    obs_ci_kwargs={'line': {'dash': 'dot', 'color': 'darkred'}}
)
fig.show()

Multiple Subplots

from plotly.subplots import make_subplots

# Create 2x2 subplot grid
fig = make_subplots(rows=2, cols=2)

# Add data and fits to different subplots
for i, (row, col) in enumerate([(1,1), (1,2), (2,1), (2,2)]):
    # Generate data
    x_sub = np.linspace(0, 10, 30)
    y_sub = (i+1) * x_sub + np.random.normal(0, 2, 30)
    
    # Add scatter
    fig.add_scatter(x=x_sub, y=y_sub, mode='markers',
                    name=f'Data {i+1}', row=row, col=col)
    
    # Add fit
    fig = add_statsmodel_fit(fig, x=x_sub, y=y_sub,
                             row=row, col=col, show_ci=True)

fig.update_layout(height=600, showlegend=False)
fig.show()

Advanced: GLM Fits

# Generalized Linear Model for count data
import statsmodels.api as sm

# Simulate count data (e.g., Poisson)
x = np.linspace(0, 5, 50)
lambda_true = np.exp(0.5 + 0.3 * x)
y_count = np.random.poisson(lambda_true)

fig = px.scatter(x=x, y=y_count, title='Poisson GLM')

# Fit GLM with Poisson family
fig = add_statsmodel_fit(
    fig, x=x, y=y_count,
    fitfunc=lambda y, X: sm.GLM(y, X, family=sm.families.Poisson()),
    show_ci=True
)
fig.show()

See Also