"""
Non-parametric statistical tests — pure Python implementations.
Tests
-----
- Mann-Whitney U (two independent samples)
- Wilcoxon Signed-Rank (one sample / paired samples)
- Kruskal-Wallis H (k independent samples)
"""
from typing import List, Optional
from ..core.result import HypoResult
from ..core.validators import validate_alpha, validate_alternative, validate_data, validate_groups
from ..math.basic import abs_value, sqrt
from ..math.distributions import ChiSquare, Normal
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
def _rank_data(data: List[float]) -> List[float]:
"""
Assign ranks to a flat list of values, using average ranks for ties.
Returns a list of ranks in the same original order as *data*.
"""
n = len(data)
indexed = sorted(enumerate(data), key=lambda x: x[1])
ranks = [0.0] * n
i = 0
while i < n:
j = i
while j < n - 1 and indexed[j][1] == indexed[j + 1][1]:
j += 1
avg_rank = (i + j + 2) / 2.0 # 1-based average rank
for k in range(i, j + 1):
ranks[indexed[k][0]] = avg_rank
i = j + 1
return ranks
def _tie_correction(data: List[float]) -> float:
"""
Compute the sum of (t^3 - t) for each group of tied values.
Used to correct variance in non-parametric tests.
"""
from collections import Counter
counts = Counter(data)
return sum(t**3 - t for t in counts.values() if t > 1)
# ---------------------------------------------------------------------------
# Mann-Whitney U Test
# ---------------------------------------------------------------------------
[docs]
def mann_whitney_u(
group1: List[float],
group2: List[float],
alpha: float = 0.05,
alternative: str = "two-sided",
) -> HypoResult:
"""
Mann-Whitney U test for two independent samples.
A non-parametric alternative to the two-sample t-test that tests whether
one group tends to have larger values than the other.
Uses a normal approximation (with tie correction) for the p-value.
Args:
group1: First group sample values
group2: Second group sample values
alpha: Significance level (default 0.05)
alternative: 'two-sided', 'greater' (group1 > group2), or 'less'
Returns:
HypoResult with statistic=U1, effect_size=rank-biserial r
Examples:
>>> result = mann_whitney_u([1, 2, 3, 4], [5, 6, 7, 8])
>>> print(result.summary())
"""
group1 = validate_data(group1, 2, "group1")
group2 = validate_data(group2, 2, "group2")
validate_alpha(alpha)
validate_alternative(alternative)
n1, n2 = len(group1), len(group2)
N = n1 + n2
# Combine groups and rank
combined_vals = group1 + group2
combined_groups = [1] * n1 + [2] * n2
all_ranks = _rank_data(combined_vals)
# Sum of ranks for group 1
R1 = sum(rank for rank, grp in zip(all_ranks, combined_groups) if grp == 1)
# U statistics
U1 = R1 - n1 * (n1 + 1) / 2.0
U2 = n1 * n2 - U1
# Mean under H0
mean_U = n1 * n2 / 2.0
# Variance with tie correction
tie_sum = _tie_correction(combined_vals)
var_U = (n1 * n2 / 12.0) * (N + 1 - tie_sum / (N * (N - 1))) if N > 1 else 0
if var_U <= 0:
var_U = n1 * n2 * (N + 1) / 12.0 # fallback without tie correction
std_U = sqrt(var_U)
# Z statistic: U1 measures tendency of group1 to exceed group2
Z = (U1 - mean_U) / std_U
# p-value
norm = Normal()
if alternative == "two-sided":
p_value = 2 * (1 - norm.cdf(abs_value(Z)))
elif alternative == "greater":
p_value = 1 - norm.cdf(Z)
else: # less
p_value = norm.cdf(Z)
# Continuity correction clamp
p_value = max(0.0, min(1.0, p_value))
# Effect size: rank-biserial correlation r = (U1 - U2) / (n1 * n2)
effect_size = (U1 - U2) / (n1 * n2)
R2 = sum(all_ranks) - R1
data_summary = {
"n1": n1,
"n2": n2,
"rank_sum_group1": R1,
"rank_sum_group2": R2,
"mean_rank_group1": R1 / n1,
"mean_rank_group2": R2 / n2,
"U1": U1,
"U2": U2,
"z_statistic": Z,
}
significance = "significant" if p_value < alpha else "not significant"
interpretation = (
f"The Mann-Whitney U test is {significance} "
f"(U = {U1:.1f}, Z = {Z:.4f}, p = {p_value:.4f}). "
+ (
"Group 1 values tend to be larger than Group 2."
if U1 > mean_U and p_value < alpha
else "No significant directional difference detected."
)
)
return HypoResult(
test_name="Mann-Whitney U Test",
statistic=U1,
p_value=p_value,
effect_size=effect_size,
effect_size_name="rank-biserial r",
confidence_interval=None,
degrees_of_freedom=None,
sample_sizes=(n1, n2),
alpha=alpha,
alternative=alternative,
interpretation=interpretation,
data_summary=data_summary,
)
# ---------------------------------------------------------------------------
# Wilcoxon Signed-Rank Test
# ---------------------------------------------------------------------------
[docs]
def wilcoxon_signed_rank(
x: List[float],
y: Optional[List[float]] = None,
mu: float = 0.0,
alpha: float = 0.05,
alternative: str = "two-sided",
) -> HypoResult:
"""
Wilcoxon signed-rank test.
Tests whether the median of differences (or of a single sample relative
to *mu*) differs from zero. Non-parametric alternative to the one-sample
or paired t-test.
Args:
x: Data values (or differences if *y* is not supplied)
y: Optional second paired sample; if provided, differences = x - y
mu: Hypothesised median under H0 (default 0)
alpha: Significance level
alternative: 'two-sided', 'greater', or 'less'
Returns:
HypoResult with statistic=W+ (sum of positive ranks)
Examples:
>>> result = wilcoxon_signed_rank([1, 2, 3, 4, 5], mu=2)
>>> result = wilcoxon_signed_rank(before, after) # paired
"""
x = validate_data(x, 2, "x")
validate_alpha(alpha)
validate_alternative(alternative)
if y is not None:
y = validate_data(y, 2, "y")
if len(x) != len(y):
from ..core.exceptions import DataFormatError
raise DataFormatError(
f"x and y must have the same length for a paired test, "
f"got {len(x)} and {len(y)}"
)
differences = [xi - yi for xi, yi in zip(x, y)]
else:
differences = [xi - mu for xi in x]
# Drop zero differences
nonzero = [d for d in differences if d != 0.0]
n = len(nonzero)
if n == 0:
raise ValueError("All differences are zero — test cannot be performed")
# Rank absolute differences
abs_diffs = [abs_value(d) for d in nonzero]
ranks = _rank_data(abs_diffs)
# W+ = sum of ranks for positive differences
W_plus = sum(r for r, d in zip(ranks, nonzero) if d > 0)
W_minus = sum(r for r, d in zip(ranks, nonzero) if d < 0)
# Expected value and variance under H0
expected_W = n * (n + 1) / 4.0
tie_sum = _tie_correction(abs_diffs)
var_W = (n * (n + 1) * (2 * n + 1) / 24.0) - (tie_sum / 48.0)
if var_W <= 0:
var_W = n * (n + 1) * (2 * n + 1) / 24.0
std_W = sqrt(var_W)
# Z statistic (using W+)
Z = (W_plus - expected_W) / std_W
# p-value
norm = Normal()
if alternative == "two-sided":
p_value = 2 * (1 - norm.cdf(abs_value(Z)))
elif alternative == "greater":
p_value = 1 - norm.cdf(Z)
else: # less
p_value = norm.cdf(Z)
p_value = max(0.0, min(1.0, p_value))
# Effect size: r = Z / sqrt(n)
effect_size = Z / sqrt(n)
n_zeros = len(differences) - n
data_summary = {
"n_pairs": len(differences),
"n_nonzero": n,
"n_zero_diffs": n_zeros,
"W_plus": W_plus,
"W_minus": W_minus,
"expected_W": expected_W,
"z_statistic": Z,
}
significance = "significant" if p_value < alpha else "not significant"
interpretation = (
f"The Wilcoxon signed-rank test is {significance} "
f"(W+ = {W_plus:.1f}, Z = {Z:.4f}, p = {p_value:.4f})."
)
return HypoResult(
test_name="Wilcoxon Signed-Rank Test",
statistic=W_plus,
p_value=p_value,
effect_size=effect_size,
effect_size_name="rank-biserial r",
confidence_interval=None,
degrees_of_freedom=None,
sample_sizes=n,
alpha=alpha,
alternative=alternative,
interpretation=interpretation,
data_summary=data_summary,
)
# ---------------------------------------------------------------------------
# Kruskal-Wallis H Test
# ---------------------------------------------------------------------------
[docs]
def kruskal_wallis(
*groups: List[float],
alpha: float = 0.05,
) -> HypoResult:
"""
Kruskal-Wallis H test for k independent groups.
Non-parametric one-way ANOVA. Tests whether the population medians of all
groups are equal. P-value is obtained from the chi-square distribution
with k-1 degrees of freedom.
Args:
*groups: Two or more group samples (each a list of floats)
alpha: Significance level
Returns:
HypoResult with statistic=H, effect_size=eta-squared
Examples:
>>> result = kruskal_wallis([1,2,3], [4,5,6], [7,8,9])
>>> print(result.summary())
"""
groups = validate_groups(*groups, min_size=2, min_groups=2)
validate_alpha(alpha)
k = len(groups)
group_sizes = [len(g) for g in groups]
N = sum(group_sizes)
# Combine all observations and rank
combined_vals: List[float] = []
group_labels: List[int] = []
for idx, g in enumerate(groups):
combined_vals.extend(g)
group_labels.extend([idx] * len(g))
all_ranks = _rank_data(combined_vals)
# Sum of ranks per group
rank_sums = [0.0] * k
for rank, label in zip(all_ranks, group_labels):
rank_sums[label] += rank
# H statistic
h_inner = sum(rank_sums[i] ** 2 / group_sizes[i] for i in range(k))
H = (12.0 / (N * (N + 1))) * h_inner - 3 * (N + 1)
# Tie correction
tie_sum = _tie_correction(combined_vals)
tie_factor = 1.0 - tie_sum / (N**3 - N) if N > 1 else 1.0
if tie_factor > 0:
H = H / tie_factor
# p-value from chi-square with k-1 df
df = k - 1
chi2_dist = ChiSquare(df)
p_value = 1 - chi2_dist.cdf(H) if H >= 0 else 1.0
p_value = max(0.0, min(1.0, p_value))
# Effect size: eta-squared = (H - k + 1) / (N - k)
if N > k:
eta_sq = (H - k + 1) / (N - k)
eta_sq = max(0.0, min(1.0, eta_sq))
else:
eta_sq = None
mean_ranks = [rank_sums[i] / group_sizes[i] for i in range(k)]
data_summary = {
"n_groups": k,
"group_sizes": group_sizes,
"rank_sums": rank_sums,
"mean_ranks": mean_ranks,
"N_total": N,
"H_uncorrected": (12.0 / (N * (N + 1))) * h_inner - 3 * (N + 1),
}
significance = "significant" if p_value < alpha else "not significant"
interpretation = (
f"The Kruskal-Wallis test is {significance} "
f"(H({df}) = {H:.4f}, p = {p_value:.4f}). "
+ (
"At least one group differs significantly from the others."
if p_value < alpha
else "No significant difference in central tendency across groups."
)
)
return HypoResult(
test_name="Kruskal-Wallis H Test",
statistic=H,
p_value=p_value,
effect_size=eta_sq,
effect_size_name="eta-squared",
confidence_interval=None,
degrees_of_freedom=df,
sample_sizes=tuple(group_sizes),
alpha=alpha,
alternative="two-sided",
interpretation=interpretation,
data_summary=data_summary,
)