Source code for hypotestx.tests.nonparametric

"""
Non-parametric statistical tests — pure Python implementations.

Tests
-----
- Mann-Whitney U  (two independent samples)
- Wilcoxon Signed-Rank  (one sample / paired samples)
- Kruskal-Wallis H  (k independent samples)
"""

from typing import List, Optional

from ..core.result import HypoResult
from ..core.validators import validate_alpha, validate_alternative, validate_data, validate_groups
from ..math.basic import abs_value, sqrt
from ..math.distributions import ChiSquare, Normal

# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------


def _rank_data(data: List[float]) -> List[float]:
    """
    Assign ranks to a flat list of values, using average ranks for ties.

    Returns a list of ranks in the same original order as *data*.
    """
    n = len(data)
    indexed = sorted(enumerate(data), key=lambda x: x[1])

    ranks = [0.0] * n
    i = 0
    while i < n:
        j = i
        while j < n - 1 and indexed[j][1] == indexed[j + 1][1]:
            j += 1
        avg_rank = (i + j + 2) / 2.0  # 1-based average rank
        for k in range(i, j + 1):
            ranks[indexed[k][0]] = avg_rank
        i = j + 1

    return ranks


def _tie_correction(data: List[float]) -> float:
    """
    Compute the sum of (t^3 - t) for each group of tied values.
    Used to correct variance in non-parametric tests.
    """
    from collections import Counter

    counts = Counter(data)
    return sum(t**3 - t for t in counts.values() if t > 1)


# ---------------------------------------------------------------------------
# Mann-Whitney U Test
# ---------------------------------------------------------------------------


[docs] def mann_whitney_u( group1: List[float], group2: List[float], alpha: float = 0.05, alternative: str = "two-sided", ) -> HypoResult: """ Mann-Whitney U test for two independent samples. A non-parametric alternative to the two-sample t-test that tests whether one group tends to have larger values than the other. Uses a normal approximation (with tie correction) for the p-value. Args: group1: First group sample values group2: Second group sample values alpha: Significance level (default 0.05) alternative: 'two-sided', 'greater' (group1 > group2), or 'less' Returns: HypoResult with statistic=U1, effect_size=rank-biserial r Examples: >>> result = mann_whitney_u([1, 2, 3, 4], [5, 6, 7, 8]) >>> print(result.summary()) """ group1 = validate_data(group1, 2, "group1") group2 = validate_data(group2, 2, "group2") validate_alpha(alpha) validate_alternative(alternative) n1, n2 = len(group1), len(group2) N = n1 + n2 # Combine groups and rank combined_vals = group1 + group2 combined_groups = [1] * n1 + [2] * n2 all_ranks = _rank_data(combined_vals) # Sum of ranks for group 1 R1 = sum(rank for rank, grp in zip(all_ranks, combined_groups) if grp == 1) # U statistics U1 = R1 - n1 * (n1 + 1) / 2.0 U2 = n1 * n2 - U1 # Mean under H0 mean_U = n1 * n2 / 2.0 # Variance with tie correction tie_sum = _tie_correction(combined_vals) var_U = (n1 * n2 / 12.0) * (N + 1 - tie_sum / (N * (N - 1))) if N > 1 else 0 if var_U <= 0: var_U = n1 * n2 * (N + 1) / 12.0 # fallback without tie correction std_U = sqrt(var_U) # Z statistic: U1 measures tendency of group1 to exceed group2 Z = (U1 - mean_U) / std_U # p-value norm = Normal() if alternative == "two-sided": p_value = 2 * (1 - norm.cdf(abs_value(Z))) elif alternative == "greater": p_value = 1 - norm.cdf(Z) else: # less p_value = norm.cdf(Z) # Continuity correction clamp p_value = max(0.0, min(1.0, p_value)) # Effect size: rank-biserial correlation r = (U1 - U2) / (n1 * n2) effect_size = (U1 - U2) / (n1 * n2) R2 = sum(all_ranks) - R1 data_summary = { "n1": n1, "n2": n2, "rank_sum_group1": R1, "rank_sum_group2": R2, "mean_rank_group1": R1 / n1, "mean_rank_group2": R2 / n2, "U1": U1, "U2": U2, "z_statistic": Z, } significance = "significant" if p_value < alpha else "not significant" interpretation = ( f"The Mann-Whitney U test is {significance} " f"(U = {U1:.1f}, Z = {Z:.4f}, p = {p_value:.4f}). " + ( "Group 1 values tend to be larger than Group 2." if U1 > mean_U and p_value < alpha else "No significant directional difference detected." ) ) return HypoResult( test_name="Mann-Whitney U Test", statistic=U1, p_value=p_value, effect_size=effect_size, effect_size_name="rank-biserial r", confidence_interval=None, degrees_of_freedom=None, sample_sizes=(n1, n2), alpha=alpha, alternative=alternative, interpretation=interpretation, data_summary=data_summary, )
# --------------------------------------------------------------------------- # Wilcoxon Signed-Rank Test # ---------------------------------------------------------------------------
[docs] def wilcoxon_signed_rank( x: List[float], y: Optional[List[float]] = None, mu: float = 0.0, alpha: float = 0.05, alternative: str = "two-sided", ) -> HypoResult: """ Wilcoxon signed-rank test. Tests whether the median of differences (or of a single sample relative to *mu*) differs from zero. Non-parametric alternative to the one-sample or paired t-test. Args: x: Data values (or differences if *y* is not supplied) y: Optional second paired sample; if provided, differences = x - y mu: Hypothesised median under H0 (default 0) alpha: Significance level alternative: 'two-sided', 'greater', or 'less' Returns: HypoResult with statistic=W+ (sum of positive ranks) Examples: >>> result = wilcoxon_signed_rank([1, 2, 3, 4, 5], mu=2) >>> result = wilcoxon_signed_rank(before, after) # paired """ x = validate_data(x, 2, "x") validate_alpha(alpha) validate_alternative(alternative) if y is not None: y = validate_data(y, 2, "y") if len(x) != len(y): from ..core.exceptions import DataFormatError raise DataFormatError( f"x and y must have the same length for a paired test, " f"got {len(x)} and {len(y)}" ) differences = [xi - yi for xi, yi in zip(x, y)] else: differences = [xi - mu for xi in x] # Drop zero differences nonzero = [d for d in differences if d != 0.0] n = len(nonzero) if n == 0: raise ValueError("All differences are zero — test cannot be performed") # Rank absolute differences abs_diffs = [abs_value(d) for d in nonzero] ranks = _rank_data(abs_diffs) # W+ = sum of ranks for positive differences W_plus = sum(r for r, d in zip(ranks, nonzero) if d > 0) W_minus = sum(r for r, d in zip(ranks, nonzero) if d < 0) # Expected value and variance under H0 expected_W = n * (n + 1) / 4.0 tie_sum = _tie_correction(abs_diffs) var_W = (n * (n + 1) * (2 * n + 1) / 24.0) - (tie_sum / 48.0) if var_W <= 0: var_W = n * (n + 1) * (2 * n + 1) / 24.0 std_W = sqrt(var_W) # Z statistic (using W+) Z = (W_plus - expected_W) / std_W # p-value norm = Normal() if alternative == "two-sided": p_value = 2 * (1 - norm.cdf(abs_value(Z))) elif alternative == "greater": p_value = 1 - norm.cdf(Z) else: # less p_value = norm.cdf(Z) p_value = max(0.0, min(1.0, p_value)) # Effect size: r = Z / sqrt(n) effect_size = Z / sqrt(n) n_zeros = len(differences) - n data_summary = { "n_pairs": len(differences), "n_nonzero": n, "n_zero_diffs": n_zeros, "W_plus": W_plus, "W_minus": W_minus, "expected_W": expected_W, "z_statistic": Z, } significance = "significant" if p_value < alpha else "not significant" interpretation = ( f"The Wilcoxon signed-rank test is {significance} " f"(W+ = {W_plus:.1f}, Z = {Z:.4f}, p = {p_value:.4f})." ) return HypoResult( test_name="Wilcoxon Signed-Rank Test", statistic=W_plus, p_value=p_value, effect_size=effect_size, effect_size_name="rank-biserial r", confidence_interval=None, degrees_of_freedom=None, sample_sizes=n, alpha=alpha, alternative=alternative, interpretation=interpretation, data_summary=data_summary, )
# --------------------------------------------------------------------------- # Kruskal-Wallis H Test # ---------------------------------------------------------------------------
[docs] def kruskal_wallis( *groups: List[float], alpha: float = 0.05, ) -> HypoResult: """ Kruskal-Wallis H test for k independent groups. Non-parametric one-way ANOVA. Tests whether the population medians of all groups are equal. P-value is obtained from the chi-square distribution with k-1 degrees of freedom. Args: *groups: Two or more group samples (each a list of floats) alpha: Significance level Returns: HypoResult with statistic=H, effect_size=eta-squared Examples: >>> result = kruskal_wallis([1,2,3], [4,5,6], [7,8,9]) >>> print(result.summary()) """ groups = validate_groups(*groups, min_size=2, min_groups=2) validate_alpha(alpha) k = len(groups) group_sizes = [len(g) for g in groups] N = sum(group_sizes) # Combine all observations and rank combined_vals: List[float] = [] group_labels: List[int] = [] for idx, g in enumerate(groups): combined_vals.extend(g) group_labels.extend([idx] * len(g)) all_ranks = _rank_data(combined_vals) # Sum of ranks per group rank_sums = [0.0] * k for rank, label in zip(all_ranks, group_labels): rank_sums[label] += rank # H statistic h_inner = sum(rank_sums[i] ** 2 / group_sizes[i] for i in range(k)) H = (12.0 / (N * (N + 1))) * h_inner - 3 * (N + 1) # Tie correction tie_sum = _tie_correction(combined_vals) tie_factor = 1.0 - tie_sum / (N**3 - N) if N > 1 else 1.0 if tie_factor > 0: H = H / tie_factor # p-value from chi-square with k-1 df df = k - 1 chi2_dist = ChiSquare(df) p_value = 1 - chi2_dist.cdf(H) if H >= 0 else 1.0 p_value = max(0.0, min(1.0, p_value)) # Effect size: eta-squared = (H - k + 1) / (N - k) if N > k: eta_sq = (H - k + 1) / (N - k) eta_sq = max(0.0, min(1.0, eta_sq)) else: eta_sq = None mean_ranks = [rank_sums[i] / group_sizes[i] for i in range(k)] data_summary = { "n_groups": k, "group_sizes": group_sizes, "rank_sums": rank_sums, "mean_ranks": mean_ranks, "N_total": N, "H_uncorrected": (12.0 / (N * (N + 1))) * h_inner - 3 * (N + 1), } significance = "significant" if p_value < alpha else "not significant" interpretation = ( f"The Kruskal-Wallis test is {significance} " f"(H({df}) = {H:.4f}, p = {p_value:.4f}). " + ( "At least one group differs significantly from the others." if p_value < alpha else "No significant difference in central tendency across groups." ) ) return HypoResult( test_name="Kruskal-Wallis H Test", statistic=H, p_value=p_value, effect_size=eta_sq, effect_size_name="eta-squared", confidence_interval=None, degrees_of_freedom=df, sample_sizes=tuple(group_sizes), alpha=alpha, alternative="two-sided", interpretation=interpretation, data_summary=data_summary, )