Source code for hypotestx.explore.visualize

"""
Visualization helpers for HypoTestX.

All plotting functions gracefully degrade when matplotlib is not installed:
they raise ``ImportError`` with a helpful install message.

Install the optional visualization dependencies with::

    pip install hypotestx[visualization]   # matplotlib + plotly
    # or just
    pip install matplotlib

Public API
----------
plot_result(result)                   -> matplotlib Figure
plot_distributions(groups, labels)    -> matplotlib Figure
plot_p_value(p_value, alpha, df)      -> matplotlib Figure
generate_report(result, path, fmt)    -> saves HTML / PNG report
"""

from __future__ import annotations

from typing import Any, List, Optional, Sequence

__all__ = [
    "plot_result",
    "plot_distributions",
    "plot_p_value",
    "generate_report",
]


# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------


def _require_matplotlib():
    """Return (plt, patches) or raise a descriptive ImportError."""
    try:
        import matplotlib.patches as mpatches
        import matplotlib.pyplot as plt

        return plt, mpatches
    except ImportError as exc:
        raise ImportError(
            "Matplotlib is required for plotting. "
            "Install it with:  pip install matplotlib  "
            "or  pip install hypotestx[visualization]"
        ) from exc


def _stderr_bar_chart(ax, group_labels, means, stds, title=""):
    """Draw a simple bar chart with ±1 SD error bars."""
    x = list(range(len(group_labels)))
    ax.bar(x, means, width=0.5, color="#4C72B0", edgecolor="white", alpha=0.85)
    ax.errorbar(x, means, yerr=stds, fmt="none", color="black", capsize=5, linewidth=1.5)
    ax.set_xticks(x)
    ax.set_xticklabels(group_labels)
    ax.set_ylabel("Mean ± SD")
    if title:
        ax.set_title(title)


def _normal_pdf(x_vals, mu, sigma):
    """Compute normal PDF values, returning zeros if sigma == 0."""
    import math

    if sigma == 0:
        return [0.0] * len(x_vals)
    return [
        (1 / (sigma * math.sqrt(2 * math.pi))) * math.exp(-0.5 * ((x - mu) / sigma) ** 2)
        for x in x_vals
    ]


# ---------------------------------------------------------------------------
# plot_p_value
# ---------------------------------------------------------------------------


[docs] def plot_p_value( p_value: float, alpha: float = 0.05, degrees_of_freedom: Optional[float] = None, test_statistic: Optional[float] = None, alternative: str = "two-sided", title: str = "", ) -> Any: """ Visualise the p-value on a standard-normal (or t) distribution curve. Hatches the rejection region(s) and marks the observed p-value. Parameters ---------- p_value : float alpha : float Significance level (default 0.05). degrees_of_freedom : float, optional If provided, a t-distribution tail is shown instead of normal. test_statistic : float, optional If provided, marks the observed statistic on the x-axis. alternative : str ``"two-sided"``, ``"greater"``, or ``"less"``. title : str Plot title. Returns ------- matplotlib.figure.Figure """ plt, mpatches = _require_matplotlib() import math fig, ax = plt.subplots(figsize=(8, 4)) n_pts = 400 if degrees_of_freedom is not None: # Approximate t-distribution via scaled normal for visualisation scale = ( math.sqrt(degrees_of_freedom / (degrees_of_freedom - 2)) if degrees_of_freedom > 2 else 1.0 ) x_range = (-4 * scale, 4 * scale) else: x_range = (-4.0, 4.0) step = (x_range[1] - x_range[0]) / n_pts xs = [x_range[0] + i * step for i in range(n_pts + 1)] ys = _normal_pdf(xs, 0, 1) ax.plot(xs, ys, color="#2d6a9f", linewidth=2) ax.fill_between(xs, ys, 0, alpha=0.08, color="#2d6a9f") # Shade rejection region(s) if alternative in ("two-sided", "less"): # left tail: x < critical_low crit = _normal_ppf(alpha / 2 if alternative == "two-sided" else alpha) xs_rej = [x for x in xs if x <= crit] ys_rej = _normal_pdf(xs_rej, 0, 1) ax.fill_between(xs_rej, ys_rej, 0, alpha=0.45, color="#d62728", label="Rejection region") if alternative in ("two-sided", "greater"): crit = _normal_ppf(1 - (alpha / 2 if alternative == "two-sided" else alpha)) xs_rej = [x for x in xs if x >= crit] ys_rej = _normal_pdf(xs_rej, 0, 1) # Only add label here if the left tail didn't already claim it _right_label = "Rejection region" if alternative == "greater" else None ax.fill_between( xs_rej, ys_rej, 0, alpha=0.45, color="#d62728", **(dict(label=_right_label) if _right_label else {}), ) # Mark observed statistic if test_statistic is not None: ax.axvline( x=test_statistic, color="#e67e22", linewidth=2, linestyle="--", label=f"Test statistic = {test_statistic:.3f}", ) sig_label = "significant" if p_value < alpha else "not significant" ax.set_xlabel("Standard units") ax.set_ylabel("Density") ax.set_title(title or f"p = {p_value:.4f} (alpha = {alpha}) → {sig_label}") ax.legend(loc="upper right", fontsize=9) fig.tight_layout() return fig
def _normal_ppf(p: float) -> float: """Approximate normal inverse CDF (rational approximation).""" import math # Rational approximation (Beasley-Springer-Moro) if p <= 0.0: return float("-inf") if p >= 1.0: return float("inf") if p < 0.5: t = math.sqrt(-2.0 * math.log(p)) else: t = math.sqrt(-2.0 * math.log(1.0 - p)) c0, c1, c2 = 2.515517, 0.802853, 0.010328 d1, d2, d3 = 1.432788, 0.189269, 0.001308 numerator = c0 + c1 * t + c2 * t * t denominator = 1.0 + d1 * t + d2 * t * t + d3 * t * t * t result = t - numerator / denominator return result if p < 0.5 else -result # --------------------------------------------------------------------------- # plot_distributions # ---------------------------------------------------------------------------
[docs] def plot_distributions( groups: List[Sequence[float]], labels: Optional[List[str]] = None, title: str = "", kind: str = "box", ) -> Any: """ Plot the distribution of one or more groups side-by-side. Parameters ---------- groups : list of sequences Each element is a numeric sequence (one per group). labels : list of str, optional Group labels. Defaults to ``["Group 1", "Group 2", ...]``. title : str Plot title. kind : str ``"box"`` (default), ``"violin"``, or ``"bar"``. Returns ------- matplotlib.figure.Figure """ plt, _ = _require_matplotlib() if labels is None: labels = [f"Group {i + 1}" for i in range(len(groups))] fig, ax = plt.subplots(figsize=(max(6, len(groups) * 1.8), 5)) if kind == "violin": parts = ax.violinplot(groups, showmedians=True) for pc in parts.get("bodies", []): pc.set_facecolor("#4C72B0") pc.set_alpha(0.7) ax.set_xticks(range(1, len(groups) + 1)) ax.set_xticklabels(labels) elif kind == "bar": import math means = [sum(g) / len(g) if g else 0.0 for g in groups] stds = [ math.sqrt(sum((v - m) ** 2 for v in g) / len(g)) if len(g) > 1 else 0.0 for g, m in zip(groups, means) ] _stderr_bar_chart(ax, labels, means, stds) else: # box (default) import matplotlib as _mpl _mpl_ver = tuple(int(x) for x in _mpl.__version__.split(".")[:2]) _bp_kw = "tick_labels" if _mpl_ver >= (3, 9) else "labels" ax.boxplot( groups, **{_bp_kw: labels}, patch_artist=True, boxprops=dict(facecolor="#4C72B0", alpha=0.7), medianprops=dict(color="white", linewidth=2), ) ax.set_title(title or "Group Distribution Comparison") ax.set_ylabel("Value") fig.tight_layout() return fig
# --------------------------------------------------------------------------- # plot_result # ---------------------------------------------------------------------------
[docs] def plot_result(result: Any, kind: str = "auto") -> Any: """ Generate a figure summarising a ``HypoResult``. Automatically picks the best chart type based on the test: - ``"p_value"`` - p-value distribution curve - ``"bar"`` - bar chart (when group means are in data_summary) - ``"auto"`` - pick automatically Parameters ---------- result : HypoResult kind : str ``"auto"``, ``"p_value"``, ``"bar"``, ``"box"``. Returns ------- matplotlib.figure.Figure """ plt, _ = _require_matplotlib() (result.test_name or "").lower() p_value = result.p_value alpha = result.alpha stat = result.statistic df_stat = result.degrees_of_freedom alt = getattr(result, "alternative", "two-sided") d_summary = result.data_summary or {} # ── auto strategy ──────────────────────────────────────────────────── if kind == "auto": if "group1_mean" in d_summary and "group2_mean" in d_summary: kind = "comparison_bar" else: kind = "p_value" # ── comparison bar (two-group t-test) ──────────────────────────────── if kind == "comparison_bar": pass means = [d_summary.get("group1_mean", 0), d_summary.get("group2_mean", 0)] stds = [d_summary.get("group1_std", 0), d_summary.get("group2_std", 0)] labels = ["Group 1", "Group 2"] n1 = d_summary.get("group1_size", 1) n2 = d_summary.get("group2_size", 1) if n1 and n2: labels = [f"Group 1 (n={n1})", f"Group 2 (n={n2})"] fig, axes = plt.subplots(1, 2, figsize=(11, 5)) _stderr_bar_chart(axes[0], labels, means, stds, title="Group Means ± SD") # p-value panel df_val = df_stat if isinstance(df_stat, (int, float)) else None _draw_p_panel(axes[1], p_value, alpha, stat, df_val, alt) sig = "Significant" if p_value < alpha else "Not significant" fig.suptitle( f"{result.test_name} | {sig} (p = {p_value:.4f})", fontsize=12, fontweight="bold", ) fig.tight_layout(rect=[0, 0, 1, 0.94]) return fig # ── p-value only ───────────────────────────────────────────────────── df_val = df_stat if isinstance(df_stat, (int, float)) else None return plot_p_value( p_value, alpha=alpha, degrees_of_freedom=df_val, test_statistic=stat, alternative=alt, title=f"{result.test_name} (p = {p_value:.4f})", )
def _draw_p_panel(ax, p_value, alpha, test_stat, df_val, alternative): """Draw the p-value distribution panel on an existing Axes object.""" n_pts = 300 xs = [-4.0 + 8.0 * i / n_pts for i in range(n_pts + 1)] ys = _normal_pdf(xs, 0, 1) ax.plot(xs, ys, color="#2d6a9f", linewidth=2) ax.fill_between(xs, ys, 0, alpha=0.08, color="#2d6a9f") if alternative in ("two-sided", "less"): crit = _normal_ppf(alpha / 2 if alternative == "two-sided" else alpha) xs_r = [x for x in xs if x <= crit] ax.fill_between(xs_r, _normal_pdf(xs_r, 0, 1), 0, alpha=0.45, color="#d62728") if alternative in ("two-sided", "greater"): crit = _normal_ppf(1 - (alpha / 2 if alternative == "two-sided" else alpha)) xs_r = [x for x in xs if x >= crit] ax.fill_between(xs_r, _normal_pdf(xs_r, 0, 1), 0, alpha=0.45, color="#d62728") if test_stat is not None: ax.axvline(x=test_stat, color="#e67e22", linewidth=2, linestyle="--") sig = "significant" if p_value < alpha else "not significant" ax.set_title(f"p = {p_value:.4f}{sig}") ax.set_xlabel("Standard units") ax.set_ylabel("Density") # --------------------------------------------------------------------------- # generate_report # --------------------------------------------------------------------------- def generate_report( result: Any, path: Optional[str] = None, fmt: str = "html", ) -> str: """ Generate a self-contained HTML (or plain-text) report and optionally save it to *path*. Parameters ---------- result : HypoResult path : str, optional File path to save the report. If None, the report string is returned without saving. fmt : str ``"html"`` (default) or ``"text"``. Returns ------- str : report content (HTML or plain text) Notes ----- * PDF export requires the optional ``weasyprint`` package. Install with: ``pip install weasyprint`` * For ``fmt="html"`` matplotlib is embedded as a base-64 PNG if available; otherwise a text summary is embedded. """ if fmt == "text": from ..reporting.generator import text_report content = text_report(result, verbose=True) if path: with open(path, "w", encoding="utf-8") as fh: fh.write(content) return content if fmt == "pdf": html_content = generate_report(result, path=None, fmt="html") try: import weasyprint # type: ignore except ImportError as exc: raise ImportError( "PDF export requires weasyprint. " "Install with: pip install weasyprint" ) from exc pdf_bytes = weasyprint.HTML(string=html_content).write_pdf() if path: with open(path, "wb") as fh: fh.write(pdf_bytes) return f"<PDF: {len(pdf_bytes)} bytes>" # Default: HTML from ..reporting.generator import apa_report, text_report # Try to embed a base64-encoded plot img_tag = "" try: import base64 import io fig = plot_result(result) buf = io.BytesIO() fig.savefig(buf, format="png", dpi=100, bbox_inches="tight") buf.seek(0) img_b64 = base64.b64encode(buf.read()).decode("ascii") img_tag = ( f'<img src="data:image/png;base64,{img_b64}" ' f'style="max-width:100%;margin:1em 0;" alt="test result plot"/>' ) import matplotlib.pyplot as plt plt.close(fig) except Exception: pass # plotting is optional apa = apa_report(result) significance = "Significant" if result.is_significant else "Not Significant" sig_color = "#27ae60" if result.is_significant else "#e74c3c" # Build simple key-value stats table rows = [ ("Test", result.test_name), ("Statistic", f"{result.statistic:.4f}"), ("p-value", f"{result.p_value:.6f}"), ("Significant", significance), ("Alpha", str(result.alpha)), ("Alternative", result.alternative), ] if result.degrees_of_freedom is not None: rows.append(("df", str(result.degrees_of_freedom))) if result.effect_size is not None: rows.append( ( result.effect_size_name or "Effect size", f"{result.effect_size:.4f} ({result.effect_magnitude})", ) ) if result.confidence_interval is not None: ci_level = int((1 - result.alpha) * 100) ci = result.confidence_interval rows.append((f"{ci_level}% CI", f"[{ci[0]:.4f}, {ci[1]:.4f}]")) table_rows_html = "\n".join(f"<tr><th>{k}</th><td>{v}</td></tr>" for k, v in rows) html_content = f"""<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <title>{result.test_name} — HypoTestX Report</title> <style> body {{font-family: system-ui, sans-serif; max-width: 860px; margin: 2em auto; padding: 0 1em; color: #222;}} h1 {{color: #2d6a9f; border-bottom: 2px solid #2d6a9f; padding-bottom:.3em;}} h2 {{color: #444; font-size: 1.1em; margin-top:1.8em;}} .badge {{display:inline-block; padding:.25em .7em; border-radius:4px; color:#fff; font-weight:bold; background:{sig_color};}} table {{border-collapse: collapse; width:100%; margin:.5em 0;}} th {{text-align:left; width:40%; background:#f0f4f8; padding:.4em .7em; border:1px solid #dde;}} td {{padding:.4em .7em; border:1px solid #dde;}} pre {{background:#f8f8f8; padding:1em; overflow-x:auto; font-size:.88em; border-left:4px solid #2d6a9f;}} footer{{font-size:.8em; color:#888; margin-top:2em;}} </style> </head> <body> <h1>{result.test_name}</h1> <p><span class="badge">{significance}</span></p> <h2>Test Statistics</h2> <table>{table_rows_html}</table> {img_tag} <h2>APA Citation</h2> <pre>{apa}</pre> {"<h2>Interpretation</h2><p>" + result.interpretation + "</p>" if result.interpretation else ""} <footer>Generated by HypoTestX — https://github.com/Ankit-Anand123/hypotestx</footer> </body> </html> """ if path: with open(path, "w", encoding="utf-8") as fh: fh.write(html_content) return html_content