Source code for hypotestx.explore.visualize

"""
Visualization helpers for HypoTestX.

All plotting functions gracefully degrade when matplotlib is not installed:
they raise ``ImportError`` with a helpful install message.

Install the optional visualization dependencies with::

    pip install hypotestx[visualization]   # matplotlib + plotly
    # or just
    pip install matplotlib

Public API
----------
plot_result(result)                   -> matplotlib Figure
plot_distributions(groups, labels)    -> matplotlib Figure
plot_p_value(p_value, alpha, df)      -> matplotlib Figure
generate_report(result, path, fmt)    -> saves HTML / PNG report
"""

from __future__ import annotations

from typing import Any, List, Optional, Sequence

__all__ = [
    "plot_result",
    "plot_distributions",
    "plot_p_value",
    "generate_report",
]


# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------


def _require_matplotlib():
    """Return (plt, patches) or raise a descriptive ImportError."""
    try:
        import matplotlib.patches as mpatches
        import matplotlib.pyplot as plt

        return plt, mpatches
    except ImportError as exc:
        raise ImportError(
            "Matplotlib is required for plotting. "
            "Install it with:  pip install matplotlib  "
            "or  pip install hypotestx[visualization]"
        ) from exc


def _stderr_bar_chart(ax, group_labels, means, stds, title=""):
    """Draw a simple bar chart with ±1 SD error bars."""
    x = list(range(len(group_labels)))
    ax.bar(x, means, width=0.5, color="#4C72B0", edgecolor="white", alpha=0.85)
    ax.errorbar(x, means, yerr=stds, fmt="none", color="black", capsize=5, linewidth=1.5)
    ax.set_xticks(x)
    ax.set_xticklabels(group_labels)
    ax.set_ylabel("Mean ± SD")
    if title:
        ax.set_title(title)


def _normal_pdf(x_vals, mu, sigma):
    """Compute normal PDF values, returning zeros if sigma == 0."""
    import math

    if sigma == 0:
        return [0.0] * len(x_vals)
    return [
        (1 / (sigma * math.sqrt(2 * math.pi))) * math.exp(-0.5 * ((x - mu) / sigma) ** 2)
        for x in x_vals
    ]


# ---------------------------------------------------------------------------
# plot_p_value
# ---------------------------------------------------------------------------



[docs]
def plot_p_value(
    p_value: float,
    alpha: float = 0.05,
    degrees_of_freedom: Optional[float] = None,
    test_statistic: Optional[float] = None,
    alternative: str = "two-sided",
    title: str = "",
) -> Any:
    """
    Visualise the p-value on a standard-normal (or t) distribution curve.

    Hatches the rejection region(s) and marks the observed p-value.

    Parameters
    ----------
    p_value : float
    alpha : float
        Significance level (default 0.05).
    degrees_of_freedom : float, optional
        If provided, a t-distribution tail is shown instead of normal.
    test_statistic : float, optional
        If provided, marks the observed statistic on the x-axis.
    alternative : str
        ``"two-sided"``, ``"greater"``, or ``"less"``.
    title : str
        Plot title.

    Returns
    -------
    matplotlib.figure.Figure
    """
    plt, mpatches = _require_matplotlib()
    import math

    fig, ax = plt.subplots(figsize=(8, 4))
    n_pts = 400

    if degrees_of_freedom is not None:
        # Approximate t-distribution via scaled normal for visualisation
        scale = (
            math.sqrt(degrees_of_freedom / (degrees_of_freedom - 2))
            if degrees_of_freedom > 2
            else 1.0
        )
        x_range = (-4 * scale, 4 * scale)
    else:
        x_range = (-4.0, 4.0)

    step = (x_range[1] - x_range[0]) / n_pts
    xs = [x_range[0] + i * step for i in range(n_pts + 1)]
    ys = _normal_pdf(xs, 0, 1)

    ax.plot(xs, ys, color="#2d6a9f", linewidth=2)
    ax.fill_between(xs, ys, 0, alpha=0.08, color="#2d6a9f")

    # Shade rejection region(s)
    if alternative in ("two-sided", "less"):
        # left tail: x < critical_low
        crit = _normal_ppf(alpha / 2 if alternative == "two-sided" else alpha)
        xs_rej = [x for x in xs if x <= crit]
        ys_rej = _normal_pdf(xs_rej, 0, 1)
        ax.fill_between(xs_rej, ys_rej, 0, alpha=0.45, color="#d62728", label="Rejection region")
    if alternative in ("two-sided", "greater"):
        crit = _normal_ppf(1 - (alpha / 2 if alternative == "two-sided" else alpha))
        xs_rej = [x for x in xs if x >= crit]
        ys_rej = _normal_pdf(xs_rej, 0, 1)
        # Only add label here if the left tail didn't already claim it
        _right_label = "Rejection region" if alternative == "greater" else None
        ax.fill_between(
            xs_rej,
            ys_rej,
            0,
            alpha=0.45,
            color="#d62728",
            **(dict(label=_right_label) if _right_label else {}),
        )

    # Mark observed statistic
    if test_statistic is not None:
        ax.axvline(
            x=test_statistic,
            color="#e67e22",
            linewidth=2,
            linestyle="--",
            label=f"Test statistic = {test_statistic:.3f}",
        )

    sig_label = "significant" if p_value < alpha else "not significant"
    ax.set_xlabel("Standard units")
    ax.set_ylabel("Density")
    ax.set_title(title or f"p = {p_value:.4f}  (alpha = {alpha})  →  {sig_label}")
    ax.legend(loc="upper right", fontsize=9)
    fig.tight_layout()
    return fig



def _normal_ppf(p: float) -> float:
    """Approximate normal inverse CDF (rational approximation)."""
    import math

    # Rational approximation (Beasley-Springer-Moro)
    if p <= 0.0:
        return float("-inf")
    if p >= 1.0:
        return float("inf")
    if p < 0.5:
        t = math.sqrt(-2.0 * math.log(p))
    else:
        t = math.sqrt(-2.0 * math.log(1.0 - p))
    c0, c1, c2 = 2.515517, 0.802853, 0.010328
    d1, d2, d3 = 1.432788, 0.189269, 0.001308
    numerator = c0 + c1 * t + c2 * t * t
    denominator = 1.0 + d1 * t + d2 * t * t + d3 * t * t * t
    result = t - numerator / denominator
    return result if p < 0.5 else -result


# ---------------------------------------------------------------------------
# plot_distributions
# ---------------------------------------------------------------------------



[docs]
def plot_distributions(
    groups: List[Sequence[float]],
    labels: Optional[List[str]] = None,
    title: str = "",
    kind: str = "box",
) -> Any:
    """
    Plot the distribution of one or more groups side-by-side.

    Parameters
    ----------
    groups : list of sequences
        Each element is a numeric sequence (one per group).
    labels : list of str, optional
        Group labels.  Defaults to ``["Group 1", "Group 2", ...]``.
    title : str
        Plot title.
    kind : str
        ``"box"`` (default), ``"violin"``, or ``"bar"``.

    Returns
    -------
    matplotlib.figure.Figure
    """
    plt, _ = _require_matplotlib()

    if labels is None:
        labels = [f"Group {i + 1}" for i in range(len(groups))]

    fig, ax = plt.subplots(figsize=(max(6, len(groups) * 1.8), 5))

    if kind == "violin":
        parts = ax.violinplot(groups, showmedians=True)
        for pc in parts.get("bodies", []):
            pc.set_facecolor("#4C72B0")
            pc.set_alpha(0.7)
        ax.set_xticks(range(1, len(groups) + 1))
        ax.set_xticklabels(labels)
    elif kind == "bar":
        import math

        means = [sum(g) / len(g) if g else 0.0 for g in groups]
        stds = [
            math.sqrt(sum((v - m) ** 2 for v in g) / len(g)) if len(g) > 1 else 0.0
            for g, m in zip(groups, means)
        ]
        _stderr_bar_chart(ax, labels, means, stds)
    else:  # box (default)
        import matplotlib as _mpl

        _mpl_ver = tuple(int(x) for x in _mpl.__version__.split(".")[:2])
        _bp_kw = "tick_labels" if _mpl_ver >= (3, 9) else "labels"
        ax.boxplot(
            groups,
            **{_bp_kw: labels},
            patch_artist=True,
            boxprops=dict(facecolor="#4C72B0", alpha=0.7),
            medianprops=dict(color="white", linewidth=2),
        )

    ax.set_title(title or "Group Distribution Comparison")
    ax.set_ylabel("Value")
    fig.tight_layout()
    return fig



# ---------------------------------------------------------------------------
# plot_result
# ---------------------------------------------------------------------------



[docs]
def plot_result(result: Any, kind: str = "auto") -> Any:
    """
    Generate a figure summarising a ``HypoResult``.

    Automatically picks the best chart type based on the test:
    - ``"p_value"``       - p-value distribution curve
    - ``"bar"``           - bar chart (when group means are in data_summary)
    - ``"auto"``          - pick automatically

    Parameters
    ----------
    result : HypoResult
    kind : str
        ``"auto"``, ``"p_value"``, ``"bar"``, ``"box"``.

    Returns
    -------
    matplotlib.figure.Figure
    """
    plt, _ = _require_matplotlib()

    (result.test_name or "").lower()
    p_value = result.p_value
    alpha = result.alpha
    stat = result.statistic
    df_stat = result.degrees_of_freedom
    alt = getattr(result, "alternative", "two-sided")
    d_summary = result.data_summary or {}

    # ── auto strategy ────────────────────────────────────────────────────
    if kind == "auto":
        if "group1_mean" in d_summary and "group2_mean" in d_summary:
            kind = "comparison_bar"
        else:
            kind = "p_value"

    # ── comparison bar (two-group t-test) ────────────────────────────────
    if kind == "comparison_bar":
        pass

        means = [d_summary.get("group1_mean", 0), d_summary.get("group2_mean", 0)]
        stds = [d_summary.get("group1_std", 0), d_summary.get("group2_std", 0)]
        labels = ["Group 1", "Group 2"]
        n1 = d_summary.get("group1_size", 1)
        n2 = d_summary.get("group2_size", 1)
        if n1 and n2:
            labels = [f"Group 1 (n={n1})", f"Group 2 (n={n2})"]

        fig, axes = plt.subplots(1, 2, figsize=(11, 5))
        _stderr_bar_chart(axes[0], labels, means, stds, title="Group Means ± SD")

        # p-value panel
        df_val = df_stat if isinstance(df_stat, (int, float)) else None
        _draw_p_panel(axes[1], p_value, alpha, stat, df_val, alt)

        sig = "Significant" if p_value < alpha else "Not significant"
        fig.suptitle(
            f"{result.test_name}  |  {sig}  (p = {p_value:.4f})",
            fontsize=12,
            fontweight="bold",
        )
        fig.tight_layout(rect=[0, 0, 1, 0.94])
        return fig

    # ── p-value only ─────────────────────────────────────────────────────
    df_val = df_stat if isinstance(df_stat, (int, float)) else None
    return plot_p_value(
        p_value,
        alpha=alpha,
        degrees_of_freedom=df_val,
        test_statistic=stat,
        alternative=alt,
        title=f"{result.test_name}  (p = {p_value:.4f})",
    )



def _draw_p_panel(ax, p_value, alpha, test_stat, df_val, alternative):
    """Draw the p-value distribution panel on an existing Axes object."""

    n_pts = 300
    xs = [-4.0 + 8.0 * i / n_pts for i in range(n_pts + 1)]
    ys = _normal_pdf(xs, 0, 1)

    ax.plot(xs, ys, color="#2d6a9f", linewidth=2)
    ax.fill_between(xs, ys, 0, alpha=0.08, color="#2d6a9f")

    if alternative in ("two-sided", "less"):
        crit = _normal_ppf(alpha / 2 if alternative == "two-sided" else alpha)
        xs_r = [x for x in xs if x <= crit]
        ax.fill_between(xs_r, _normal_pdf(xs_r, 0, 1), 0, alpha=0.45, color="#d62728")
    if alternative in ("two-sided", "greater"):
        crit = _normal_ppf(1 - (alpha / 2 if alternative == "two-sided" else alpha))
        xs_r = [x for x in xs if x >= crit]
        ax.fill_between(xs_r, _normal_pdf(xs_r, 0, 1), 0, alpha=0.45, color="#d62728")

    if test_stat is not None:
        ax.axvline(x=test_stat, color="#e67e22", linewidth=2, linestyle="--")

    sig = "significant" if p_value < alpha else "not significant"
    ax.set_title(f"p = {p_value:.4f}  →  {sig}")
    ax.set_xlabel("Standard units")
    ax.set_ylabel("Density")


# ---------------------------------------------------------------------------
# generate_report
# ---------------------------------------------------------------------------


def generate_report(
    result: Any,
    path: Optional[str] = None,
    fmt: str = "html",
) -> str:
    """
    Generate a self-contained HTML (or plain-text) report and optionally
    save it to *path*.

    Parameters
    ----------
    result : HypoResult
    path : str, optional
        File path to save the report.  If None, the report string is
        returned without saving.
    fmt : str
        ``"html"`` (default) or ``"text"``.

    Returns
    -------
    str : report content (HTML or plain text)

    Notes
    -----
    * PDF export requires the optional ``weasyprint`` package.
      Install with:  ``pip install weasyprint``
    * For ``fmt="html"`` matplotlib is embedded as a base-64 PNG if
      available; otherwise a text summary is embedded.
    """
    if fmt == "text":
        from ..reporting.generator import text_report

        content = text_report(result, verbose=True)
        if path:
            with open(path, "w", encoding="utf-8") as fh:
                fh.write(content)
        return content

    if fmt == "pdf":
        html_content = generate_report(result, path=None, fmt="html")
        try:
            import weasyprint  # type: ignore
        except ImportError as exc:
            raise ImportError(
                "PDF export requires weasyprint. " "Install with:  pip install weasyprint"
            ) from exc
        pdf_bytes = weasyprint.HTML(string=html_content).write_pdf()
        if path:
            with open(path, "wb") as fh:
                fh.write(pdf_bytes)
        return f"<PDF: {len(pdf_bytes)} bytes>"

    # Default: HTML
    from ..reporting.generator import apa_report, text_report

    # Try to embed a base64-encoded plot
    img_tag = ""
    try:
        import base64
        import io

        fig = plot_result(result)
        buf = io.BytesIO()
        fig.savefig(buf, format="png", dpi=100, bbox_inches="tight")
        buf.seek(0)
        img_b64 = base64.b64encode(buf.read()).decode("ascii")
        img_tag = (
            f'<img src="data:image/png;base64,{img_b64}" '
            f'style="max-width:100%;margin:1em 0;" alt="test result plot"/>'
        )
        import matplotlib.pyplot as plt

        plt.close(fig)
    except Exception:
        pass  # plotting is optional

    apa = apa_report(result)
    significance = "Significant" if result.is_significant else "Not Significant"
    sig_color = "#27ae60" if result.is_significant else "#e74c3c"

    # Build simple key-value stats table
    rows = [
        ("Test", result.test_name),
        ("Statistic", f"{result.statistic:.4f}"),
        ("p-value", f"{result.p_value:.6f}"),
        ("Significant", significance),
        ("Alpha", str(result.alpha)),
        ("Alternative", result.alternative),
    ]
    if result.degrees_of_freedom is not None:
        rows.append(("df", str(result.degrees_of_freedom)))
    if result.effect_size is not None:
        rows.append(
            (
                result.effect_size_name or "Effect size",
                f"{result.effect_size:.4f} ({result.effect_magnitude})",
            )
        )
    if result.confidence_interval is not None:
        ci_level = int((1 - result.alpha) * 100)
        ci = result.confidence_interval
        rows.append((f"{ci_level}% CI", f"[{ci[0]:.4f}, {ci[1]:.4f}]"))

    table_rows_html = "\n".join(f"<tr><th>{k}</th><td>{v}</td></tr>" for k, v in rows)

    html_content = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<title>{result.test_name} — HypoTestX Report</title>
<style>
  body {{font-family: system-ui, sans-serif; max-width: 860px; margin: 2em auto;
         padding: 0 1em; color: #222;}}
  h1   {{color: #2d6a9f; border-bottom: 2px solid #2d6a9f; padding-bottom:.3em;}}
  h2   {{color: #444; font-size: 1.1em; margin-top:1.8em;}}
  .badge {{display:inline-block; padding:.25em .7em; border-radius:4px;
            color:#fff; font-weight:bold; background:{sig_color};}}
  table {{border-collapse: collapse; width:100%; margin:.5em 0;}}
  th    {{text-align:left; width:40%; background:#f0f4f8;
           padding:.4em .7em; border:1px solid #dde;}}
  td    {{padding:.4em .7em; border:1px solid #dde;}}
  pre   {{background:#f8f8f8; padding:1em; overflow-x:auto; font-size:.88em;
           border-left:4px solid #2d6a9f;}}
  footer{{font-size:.8em; color:#888; margin-top:2em;}}
</style>
</head>
<body>
<h1>{result.test_name}</h1>
<p><span class="badge">{significance}</span></p>

<h2>Test Statistics</h2>
<table>{table_rows_html}</table>

{img_tag}

<h2>APA Citation</h2>
<pre>{apa}</pre>

{"<h2>Interpretation</h2><p>" + result.interpretation + "</p>" if result.interpretation else ""}

<footer>Generated by HypoTestX — https://github.com/Ankit-Anand123/hypotestx</footer>
</body>
</html>
"""
    if path:
        with open(path, "w", encoding="utf-8") as fh:
            fh.write(html_content)
    return html_content