Source code for hypotestx.core.llm.backends.ollama

"""
Ollama backend — free, local, open-source LLM inference.

Install:  https://ollama.com  (Windows/Mac/Linux)
Run:      ollama serve          (starts on http://localhost:11434)
Pull:     ollama pull llama3.2  (or mistral, gemma2, phi4, etc.)

Usage:
    result = hx.analyze(df, "...", backend="ollama")
    result = hx.analyze(df, "...", backend="ollama", model="mistral")
    result = hx.analyze(df, "...", backend=OllamaBackend(model="gemma2"))

Recommended free models (small but capable):
    llama3.2     ~2 GB  fastest
    mistral      ~4 GB  good quality
    gemma2       ~5 GB  very accurate
    phi4         ~9 GB  best reasoning
"""

from __future__ import annotations

import json
import urllib.error
import urllib.request
from typing import Dict, List, Optional

from ..base import LLMBackend

_DEFAULT_MODEL = "llama3.2"
_DEFAULT_HOST = "http://localhost:11434"
_CHAT_ENDPOINT = "/api/chat"
_TAGS_ENDPOINT = "/api/tags"



[docs]
class OllamaBackend(LLMBackend):
    """
    Ollama backend — fully local, zero API cost.

    Args:
        model:   Ollama model name (default: ``llama3.2``).
        host:    Base URL of the Ollama server (default: ``http://localhost:11434``).
        timeout: Request timeout in seconds (default: 120).
        options: Extra Ollama model options dict, e.g. ``{"temperature": 0}``.
    """

    name = "ollama"

    def __init__(
        self,
        model: str = _DEFAULT_MODEL,
        host: str = _DEFAULT_HOST,
        timeout: int = 120,
        options: Optional[Dict] = None,
    ):
        self.model = model
        self.host = host.rstrip("/")
        self.timeout = timeout
        self.options = options or {"temperature": 0}

    # ------------------------------------------------------------------ #
    # LLMBackend interface                                                 #
    # ------------------------------------------------------------------ #


[docs]
    def chat(self, messages: List[Dict[str, str]]) -> str:
        """Send a chat request to the local Ollama server."""
        self._check_server()

        payload = json.dumps(
            {
                "model": self.model,
                "messages": messages,
                "stream": False,
                "options": self.options,
            }
        ).encode("utf-8")

        url = self.host + _CHAT_ENDPOINT
        req = urllib.request.Request(
            url,
            data=payload,
            headers={"Content-Type": "application/json"},
        )

        try:
            with urllib.request.urlopen(req, timeout=self.timeout) as resp:
                data = json.loads(resp.read().decode("utf-8"))
        except urllib.error.URLError as exc:
            raise RuntimeError(
                f"[Ollama] Could not reach {url}. " "Make sure Ollama is running: `ollama serve`"
            ) from exc

        return data["message"]["content"]


    # ------------------------------------------------------------------ #
    # Helpers                                                              #
    # ------------------------------------------------------------------ #

    def _check_server(self) -> None:
        """Raise a helpful error if Ollama is not reachable."""
        try:
            urllib.request.urlopen(self.host + _TAGS_ENDPOINT, timeout=5)
        except Exception:
            raise RuntimeError(
                "[Ollama] Server not reachable at "
                f"{self.host}.\n"
                "  1. Install Ollama: https://ollama.com\n"
                "  2. Start it:       ollama serve\n"
                f"  3. Pull a model:   ollama pull {self.model}"
            )


[docs]
    def available_models(self) -> List[str]:
        """Return list of locally available model names."""
        try:
            url = self.host + _TAGS_ENDPOINT
            with urllib.request.urlopen(url, timeout=5) as resp:
                data = json.loads(resp.read().decode("utf-8"))
            return [m["name"] for m in data.get("models", [])]
        except Exception:
            return []



[docs]
    def auto_select_model(self) -> str:
        """
        Pick the best locally available model.
        Preference order: phi4, gemma2, mistral, llama3.2, (anything else).
        """
        available = self.available_models()
        if not available:
            return self.model  # let the request fail with a clear error
        preference = ["phi4", "gemma2", "mistral", "llama3.2"]
        for pref in preference:
            for m in available:
                if pref in m:
                    return m
        return available[0]


    def __repr__(self) -> str:
        return f"<OllamaBackend model='{self.model}' host='{self.host}'>"