Source code for hypotestx.core.llm.backends.ollama
"""
Ollama backend — free, local, open-source LLM inference.
Install: https://ollama.com (Windows/Mac/Linux)
Run: ollama serve (starts on http://localhost:11434)
Pull: ollama pull llama3.2 (or mistral, gemma2, phi4, etc.)
Usage:
result = hx.analyze(df, "...", backend="ollama")
result = hx.analyze(df, "...", backend="ollama", model="mistral")
result = hx.analyze(df, "...", backend=OllamaBackend(model="gemma2"))
Recommended free models (small but capable):
llama3.2 ~2 GB fastest
mistral ~4 GB good quality
gemma2 ~5 GB very accurate
phi4 ~9 GB best reasoning
"""
from __future__ import annotations
import json
import urllib.error
import urllib.request
from typing import Dict, List, Optional
from ..base import LLMBackend
_DEFAULT_MODEL = "llama3.2"
_DEFAULT_HOST = "http://localhost:11434"
_CHAT_ENDPOINT = "/api/chat"
_TAGS_ENDPOINT = "/api/tags"
[docs]
class OllamaBackend(LLMBackend):
"""
Ollama backend — fully local, zero API cost.
Args:
model: Ollama model name (default: ``llama3.2``).
host: Base URL of the Ollama server (default: ``http://localhost:11434``).
timeout: Request timeout in seconds (default: 120).
options: Extra Ollama model options dict, e.g. ``{"temperature": 0}``.
"""
name = "ollama"
def __init__(
self,
model: str = _DEFAULT_MODEL,
host: str = _DEFAULT_HOST,
timeout: int = 120,
options: Optional[Dict] = None,
):
self.model = model
self.host = host.rstrip("/")
self.timeout = timeout
self.options = options or {"temperature": 0}
# ------------------------------------------------------------------ #
# LLMBackend interface #
# ------------------------------------------------------------------ #
[docs]
def chat(self, messages: List[Dict[str, str]]) -> str:
"""Send a chat request to the local Ollama server."""
self._check_server()
payload = json.dumps(
{
"model": self.model,
"messages": messages,
"stream": False,
"options": self.options,
}
).encode("utf-8")
url = self.host + _CHAT_ENDPOINT
req = urllib.request.Request(
url,
data=payload,
headers={"Content-Type": "application/json"},
)
try:
with urllib.request.urlopen(req, timeout=self.timeout) as resp:
data = json.loads(resp.read().decode("utf-8"))
except urllib.error.URLError as exc:
raise RuntimeError(
f"[Ollama] Could not reach {url}. " "Make sure Ollama is running: `ollama serve`"
) from exc
return data["message"]["content"]
# ------------------------------------------------------------------ #
# Helpers #
# ------------------------------------------------------------------ #
def _check_server(self) -> None:
"""Raise a helpful error if Ollama is not reachable."""
try:
urllib.request.urlopen(self.host + _TAGS_ENDPOINT, timeout=5)
except Exception:
raise RuntimeError(
"[Ollama] Server not reachable at "
f"{self.host}.\n"
" 1. Install Ollama: https://ollama.com\n"
" 2. Start it: ollama serve\n"
f" 3. Pull a model: ollama pull {self.model}"
)
[docs]
def available_models(self) -> List[str]:
"""Return list of locally available model names."""
try:
url = self.host + _TAGS_ENDPOINT
with urllib.request.urlopen(url, timeout=5) as resp:
data = json.loads(resp.read().decode("utf-8"))
return [m["name"] for m in data.get("models", [])]
except Exception:
return []
[docs]
def auto_select_model(self) -> str:
"""
Pick the best locally available model.
Preference order: phi4, gemma2, mistral, llama3.2, (anything else).
"""
available = self.available_models()
if not available:
return self.model # let the request fail with a clear error
preference = ["phi4", "gemma2", "mistral", "llama3.2"]
for pref in preference:
for m in available:
if pref in m:
return m
return available[0]
def __repr__(self) -> str:
return f"<OllamaBackend model='{self.model}' host='{self.host}'>"