2026-moving-helper/app/llm.py

"""LLM client module — all network egress is concentrated here.

Uses ``httpx`` (already in requirements) to call OpenAI-compatible endpoints.
No ``openai`` SDK dependency.  Sync functions are fine: FastAPI runs sync
handlers in a threadpool.

Public API:
- ``is_configured(cfg)`` — returns True when the client can make calls.
- ``test_connection(cfg)`` — minimal request to verify credentials.
- ``expand_query(cfg, query)`` — query-term expansion (step 3 consumer).
- ``analyze_image(...)`` — **reserved stub, not implemented**.

All calls go through ``_call_chat_completion()`` so tests can mock a single
boundary.
"""

from __future__ import annotations

from dataclasses import dataclass
from typing import Any

import httpx

from app.settings_store import LLMConfig

# Sensible defaults
_TIMEOUT_SECONDS = 30


@dataclass
class LLMResult:
    """Uniform result wrapper for LLM calls."""

    success: bool
    message: str
    data: Any = None


def is_configured(cfg: LLMConfig) -> bool:
    """Return True only when the LLM is enabled AND has required fields."""
    return bool(cfg.enabled and cfg.model and cfg.api_key)


def test_connection(cfg: LLMConfig) -> LLMResult:
    """Send a minimal chat-completion request to verify the config.

    Uses a tiny prompt to minimise cost.  Returns an ``LLMResult`` indicating
    success or failure with a human-readable message.
    """
    if not is_configured(cfg):
        return LLMResult(
            success=False,
            message="LLM 未配置或未启用（缺少 model 或 api_key）。",
        )

    try:
        response = _call_chat_completion(
            cfg,
            messages=[{"role": "user", "content": "Hi"}],
            max_tokens=1,
        )
        return LLMResult(
            success=True,
            message=f"连接成功（模型：{cfg.model}）。",
            data=response,
        )
    except httpx.HTTPStatusError as exc:
        status = exc.response.status_code
        return LLMResult(
            success=False,
            message=f"连接失败（HTTP {status}）。请检查 base_url、model 和 api_key。",
        )
    except httpx.ConnectError:
        return LLMResult(
            success=False,
            message="无法连接到服务器。请检查 base_url 是否正确。",
        )
    except httpx.TimeoutException:
        return LLMResult(
            success=False,
            message="连接超时。请检查网络或 base_url 是否可达。",
        )
    except Exception as exc:  # noqa: BLE001 — graceful degradation
        return LLMResult(
            success=False,
            message=f"未知错误：{exc}",
        )


def expand_query(cfg: LLMConfig, query: str) -> list[str]:
    """Expand a search query into multiple synonymous terms via LLM.

    **Step 3 will consume this.**  Returns a list including the original query.
    If the LLM call fails or is not configured, returns ``[query]`` as a
    fallback (graceful degradation).
    """
    if not is_configured(cfg):
        return [query]

    try:
        response = _call_chat_completion(
            cfg,
            messages=[
                {
                    "role": "system",
                    "content": (
                        "你是一个搜索词扩展助手。用户给你一个搜索词，"
                        "你返回 3-5 个同义词或相关词，每行一个。"
                        "不要编号、不要解释、不要标点。"
                    ),
                },
                {"role": "user", "content": query},
            ],
            max_tokens=100,
        )
        choices = response.get("choices", [])
        if choices:
            content = choices[0].get("message", {}).get("content", "")
            expanded = [
                line.strip() for line in content.strip().splitlines() if line.strip()
            ]
            if expanded:
                # Always include the original query
                return [query] + [t for t in expanded if t != query]
        return [query]
    except Exception:  # noqa: BLE001 — graceful degradation
        return [query]


def analyze_image(cfg: LLMConfig, image_data: bytes, prompt: str) -> LLMResult:
    """Analyze an image via LLM vision API.

    .. note:: **Reserved stub — not implemented.** Will be filled in a future
        round for image analysis.  The signature is fixed so callers can
        depend on it.
    """
    # TODO: Implement in future round for image analysis.
    return LLMResult(
        success=False,
        message="图片分析功能尚未实现。",
    )


# ------------------------------------------------------------------
# Internal boundary — all network calls go through this single function
# ------------------------------------------------------------------


def _call_chat_completion(
    cfg: LLMConfig,
    *,
    messages: list[dict[str, str]],
    max_tokens: int = 1,
) -> dict:
    """Call the OpenAI-compatible ``/chat/completions`` endpoint.

    Returns the parsed JSON response body on success (status 2xx).
    Raises ``httpx.HTTPStatusError`` on non-2xx, or other ``httpx`` exceptions
    on network failures — callers handle these for graceful degradation.
    """
    url = cfg.base_url.rstrip("/") + "/chat/completions"
    payload: dict[str, Any] = {
        "model": cfg.model,
        "messages": messages,
        "max_tokens": max_tokens,
    }
    headers = {
        "Authorization": f"Bearer {cfg.api_key}",
        "Content-Type": "application/json",
    }

    with httpx.Client(timeout=_TIMEOUT_SECONDS) as client:
        response = client.post(url, json=payload, headers=headers)
        response.raise_for_status()
        return response.json()