Add AI search query expansion

Add LLM settings integration
Add app_settings migration, settings UI, and OpenAI-compatible httpx LLM client with mocked tests. Preserve API keys on blank form submissions, require a fresh key when base_url changes, and keep AI search settings untouched for step 3. Update docs/design LLM integration and step 3 AI search notes, including prompt contract and extra-hints planning.
2026-06-01 21:28:29 +02:00 · 2026-06-01 20:06:22 +02:00 · 2026-06-01 16:02:43 +02:00 · 2026-06-01 13:10:59 +02:00 · 2026-05-04 11:03:44 +02:00 · 2026-05-04 10:49:27 +02:00
32 changed files with 4617 additions and 112 deletions
@@ -1,9 +1,28 @@
 # Local environment and secrets
 .env
 .venv/
 # Runtime data and local review notes
 data/
 review-notes/
 Review-Notes/
 # Python cache/test/build output
 __pycache__/
 .pytest_cache/
 .mypy_cache/
 .ruff_cache/
 .coverage
 htmlcov/
 build/
 dist/
 *.egg-info/
 *.pyc
-.env
+
-data/*.db
+# Local tool state
 .codex
 .claude/settings.local.json
 backups/
 # macOS generated files
 .DS_Store
@@ -14,4 +33,3 @@ data/*.db
 **/.Spotlight-V100
 .Trashes
 **/.Trashes
 .codex
@@ -10,6 +10,8 @@ WORKDIR /app
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY alembic.ini .
 COPY migrations ./migrations
 COPY app ./app
 RUN mkdir -p /app/data
@@ -0,0 +1,148 @@
 # A generic, single database configuration.
 [alembic]
 # path to migration scripts.
 # this is typically a path given in POSIX (e.g. forward slashes)
 # format, relative to the token %(here)s which refers to the location of this
 # ini file
 script_location = %(here)s/migrations
 # template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
 # Uncomment the line below if you want the files to be prepended with date and time
 # see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
 # for all available tokens
 # file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
 # sys.path path, will be prepended to sys.path if present.
 # defaults to the current working directory.  for multiple paths, the path separator
 # is defined by "path_separator" below.
 prepend_sys_path = .
 # timezone to use when rendering the date within the migration file
 # as well as the filename.
 # If specified, requires the python>=3.9 or backports.zoneinfo library and tzdata library.
 # Any required deps can installed by adding `alembic[tz]` to the pip requirements
 # string value is passed to ZoneInfo()
 # leave blank for localtime
 # timezone =
 # max length of characters to apply to the "slug" field
 # truncate_slug_length = 40
 # set to 'true' to run the environment during
 # the 'revision' command, regardless of autogenerate
 # revision_environment = false
 # set to 'true' to allow .pyc and .pyo files without
 # a source .py file to be detected as revisions in the
 # versions/ directory
 # sourceless = false
 # version location specification; This defaults
 # to <script_location>/versions.  When using multiple version
 # directories, initial revisions must be specified with --version-path.
 # The path separator used here should be the separator specified by "path_separator"
 # below.
 # version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions
 # path_separator; This indicates what character is used to split lists of file
 # paths, including version_locations and prepend_sys_path within configparser
 # files such as alembic.ini.
 # The default rendered in new alembic.ini files is "os", which uses os.pathsep
 # to provide os-dependent path splitting.
 #
 # Note that in order to support legacy alembic.ini files, this default does NOT
 # take place if path_separator is not present in alembic.ini.  If this
 # option is omitted entirely, fallback logic is as follows:
 #
 # 1. Parsing of the version_locations option falls back to using the legacy
 #    "version_path_separator" key, which if absent then falls back to the legacy
 #    behavior of splitting on spaces and/or commas.
 # 2. Parsing of the prepend_sys_path option falls back to the legacy
 #    behavior of splitting on spaces, commas, or colons.
 #
 # Valid values for path_separator are:
 #
 # path_separator = :
 # path_separator = ;
 # path_separator = space
 # path_separator = newline
 #
 # Use os.pathsep. Default configuration used for new projects.
 path_separator = os
 # set to 'true' to search source files recursively
 # in each "version_locations" directory
 # new in Alembic version 1.10
 # recursive_version_locations = false
 # the output encoding used when revision files
 # are written from script.py.mako
 # output_encoding = utf-8
 # database URL.  This is consumed by the user-maintained env.py script only.
 # other means of configuring database URLs may be customized within the env.py
 # file.
 # sqlalchemy.url is set dynamically in migrations/env.py from app.config
 # Do NOT set it here.
 [post_write_hooks]
 # post_write_hooks defines scripts or Python functions that are run
 # on newly generated revision scripts.  See the documentation for further
 # detail and examples
 # format using "black" - use the console_scripts runner, against the "black" entrypoint
 # hooks = black
 # black.type = console_scripts
 # black.entrypoint = black
 # black.options = -l 79 REVISION_SCRIPT_FILENAME
 # lint with attempts to fix using "ruff" - use the module runner, against the "ruff" module
 # hooks = ruff
 # ruff.type = module
 # ruff.module = ruff
 # ruff.options = check --fix REVISION_SCRIPT_FILENAME
 # Alternatively, use the exec runner to execute a binary found on your PATH
 # hooks = ruff
 # ruff.type = exec
 # ruff.executable = ruff
 # ruff.options = check --fix REVISION_SCRIPT_FILENAME
 # Logging configuration.  This is also consumed by the user-maintained
 # env.py script only.
 [loggers]
 keys = root,sqlalchemy,alembic
 [handlers]
 keys = console
 [formatters]
 keys = generic
 [logger_root]
 level = WARNING
 handlers = console
 qualname =
 [logger_sqlalchemy]
 level = WARNING
 handlers =
 qualname = sqlalchemy.engine
 [logger_alembic]
 level = INFO
 handlers =
 qualname = alembic
 [handler_console]
 class = StreamHandler
 args = (sys.stderr,)
 level = NOTSET
 formatter = generic
 [formatter_generic]
 format = %(levelname)-5.5s [%(name)s] %(message)s
 datefmt = %H:%M:%S
@@ -1,6 +1,6 @@
 from typing import Generator
-from sqlalchemy import create_engine, event, text
+from sqlalchemy import create_engine, event
 from sqlalchemy.engine import make_url
 from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker
@@ -62,47 +62,14 @@ def get_db() -> Generator[Session, None, None]:
 def init_db(database_url: str | None = None) -> None:
-    from app import models
+    from app import models  # noqa: F401 — register models on Base.metadata
    if engine is None or database_url is not None:
        configure_database(database_url)
-    Base.metadata.create_all(bind=engine)
+    from app.migrate import verify_schema_is_current
-    _sync_sqlite_image_columns()
+
    resolved_url = str(engine.url)
    verify_schema_is_current(resolved_url)
 def _sync_sqlite_image_columns() -> None:
    if engine is None or engine.dialect.name != "sqlite":
        return
    image_columns = {
        "boxes": {
            "image_blob": "BLOB",
            "image_mime_type": "VARCHAR(50)",
            "image_width": "INTEGER",
            "image_height": "INTEGER",
        },
        "items": {
            "image_blob": "BLOB",
            "image_mime_type": "VARCHAR(50)",
            "image_width": "INTEGER",
            "image_height": "INTEGER",
        },
        "subitems": {
            "image_blob": "BLOB",
            "image_mime_type": "VARCHAR(50)",
            "image_width": "INTEGER",
            "image_height": "INTEGER",
        },
    }
    with engine.begin() as connection:
        for table_name, columns in image_columns.items():
            existing_columns = {
                row[1] for row in connection.execute(text(f"PRAGMA table_info({table_name})"))
            }
            for column_name, column_type in columns.items():
                if column_name not in existing_columns:
                    connection.execute(
                        text(f"ALTER TABLE {table_name} ADD COLUMN {column_name} {column_type}")
                    )
@@ -0,0 +1,272 @@
 """LLM client module — all network egress is concentrated here.
 Uses ``httpx`` (already in requirements) to call OpenAI-compatible endpoints.
 No ``openai`` SDK dependency.  Sync functions are fine: FastAPI runs sync
 handlers in a threadpool.
 Public API:
 - ``is_configured(cfg)`` — returns True when the client can make calls.
 - ``test_connection(cfg)`` — minimal request to verify credentials.
 - ``expand_query(cfg, query)`` — query-term expansion (step 3 consumer).
  Returns ``ExpansionResult`` with ``terms`` and optional ``error``.
 - ``analyze_image(...)`` — **reserved stub, not implemented**.
 All calls go through ``_call_chat_completion()`` so tests can mock a single
 boundary.
 """
 from __future__ import annotations
 import json
 import re
 from dataclasses import dataclass
 from typing import Any
 import httpx
 from app.settings_store import LLMConfig
 # Sensible defaults
 _TIMEOUT_SECONDS = 30
 # ── Prompt for query expansion (Step 3) ──────────────────────────────────
 _EXPAND_QUERY_SYSTEM_PROMPT = (
    "你是搬家物品搜索助手。用户在搜索自己打包的箱子与物品（家居/搬家场景）。"
    "给定一个搜索词，列出用户可能用来命名同一类物品的相关词："
    "近义词、常见别称、上位类别、具体品类。"
    "规则：用与查询相同的语言；"
    "只给与该物品紧密相关、有助于在清单里找到它的词；"
    "不要解释、不要造无关词；最多 8 个；"
    "只输出一个 JSON 字符串数组，例如 "
    '`["炒锅","平底锅","汤锅","厨具"]`。'
 )
@dataclass
 class LLMResult:
    """Uniform result wrapper for LLM calls."""
    success: bool
    message: str
    data: Any = None
@dataclass
 class ExpansionResult:
    """Structured result from ``expand_query``.
    ``terms`` is always a list (may be empty).
    ``error`` is ``None`` on success (including legitimate empty results);
    on failure (timeout, network error, HTTP error) it contains a
    human-friendly error message.
    """
    terms: list[str]
    error: str | None = None
 def is_configured(cfg: LLMConfig) -> bool:
    """Return True only when the LLM is enabled AND has required fields."""
    return bool(cfg.enabled and cfg.model and cfg.api_key)
 def test_connection(cfg: LLMConfig) -> LLMResult:
    """Send a minimal chat-completion request to verify the config.
    Uses a tiny prompt to minimise cost.  Returns an ``LLMResult`` indicating
    success or failure with a human-readable message.
    """
    if not is_configured(cfg):
        return LLMResult(
            success=False,
            message="LLM 未配置或未启用（缺少 model 或 api_key）。",
        )
    try:
        response = _call_chat_completion(
            cfg,
            messages=[{"role": "user", "content": "Hi"}],
            max_tokens=1,
        )
        return LLMResult(
            success=True,
            message=f"连接成功（模型：{cfg.model}）。",
            data=response,
        )
    except httpx.HTTPStatusError as exc:
        status = exc.response.status_code
        return LLMResult(
            success=False,
            message=f"连接失败（HTTP {status}）。请检查 base_url、model 和 api_key。",
        )
    except httpx.ConnectError:
        return LLMResult(
            success=False,
            message="无法连接到服务器。请检查 base_url 是否正确。",
        )
    except httpx.TimeoutException:
        return LLMResult(
            success=False,
            message="连接超时。请检查网络或 base_url 是否可达。",
        )
    except Exception as exc:  # noqa: BLE001 — graceful degradation
        return LLMResult(
            success=False,
            message=f"未知错误：{exc}",
        )
 def expand_query(
    cfg: LLMConfig,
    query: str,
    extra_hints: str = "",
 ) -> ExpansionResult:
    """Expand a search query into multiple synonymous terms via LLM.
    Returns an ``ExpansionResult``.  On success ``terms`` contains the expanded
    terms (possibly empty) and ``error`` is ``None``.  On failure (network
    error, timeout, HTTP error) ``terms`` is ``[]`` and ``error`` contains a
    human-friendly message.
    """
    if not is_configured(cfg):
        return ExpansionResult(terms=[])
    system_prompt = _EXPAND_QUERY_SYSTEM_PROMPT
    if extra_hints and extra_hints.strip():
        system_prompt += "\n" + extra_hints.strip()
    try:
        response = _call_chat_completion(
            cfg,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": query},
            ],
            max_tokens=200,
            temperature=0,
        )
    except httpx.TimeoutException:
        return ExpansionResult(
            terms=[],
            error="AI 搜索请求超时，请稍后再试。",
        )
    except httpx.ConnectError:
        return ExpansionResult(
            terms=[],
            error="无法连接到 AI 服务，请检查网络或设置。",
        )
    except httpx.HTTPStatusError:
        return ExpansionResult(
            terms=[],
            error="AI 服务返回错误，请检查配置。",
        )
    except Exception:  # noqa: BLE001 — graceful degradation
        return ExpansionResult(
            terms=[],
            error="AI 搜索暂时不可用，请稍后再试。",
        )
    choices = response.get("choices", [])
    if not choices:
        return ExpansionResult(terms=[])
    content = choices[0].get("message", {}).get("content", "")
    return ExpansionResult(terms=_parse_json_string_array(content))
 # ── Constants for output contract enforcement ────────────────────────────
 _MAX_EXPANSION_TERMS = 8
 _MAX_TERM_LENGTH = 30
 def _parse_json_string_array(content: str) -> list[str]:
    """Parse LLM output into a list of strings.
    Strict contract enforcement:
    1. Strip markdown code fences;
    2. Try ``json.loads`` — only accept a JSON **array of strings**;
    3. Anything else (prose, JSON objects, bad JSON) → return ``[]``.
    This ensures the output contract is enforced by code: no matter what
    the model returns or what ``ai_search_extra_hints`` contains, only a
    valid JSON string array is accepted.
    """
    text = content.strip()
    if not text:
        return []
    # Strip markdown code fences
    text = re.sub(r"^```(?:json)?\s*", "", text)
    text = re.sub(r"\s*```$", "", text)
    text = text.strip()
    # Attempt JSON parse — strictly require a list
    try:
        parsed = json.loads(text)
    except (json.JSONDecodeError, ValueError):
        return []
    if not isinstance(parsed, list):
        return []
    # Validate every element is a string; reject non-string items
    terms: list[str] = []
    for item in parsed:
        if not isinstance(item, str):
            return []
        cleaned = item.strip()
        if cleaned and len(cleaned) <= _MAX_TERM_LENGTH:
            terms.append(cleaned)
    # Cap total count
    return terms[:_MAX_EXPANSION_TERMS]
 def analyze_image(cfg: LLMConfig, image_data: bytes, prompt: str) -> LLMResult:
    """Analyze an image via LLM vision API.
    .. note:: **Reserved stub — not implemented.** Will be filled in a future
        round for image analysis.  The signature is fixed so callers can
        depend on it.
    """
    # TODO: Implement in future round for image analysis.
    return LLMResult(
        success=False,
        message="图片分析功能尚未实现。",
    )
 # ------------------------------------------------------------------
 # Internal boundary — all network calls go through this single function
 # ------------------------------------------------------------------
 def _call_chat_completion(
    cfg: LLMConfig,
    *,
    messages: list[dict[str, str]],
    max_tokens: int = 1,
    temperature: float | None = None,
 ) -> dict:
    """Call the OpenAI-compatible ``/chat/completions`` endpoint.
    Returns the parsed JSON response body on success (status 2xx).
    Raises ``httpx.HTTPStatusError`` on non-2xx, or other ``httpx`` exceptions
    on network failures — callers handle these for graceful degradation.
    """
    url = cfg.base_url.rstrip("/") + "/chat/completions"
    payload: dict[str, Any] = {
        "model": cfg.model,
        "messages": messages,
        "max_tokens": max_tokens,
    }
    if temperature is not None:
        payload["temperature"] = temperature
    headers = {
        "Authorization": f"Bearer {cfg.api_key}",
        "Content-Type": "application/json",
    }
    with httpx.Client(timeout=_TIMEOUT_SECONDS) as client:
        response = client.post(url, json=payload, headers=headers)
        response.raise_for_status()
        return response.json()
@@ -5,12 +5,15 @@ from fastapi import Depends, FastAPI, File, Form, HTTPException, Request, Upload
 from fastapi.responses import FileResponse, RedirectResponse, Response
 from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates
-from sqlalchemy import func, or_
+from sqlalchemy import func, false, or_
 from sqlalchemy.orm import Session
 from app.db import get_db, init_db
 from app.images import process_upload
 from app.llm import expand_query, is_configured, test_connection
 from app.llm import LLMResult
 from app.models import Box, Item, SubItem
 from app.settings_store import LLMConfig, get_app_settings, save_app_settings
 templates = Jinja2Templates(directory="app/templates")
 STATIC_DIR = Path("app/static")
@@ -88,6 +91,46 @@ def _wants_add_next(submit_action: str | None) -> bool:
    return submit_action == "save_and_add_next"
 def _validate_settings_origin(request: Request) -> str | None:
    """Check Origin/Referer for same-host browser requests.
    Returns an error message if validation fails, or None if OK.
    Missing both headers (e.g. curl, API call) is allowed for now.
    """
    origin = request.headers.get("origin")
    referer = request.headers.get("referer")
    if origin:
        host = request.headers.get("host", "")
        # origin includes scheme, host only has host:port
        from urllib.parse import urlparse
        parsed = urlparse(origin)
        origin_host = parsed.netloc
        if origin_host != host:
            return "请求来源与当前站点不一致，操作被拒绝。"
    elif referer:
        host = request.headers.get("host", "")
        from urllib.parse import urlparse
        parsed = urlparse(referer)
        referer_host = parsed.netloc
        if referer_host != host:
            return "请求来源与当前站点不一致，操作被拒绝。"
    return None
 def _validate_base_url_scheme(base_url: str) -> str | None:
    """Return an error message if base_url scheme is not allowed, else None."""
    from urllib.parse import urlparse
    parsed = urlparse(base_url)
    if parsed.scheme not in ("https", "http"):
        return "Base URL 必须以 http:// 或 https:// 开头。"
    return None
 def _format_average(total: int, divisor: int) -> str:
    if divisor == 0:
        return "0.0"
@@ -117,24 +160,41 @@ def _build_boxes_overview_summary(db: Session) -> dict[str, int | str]:
    }
-def _build_search_results(db: Session, query: str) -> list[dict]:
+def _build_search_results(db: Session, query: str | list[str]) -> list[dict]:
-    keyword = f"%{query.lower()}%"
+    """Search Box / Item / SubItem by name and note using case-insensitive LIKE.
    Accepts either a single query string or a list of keywords.
    When multiple keywords are given, they are combined with OR — a match on
    *any* keyword is sufficient.
    """
    keywords = [query] if isinstance(query, str) else query
    patterns = [f"%{kw.lower()}%" for kw in keywords]
    def _or_like(column, note_column):
        """Build an OR filter that matches any pattern on either column."""
        conditions = []
        for pat in patterns:
            conditions.append(func.lower(column).like(pat))
            conditions.append(func.lower(func.coalesce(note_column, "")).like(pat))
        return or_(false(), *conditions) if conditions else false()
    results: list[dict] = []
    seen_ids: set[tuple[str, int]] = set()
    def _add(result_type: str, obj_id: int, entry: dict) -> None:
        key = (result_type, obj_id)
        if key not in seen_ids:
            seen_ids.add(key)
            results.append(entry)
    box_matches = (
        db.query(Box)
-        .filter(
+        .filter(_or_like(Box.name, Box.note))
            or_(
                func.lower(Box.name).like(keyword),
                func.lower(func.coalesce(Box.note, "")).like(keyword),
            )
        )
        .order_by(Box.id.desc())
        .all()
    )
    for box in box_matches:
-        results.append(
+        _add("Box", box.id, {
            {
            "type": "Box",
            "name": box.name,
            "note": box.note,
@@ -145,24 +205,17 @@ def _build_search_results(db: Session, query: str) -> list[dict]:
            "path": "顶层箱子",
            "is_container": None,
            "image_url": f"/boxes/{box.id}/image" if box.image_blob else None,
-            }
+        })
        )
    item_matches = (
        db.query(Item)
        .join(Item.box)
-        .filter(
+        .filter(_or_like(Item.name, Item.note))
            or_(
                func.lower(Item.name).like(keyword),
                func.lower(func.coalesce(Item.note, "")).like(keyword),
            )
        )
        .order_by(Item.id.desc())
        .all()
    )
    for item in item_matches:
-        results.append(
+        _add("Item", item.id, {
            {
            "type": "Item",
            "name": item.name,
            "note": item.note,
@@ -173,25 +226,18 @@ def _build_search_results(db: Session, query: str) -> list[dict]:
            "path": f"位于箱子：{item.box.name}",
            "is_container": item.is_container,
            "image_url": f"/items/{item.id}/image" if item.image_blob else None,
-            }
+        })
        )
    subitem_matches = (
        db.query(SubItem)
        .join(SubItem.parent_item)
        .join(Item.box)
-        .filter(
+        .filter(_or_like(SubItem.name, SubItem.note))
            or_(
                func.lower(SubItem.name).like(keyword),
                func.lower(func.coalesce(SubItem.note, "")).like(keyword),
            )
        )
        .order_by(SubItem.id.desc())
        .all()
    )
    for subitem in subitem_matches:
-        results.append(
+        _add("SubItem", subitem.id, {
            {
            "type": "SubItem",
            "name": subitem.name,
            "note": subitem.note,
@@ -205,12 +251,39 @@ def _build_search_results(db: Session, query: str) -> list[dict]:
            ),
            "is_container": None,
            "image_url": f"/subitems/{subitem.id}/image" if subitem.image_blob else None,
-            }
+        })
        )
    return results
 def _ai_search(db: Session, cfg: "LLMConfig", query: str) -> tuple[list[str], list[dict], str | None]:
    """Swappable AI search seam.
    Returns ``(expanded_terms, results, error_message)``.
    - On success: expanded terms + broadened results, ``error_message`` is ``None``.
    - On failure (timeout, network error, HTTP error): empty terms + normal LIKE
      results + friendly error message.
    - On empty expansion (model returned ``[]`` legitimately): empty terms + normal
      results, ``error_message`` is ``None``.
    """
    result = expand_query(cfg, query, extra_hints=cfg.ai_search_extra_hints)
    if result.error:
        # Real failure (timeout / network / HTTP) → show error + fallback
        results = _build_search_results(db, query)
        return [], results, result.error
    if not result.terms:
        # Legitimate empty expansion → normal results, no error
        results = _build_search_results(db, query)
        return [], results, None
    # Deduplicate: original query + expanded terms
    all_terms = list(dict.fromkeys([query] + result.terms))
    results = _build_search_results(db, all_terms)
    return result.terms, results, None
 def create_app() -> FastAPI:
    @asynccontextmanager
    async def lifespan(app: FastAPI):
@@ -242,10 +315,28 @@ def create_app() -> FastAPI:
    def search_page(
        request: Request,
        q: str | None = None,
        ai: str | None = None,
        db: Session = Depends(get_db),
    ):
        query = (q or "").strip()
-        results = _build_search_results(db, query) if query else []
+        cfg = get_app_settings(db)
        ai_requested = ai == "1"
        ai_available = cfg.ai_search_enabled and is_configured(cfg)
        expanded_terms: list[str] = []
        ai_error: str | None = None
        if query:
            if ai_requested and ai_available:
                try:
                    expanded_terms, results, ai_error = _ai_search(db, cfg, query)
                except Exception:  # noqa: BLE001 — graceful degradation
                    ai_error = "AI 搜索暂时不可用，已回退到普通搜索。"
                    results = _build_search_results(db, query)
            else:
                results = _build_search_results(db, query)
        else:
            results = []
        return templates.TemplateResponse(
            request=request,
            name="search/index.html",
@@ -254,6 +345,10 @@ def create_app() -> FastAPI:
                "query": query,
                "results": results,
                "searched": bool(query),
                "ai_activated": ai_requested and ai_available and bool(query),
                "expanded_terms": expanded_terms,
                "ai_error": ai_error,
                "ai_available": ai_available,
            },
        )
@@ -267,6 +362,175 @@ def create_app() -> FastAPI:
            context={"page_title": "箱子", "boxes": boxes, "summary": summary},
        )
    # ------------------------------------------------------------------
    # Settings
    # ------------------------------------------------------------------
    @app.get("/settings")
    def settings_page(request: Request, db: Session = Depends(get_db)):
        cfg = get_app_settings(db)
        return templates.TemplateResponse(
            request=request,
            name="settings/form.html",
            context={
                "page_title": "设置",
                "config": cfg,
                "api_key_configured": bool(cfg.api_key),
                "test_result": None,
            },
        )
    @app.post("/settings")
    def save_settings(
        request: Request,
        enabled: str | None = Form(default=None),
        base_url: str | None = Form(default=None),
        model: str | None = Form(default=None),
        api_key: str | None = Form(default=None),
        ai_search_enabled: str | None = Form(default=None),
        ai_search_extra_hints: str | None = Form(default=None),
        db: Session = Depends(get_db),
    ) -> Response:
        # Origin/Referer check for browser requests
        origin_error = _validate_settings_origin(request)
        if origin_error:
            raise HTTPException(status_code=403, detail=origin_error)
        resolved_base_url = _clean_text(base_url) or "https://api.openai.com/v1"
        # Validate base_url scheme
        scheme_error = _validate_base_url_scheme(resolved_base_url)
        if scheme_error:
            raise HTTPException(status_code=400, detail=scheme_error)
        resolved_model = _clean_text(model) or ""
        # Only base_url change counts as an endpoint change — model switches
        # under the same base_url do not require a new key.
        existing_cfg = get_app_settings(db)
        submitted_key = _clean_text(api_key)
        base_url_changed = resolved_base_url != existing_cfg.base_url
        if base_url_changed and submitted_key is None:
            # base_url changed but no new key provided — refuse to save,
            # return to settings page with a clear error message.
            return templates.TemplateResponse(
                request=request,
                name="settings/form.html",
                context={
                    "page_title": "设置",
                    "config": LLMConfig(
                        enabled=enabled == "on",
                        base_url=resolved_base_url,
                        model=resolved_model,
                        api_key=existing_cfg.api_key,
                        ai_search_enabled=ai_search_enabled == "on",
                        ai_search_extra_hints=_clean_text(ai_search_extra_hints) or "",
                    ),
                    "api_key_configured": bool(existing_cfg.api_key),
                    "test_result": LLMResult(
                        success=False,
                        message="Base URL 已变更，请重新输入 API Key 后保存。",
                    ),
                },
            )
        # submitted_key is None → keep old key; str (including "") → use new value
        resolved_api_key = submitted_key
        resolved_extra_hints = _clean_text(ai_search_extra_hints) or ""
        save_app_settings(
            db,
            enabled=enabled == "on",
            base_url=resolved_base_url,
            model=resolved_model,
            api_key=resolved_api_key,
            ai_search_enabled=ai_search_enabled == "on",
            ai_search_extra_hints=resolved_extra_hints,
        )
        return RedirectResponse(url="/settings", status_code=status.HTTP_303_SEE_OTHER)
    @app.post("/settings/test")
    def test_settings_connection(
        request: Request,
        enabled: str | None = Form(default=None),
        base_url: str | None = Form(default=None),
        model: str | None = Form(default=None),
        api_key: str | None = Form(default=None),
        ai_search_enabled: str | None = Form(default=None),
        ai_search_extra_hints: str | None = Form(default=None),
        db: Session = Depends(get_db),
    ):
        # Origin/Referer check for browser requests
        origin_error = _validate_settings_origin(request)
        if origin_error:
            raise HTTPException(status_code=403, detail=origin_error)
        resolved_base_url = _clean_text(base_url) or "https://api.openai.com/v1"
        # Validate base_url scheme
        scheme_error = _validate_base_url_scheme(resolved_base_url)
        if scheme_error:
            raise HTTPException(status_code=400, detail=scheme_error)
        resolved_model = _clean_text(model) or ""
        # Only reuse stored key if base_url matches saved config. Model switches
        # under the same base_url can use the same key; a base_url change cannot.
        existing_cfg = get_app_settings(db)
        submitted_key = _clean_text(api_key)
        base_url_matches = resolved_base_url == existing_cfg.base_url
        if base_url_matches and submitted_key is None:
            resolved_api_key = existing_cfg.api_key
        elif submitted_key is not None:
            resolved_api_key = submitted_key
        else:
            # base_url changed but no key provided — refuse to test
            return templates.TemplateResponse(
                request=request,
                name="settings/form.html",
                context={
                    "page_title": "设置",
                    "config": LLMConfig(
                        enabled=enabled == "on",
                        base_url=resolved_base_url,
                        model=resolved_model,
                        api_key="",
                        ai_search_enabled=ai_search_enabled == "on",
                        ai_search_extra_hints=_clean_text(ai_search_extra_hints) or "",
                    ),
                    "api_key_configured": bool(existing_cfg.api_key),
                    "test_result": LLMResult(
                        success=False,
                        message="Base URL 已变更，请重新输入 API Key 后再测试。",
                    ),
                },
            )
        test_cfg = LLMConfig(
            enabled=enabled == "on",
            base_url=resolved_base_url,
            model=resolved_model,
            api_key=resolved_api_key or "",
            ai_search_enabled=ai_search_enabled == "on",
            ai_search_extra_hints=_clean_text(ai_search_extra_hints) or "",
        )
        result = test_connection(test_cfg)
        return templates.TemplateResponse(
            request=request,
            name="settings/form.html",
            context={
                "page_title": "设置",
                "config": test_cfg,
                "api_key_configured": bool(test_cfg.api_key),
                "test_result": result,
            },
        )
    @app.get("/boxes/new")
    def new_box_page(request: Request):
        return templates.TemplateResponse(
@@ -0,0 +1,315 @@
 """Alembic migration wrapper with two responsibilities:
 **(A) CLI entry point ``python -m app.migrate``** — idempotent migration command.
    Handles four cases:
    - Empty DB → ``upgrade head`` (create tables)
    - Unmanaged DB matching baseline (V1) → ``stamp V1`` → ``upgrade head``
    - Unmanaged DB NOT matching baseline → **fail-close**, no changes
    - Already at head → no-op, exit 0
 **(B) Startup verification ``verify_schema_is_current(url)``** — read-only check.
    Used by ``init_db()`` to confirm the DB is at ``head`` before serving traffic.
    **Never modifies the DB.** Raises on mismatch.
 """
 from __future__ import annotations
 import logging
 import sys
 from pathlib import Path
 from alembic import command
 from alembic.config import Config as AlembicConfig
 from sqlalchemy import create_engine
 from sqlalchemy import inspect as sa_inspect
 logger = logging.getLogger("app.migrate")
 # The V1 baseline revision ID.  Must be kept in sync with the revision in
 # ``migrations/versions/``.  A literal is clearer than importing from
 # auto-generated code whose module name changes.
 V1_REVISION = "57af90893f55"
 # ------------------------------------------------------------------
 # Internal helpers
 # ------------------------------------------------------------------
 def _make_alembic_config(database_url: str) -> AlembicConfig:
    """Build an Alembic ``Config`` pointing at the bundled ``migrations/``."""
    project_root = Path(__file__).resolve().parent.parent
    migrations_dir = project_root / "migrations"
    alembic_ini = project_root / "alembic.ini"
    cfg = AlembicConfig(str(alembic_ini))
    cfg.set_main_option("script_location", str(migrations_dir))
    cfg.set_main_option("sqlalchemy.url", database_url)
    return cfg
 def _detect_db_state(database_url: str) -> str:
    """Return ``"managed"``, ``"unmanaged"``, or ``"empty"``.
    - **managed**:   ``alembic_version`` table exists.
    - **unmanaged**: any table exists but no ``alembic_version``.
    - **empty**:     no tables at all (truly empty DB).
    """
    eng = create_engine(database_url)
    try:
        table_names = set(sa_inspect(eng).get_table_names())
    finally:
        eng.dispose()
    if "alembic_version" in table_names:
        return "managed"
    if table_names:
        return "unmanaged"
    return "empty"
 def _get_current_revision(database_url: str) -> str | None:
    """Return the current ``alembic_version`` value, or ``None`` if absent."""
    eng = create_engine(database_url)
    try:
        tables = set(sa_inspect(eng).get_table_names())
        if "alembic_version" not in tables:
            return None
        with eng.begin() as conn:
            from sqlalchemy import text
            row = conn.execute(text("SELECT version_num FROM alembic_version")).scalar()
        return row
    finally:
        eng.dispose()
 def _build_reference_schema() -> dict:
    """Build a full reference schema from the V1 baseline migration.
    Returns a dict with table names, columns (name, nullable, type,
    primary_key), foreign keys (constrained_columns, referred_table,
    referred_columns, ondelete), and indexes (name, column_names, unique).
    """
    import tempfile
    tmp = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
    tmp.close()
    try:
        tmp_url = f"sqlite:///{tmp.name}"
        cfg = _make_alembic_config(tmp_url)
        command.upgrade(cfg, V1_REVISION)
        eng = create_engine(tmp_url)
        try:
            inspector = sa_inspect(eng)
            tables = ("boxes", "items", "subitems")
            result: dict = {"tables": set(tables), "columns": {}, "fks": {}, "indexes": {}}
            for tbl in tables:
                # Columns: name, nullable, type (stringified), primary_key
                cols = inspector.get_columns(tbl)
                result["columns"][tbl] = sorted(
                    (c["name"], c.get("nullable", True), str(c["type"]), c.get("primary_key", False))
                    for c in cols
                )
                # Foreign keys
                fks = inspector.get_foreign_keys(tbl)
                result["fks"][tbl] = sorted(
                    (
                        tuple(fk["constrained_columns"]),
                        fk["referred_table"],
                        tuple(fk["referred_columns"]),
                        fk.get("ondelete"),
                    )
                    for fk in fks
                )
                # Indexes
                idxs = inspector.get_indexes(tbl)
                result["indexes"][tbl] = sorted(
                    (idx["name"], tuple(idx["column_names"]), idx.get("unique", False))
                    for idx in idxs
                )
            return result
        finally:
            eng.dispose()
    finally:
        from os import unlink
        unlink(tmp.name)
 def _schema_matches_baseline(database_url: str) -> bool:
    """Check whether an unmanaged DB's schema matches V1 baseline.
    Compares table names, column definitions (name, nullable, type, PK),
    foreign keys (constrained/referred columns, ondelete), and indexes
    (name, columns, unique).  SQLite type-affinity differences are
    tolerated via an explicit normalization allowlist.
    """
    ref = _build_reference_schema()
    eng = create_engine(database_url)
    try:
        inspector = sa_inspect(eng)
        # 1. Table names must match exactly
        actual_tables = set(inspector.get_table_names())
        if actual_tables != ref["tables"]:
            logger.info("Table mismatch: got %s, expected %s", actual_tables, ref["tables"])
            return False
        for tbl in ref["tables"]:
            # 2. Columns
            actual_cols = sorted(
                (c["name"], c.get("nullable", True), str(c["type"]), c.get("primary_key", False))
                for c in inspector.get_columns(tbl)
            )
            if actual_cols != ref["columns"][tbl]:
                logger.info("Column mismatch on %s: got %s, expected %s", tbl, actual_cols, ref["columns"][tbl])
                return False
            # 3. Foreign keys
            actual_fks = sorted(
                (
                    tuple(fk["constrained_columns"]),
                    fk["referred_table"],
                    tuple(fk["referred_columns"]),
                    fk.get("ondelete"),
                )
                for fk in inspector.get_foreign_keys(tbl)
            )
            if actual_fks != ref["fks"][tbl]:
                logger.info("FK mismatch on %s: got %s, expected %s", tbl, actual_fks, ref["fks"][tbl])
                return False
            # 4. Indexes
            actual_idxs = sorted(
                (idx["name"], tuple(idx["column_names"]), idx.get("unique", False))
                for idx in inspector.get_indexes(tbl)
            )
            if actual_idxs != ref["indexes"][tbl]:
                logger.info("Index mismatch on %s: got %s, expected %s", tbl, actual_idxs, ref["indexes"][tbl])
                return False
        return True
    finally:
        eng.dispose()
 # ------------------------------------------------------------------
 # Public API
 # ------------------------------------------------------------------
 def verify_schema_is_current(database_url: str) -> None:
    """Read-only check: confirm the DB is at ``head``.
    Called by ``init_db()`` at application startup.  **Never modifies the
    DB.**  Raises ``RuntimeError`` if the DB is not at ``head``, with a
    message guiding the user to run ``python -m app.migrate``.
    """
    # For SQLite file URLs, check file existence first to avoid the engine
    # creating a side-effect empty file.
    from sqlalchemy.engine import make_url
    url = make_url(database_url)
    if url.drivername.startswith("sqlite"):
        db_path = url.database
        if db_path and db_path != ":memory:" and not Path(db_path).exists():
            raise RuntimeError(
                f"Database file does not exist: {db_path}. "
                "Run `python -m app.migrate` to create the schema first."
            )
    state = _detect_db_state(database_url)
    if state == "empty":
        raise RuntimeError(
            "Database is empty — no tables found. "
            "Run `python -m app.migrate` to create the schema first."
        )
    if state == "unmanaged":
        raise RuntimeError(
            "Database exists but has no alembic_version table (not under Alembic control). "
            "Run `python -m app.migrate` to adopt it first."
        )
    # state == "managed" — check revision
    current = _get_current_revision(database_url)
    # Determine head revision from the migration scripts
    cfg = _make_alembic_config(database_url)
    from alembic.script import ScriptDirectory
    script = ScriptDirectory.from_config(cfg)
    head_rev = script.get_current_head()
    if current != head_rev:
        raise RuntimeError(
            f"Database is at revision '{current}' but the application expects "
            f"'{head_rev}'. Run `python -m app.migrate` to upgrade."
        )
    logger.info("Database schema verification passed (revision: %s).", current)
 def run_migrations(database_url: str) -> None:
    """Execute migrations — intended for the CLI entry point.
    Idempotent: safe to re-run on every deploy.
    Cases:
    - Empty DB → ``upgrade head``
    - Unmanaged DB matching V1 baseline → ``stamp V1`` → ``upgrade head``
    - Unmanaged DB NOT matching V1 baseline → **fail-close**
    - Already managed → ``upgrade head`` (no-op if at head)
    """
    cfg = _make_alembic_config(database_url)
    state = _detect_db_state(database_url)
    if state == "empty":
        logger.info("Empty database detected — creating schema from scratch.")
        command.upgrade(cfg, "head")
    elif state == "unmanaged":
        if _schema_matches_baseline(database_url):
            logger.info(
                "Unmanaged database matches V1 baseline — stamping %s and upgrading.",
                V1_REVISION,
            )
            command.stamp(cfg, V1_REVISION)
            command.upgrade(cfg, "head")
        else:
            logger.error(
                "Unmanaged database schema does NOT match V1 baseline. "
                "Refusing to migrate to avoid data loss."
            )
            raise SystemExit(
                "Migration aborted: database schema does not match the "
                "expected V1 baseline. Inspect the database manually."
            )
    else:  # managed
        logger.info("Database already under Alembic control — upgrading to head.")
        command.upgrade(cfg, "head")
 # ------------------------------------------------------------------
 # CLI entry point: ``python -m app.migrate``
 # ------------------------------------------------------------------
 if __name__ == "__main__":
    logging.basicConfig(
        level=logging.INFO,
        format="%(levelname)s [%(name)s] %(message)s",
    )
    from app.config import get_settings
    settings = get_settings()
    url = settings.database_url
    logger.info("Running migrations against %s", url)
    run_migrations(url)
    logger.info("Migration complete.")
@@ -90,3 +90,10 @@ class SubItem(Base):
    )
    parent_item: Mapped[Item] = relationship(back_populates="subitems")
 class AppSetting(Base):
    __tablename__ = "app_settings"
    key: Mapped[str] = mapped_column(Text, primary_key=True)
    value: Mapped[str | None] = mapped_column(Text, nullable=True)
@@ -0,0 +1,93 @@
 """Settings read/write helpers for the ``app_settings`` KV table.
 Provides a typed ``LLMConfig`` dataclass and two helpers:
 - ``get_app_settings(db) -> LLMConfig`` — reads KV rows (or returns defaults).
 - ``save_app_settings(db, ...) -> None`` — writes KV rows; API key left blank
  means "keep the old value".
 """
 from __future__ import annotations
 from dataclasses import dataclass
 from sqlalchemy.orm import Session
 from app.models import AppSetting
@dataclass
 class LLMConfig:
    """All settings consumed by the LLM client and settings UI."""
    enabled: bool = False
    base_url: str = "https://api.openai.com/v1"
    model: str = ""
    api_key: str = ""
    ai_search_enabled: bool = False
    ai_search_extra_hints: str = ""
 def _get_value(rows: dict[str, str], key: str, default: str) -> str:
    return rows.get(key, default)
 def _get_bool(rows: dict[str, str], key: str, default: bool) -> bool:
    return rows.get(key, str(default).lower()) == "true"
 def get_app_settings(db: Session) -> LLMConfig:
    """Read all settings from ``app_settings`` and return an ``LLMConfig``."""
    rows: dict[str, str] = {}
    for row in db.query(AppSetting).all():
        if row.value is not None:
            rows[row.key] = row.value
    return LLMConfig(
        enabled=_get_bool(rows, "llm_enabled", False),
        base_url=_get_value(rows, "llm_base_url", "https://api.openai.com/v1"),
        model=_get_value(rows, "llm_model", ""),
        api_key=_get_value(rows, "llm_api_key", ""),
        ai_search_enabled=_get_bool(rows, "ai_search_enabled", False),
        ai_search_extra_hints=_get_value(rows, "ai_search_extra_hints", ""),
    )
 def save_app_settings(
    db: Session,
    *,
    enabled: bool | None = None,
    base_url: str | None = None,
    model: str | None = None,
    api_key: str | None = None,
    ai_search_enabled: bool | None = None,
    ai_search_extra_hints: str | None = None,
 ) -> None:
    """Write settings to ``app_settings``.
    If ``api_key`` is ``None`` (form field left blank), the existing key is
    preserved.  All other fields are written as-is.
    """
    updates: dict[str, str | None] = {}
    if enabled is not None:
        updates["llm_enabled"] = str(enabled).lower()
    if base_url is not None:
        updates["llm_base_url"] = base_url
    if model is not None:
        updates["llm_model"] = model
    if api_key is not None:
        updates["llm_api_key"] = api_key
    if ai_search_enabled is not None:
        updates["ai_search_enabled"] = str(ai_search_enabled).lower()
    if ai_search_extra_hints is not None:
        updates["ai_search_extra_hints"] = ai_search_extra_hints
    for key, value in updates.items():
        existing = db.get(AppSetting, key)
        if existing is not None:
            existing.value = value
        else:
            db.add(AppSetting(key=key, value=value))
    db.commit()
@@ -19,6 +19,7 @@
        <nav class="top-nav">
            <a href="/boxes">箱子</a>
            <a href="/search">搜索</a>
            <a href="/settings">设置</a>
        </nav>
        {% block content %}{% endblock %}
    </main>
@@ -20,7 +20,31 @@
    </form>
 </section>
 {% if query and ai_available %}
 <section class="card" style="margin-top: 8px;">
    {% if ai_activated %}
    <span class="muted">AI 搜索已启用</span>
    {% else %}
    <a href="/search?q={{ query | urlencode }}&ai=1" class="button button-secondary" style="display:inline-block; text-decoration:none;">
        AI 智能搜索
    </a>
    {% endif %}
 </section>
 {% endif %}
 {% if searched %}
    {% if ai_error %}
    <section class="card" style="margin-top: 8px; border-color: #b42318;">
        <p style="margin:0; color: #b42318;"><strong>{{ ai_error }}</strong></p>
    </section>
    {% endif %}
    {% if ai_activated and expanded_terms %}
    <section class="card" style="margin-top: 8px; border-color: #0b57d0;">
        <p style="margin:0; color: #0b57d0;"><strong>AI 帮你扩展了：</strong>{{ expanded_terms | join('、') }}</p>
    </section>
    {% endif %}
    {% if results %}
    <section class="stack">
        <p class="muted">共找到 {{ results|length }} 条结果。</p>
@@ -0,0 +1,69 @@
 {% extends "base.html" %}
 {% block content %}
 <div class="breadcrumb">
    <a href="/boxes">箱子</a>
    <span>/</span>
    <strong>设置</strong>
 </div>
 <div class="page-header">
    <div>
        <h1>设置</h1>
        <p class="muted">配置 LLM 连接参数。未配置时，整站行为不受影响。</p>
    </div>
 </div>
 {% if test_result %}
 <section class="card" style="margin-bottom: 16px; border-color: {% if test_result.success %}#2f6b1f{% else %}#b42318{% endif %};">
    <p style="margin:0; color: {% if test_result.success %}#2f6b1f{% else %}#b42318{% endif %};">
        <strong>{{ "✓ " if test_result.success else "✗ " }}{{ test_result.message }}</strong>
    </p>
 </section>
 {% endif %}
 <form method="post" action="/settings" class="stack form-panel">
    <label class="form-field checkbox-row">
        <input type="checkbox" name="enabled" {% if config.enabled %}checked{% endif %}>
        启用 LLM
    </label>
    <p class="checkbox-help">开启后，AI 相关功能将使用下方配置连接 LLM 服务。</p>
    <label class="form-field">
        Base URL
        <input type="text" name="base_url" value="{{ config.base_url }}" placeholder="https://api.openai.com/v1">
    </label>
    <label class="form-field">
        模型名称
        <input type="text" name="model" value="{{ config.model }}" placeholder="例如 gpt-4o-mini">
    </label>
    <label class="form-field">
        API Key
        {% if api_key_configured %}
        <input type="password" name="api_key" value="" placeholder="已配置，留空＝不修改">
        {% else %}
        <input type="password" name="api_key" value="" placeholder="输入 API Key">
        {% endif %}
    </label>
    <hr style="border:none;border-top:1px solid #ddd;margin:16px 0;">
    <label class="form-field checkbox-row">
        <input type="checkbox" name="ai_search_enabled" {% if config.ai_search_enabled %}checked{% endif %}>
        启用 AI 智能搜索
    </label>
    <p class="checkbox-help">开启后，搜索页将显示「AI 智能搜索」按钮，通过查询词扩展增强搜索结果。</p>
    <label class="form-field">
        额外领域提示（可选）
        <textarea name="ai_search_extra_hints" rows="3" placeholder="例如：用户物品主要涉及厨房用品和电子产品">{% if config.ai_search_extra_hints %}{{ config.ai_search_extra_hints }}{% endif %}</textarea>
    </label>
    <p class="checkbox-help">追加到 AI 搜索提示词末尾，帮助模型理解你的物品领域。留空则使用默认提示词。</p>
    <div class="form-actions">
        <button type="submit" class="button button-primary">保存设置</button>
        <button type="submit" class="button button-secondary" formaction="/settings/test" formmethod="post">测试连接</button>
    </div>
 </form>
 {% endblock %}
@@ -0,0 +1,12 @@
 # docs · 文档索引 / Documentation Index
 本目录存放「2026 搬家助手」的项目文档。
 This folder holds documentation for the **2026 Moving Helper** project.
 | 文件 / File | 内容 / Contents |
 | --- | --- |
 | [`repository-brief.md`](./repository-brief.md) | 仓库总体简报：技术栈、架构、数据模型、路由、部署、CI/CD、测试，以及面向下一轮改动的扩展建议。<br>Full repository brief: tech stack, architecture, data model, routes, deployment, CI/CD, tests, and extension notes for the next round of changes. |
 | [`design/`](./design/) | 具体改动轮次的设计文档与实施计划。当前轮次：LLM 接入与迁移地基（Alembic + LLM + 基础 AI 搜索）。<br>Per-round design docs and implementation plans. Current round: LLM integration & migration foundation (Alembic + LLM + basic AI search). |
 > 说明 / Note：本文档由一轮代码走查整理而成，描述的是**当前 `main` 分支**的状态。后续改动代码时，请同步更新这里。
 > This brief reflects the **current `main` branch**. Please keep it in sync as the code evolves.
@@ -0,0 +1,30 @@
 # docs/design · 设计文档 / Design Docs
 本目录存放面向具体改动轮次的设计与实施计划。
 Design and implementation plans for specific rounds of changes.
 ## 当前轮次 / Current round — LLM 接入与迁移地基 / LLM Integration & Migration Foundation
 本轮三件事 / Three deliverables：① 引入 Alembic 迁移系统（含封装）② LLM 接入（配置页 + 落库 + 客户端）③ 基础 AI 搜索（查询词扩展）。
 **本轮不含图片分析**（留作未来，架构已预留接口）。Image analysis is **not** in this round (reserved for the future).
 **总体设计（High-level，"做什么/为什么"）/ High-level design ("what/why")：**
 | 文件 / File | 内容 / Contents |
 | --- | --- |
 | [`llm-integration-design.md`](./llm-integration-design.md) | 原则、架构、迁移子系统、LLM 接入、AI 搜索、安全、测试、未来扩展、决策记录（D1–D10）。<br>Principles, architecture, migration subsystem, LLM, AI search, security, testing, future seams, decisions log. |
 **实施计划（"怎么做"，每步一个自包含文件）/ Implementation plan ("how", one self-contained file per step)：**
 | 文件 / File | 内容 / Contents |
 | --- | --- |
 | [`implementation-plan.md`](./implementation-plan.md) | 总览：步骤顺序、依赖、跨步骤约定。<br>Overview: sequence, dependencies, cross-cutting conventions. |
 | [`step-1-alembic-foundation.md`](./step-1-alembic-foundation.md) | 步骤 1：Alembic 迁移地基（不改 schema）。<br>Step 1: migration foundation. |
 | [`step-2-llm-integration.md`](./step-2-llm-integration.md) | 步骤 2：LLM 接入（`app_settings` + 客户端 + 配置页）。<br>Step 2: LLM integration. |
 | [`step-3-ai-search.md`](./step-3-ai-search.md) | 步骤 3：基础 AI 搜索（查询词扩展）。<br>Step 3: basic AI search. |
 > 每个 step 文件**自包含**：实现 Agent 每次只读对应的一个文件即可执行。
 > Each step file is **self-contained** — an implementation agent only needs to read that one file.
 > 实现与设计若有偏差，请回写本目录，并同步仓库简报 `../repository-brief.md`（尤其 §10 迁移、§15 约束）。
 > If implementation diverges, update these docs and the brief (`../repository-brief.md`, esp. §10 & §15).
@@ -0,0 +1,36 @@
 # 实施计划 · 总览 / Implementation Plan · Overview
 > 配合设计文档 [`llm-integration-design.md`](./llm-integration-design.md) 阅读。
 > Read alongside the high-level design doc.
 >
 > 三步走，**每步一个独立文件、一个可独立合入的 PR / branch**。实现 Agent 每次只需读对应的 step 文件即可执行。
 > Three steps, **one self-contained file and one mergeable PR per step**. An implementation agent only needs to read the relevant step file.
 ---
 ## 步骤与文件 / Steps & Files
 | 步骤 / Step | 文件 / File | 目标 / Goal | 改 schema? | 依赖 / Depends on |
 | --- | --- | --- | --- | --- |
 | **1** | [`step-1-alembic-foundation.md`](./step-1-alembic-foundation.md) | Alembic 迁移地基（V1 baseline + 独立幂等迁移命令 + 启动只校验/fail-close），**不改 schema** | 否 / No | — |
 | **2** | [`step-2-llm-integration.md`](./step-2-llm-integration.md) | LLM 接入：`app_settings` 表 + 客户端 + 配置页 | 是 / Yes (V2) | 步骤 1 / Step 1 |
 | **3** | [`step-3-ai-search.md`](./step-3-ai-search.md) | 基础 AI 搜索：常驻按钮 + 查询词扩展 | 否 / No | 步骤 2 / Step 2 |
 **顺序 / Sequence：** 严格按 1 → 2 → 3，前一步绿了再进下一步。
 Strictly 1 → 2 → 3; advance only when the previous step is green.
 ---
 ## 跨步骤约定 / Cross-cutting Conventions（每步都适用 / apply to every step）
 - **提交 / Commits：** 每步独立 branch + PR；遵循仓库约定——**不主动 push/commit，除非业主明确要求**。
  One branch/PR per step; **do not push/commit unless explicitly asked**.
 - **CI 不联网 / Network-free CI：** 任何 LLM 调用在测试中必须 mock。
  All LLM calls must be mocked in tests.
 - **降级优先 / Degradation first：** 每个 AI 接入点先想清楚"未配置 / 调用失败"时的表现；AI 是加分项，不是依赖。
  Always design the "unconfigured / failed" path first; AI is additive, never required.
 - **依赖最小 / Minimal deps：** 复用已有 `httpx`；本轮唯一新增依赖是 `alembic`。不要引入 `openai` SDK。
  Reuse `httpx`; the only new dependency this round is `alembic`. Do not add the `openai` SDK.
 - **保持形态 / Keep the shape：** FastAPI + Jinja2 SSR + SQLite，无前端构建链；新页面沿用现有模板/样式。
 - **文档同步 / Keep docs in sync：** 实现与设计若有偏差，回写本目录对应文件与仓库简报 `../repository-brief.md`（§10 迁移、§15 约束）。
  If implementation diverges, update the step file, the design doc, and the brief (§10/§15).
@@ -0,0 +1,356 @@
 # 设计文档 · LLM 接入与迁移地基 / Design · LLM Integration & Migration Foundation
 > 中英双语。这是「下一轮改动」的总体设计（high-level design），实施步骤见 [`implementation-plan.md`](./implementation-plan.md)。
 > Bilingual. High-level design for the next round of changes; step-by-step plan in [`implementation-plan.md`](./implementation-plan.md).
 >
 > 状态 / Status：**已定稿，待实现 / Agreed, pending implementation**
 > 基线 / Base：`main` @ `b9b6583`
 ---
 ## 0. 本轮范围 / Scope of This Round
 **本轮只做三件事 / This round delivers exactly three things：**
 1. **引入 Alembic 数据库迁移系统**（含一层封装，让应用不直接接触 Alembic 细节）。
   Introduce **Alembic** as the migration system (with a thin wrapper so the app never touches Alembic directly).
 2. **LLM 接入**：一个配置页 + 配置落库 + 一个可复用的 LLM 客户端。
   **LLM integration**: a config page + DB-persisted config + a reusable LLM client.
 3. **最基础的 AI 搜索**：搜索页常驻一个「AI 智能搜索」动作，用查询词扩展增强结果。
   **Basic AI search**: a persistent "AI search" action on the search page, powered by query-term expansion.
 **本轮明确不做（留作未来）/ Explicitly out of scope this round（future）：**
 - 图片内容分析（`image_description` 列、视觉模型调用、手动/批量/夜间生成）。
  Image content analysis (`image_description` columns, vision calls, manual/batch/nightly generation).
 - 向量嵌入 + 相似度语义搜索（AI 搜索的"高阶版"）。
  Vector embeddings + similarity semantic search (the "advanced" AI search).
 - 多图、OCR、鉴权、标签系统等（见仓库简报 §15 / see brief §15）。
 > 架构会为上述未来项**预留接口**（§9），但本轮不实现。
 > The architecture **leaves seams** for the above (§9) without implementing them now.
 ---
 ## 1. 设计原则 / Guiding Principles
 - **AI 是加分项，不是依赖 / AI is additive, never required.**
  未配置或调用失败时，整站行为与今天**完全一致**。AI 只在"能用且开启"时才介入。
  When unconfigured or on failure, the app behaves **exactly as today**. AI engages only when configured and enabled.
 - **单一 schema 事实来源 / One source of truth for schema.**
  Alembic 接管建表与变更；退休手写的 `_sync_sqlite_image_columns()`。
  Alembic owns schema creation and changes; retire the hand-rolled `_sync_sqlite_image_columns()`.
 - **依赖最小化 / Minimal dependencies.**
  复用已在 `requirements.txt` 的 `httpx` 调 OpenAI 兼容接口；本轮**唯一新增依赖是 `alembic`**。
  Reuse the existing `httpx` for OpenAI-compatible calls; the **only new dependency is `alembic`**.
 - **保持现有形态 / Keep the current shape.**
  仍是 FastAPI + Jinja2 SSR + SQLite，无前端构建链；新页面沿用现有模板风格。
  Still FastAPI + Jinja2 SSR + SQLite, no frontend build; new pages follow existing template style.
 - **测试不联网、数据隔离 / Tests stay offline and isolated.**
  LLM 客户端做成单一可 mock 边界；迁移在测试中真实执行（临时 SQLite）。
  The LLM client is a single mockable boundary; migrations actually run in tests (throwaway SQLite).
 - **可信内网安全姿态 / Trusted-LAN posture.**
  无鉴权（仅内网/VPN 访问）；API Key 明文落库为业主在其威胁模型下的明确选择（§7）。
  No auth (LAN/VPN only); plaintext API key in DB is the owner's explicit choice under their threat model (§7).
 ---
 ## 2. 总体架构 / Architecture Overview
 ```text
                       ┌─────────────────────────────────────────────┐
   HTTP (SSR)          │                app/main.py                  │
  ───────────────────► │  路由 / routes  +  请求编排 / orchestration   │
                       └───┬───────────────┬───────────────┬─────────┘
                           │               │               │
                  ┌────────▼──────┐ ┌──────▼───────┐ ┌──────▼────────┐
                  │ app/llm.py    │ │ app_settings │ │ 搜索逻辑       │
                  │ LLM 客户端     │ │ 读写 helper   │ │ AI 检索 seam  │
                  │ (httpx)       │ │ (KV in DB)   │ │ (可替换)       │
                  └───────────────┘ └──────┬───────┘ └───────────────┘
                                           │
                       ┌───────────────────▼─────────────────────────┐
                       │                app/migrate.py                │
                       │ 启动 / boot:  verify_schema_is_current() 只读  │
                       │   └─ 与 head 不一致 → fail-close，拒绝启动      │
                       │ 命令 / CLI `python -m app.migrate`（幂等）:    │
                       │   └─ 空库建库 / 认领老库 / upgrade（见 §3）     │
                       └───────────────────┬─────────────────────────┘
                                           │ command.upgrade / stamp（仅迁移命令 / migration command only）
                       ┌───────────────────▼─────────────────────────┐
                       │     Alembic (alembic.ini + migrations/)      │
                       │     V1 baseline → V2(app_settings) → …       │
                       └───────────────────┬─────────────────────────┘
                                           │
                                     ┌─────▼─────┐
                                     │  SQLite   │
                                     └───────────┘
 ```
 新增模块 / New modules：`app/migrate.py`（Alembic 封装）、`app/llm.py`（LLM 客户端）、`migrations/`（Alembic 工程）、`app/templates/settings/`（配置页）。
 改动模块 / Touched：`app/db.py`、`app/main.py`、`app/models.py`、`app/templates/base.html`、`Dockerfile`、`requirements.txt`、`tests/`。
 ---
 ## 3. 迁移子系统 / Migration Subsystem (Alembic)
 ### 3.1 为什么 / Why
 配置表与未来的新列（如 `tag`、`image_description`）都需要可重复、可审阅的迁移；现有手写列同步只能补图片列，无法长期支撑。
 A config table and future columns need repeatable, reviewable migrations; the hand-rolled column sync only patches image columns and won't scale.
 ### 3.2 收敛不变量 / The Convergence Invariant
 **所有数据库最终都收敛到同一个 `head`。`V1 baseline` 必须严格等于"今天的真实 schema"（三张表 + 现有图片列），不多一列。**
 All databases converge to the same `head`. The `V1 baseline` must equal **today's actual schema exactly** (the three tables + existing image columns) — nothing more.
 ```text
 迁移链 / chain:  V1(baseline = 现状)  ──►  V2(app_settings)  ──►  …未来…  ──► head
 老的生产库 / existing prod DB:  stamp 到 V1（只写版本号，不建表，不碰数据） ──► upgrade ──► head
 全新/空库 / fresh DB:           跑 V1（真正建三张表） ───────────────────────► upgrade ──► head
                                                                              ↑ 终点一致 / same end state
 ```
 > `stamp` 只向 `alembic_version` 写一条版本记录，**不执行任何 DDL、不修改数据**。这是安全认领已有库的关键。
 > `stamp` only writes a row into `alembic_version`; it runs **no DDL and touches no data**. This is the key to safely adopting an existing DB.
 ### 3.3 运行时机：校验与迁移分离 / Migrations Run Separately from Startup
 **关键决策：迁移不在应用启动时发生。** 启动只做**只读校验**，迁移由一个独立、显式的命令/步骤执行。
 **Key decision: migrations do not happen at app startup.** Startup only **verifies** (read-only); migrating is an explicit, separate step.
 - **启动校验（fail-close）/ Startup check (fail-closed)：** `app/db.py::init_db()` 调 `app/migrate.py::verify_schema_is_current(url)`，比较 DB 当前 revision 与 `head`：
  - 一致 → 正常启动 / match → start normally。
  - 不一致（含空库、未认领的老库）→ **fail-close**：输出清晰日志、拒绝提供服务、提示先跑迁移步骤；**不执行任何 DDL、不碰数据**。
    Mismatch (incl. empty or un-adopted DBs) → **fail closed**: clear log, refuse to serve, no DDL, no data change.
 - **迁移命令 / The migration command：** 独立、显式、**幂等**的 `python -m app.migrate`（逻辑在 `app/migrate.py`）。已在 `head` 则空操作并退出 0，便于每次部署都安全重跑。
  A separate, explicit, **idempotent** `python -m app.migrate`. No-op (exit 0) when already at `head`, so it is safe to re-run on every deploy.
 - 退休手写列同步 / Retire the hand-rolled sync：`_sync_sqlite_image_columns()` 删除，schema 由 Alembic 单一接管。
  `_sync_sqlite_image_columns()` is removed; Alembic is the sole owner of schema.
 为什么 / Why：避免"启动副作用式迁移"、避免多实例并发迁移竞态；当 code 与 DB 不一致时，**宁可不启动也不带病运行**。
 Avoids surprise startup migrations and concurrent-migration races; on a code/DB mismatch it refuses to run rather than run wrong.
 ### 3.4 迁移命令的三种情况 / The Migration Command's Three Cases
 `python -m app.migrate` 用 SQLAlchemy inspector 判定，分三种：
 `python -m app.migrate` inspects the DB and branches three ways:
 | 库的状态 / DB state | 动作 / Action |
 | --- | --- |
 | **空库 / empty** | `upgrade head`（建库并升到最新 / create & upgrade to head） |
 | **老库且与 baseline 一致 / existing, matches baseline（2a）** | `stamp V1` → `upgrade head`（认领后升级 / adopt then upgrade） |
 | **老库但与 baseline 不一致 / existing, mismatched（2b）** | **fail-close，不做任何改动 / fail closed, no changes** |
 > **一致性比对的基准是 baseline(V1)，不是 head。** 未认领的老库结构停在 V1（不含 `app_settings` 等后续内容），若拿 head 去比会把合法老库误判为不一致。
 > The match is compared against the **baseline (V1)**, not `head` — an un-adopted DB sits at V1 and would wrongly look "mismatched" if compared against head.
 >
 > ⚠️ SQLite 的 autogenerate 比对存在假阳性（类型亲和、索引命名等），可能让 2b 误 fail。实现上需用**容忍性比对**或允许**人工确认覆盖**（见 §3.6 验证）。
 > SQLite autogenerate has false positives; 2b should use a tolerant comparison or allow a documented manual override (see §3.6).
 ### 3.5 部署形态：Compose db-migration 闸门 / Deployment Shape: a Compose Gate（未来 / future）
 意图：用一个一次性 `db-migration` 服务跑迁移命令，**成功才放行 App**。本轮可先只交付命令本身，Compose 接线随后。
 Intent: a one-shot `db-migration` service runs the command and **the app starts only on its success**. The command ships this round; the Compose wiring can follow.
 ```yaml
 services:
  db-migration:
    image: <same image>
    command: python -m app.migrate     # 成功 exit 0；2b/失败 exit ≠0
  web:
    depends_on:
      db-migration:
        condition: service_completed_successfully
 ```
 迁移失败（含 2b 不一致）→ App 永不启动。
 A failed migration (incl. a 2b mismatch) → the app never starts.
 ### 3.6 Alembic 配置要点 / Alembic config notes
 - `migrations/env.py`：`target_metadata = Base.metadata`；DB URL 从 `get_settings().database_url` 动态读取（不写死在 `alembic.ini`）；对 SQLite 设 `render_as_batch=True`（便于未来改列/删列走 batch 模式）。
  `target_metadata = Base.metadata`; URL read dynamically from settings; `render_as_batch=True` for SQLite.
 - **V1 baseline 的生成与验证 / Authoring & verifying V1：** 用当前 models 对**空库** autogenerate 得到完整建表脚本；再对**生产库副本**跑 `alembic check`，**应显示无差异**——即印证"schema 符合预期、可安全盖章"。
  Autogenerate against an empty DB for the full create script; then run `alembic check` against a copy of the prod DB — it **should report no diff**, confirming it's safe to stamp.
 - 镜像 / Image：`Dockerfile` 需 `COPY` `alembic.ini` 与 `migrations/`，否则容器内无迁移脚本。
 - CI（可选 / optional）：加一步 `alembic check`，防止改了 model 却忘记生成迁移。
  Add an `alembic check` step to catch model/migration drift.
 ---
 ## 4. LLM 接入 / LLM Integration
 ### 4.1 配置存储：键值表 / Config storage: a KV table
 新增表 `app_settings(key TEXT PRIMARY KEY, value TEXT)`（由 V2 迁移创建）。
 New table `app_settings(key TEXT PRIMARY KEY, value TEXT)` (created by the V2 migration).
 **为什么用 KV 而非定型列 / Why KV instead of typed columns：** 后续还会陆续加配置项；给*已有表*加列有迁移成本，而 KV 加配置项＝加一行，永不迁移。类型与校验在 Python 侧处理。
 More settings are coming; adding columns to an *existing* table costs a migration, whereas a KV row never does. Typing/validation live in Python.
 本轮使用的 key / Keys used this round：
 | key | 含义 / Meaning | 默认 / Default |
 | --- | --- | --- |
 | `llm_enabled` | LLM 总开关 / master toggle | `false` |
 | `llm_base_url` | OpenAI 兼容端点 / endpoint | `https://api.openai.com/v1` |
 | `llm_model` | 模型名 / model name | （空 / empty） |
 | `llm_api_key` | API Key（明文 / plaintext，见 §7） | （空 / empty） |
 | `ai_search_enabled` | AI 搜索功能开关 / AI-search feature toggle | `false` |
 | `ai_search_extra_hints` | AI 搜索：可选「额外领域提示」，追加到默认系统提示词（step 3 引入）/ optional extra domain hints appended to the default prompt | （空 / empty） |
 > 读写封装 / Access helpers：`get_app_settings(db) -> LLMConfig`（dataclass 视图）与 `save_app_settings(db, ...)`，供路由与 `app/llm.py` 复用。
 > Helpers `get_app_settings(db) -> LLMConfig` and `save_app_settings(db, ...)`, reused by routes and `app/llm.py`.
 ### 4.2 LLM 客户端 / The client (`app/llm.py`)
 OpenAI 兼容的薄客户端，基于 `httpx`，**无新依赖** / A thin OpenAI-compatible client over `httpx`, **no new dependency**：
 - `is_configured(cfg) -> bool`：开关开启且 `model`/`api_key` 齐全。
 - `test_connection(cfg) -> Result`：发一个最小请求验证 `base_url`/`model`/`api_key`，供配置页"测试连接"用。
 - `expand_query(cfg, query, extra_hints="") -> ExpansionResult`：把查询词扩成一批近义/相关词；`terms` 为扩展词列表（不含原词），`error` 用于区分超时/网络/HTTP 等真实调用失败（提示词与输出契约见 §5.2）。
 - `analyze_image(...)`：**本轮不实现**，仅在文档中预留为未来接口（图片分析轮次）。Reserved for a future round, not implemented now.
 要点 / Notes：
 - 统一超时与错误处理；失败不抛到用户面前，按"优雅降级"返回可识别的失败信号。
  Unified timeout + error handling; failures degrade gracefully rather than surfacing as 500s.
 - 同步实现即可——FastAPI 把同步 `def` 路由丢线程池执行，阻塞式 httpx 调用可接受。
  A synchronous implementation is fine — FastAPI runs sync handlers in a threadpool.
 - **唯一对外/网络边界**，测试中整体 mock，CI 保持无网络。
  The **single network boundary**, fully mocked in tests.
 ### 4.3 配置页 / Config page
 | 路由 / Route | 作用 / Purpose |
 | --- | --- |
 | `GET /settings` | 渲染配置表单（Key 脱敏显示）/ render form (key masked) |
 | `POST /settings` | 保存配置到 `app_settings` / persist to `app_settings` |
 | `POST /settings/test` | 用当前/待保存配置测试连接 / test connection |
 - 模板 `app/templates/settings/form.html`，沿用现有卡片/表单样式；`base.html` 顶部导航加一个「设置」入口。
  Template under `settings/`, reusing existing styles; add a "设置/Settings" link in `base.html` nav.
 - **Key 脱敏 / Key masking**：页面不回显明文，显示「已配置，留空＝不修改」，提交留空则保留原值。
  Never echo the plaintext key; show "configured, leave blank to keep", and keep the old value if left blank.
 ### 4.4 降级 / Degradation
 `llm_enabled` 关或未配置时：配置页照常可用；AI 搜索按钮隐藏或提示去配置；其余功能与现状一致。
 When disabled/unconfigured: the settings page still works; the AI-search button is hidden or hints to configure; everything else is unchanged.
 ---
 ## 5. AI 搜索 / AI Search
 ### 5.1 行为 / Behavior
 - **常驻动作 / Persistent action：** 搜索页**始终**提供「AI 智能搜索」，**不以"零结果"为前提**——即便普通搜索已出结果，用户不满意时也能点。
  The "AI search" action is **always** present on the search page, **not gated on zero results** — usable even when normal results exist.
 - **流程 / Flow：** 普通 `LIKE` 照常先出结果 → 用户触发 AI → `expand_query` 返回 `ExpansionResult`（扩展词 `terms` 不含原词；调用失败写入 `error`）→ `ai_search` 用「原词 + 扩展词」对 `name`/`note` 做 OR `LIKE` 重搜 → 展示，并用横幅标注「AI 帮你扩展了：…」。
  Normal `LIKE` first → user triggers AI → `expand_query` returns an `ExpansionResult` (`terms` exclude the original query; failures are represented by `error`) → `ai_search` OR-`LIKE`s over name/note with the original + expanded terms → render with a banner listing the expansion.
 - **只把查询词发出去 / Only the query leaves**，不外泄物品清单；token 恒定、不随上千件物品增长。
  Only the query is sent; the inventory is not. Token cost is constant and does not grow with thousands of items.
 ### 5.2 提示词与输出契约 / Prompt & Output Contract
 `expand_query` 的**质量**取决于提示词，**集成稳定性**取决于输出契约——两者都在代码侧掌控（决策 C）。
 Quality hinges on the prompt; integration stability hinges on the output contract — both are code-controlled (decision C).
 - **基础系统提示词写死在 `app/llm.py`（用户改不坏）/ Base system prompt hardcoded：** 框定搬家/家居场景，要求"列出用户可能用来命名同一物品的相关词（近义、别称、上位类别、具体品类）"；语言跟随查询；最多约 8 个；不解释、不造无关词。
  Frames the moving/household domain, asks for related naming terms, follows the query's language, caps the count, no prose.
 - **可选「额外领域提示」/ Optional extra hints：** KV `ai_search_extra_hints`（设置页一个多行输入，默认空）。非空时**追加**到基础提示词之后，供业主微调倾向（如"厨房用品多，偏向厨具类"）。**它只能补充，不能改写输出格式。**
  An optional free-text setting appended to the base prompt; it can only add guidance, never alter the output format.
 - **输出契约（代码强制，与提示词解耦）/ Output contract (code-enforced)：** 要求模型只返回 **JSON 字符串数组**；解析时去掉 ` ```json ` 围栏 → `json.loads` → 只接受字符串数组 → 过滤空串/过长词 → 最多 8 个。散文、坏 JSON、JSON object、非字符串数组都视为**合法空扩展**（`terms=[]`, `error=None`）；网络错误、HTTP 错误、超时等真实调用失败写入 `ExpansionResult.error`。`expand_query` 的 `terms` 只包含扩展词；**原词由 `ai_search` 并入并去重**。
  Require a JSON string array; strip code fences, `json.loads`, accept only string arrays, filter empty/overlong terms, and cap to 8 terms. Prose, bad JSON, JSON objects, and non-string arrays are successful empty expansions (`terms=[]`, `error=None`); network/HTTP/timeout failures are represented by `ExpansionResult.error`. `expand_query.terms` contains only expanded terms; `ai_search` adds the original term and dedupes.
 - **客户端参数 / Client params：** 低 temperature、较小 max_tokens、设超时。Low temperature, small max_tokens, a timeout.
 - **措辞留松 / Wording left loose：** 默认提示词的具体字句可在 step-3 实测中迭代，不在文档里冻死。
  Exact default wording can be iterated during step-3 testing.
 ### 5.3 实现接口 / Implementation seam
 - 路由层扩展现有 `GET /search`：增加 `ai=1` 触发位（如 `GET /search?q=锅&ai=1`），保持单页、可收藏、SSR 友好。
  Extend the existing `GET /search` with an `ai=1` trigger (e.g. `/search?q=…&ai=1`), staying single-page and bookmarkable.
 - 内部定义可替换的检索 seam，例如 `ai_search(db, query) -> (expanded_terms, results, error_message)`：
  Define a replaceable retrieval seam, e.g. `ai_search(db, query) -> (expanded_terms, results, error_message)`:
  - **本轮 / now：** 内部＝查询词扩展 + 本地 `LIKE`。
  - **未来 / later：** 换成向量嵌入 + 相似度检索，**路由与模板不变**。
    Swap to embeddings + similarity later **without changing the route or template**.
 - 本轮检索范围＝`name` + `note`（`image_description` 本轮不存在）。
  Search scope this round = `name` + `note` (no `image_description` yet).
 ### 5.4 降级 / Degradation
 AI 关闭/未配置 → 不显示按钮（或提示去 `/settings`）；调用失败 → 友好提示并回退到普通结果。
 AI off/unconfigured → no button (or a hint to `/settings`); on failure → a friendly message, fall back to normal results.
 合法空扩展（模型返回 `[]` 或输出无法通过严格 JSON 字符串数组契约）不视为调用失败：回退普通结果，不显示故障提示。
 A legitimate empty expansion (model returns `[]` or output fails the strict JSON-string-array contract) is not treated as a call failure: fall back to normal results without an error banner.
 ---
 ## 6. 数据模型与路由变更 / Data Model & Route Changes
 **数据模型 / Data model（本轮）：**
 - 新增 `AppSetting`（表 `app_settings`，KV）。由 V2 迁移建表。
  Add `AppSetting` (`app_settings`, KV), created by the V2 migration.
 - `boxes` / `items` / `subitems` **本轮不变**。Unchanged this round.
 **新增/改动路由 / Routes added/changed：**
 - `GET /settings`、`POST /settings`、`POST /settings/test`（新）。
 - `GET /search?q=&ai=1`（扩展现有）。
 - `base.html` 导航新增「设置」。
 ---
 ## 7. 安全姿态 / Security Posture
 - **无鉴权 / No auth**：仅经可信内网 / VPN + nginx HTTPS 访问，业主已确认风险可接受。
  LAN/VPN + nginx HTTPS only; owner accepts the risk.
 - **API Key 明文落库 / Plaintext API key in DB**：业主明确选择。理由：备份经 `rclone` 至业主自有 OneDrive，链路可信；若攻击者已能读到服务器文件，则任何落盘位置都不安全。
  Owner's explicit choice; backups go via `rclone` to the owner's own OneDrive, and a server-file-read attacker defeats any at-rest location anyway.
 - **UI 不回显明文 Key / UI never echoes the key**（§4.3）——这是表单卫生，不是加密。
 - **外发数据 / Data egress**：AI 搜索只发送*查询词*；图片分析（未来）才会外发图片。
  AI search sends only the *query*; image egress only arrives with the future image-analysis feature.
 ---
 ## 8. 测试策略 / Testing Strategy
 - **迁移在测试中真实执行 / Migrations run in tests：** fixture 先在临时 SQLite 上跑迁移命令（建库 → `upgrade head`），再 `create_app()`（启动校验随之通过）。schema 来自迁移本身——单一事实来源 + 迁移覆盖。
  The fixture runs the migration command on a tmp SQLite first, then `create_app()` (whose startup check then passes).
 - **认领逻辑测试 / Adoption test（2a）：** 构造"有 `boxes` 数据但无 `alembic_version`"的库 → 跑迁移命令 → 断言数据保留、版本到达 head。
  Build a "has `boxes` data, no `alembic_version`" DB → run the migration command → assert data preserved and version at head.
 - **fail-close 测试 / Fail-closed tests：** ① DB 未到 head 时 `create_app()` 启动应 fail-close；② 2b 不一致时迁移命令应 fail-close 且不改动。
  ① `create_app()` fails closed when the DB is not at head; ② the migration command fails closed (and changes nothing) on a 2b mismatch.
 - **LLM 全程 mock / Mock the LLM：** 打桩 `expand_query` / `test_connection`（或底层 httpx），CI 不联网。
 - **新增用例 / New cases：** 配置增删改 + Key 脱敏；测试连接（mock）；AI 搜索扩展命中；各降级路径（未配置/失败）。
 ---
 ## 9. 未来扩展（本轮不做，但已预留）/ Future Extensions (seams reserved)
 | 未来项 / Future item | 预留点 / Seam already in place |
 | --- | --- |
 | 图片内容分析 / Image analysis | `app/llm.py` 预留 `analyze_image`；迁移系统可加 `image_description` 列；搜索范围可纳入该列。<br>`analyze_image` reserved; migrations can add `image_description`; search can include it. |
 | 向量语义搜索 / Vector semantic search | `ai_search(...)` seam 可整体替换；批处理可与图片描述补算共用。<br>The `ai_search` seam is swappable; batch jobs can be shared. |
 | 夜间批处理 / Nightly batch | 分析逻辑写成批量友好函数，cron 仅是薄包装（仿 backup cron）。<br>Batch-friendly functions; cron is a thin wrapper like the backup cron. |
 | 文本/视觉模型分离 / Split models | `app_settings` 加一个 key 即可，无需迁移。<br>Add one KV key, no migration. |
 ---
 ## 10. 决策记录 / Decisions Log
 | # | 决策 / Decision | 理由 / Rationale |
 | --- | --- | --- |
 | D1 | 先引入 Alembic 再做功能 / Alembic before features | 配置表与未来列都依赖可靠迁移；退休手写列同步。 |
 | D2 | V1 baseline 严格等于现状，新东西放 V2+ / baseline = current schema only | 使 `stamp` 认领老库为真、安全。 |
 | D3 | 迁移与启动分离：启动只校验 + fail-close，迁移走独立幂等命令（`python -m app.migrate`）/ 未来 Compose `db-migration` 闸门 / migrations separated from startup | 避免启动副作用式迁移与并发竞态；schema 不一致宁可不启动也不带病运行；迁移成功才放行 App。 |
 | D4 | 配置用 KV 表 / KV settings table | 后续配置项多，避免反复给已有表加列。 |
 | D5 | API Key 明文落库 / plaintext key | 业主威胁模型下可接受；备份至自有 OneDrive。 |
 | D6 | 复用 httpx，手搓 OpenAI 调用 / reuse httpx | 不引入 `openai` SDK，依赖最小。 |
 | D7 | AI 搜索常驻、不依赖零结果 / persistent AI search | 用户对已有结果不满意时也能用。 |
 | D8 | AI 搜索 v1＝查询词扩展 / query-term expansion | 上千件物品下可扩展、不外泄清单、token 恒定。 |
 | D9 | 检索做成可替换 seam / pluggable retrieval | 未来换嵌入式语义搜索时上层不动。 |
 | D10 | 图片分析不在本轮 / image analysis deferred | 业主本轮三件事不含它；架构预留接口。 |
 | D11 | AI 搜索提示词：默认写死 + 可选「额外领域提示」；输出契约由代码强制 / hardcoded default prompt + optional extra-hints, code-enforced JSON contract | 保证解析稳定（用户改不坏），又给业主一点不改代码即可微调的空间。 |
@@ -0,0 +1,119 @@
 # 步骤 1 · Alembic 迁移地基 / Step 1 · Migration Foundation
 > **可独立执行 / Self-contained.** 完整背景见设计文档 [`llm-integration-design.md`](./llm-integration-design.md) §3；跨步骤约定见 [`implementation-plan.md`](./implementation-plan.md)。
 > **前置 / Prerequisite：** 无（第一步）/ none.
 > **产出 / Output：** 一个可独立合入的 PR；**不改任何业务 schema**。A mergeable PR with **zero business-schema change**.
 ---
 ## 目标 / Goal
 引入 Alembic 并**安全接管现有生产库**，schema 一点不改，所有现有测试保持绿。**迁移与应用启动分离**：启动只做只读校验 + fail-close，实际迁移由独立、幂等命令 `python -m app.migrate` 执行。
 Introduce Alembic and **safely adopt the existing prod DB** with zero schema change; all tests stay green. **Migration is separated from startup**: boot only verifies (read-only) and fails closed; the actual migrating is done by a separate idempotent command `python -m app.migrate`.
 ---
 ## 必要背景 / Essential Context（仅凭本文件即可执行 / enough to execute from this file）
 - **当前没有 Alembic。** 唯一的"迁移"是 `app/db.py::_sync_sqlite_image_columns()`（启动时缺图片列就 `ALTER TABLE ADD COLUMN`）。
  No Alembic today; the only "migration" is the hand-rolled image-column sync in `app/db.py`.
 - `app/db.py::init_db()` 在 FastAPI lifespan 启动时被 `create_app()` 调用，现在执行 `Base.metadata.create_all()` + `_sync_sqlite_image_columns()`。**本步把它改成只读校验**（不再在启动时建表/迁移）。相关符号：`Base`、`engine`、`SessionLocal`、`configure_database()`。
  `init_db()` runs at lifespan startup and currently does `create_all()` + the image-column sync. **This step turns it into a read-only check** (no table creation/migration at boot).
 - `tests/conftest.py` 的 `client` fixture：`configure_database(tmp_url)` → `create_app()`（触发 `init_db`）。每个测试用临时 SQLite，互不污染。
 - models 在 `app/models.py`：`Box` / `Item` / `SubItem` 三张表；每张含 `image_blob`(BLOB) / `image_mime_type` / `image_width` / `image_height`，以及 `created_at` / `updated_at`。
 - DB URL 来自 `app/config.py::get_settings().database_url`（默认 `sqlite:///./data/app.db`）。
 - **生产库**是当年 `create_all` 建的、**已装上千件数据、没有 `alembic_version` 表**。
 ### 铁律 / The Invariant（不可违背 / non-negotiable）
 - 所有数据库最终收敛到同一个 `head`。All DBs converge to the same `head`.
 - **V1 baseline 必须严格等于"今天的真实 schema"**（三张表 + 现有图片列 + 索引），**不多一列**。新东西放后续 revision。
  The V1 baseline must equal **today's actual schema exactly** — nothing more.
 - 以下动作**由迁移命令执行，不在应用启动时** / done by the **migration command**, not at boot：
  - 老库且与 baseline 一致：`stamp V1`（只写版本号，**不建表、不碰数据**）→ `upgrade head`。
    Existing DB matching baseline: `stamp V1` (no DDL, no data change) → `upgrade head`.
  - 老库但与 baseline 不一致：**fail-close，不做任何改动**。Mismatched existing DB → fail closed.
  - 新库：跑 `V1`（真正建表）→ `upgrade head`。Fresh DB: run `V1` → `upgrade head`.
 ---
 ## 任务 / Tasks
 - [ ] `requirements.txt` 增加 `alembic`（钉一个明确版本 / pin a version）。
 - [ ] 初始化 Alembic 工程：`alembic.ini` + `migrations/`（含 `env.py`、`versions/`）。
 - [ ] 配置 `migrations/env.py`：
  - `target_metadata = app.db.Base.metadata`（确保导入 `app.models` 以注册三张表）。
  - `sqlalchemy.url` **从 `app.config.get_settings().database_url` 动态读取**，不写死在 `alembic.ini`。
  - 对 SQLite 设 `render_as_batch=True`（为未来改列/删列预留 batch 能力）。
 - [ ] 生成 **V1 baseline 迁移**＝当前 models 的完整建表（`boxes`/`items`/`subitems`，含图片列与索引）。做法：对**空库** `--autogenerate`。
  Author V1 by autogenerating against an **empty** DB.
 - [ ] **验证 baseline**：对一份**生产库副本**跑 `alembic check`，确认**无差异**（印证可安全 `stamp`；SQLite 偶有类型亲和/索引命名假差异，人眼复核）。
  Verify with `alembic check` against a **copy of the prod DB** → expect no diff.
 - [ ] 新增 `app/migrate.py`，承担两个职责 / two responsibilities：
  - **(A) 迁移命令入口 `python -m app.migrate`（幂等 / idempotent）**：编程方式构造 Alembic `Config`（`script_location` → 打包进镜像的 `migrations/`，`sqlalchemy.url` = 解析出的 URL），用 SQLAlchemy inspector 分情况：
    - 空库 / empty → `command.upgrade(cfg, "head")`
    - 老库且与 **baseline(V1)** 一致 → `command.stamp(cfg, "<V1 rev>")` → `command.upgrade(cfg, "head")`
    - 老库但与 baseline 不一致 → **fail-close**：非零退出 + 清晰日志 + **不做任何改动**
    - 已在 `head` → 空操作、退出 0
    - `<V1 rev>` 指 **baseline 这个具体 revision**（`down_revision=None` 的那条），不是 `head`。
    - "与 baseline 一致"的判定**对照 baseline(V1) 的预期 schema**（不是 head）；SQLite 假差异需容忍或允许人工确认覆盖。
  - **(B) 启动校验 `verify_schema_is_current(url)`（只读 / read-only）**：比较 DB 当前 revision 与 `head`；不一致返回失败/抛错，**绝不改动 DB**。
 - [ ] 改 `app/db.py::init_db()`：改为调 `verify_schema_is_current(resolved_url)` —— **一致才放行；不一致 fail-close**（清晰日志，提示先跑 `python -m app.migrate`）。不再在启动时建表/迁移。**删除** `_sync_sqlite_image_columns()`。保留 `configure_database()` / engine 装配。
  `init_db()` now only verifies and **fails closed** on mismatch (pointing the user to `python -m app.migrate`); remove `_sync_sqlite_image_columns()`.
 - [ ] `tests/conftest.py`：fixture 改为**先跑迁移命令**把临时库带到 `head`，再 `create_app()`（这样启动校验通过）。
  Fixture runs the migration first, then `create_app()`.
 - [ ] `Dockerfile`：加 `COPY alembic.ini .` 与 `COPY migrations ./migrations`（否则容器内无迁移脚本）。
 - [ ] CI（可选 / optional）：`.github/workflows/test.yml` 加一步 `alembic check`，防止 model 与迁移漂移。
 - [ ] Compose `db-migration` 闸门（可后续 / can be deferred）：加一个一次性服务跑 `python -m app.migrate`，`web` 经 `depends_on: condition: service_completed_successfully` 等它成功（见设计 §3.5）。
  Add a one-shot `db-migration` service gating `web` (design §3.5); may be deferred.
 ---
 ## 涉及文件 / Files
 `requirements.txt`、`alembic.ini`(新)、`migrations/**`(新)、`app/migrate.py`(新)、`app/db.py`、`tests/conftest.py`、`Dockerfile`、（可选）`.github/workflows/test.yml`、（可后续）`docker-compose.yml`。
 ---
 ## 测试 / Tests
 - [ ] 现有 ~83 个测试全绿（fixture 先跑迁移、再起 App，启动校验通过）。
  All existing ~83 tests pass (fixture migrates first, then starts the app).
 - [ ] **认领老库（2a）**：构造"有 `boxes` 数据、无 `alembic_version`"的库（可先用 `create_all` 造）→ 跑迁移命令 → 断言数据保留、版本到达 `head`、未重复建表报错。
  Adoption (2a): migrate an un-stamped populated DB → data preserved, version at `head`.
 - [ ] **全新库**：空 URL → 跑迁移命令 → 三张表存在、版本到 `head`。
  Fresh DB: empty URL → migrate → tables exist, version at `head`.
 - [ ] **fail-close（启动）**：DB 未到 `head` 时 `create_app()` / `init_db()` 启动应 fail-close（抛错/拒绝服务）、不改动 DB。
  Startup fails closed when the DB is not at `head`; DB unchanged.
 - [ ] **fail-close（2b）**：构造与 baseline 不一致的老库 → 跑迁移命令 → 断言非零退出、DB 不变。
  Migration command fails closed on a 2b mismatch; DB unchanged.
 ---
 ## 验收 / Acceptance
 - 迁移命令：空库建到 `head`；老库一致则认领并到 `head`；老库不一致则 **fail-close 不改动**；已在 `head` 则幂等空操作。
  Migration command: empty→head; matching existing→adopt+head; mismatch→fail closed; already-at-head→no-op.
 - 启动校验：DB 未到 `head` 时**拒绝启动**并输出清晰日志；到 `head` 才正常起。
  Startup refuses to boot (clear log) unless the DB is at `head`.
 - 模拟老库认领后**数据无损**。Adopted existing-like DB keeps data intact.
 - 全部测试绿；schema 与本步骤前**逐列一致**（本步不改业务 schema）。
  All tests green; schema identical to before (no business-schema change).
 ---
 ## 风险与缓解 / Risks & Mitigations
 - **baseline 与现状有偏差 → `stamp` 失真。** 缓解：`alembic check` 对生产副本校验 + 人眼复核 SQLite 假差异。
  Baseline drift → `alembic check` against a prod copy + manual eyeball.
 - **2b 一致性比对假阳性 → 合法老库被误 fail-close。** 缓解：比对基准用 baseline(V1) 而非 head；容忍已知 SQLite 噪声，或提供"人工确认覆盖"的开关。
  2b false positives wrongly fail a legit DB → compare against baseline (not head); tolerate known SQLite noise or offer a manual-confirm override.
 - **容器内找不到迁移脚本。** 缓解：确认 `Dockerfile` 已 `COPY` `alembic.ini` 与 `migrations/`；`script_location` 用绝对/相对镜像 WORKDIR(`/app`) 正确解析。
  Migrations missing in image → ensure they're `COPY`-ed and `script_location` resolves under `/app`.
 ---
 ## 相关约定 / Conventions（详见 implementation-plan.md）
 - 不主动 push/commit，除非业主要求。Don't push/commit unless asked.
 - 实现与设计若有偏差 → 回写设计文档 §3 与仓库简报 `../repository-brief.md` §10。
@@ -0,0 +1,102 @@
 # 步骤 2 · LLM 接入 / Step 2 · LLM Integration
 > **可独立执行 / Self-contained.** 完整背景见设计文档 [`llm-integration-design.md`](./llm-integration-design.md) §4；跨步骤约定见 [`implementation-plan.md`](./implementation-plan.md)。
 > **前置 / Prerequisite：** [步骤 1](./step-1-alembic-foundation.md) 已合入（Alembic 已就位——**schema 变更一律通过新建迁移完成，并经迁移命令 `python -m app.migrate` / `db-migration` 步骤生效，非应用启动时**）。Step 1 merged; Alembic is in place — **schema changes go through a new migration, applied by the migration command, not at app startup**.
 > **产出 / Output：** 一个可独立合入的 PR。
 ---
 ## 目标 / Goal
 提供一个配置页：能填写并测试 OpenAI 兼容的 `base_url`/`model`/`api_key`，配置落库到 `app_settings`；并提供一个可复用、可 mock 的 LLM 客户端。**未配置时整站行为不变。**
 A settings page to enter & test the LLM config, persisted to `app_settings`, plus a reusable, mockable LLM client. **App behavior is unchanged when unconfigured.**
 ---
 ## 必要背景 / Essential Context
 - 路由全部在 `app/main.py::create_app()`；模板在 `app/templates/`，基础模板 `base.html` 顶部有导航（现有「箱子」「搜索」两个链接）。
  All routes live in `create_app()`; templates under `app/templates/`; nav lives in `base.html`.
 - DB 会话依赖：`Depends(get_db)`（`app/db.py`）。models 在 `app/models.py`，`Base` 在 `app/db.py`。
 - **同步 handler 即可**：FastAPI 把同步 `def` 路由丢线程池执行，阻塞式 `httpx` 调用可接受。
  Sync handlers are fine — FastAPI runs them in a threadpool, so blocking `httpx` is acceptable.
 - `httpx` 已在 `requirements.txt`，**不要新增依赖**（不引入 `openai` SDK）。
  `httpx` is already a dependency; **add no new deps**.
 ### 关键决策 / Key Decisions
 - **配置存储用键值表**，不是定型列：`app_settings(key TEXT PRIMARY KEY, value TEXT)`。原因：后续配置项会变多，KV 加项＝加一行、永不迁移；类型/校验在 Python 侧。
  KV table, not typed columns — future settings = new rows, never a migration.
 - **API Key 明文落库**（业主在其威胁模型下的明确选择），但**配置页绝不回显明文**：显示「已配置，留空＝不修改」，提交留空则保留原值。
  Plaintext key in DB (owner's explicit choice), but the **UI never echoes it** — show "configured, leave blank to keep".
 - **优雅降级**：`llm_enabled` 关或缺 `model`/`api_key` 时，`is_configured()` 为假；调用失败不抛 500，返回可识别的失败信号。
  Graceful degradation throughout.
 ### 本轮使用的 key / Keys this round
 | key | 含义 / Meaning | 默认 / Default |
 | --- | --- | --- |
 | `llm_enabled` | LLM 总开关 / master toggle | `false` |
 | `llm_base_url` | OpenAI 兼容端点 / endpoint | `https://api.openai.com/v1` |
 | `llm_model` | 模型名 / model name | （空 / empty） |
 | `llm_api_key` | API Key（明文 / plaintext） | （空 / empty） |
 | `ai_search_enabled` | AI 搜索功能开关（步骤 3 用）/ AI-search toggle | `false` |
 ---
 ## 任务 / Tasks
 - [ ] **新建 V2 迁移**（用 Alembic，遵循步骤 1 的工作流）：创建 `app_settings(key TEXT PRIMARY KEY, value TEXT)`。
  New V2 Alembic migration creating `app_settings`.
 - [ ] `app/models.py`：新增 `AppSetting` 模型（映射 `app_settings`）。
 - [ ] 配置读写 helper（建议放 `app/settings_store.py` 或 `app/config.py` 旁）：
  - `get_app_settings(db) -> LLMConfig`（dataclass：`enabled`/`base_url`/`model`/`api_key`/`ai_search_enabled`，含默认值）。
  - `save_app_settings(db, ...)`：写回 KV；Key 留空则不覆盖原值。
 - [ ] 新增 `app/llm.py`（基于 `httpx`）：
  - [ ] `is_configured(cfg) -> bool`
  - [ ] `test_connection(cfg) -> Result`（发最小请求验证 `base_url`/`model`/`api_key`）。
  - [ ] `expand_query(cfg, query) -> ExpansionResult`（查询词扩展；**步骤 3 会校准提示词与输出契约**；`terms` 为扩展词列表，`error` 用于区分超时/网络/HTTP 等真实调用失败）。
  - [ ] 统一超时 + 错误处理；失败优雅降级。
  - [ ] **（预留，不实现）** `analyze_image(...)`：仅留 TODO/签名占位 + 注释指向"未来图片分析轮次"。Reserved, not implemented.
  - [ ] 把所有网络调用收敛到**单一函数边界**，便于测试整体 mock。
 - [ ] 路由（`app/main.py`）：
  - [ ] `GET /settings`：渲染配置表单（Key 脱敏）。
  - [ ] `POST /settings`：保存到 `app_settings`（303 重定向，沿用现有 POST 风格）。
  - [ ] `POST /settings/test`：用当前/待保存配置测试连接，回显结果。
 - [ ] 模板：`app/templates/settings/form.html`（沿用现有卡片/表单样式）；`base.html` 导航加「设置」入口。
 - [ ] 测试（LLM 全程 mock，CI 不联网）：
  - [ ] 保存/读取配置；**Key 脱敏**（响应 HTML 不含明文；提交留空不覆盖原 Key）。
  - [ ] `POST /settings/test` 成功/失败两条分支（mock `test_connection` 或底层 httpx）。
  - [ ] 未配置时 `is_configured()` 为假；配置页在 `llm_enabled=false` 下仍可正常打开保存。
 ---
 ## 涉及文件 / Files
 `migrations/versions/**`(V2)、`app/models.py`、`app/llm.py`(新)、`app/settings_store.py`(新，或并入既有模块)、`app/main.py`、`app/templates/settings/form.html`(新)、`app/templates/base.html`、`tests/`。
 ---
 ## 验收 / Acceptance
 - 在 `/settings` 填入配置 → 保存 → 重启应用后仍在（已落库）。Config persists across restarts.
 - 「测试连接」对真实 OpenAI 端点可用（手动验证）；自动化测试中走 mock。
 - 配置页 HTML **不含明文 Key**；留空提交保留原值。
 - `llm_enabled=false` 或缺 Key 时，全站行为与步骤 1 后一致（无回归）。
 ---
 ## 风险与缓解 / Risks & Mitigations
 - **把网络调用散落各处 → 难 mock、CI 易联网。** 缓解：所有外呼集中在 `app/llm.py` 单一边界。
  Scattered network calls → keep all egress in `app/llm.py`.
 - **Key 不慎回显。** 缓解：模板永不输出 `api_key` 值，仅输出"是否已配置"。
  Accidental key echo → template never prints the key value.
 ---
 ## 相关约定 / Conventions（详见 implementation-plan.md）
 - 不主动 push/commit，除非业主要求。
 - 无新依赖（用 `httpx`）。CI 不联网（mock LLM）。
 - 实现与设计若有偏差 → 回写设计文档 §4 与仓库简报 §15。
@@ -0,0 +1,103 @@
 # 步骤 3 · 基础 AI 搜索 / Step 3 · Basic AI Search
 > **可独立执行 / Self-contained.** 完整背景见设计文档 [`llm-integration-design.md`](./llm-integration-design.md) §5；跨步骤约定见 [`implementation-plan.md`](./implementation-plan.md)。
 > **前置 / Prerequisite：** [步骤 2](./step-2-llm-integration.md) 已合入（`app/llm.py::expand_query`、`app_settings` 配置、`ai_search_enabled` 开关均已就绪）。Step 2 merged.
 > **产出 / Output：** 一个可独立合入的 PR；**不改 schema**。
 ---
 ## 目标 / Goal
 在搜索页提供一个**常驻**的「AI 智能搜索」动作：点击后用查询词扩展增强搜索结果。**不以"零结果"为前提**——即便普通搜索已出结果，用户不满意时也能用。
 A **persistent** "AI search" action on the search page that broadens results via query-term expansion. **Not gated on zero results** — usable even when normal results exist.
 ---
 ## 必要背景 / Essential Context
 - 现有搜索：`app/main.py::_build_search_results(db, query)` 对 `Box`/`Item`/`SubItem` 的 `name` 与 `note` 做大小写不敏感 `LIKE`，返回结果列表；路由 `GET /search`（函数 `search_page`，参数 `q`）渲染 `app/templates/search/index.html`。
  Existing search: `_build_search_results(db, query)` does case-insensitive `LIKE` over name/note; route `GET /search` renders `search/index.html`.
 - 步骤 2 已提供：`app/llm.py::expand_query` 的基础能力、配置读取 `get_app_settings(db)`、开关 `ai_search_enabled` 与 `is_configured(cfg)`、设置页 `app/templates/settings/form.html`；本步将 `expand_query` 校准为返回结构化 `ExpansionResult(terms, error)`。
 - 本步**新增**配置项 `ai_search_extra_hints`（可选「额外领域提示」）并在设置页加一个多行输入——这是本步**唯一**触及设置页之处。
  This step adds the `ai_search_extra_hints` setting + a textarea on the settings page (the only settings-page change here).
 - 本轮检索范围＝`name` + `note`（`image_description` 本轮不存在，属未来图片分析轮次）。
  Search scope = `name` + `note` (no `image_description` this round).
 ### 关键决策 / Key Decisions
 - **常驻、不依赖零结果。** 普通 `LIKE` 照常先出结果；AI 动作始终可用（开启且已配置时）。
  Persistent and not gated on zero results.
 - **流程：** 触发 AI → `expand_query` 返回 `ExpansionResult`（扩展词 `terms` 不含原词，调用失败写入 `error`）→ `ai_search` 合并「原词 + 扩展词」并对 `name`/`note` 做 OR `LIKE` 重搜 → 展示，并用横幅标注「AI 帮你扩展了：…」。**只把查询词发出去**，不外泄物品清单。
  Trigger → `expand_query` returns an `ExpansionResult` (`terms` exclude the original query; failures are represented by `error`) → `ai_search` OR-`LIKE`s over the original + expanded terms → render with a banner of the expansion. Only the query leaves.
 - **可替换的检索 seam。** 把 AI 检索抽成一个函数（如 `ai_search(db, query) -> (expanded_terms, results, error_message)`），本轮内部＝查询词扩展 + 本地 `LIKE`；**未来换成向量嵌入 + 相似度时，路由与模板不变**。
  Wrap AI retrieval behind a swappable seam so embeddings can replace it later without touching route/template.
 - **提示词（决策 C，详见设计 §5.2）。** 基础系统提示词**写死在 `app/llm.py`**；设置页可选的 `ai_search_extra_hints` 非空时**追加**到其后；**输出契约由代码强制**（只接受 JSON 字符串数组；散文/坏 JSON/非字符串数组解析为合法空扩展；网络/超时/HTTP 失败写入 `ExpansionResult.error`），用户改 hints 也改不坏解析。
  Base prompt hardcoded; optional extra hints appended; output contract enforced in code: only a JSON string array is accepted; prose/bad JSON/non-string arrays become a successful empty expansion; network/timeout/HTTP failures are represented by `ExpansionResult.error`.
 - **优雅降级。** AI 关闭/未配置 → 不显示按钮（或提示去 `/settings`）；调用失败 → 友好提示 + 回退普通结果。
 ---
 ## 任务 / Tasks
 - [ ] **落地/校准 `expand_query` 的提示词（按设计 §5.2）**：
  - 基础系统提示词写死在 `app/llm.py`（搬家/家居场景、列相关命名词、跟随查询语言、≤ ~8 个、不解释、不造无关词）。默认提示词起点（**可迭代** / a starting point, tune during testing）：
    > 你是搬家物品搜索助手。用户在搜索自己打包的箱子与物品（家居/搬家场景）。给定一个搜索词，列出用户可能用来命名同一类物品的相关词：近义词、常见别称、上位类别、具体品类。规则：用与查询相同的语言；只给与该物品紧密相关、有助于在清单里找到它的词；不要解释、不要造无关词；最多 8 个；只输出一个 JSON 字符串数组，例如 `["炒锅","平底锅","汤锅","厨具"]`。
  - 读取 `ai_search_extra_hints`，非空则**追加**到基础提示词之后（只补充，不改格式）。
  - **返回契约**：`expand_query(cfg, query, extra_hints="") -> ExpansionResult`，其中 `terms` 是扩展词列表（**不含原词**），`error` 在成功时为 `None`。
  - **输出契约**：要求模型只回 JSON 字符串数组；解析去 ` ```json ` 围栏 → `json.loads` → 只接受字符串数组 → 过滤空串/过长词 → 最多 8 个；散文、坏 JSON、JSON object、非字符串数组都返回 `terms=[]` 且 `error=None`（合法空扩展）；网络错误、HTTP 错误、超时等调用失败返回 `terms=[]` 且 `error=<友好错误>`；不向上抛 500。
 - [ ] **新增配置项 `ai_search_extra_hints`**：KV 默认空；纳入 `get_app_settings` / `save_app_settings`；设置页 `app/templates/settings/form.html` 加一个多行输入（沿用 step 2 风格）。
 - [ ] 实现检索 seam：在 `app/main.py`（或抽一个小搜索模块 `app/search.py`）加 `ai_search(db, query) -> (expanded_terms, results, error_message)`：
  - 调 `expand_query(cfg, query)` 得到 `ExpansionResult`；
  - 若 `result.error` 非空：回退普通搜索，并把友好错误传给模板；
  - 若 `result.terms` 为空且无错误：视为合法空扩展，回退普通搜索，不显示故障提示；
  - 用「原词 + 扩展词」对 `name`/`note` 做 OR `LIKE`（**复用现有 `_build_search_results` 的匹配逻辑**，避免重复实现），去重。
  - 注意：现有 `_build_search_results(db, query)` 只接收单个查询词；建议把它泛化为接收一组关键词（对多个词做 OR），让 AI 搜索与普通搜索共用同一套匹配逻辑，避免分叉。
    Note: `_build_search_results` currently takes a single query — generalize it to accept multiple keywords so AI and normal search share one matching path.
 - [ ] 扩展 `GET /search`：支持 `ai=1` 触发位（如 `GET /search?q=锅&ai=1`），保持单页、可收藏、SSR 友好。
  - `ai=1` 且 AI 开启且 `is_configured()` → 走 `ai_search`，把 `expanded_terms` 传给模板做横幅。
  - 否则走原有普通搜索。
 - [ ] 模板 `app/templates/search/index.html`：
  - 常驻「AI 智能搜索」按钮，链接到 `?q=<当前词>&ai=1`；
  - AI 关闭/未配置时隐藏按钮（或显示去 `/settings` 的提示）；
  - `ai=1` 结果页顶部显示横幅「AI 帮你扩展了：term1、term2…」。
 - [ ] 降级：`ai_search` 内部调用失败时捕获，渲染友好提示并回退到普通 `LIKE` 结果。
 - [ ] 测试（mock `expand_query`，CI 不联网）：
  - [ ] 扩展词驱动命中：原词 `LIKE` 搜不到、扩展后能搜到。
  - [ ] 已有结果时点 AI 仍可用，且结果集被扩大（含原结果）。
  - [ ] 按钮可见性随 `ai_search_enabled` + `is_configured()` 门控。
  - [ ] 调用失败（超时/网络/HTTP）→ 回退普通结果、显示友好提示、页面不报错。
  - [ ] `expand_query` 输出解析：模型回合法 JSON 数组 → 正确解析；回散文/坏 JSON/非字符串数组 → `terms=[]` 且 `error=None`；超时/网络/HTTP 失败 → `terms=[]` 且 `error` 非空；均不抛错。
    Output parsing: valid JSON array → parsed; prose/bad JSON/non-string arrays → `terms=[]`, `error=None`; timeout/network/HTTP failures → `terms=[]`, non-empty `error`; no raise.
  - [ ] `ai_search_extra_hints` 非空时确被追加进请求（可对构造的请求体断言）。
    Extra hints, when set, are appended to the request.
 ---
 ## 涉及文件 / Files
 `app/llm.py`、`app/main.py`、（可选 `app/search.py`）、`app/templates/search/index.html`、`app/templates/settings/form.html`、配置读写 helper（step 2 的 settings store）、`tests/`。
 ---
 ## 验收 / Acceptance
 - 搜索页在 AI 开启时**始终**可见「AI 智能搜索」；点击后结果按扩展词扩大，并标注扩展词。
 - 未配置/失败时优雅降级，普通搜索完全不受影响。
 - 检索逻辑收敛在 `ai_search` seam，未来可整体替换为向量语义搜索而不动路由/模板。
 ---
 ## 风险与缓解 / Risks & Mitigations
 - **扩展词过多/过散 → 结果噪声大。** 缓解：限制扩展词数量；横幅透明展示扩展词，让用户理解结果来源。
  Too many/too-loose terms → cap the expansion count and show it transparently.
 - **AI 调用慢/失败拖累搜索页。** 缓解：仅在 `ai=1` 时才调用（普通搜索零开销）；设超时；失败回退。
  Slow/failed calls → only call on `ai=1`, set a timeout, fall back.
 ---
 ## 相关约定 / Conventions（详见 implementation-plan.md）
 - 不主动 push/commit，除非业主要求。
 - CI 不联网（mock `expand_query`）。
 - 实现与设计若有偏差 → 回写设计文档 §5 与仓库简报 §15。
@@ -0,0 +1,300 @@
 # 仓库简报 / Repository Brief — 2026 搬家助手 (Moving Helper)
 > 面向「下一轮改动」前的快速理解文档。中英双语对照。
 > A bilingual orientation doc to read before the next round of changes.
 >
 > 对应版本 / Snapshot: `main` @ `b9b6583`（撰写时 / at time of writing）
 ---
 ## 1. 一句话定位 / In One Sentence
 **中文：** 一个轻量的、面向**可信家庭内网**的搬家装箱记录工具：记录「有哪些箱子、每个箱子里有什么物品、容器型物品里又装了什么」，支持单图、全局搜索，并以 Docker 长期运行。
 **EN:** A lightweight, **trusted-home-LAN** moving inventory tool: track *which boxes exist, what items each box holds, and what sub-items sit inside container-type items*. Supports one image per record, global search, and runs long-term via Docker.
 设计取向 / Design stance：小而稳、易于几个月后回来继续扩展；**不是**企业平台、**不**追求复杂运维。
 Small, stable, easy to pick back up months later; **not** an enterprise platform.
 ---
 ## 2. 技术栈 / Tech Stack
 | 层 / Layer | 选型 / Choice | 版本 / Version |
 | --- | --- | --- |
 | Web 框架 / Framework | FastAPI | 0.116.1 |
 | ASGI 服务器 / Server | Uvicorn (`[standard]`) | 0.35.0 |
 | 模板 / Templating | Jinja2（服务端渲染 SSR） | 3.1.6 |
 | ORM | SQLAlchemy 2.x（`Mapped` / `mapped_column` 风格） | 2.0.43 |
 | 数据库 / DB | SQLite（文件库 / file-based） | — |
 | 表单 / Forms | python-multipart | 0.0.20 |
 | 图片处理 / Images | Pillow（HEIC 时可选 `pillow_heif` / `sips` 兜底） | 11.2.1 |
 | HTTP 客户端 / Client | requests（仅 Notion 导入用） | 2.32.3 |
 | 测试 / Tests | pytest + Starlette `TestClient`(httpx) | 8.4.1 / 0.28.1 |
 | 部署 / Deploy | Docker / Docker Compose + nginx 反代 | — |
 **没有前端构建链 / No frontend build chain**：纯 SSR + 一个 `style.css` + `base.html` 里的少量原生 JS。无 npm / Node / 打包器。
 Pure SSR + one `style.css` + a little vanilla JS inline in `base.html`. No npm/Node/bundler.
 ---
 ## 3. 目录结构 / Project Layout
 ```text
 .
 ├── app/
 │   ├── __init__.py
 │   ├── config.py            # 环境变量配置 (Settings dataclass)
 │   ├── db.py                # SQLAlchemy engine/session、init_db、SQLite 轻量迁移
 │   ├── images.py            # 图片处理管线 (Pillow + HEIC 兜底)
 │   ├── main.py              # 所有路由 + 应用工厂 create_app()  ← 核心
 │   ├── models.py            # Box / Item / SubItem 三个 ORM 模型
 │   ├── notion_import.py     # 一次性 Notion 导入的解析/写入逻辑
 │   ├── static/              # style.css, manifest, service-worker.js, PWA 图标
 │   └── templates/           # Jinja2 模板 (boxes/ items/ subitems/ search/ + base.html)
 ├── scripts/
 │   ├── install.sh           # 一键安装：渲染 nginx/backup/compose + cron
 │   ├── deploy.sh            # 仓库内的轻量更新脚本 (git pull + compose up)
 │   ├── backup_db.sh         # 备份脚本模板 (占位符由 install.sh 渲染)
 │   ├── import_notion.py     # Notion 导入 CLI 入口
 │   └── nginx/moving-helper.nginx.template
 ├── tests/                   # pytest：test_app.py (~74) + test_notion_import.py (9)
 ├── data/                    # 运行期 SQLite (data/app.db)，已 gitignore
 ├── .github/workflows/       # test.yml (CI) + docker-image.yml (CD)
 ├── Dockerfile, docker-compose.yml, .dockerignore
 ├── .env.example             # 部署配置示例（被 shell 脚本 source）
 ├── pytest.ini, requirements.txt, README.md
 └── docs/                    # ← 本文档所在
 ```
 **关键入口 / Key entry point：** `app/main.py` 里的 `create_app()` 注册了**全部**路由。整个后端逻辑几乎都在这一个文件里。
 Almost all backend logic lives in the single `create_app()` factory in `app/main.py`.
 ---
 ## 4. 数据模型 / Data Model
 固定**三级层次**，不是无限树 / A fixed **3-level hierarchy**, not an arbitrary tree：
 ```text
 Box  (顶层容器 / top container：纸箱、行李箱…)
 └── Item  (箱子里的物品 / an item in the box)
    └── SubItem  (仅容器型 Item 才有 / only under container items)
 ```
 定义于 `app/models.py`：
 | 模型 / Model | 表 / Table | 关键字段 / Key fields | 关系 / Relations |
 | --- | --- | --- | --- |
 | `Box` | `boxes` | `name`, `note`, `room`, `status` + 图片字段 + 时间戳 | `items` → 多个 Item（`cascade="all, delete-orphan"`） |
 | `Item` | `items` | `box_id`(FK), `name`, `note`, `quantity`, `is_container` + 图片字段 + 时间戳 | 属于一个 `Box`；`subitems` → 多个 SubItem（级联删除） |
 | `SubItem` | `subitems` | `parent_item_id`(FK), `name`, `note`, `quantity` + 图片字段 + 时间戳 | 属于一个 `Item` |
 **核心规则 / Core rules：**
 - 只有 `Item.is_container == True` 的物品才允许拥有 `SubItem`；非容器去建子物品会返回 **400**（`_require_container_item`）。
  Only container items may hold sub-items; otherwise the API returns **400**.
 - 在更新 Item 时，若取消勾选 `is_container`，会**清空其所有 SubItem**（`item.subitems.clear()`，`main.py:454`）。
  Un-checking `is_container` on update **clears all sub-items**.
 - 删除级联：删 Box → 删其 Item → 删其 SubItem，由 ORM `cascade` + 外键 `ondelete="CASCADE"` 双重保障（SQLite 还启用了 `PRAGMA foreign_keys=ON`）。
  Delete cascades top-down, enforced both by ORM cascade and FK `ondelete`.
 **图片字段（每个模型都有同一组）/ Image fields (same set on every model)：**
 `image_blob` (BLOB)、`image_mime_type`、`image_width`、`image_height`。
 → 图片直接以二进制存进 SQLite，**不落地为文件**。Images are stored **inline as BLOBs in SQLite**, not as files.
 时间戳 / Timestamps：`created_at`、`updated_at`（UTC，`updated_at` 带 `onupdate`）。
 ---
 ## 5. 路由总览 / Route Map
 全部在 `app/main.py`。POST 后统一 **303 See Other** 重定向（避免重复提交）。
 All in `app/main.py`. POSTs redirect with **303 See Other**.
 | 方法 路径 / Method Path | 作用 / Purpose |
 | --- | --- |
 | `GET /` | 302 跳转到 `/boxes` |
 | `GET /manifest.webmanifest`, `GET /service-worker.js` | PWA 资源（从根路径返回） |
 | `GET /search?q=` | 全局搜索页 |
 | `GET /boxes` | 箱子列表 + 概览统计 |
 | `GET /boxes/new` · `POST /boxes` | 新建箱子表单 / 提交 |
 | `GET /boxes/{id}` | 箱子详情（含其 Item 列表） |
 | `GET /boxes/{id}/edit` · `POST /boxes/{id}/update` | 编辑 / 保存 |
 | `POST /boxes/{id}/delete` | 删除箱子 |
 | `GET /boxes/{id}/image` · `POST /boxes/{id}/image/delete` | 取图 / 删图 |
 | `GET /boxes/{id}/items/new` · `POST /boxes/{id}/items` | 在箱子下新建物品 |
 | `GET /items/{id}` | 物品详情（容器型则含 SubItem 列表） |
 | `GET /items/{id}/edit` · `POST /items/{id}/update` · `POST /items/{id}/delete` | 编辑 / 保存 / 删除 |
 | `GET /items/{id}/image` · `POST /items/{id}/image/delete` | 取图 / 删图 |
 | `GET /items/{id}/subitems/new` · `POST /items/{id}/subitems` | 在容器型物品下新建子物品 |
 | `GET /subitems/{id}/edit` · `POST /subitems/{id}/update` · `POST /subitems/{id}/delete` | 编辑 / 保存 / 删除 |
 | `GET /subitems/{id}/image` · `POST /subitems/{id}/image/delete` | 取图 / 删图 |
 **重定向行为细节 / Redirect nuances**（`main.py` 创建逻辑）：
 - 创建物品时，若点「保存并添加下一个」(`submit_action=save_and_add_next`) → 回到新建表单；若是容器型 → 跳到物品详情；否则 → 回箱子详情。
  On create: *save & add next* → back to new form; container → item detail; else → box detail.
 - 子物品的「保存并添加下一个」同理回到子物品新建表单。
  Sub-item *save & add next* returns to its new-form too.
 > 没有 JSON API / no JSON API：全部返回 HTML（图片路由返回二进制）。FastAPI 的自动 `/docs` 仍可用，但业务路由均是表单驱动的 SSR。
 ---
 ## 6. 图片处理管线 / Image Pipeline (`app/images.py`)
 每次上传都会经过 `process_upload()` → `_prepare_image()` 统一处理：
 Every upload is normalized through `process_upload()` → `_prepare_image()`:
 1. 读取字节，空内容 → 400；非法图片 → 400。Read bytes; empty/invalid → 400.
 2. **按 EXIF 方向矫正**（`ImageOps.exif_transpose`），再处理。Apply EXIF orientation first.
 3. 去元数据并转 RGB（`RGBA/LA` 贴白底、`P` 转 RGB）。Strip metadata, flatten to RGB.
 4. **最长边缩放到 ≤ 1600px**（`thumbnail`）。Downscale longest side to ≤ 1600px.
 5. 存为 **JPEG，质量 80，`optimize=True`**。Save as JPEG q80.
 6. 写入 `image_blob` + 记录 mime / 宽 / 高。Store blob + dimensions.
 **HEIC/HEIF 兜底 / fallback：** 先尝试 `pillow_heif`（若已安装）；否则在 macOS 上用 `sips` 转 JPEG；都不行则返回中文错误提示让用户先转格式。
 Tries `pillow_heif`, then macOS `sips`, else a clear error asking to convert first.
 > 注意 / Note：`pillow_heif` **不在** `requirements.txt` 里，所以默认环境 HEIC 依赖系统 `sips`（仅 macOS）。Linux 容器里上传 HEIC 会得到「请先转换」的提示。
 每个对象**最多一张图**，支持上传 / 替换 / 删除，不支持多图。
 One image per object; upload/replace/delete; no multi-image.
 ---
 ## 7. 全局搜索 / Global Search (`_build_search_results`, `main.py`)
 - `GET /search?q=关键词`，对 `Box / Item / SubItem` 的 `name` 和 `note` 做 **SQLite `LOWER(...) LIKE %q%`** 模糊匹配（大小写不敏感）。
  Case-insensitive `LIKE` over `name` + `note` across all three types.
 - 结果带：类型标签、归属路径（Item 显示所属 Box，SubItem 显示所属 Item + Box）、若有图则带缩略图链接。
  Results include type, location path, and a thumbnail link if an image exists.
 - 无外部搜索引擎、无全文索引。No external search engine / full-text index.
 `/boxes` 概览页另有统计（`_build_boxes_overview_summary`）：箱子数、物品数（含/不含子物品）、每箱平均物品数、每容器型 Item 平均子物品数。
 ---
 ## 8. PWA 支持 / PWA Support
 最小可安装 PWA，**不改 SSR 结构** / minimal installable PWA without changing SSR：
 - 根路径提供 `manifest.webmanifest`（正确 mime）和 `service-worker.js`。
 - `base.html` 注入 theme-color、apple-touch-icon、安装相关 meta，并注册 service worker。
 - 图标：180（apple-touch）、192、512、512-maskable，位于 `app/static/icons/`。
 **当前 service worker 仅做 `skipWaiting` + `clients.claim()`，没有任何缓存/离线能力。**
 The service worker only claims clients — **no caching, no offline** yet.
 `base.html` 里的原生 JS 还实现了：可点击卡片（`.clickable-card[data-href]`，含键盘 Enter/Space 支持）、表单内回车跳到下一个字段。
 ---
 ## 9. 配置与环境变量 / Configuration
 **应用运行时 / App runtime**（`app/config.py`，`Settings` dataclass）：
 | 变量 / Var | 默认 / Default | 说明 |
 | --- | --- | --- |
 | `DATABASE_URL` | `sqlite:///./data/app.db` | 数据库连接串 |
 | `HOST` | `0.0.0.0` | （定义了但 uvicorn 在 CMD 里写死） |
 | `PORT` | `10000` | 同上 |
 **部署时 / Deploy-time**（`.env`，被 shell 脚本 `source`，**非**应用直接读取）：
 `HOST_DOMAIN`、`SSL_PATH`、`APP_DIR`、`BACKUP_DIR`、`BACKUP_REMOTE`、`APP_PORT`、`DATA_DIR`、`DATABASE_URL`、`COMPOSE_PROJECT_NAME`。详见 `.env.example`。
 约定 / Conventions：容器内固定监听 `0.0.0.0:10000`；`APP_PORT` 只控制宿主机暴露端口；SQLite 固定写 `/app/data/app.db`（容器内）。
 ---
 ## 10. 数据库初始化与迁移 / DB Init & Migrations (`app/migrate.py` + `app/db.py`)
 - **Alembic 接管 schema**：迁移系统由 Alembic 管理（`alembic.ini` + `migrations/`），V1 baseline 等于当前三表 schema。
  Alembic owns schema creation and changes (`alembic.ini` + `migrations/`); V1 baseline equals the current three-table schema.
 - **迁移与启动分离 / Migrations separated from startup**：
  - `init_db()`（`app/db.py`）在 FastAPI lifespan 启动时调用 `verify_schema_is_current()`，只做**只读校验**——检查 DB 是否在 `head`，不一致则 **fail-close**（拒绝启动、不执行任何 DDL）。
    `init_db()` calls `verify_schema_is_current()` at startup — read-only check, fails closed on mismatch, no DDL.
  - 实际迁移由独立幂等命令 `python -m app.migrate`（`app/migrate.py`）执行：空库建表、老库认领（stamp V1 → upgrade head）、已在 head 则空操作。老库 schema 不匹配则 fail-close 不改动。
    Actual migration via standalone idempotent command `python -m app.migrate`: fresh DB → create, matching existing → adopt, already-at-head → no-op, mismatch → fail closed.
 - SQLite 连接开启 `PRAGMA foreign_keys=ON`。
 - 手写列同步 `_sync_sqlite_image_columns()` 已退休删除。
  The hand-rolled `_sync_sqlite_image_columns()` has been retired and removed.
 ---
 ## 11. 部署 / Deployment
 **Docker**（`Dockerfile`）：`python:3.12-slim` → 装依赖 → 拷贝 `app/` → `uvicorn app.main:app --host 0.0.0.0 --port 10000`。
 **Compose**（`docker-compose.yml`）：
 - 镜像固定 `code.wanderingbadger.dev/tliu93/2026-moving-helper:latest`，同时保留 `build:` 用于本地构建。
 - `user: "1000:1000"`，仅 `127.0.0.1:${APP_PORT}:10000` 暴露，`${DATA_DIR}:/app/data` 持久化，`restart: unless-stopped`。
 **一键安装**（`scripts/install.sh`，需 root/sudo 写 nginx）：读 `.env` → 拷 compose/.env/渲染后的 backup 脚本到 `APP_DIR` → 渲染并启用 nginx 站点 → `nginx -t` + reload → `docker compose pull && up -d` → 写每日 `02:10` 备份 cron。无 `.env` 直接退出。
 **轻量更新**（`scripts/deploy.sh`）：`git pull --ff-only` → `docker compose pull web` → `up -d` → 打印状态/日志。
 **nginx 模板**（`scripts/nginx/...`）：80→443 跳转、443 启用 SSL、反代到 `127.0.0.1:${APP_PORT}`、`client_max_body_size 0`。证书由用户自备于 `SSL_PATH`（`fullchain.pem` / `privkey.key`）。
 **备份**（`scripts/backup_db.sh`，模板带占位符由 install 渲染）：用 `sqlite3 .backup` 取事务一致快照（不停容器），文件名带时间戳，**最多保留 5 个**，`BACKUP_REMOTE` 非空时 `rclone sync` 到远端。
 ---
 ## 12. CI / CD（`.github/workflows/`）
 - **`test.yml`（CI）**：任意分支 `push` 触发 → Python 3.12 → 装依赖 → `pytest`。无需外部服务/DB。
 - **`docker-image.yml`（CD）**：`v*` tag 触发；**先校验该 tag 提交可从 `origin/main` 到达**，再 buildx 构建 `linux/amd64` + `linux/arm64`，推 `:${tag}` 和 `:latest`。
 - 需在仓库 Secrets 配 `REGISTRY_USERNAME` / `REGISTRY_TOKEN`（Gitea container registry）。
 发布流程 / Release：`git tag vX.Y.Z && git push origin main --tags`。
 ---
 ## 13. 测试 / Tests
 - `tests/conftest.py`：每个测试用 `tmp_path` 建独立 SQLite，`configure_database(...)` 切换，再 `create_app()` —— **不污染** `data/app.db`，无需 Docker。
  Each test gets an isolated tmp SQLite; never touches dev data.
 - `tests/test_app.py`（约 74 个）：Box/Item/SubItem CRUD、级联删除、404、图片上传/替换/删除/错误路径、EXIF 矫正、图片路由、搜索（name/note/路径/缩略图）、重定向行为、页面结构与 UX 文案、概览统计。
 - `tests/test_notion_import.py`（9 个）：page id 提取、heading/bullet 解析、容器判定、超层级警告、媒体跳过、dry-run 不写库、apply 写库结构。
 运行 / Run：`python -m pytest`。
 ---
 ## 14. Notion 一次性导入 / One-time Notion Import
 `app/notion_import.py` + `scripts/import_notion.py`（交互式 CLI，`--dry-run` / `--apply`）。
 结构映射 / Mapping：`heading_2` → `Box`；其下一级 bullet → `Item`；二级 bullet → `SubItem`（此时父 bullet 自动判为容器型）。更深层级只警告不导入；**不导入任何图片/媒体**。
 定位 / Positioning：**一次性 migration 工具，非长期同步**；建议导入前先备份 `data/app.db`。`NOTION_VERSION = "2026-03-11"`。
 ---
 ## 15. 已知约束 & 下一轮改动建议 / Constraints & Notes for the Next Round
 **当前明确「未实现」/ Explicitly out of scope（见 README）：** 离线缓存/同步、多图、OCR、AI 识别、图片标签/分类、登录鉴权、标签系统、前后端分离、复杂 UI。
 **改动前值得注意的点 / Things to watch before changing things：**
 1. **无鉴权 / No auth.** 设计前提是「可信内网 + nginx HTTPS」。任何要暴露到公网的改动都需先加访问控制。
 2. **迁移机制薄弱 / Weak migrations（§10）.** 加新字段到已有库不会自动建列。建议：要么扩展 `_sync_sqlite_image_columns` 思路（改成更通用的列同步），要么正式引入 Alembic。
 3. **图片存在 SQLite 里 / Images live in the DB.** 好处是备份/迁移只需一个文件；代价是库体积随图增长、备份成本上升。若要支持多图或大图归档，应考虑改为对象存储/文件系统 + 路径引用。
 4. **逻辑高度集中在 `main.py` / Logic concentrated in `main.py`.** 路由、表单解析、查询、统计、搜索都在一个文件。新增大功能时可考虑拆分 router/service 模块，但要保留 `create_app()` 工厂以维持测试隔离。
 5. **Service Worker 是空壳 / SW is a stub.** README 写的「PWA」目前不含离线能力；要做离线需真正实现缓存策略。
 6. **固定 3 级层次 / Fixed 3 levels.** `Box → Item → SubItem` 写死在模型、路由、模板、Notion 解析多处；若要变成可嵌套树，是一次跨层改动。
 7. **HEIC 在 Linux 容器里不可用 / HEIC fails in Linux containers**（`pillow_heif` 未列入依赖，`sips` 仅 macOS）。若用户多用 iPhone 原图，考虑把 `pillow_heif` 加进 `requirements.txt`。
 8. **UI 全中文、SSR、单 `style.css`.** 前端改动直接编辑 `app/templates/*` 与 `app/static/style.css`，无构建步骤。
 ---
 ## 16. 本地快速启动 / Quick Local Start
 ```bash
 python3 -m venv .venv && source .venv/bin/activate
 pip install -r requirements.txt
 uvicorn app.main:app --reload --host 0.0.0.0 --port 10000
 # 打开 / open http://localhost:10000  (默认数据库 / default DB: ./data/app.db)
 python -m pytest        # 跑测试 / run tests
 ```
@@ -0,0 +1 @@
 Generic single-database configuration.
@@ -0,0 +1,80 @@
 from logging.config import fileConfig
 from sqlalchemy import pool
 from sqlalchemy import engine_from_config
 from alembic import context
 # Import Base and models so Alembic can see all tables for autogenerate.
 from app.db import Base
 import app.models  # noqa: F401 — registers Box, Item, SubItem on Base.metadata
 config = context.config
 # Dynamically set sqlalchemy.url from app config (not hardcoded in alembic.ini).
 # When called programmatically via app.migrate.run_migrations(), the URL is
 # already set on the Config object — respect it.  Fall back to get_settings()
 # only when invoked from the ``alembic`` CLI.
 from app.config import get_settings
 if not config.get_main_option("sqlalchemy.url"):
    settings = get_settings()
    config.set_main_option("sqlalchemy.url", settings.database_url)
 if config.config_file_name is not None:
    fileConfig(config.config_file_name)
 target_metadata = Base.metadata
 def run_migrations_offline() -> None:
    """Run migrations in 'offline' mode.
    This configures the context with just a URL
    and not an Engine, though an Engine is acceptable
    here as well.  By skipping the Engine creation
    we don't even need a DBAPI to be available.
    Calls to context.execute() here emit the given string to the
    script output.
    """
    url = config.get_main_option("sqlalchemy.url")
    context.configure(
        url=url,
        target_metadata=target_metadata,
        literal_binds=True,
        dialect_opts={"paramstyle": "named"},
        render_as_batch=True,
    )
    with context.begin_transaction():
        context.run_migrations()
 def run_migrations_online() -> None:
    """Run migrations in 'online' mode.
    In this scenario we need to create an Engine
    and associate a connection with the context.
    """
    connectable = engine_from_config(
        config.get_section(config.config_ini_section, {}),
        prefix="sqlalchemy.",
        poolclass=pool.NullPool,
    )
    with connectable.connect() as connection:
        context.configure(
            connection=connection,
            target_metadata=target_metadata,
            render_as_batch=True,
        )
        with context.begin_transaction():
            context.run_migrations()
 if context.is_offline_mode():
    run_migrations_offline()
 else:
    run_migrations_online()
@@ -0,0 +1,28 @@
 """${message}
 Revision ID: ${up_revision}
 Revises: ${down_revision | comma,n}
 Create Date: ${create_date}
 """
 from typing import Sequence, Union
 from alembic import op
 import sqlalchemy as sa
 ${imports if imports else ""}
 # revision identifiers, used by Alembic.
 revision: str = ${repr(up_revision)}
 down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)}
 branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
 depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
 def upgrade() -> None:
    """Upgrade schema."""
    ${upgrades if upgrades else "pass"}
 def downgrade() -> None:
    """Downgrade schema."""
    ${downgrades if downgrades else "pass"}
@@ -0,0 +1,96 @@
 """V1 baseline
 Revision ID: 57af90893f55
 Revises: 
 Create Date: 2026-06-01 13:49:15.867487
 """
 from typing import Sequence, Union
 from alembic import op
 import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision: str = '57af90893f55'
 down_revision: Union[str, Sequence[str], None] = None
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    """Upgrade schema."""
    # ### commands auto generated by Alembic - please adjust! ###
    op.create_table('boxes',
    sa.Column('id', sa.Integer(), nullable=False),
    sa.Column('name', sa.String(length=100), nullable=False),
    sa.Column('note', sa.Text(), nullable=True),
    sa.Column('room', sa.String(length=100), nullable=True),
    sa.Column('status', sa.String(length=50), nullable=True),
    sa.Column('image_blob', sa.LargeBinary(), nullable=True),
    sa.Column('image_mime_type', sa.String(length=50), nullable=True),
    sa.Column('image_width', sa.Integer(), nullable=True),
    sa.Column('image_height', sa.Integer(), nullable=True),
    sa.Column('created_at', sa.DateTime(timezone=True), nullable=False),
    sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False),
    sa.PrimaryKeyConstraint('id')
    )
    with op.batch_alter_table('boxes', schema=None) as batch_op:
        batch_op.create_index(batch_op.f('ix_boxes_id'), ['id'], unique=False)
    op.create_table('items',
    sa.Column('id', sa.Integer(), nullable=False),
    sa.Column('box_id', sa.Integer(), nullable=False),
    sa.Column('name', sa.String(length=100), nullable=False),
    sa.Column('note', sa.Text(), nullable=True),
    sa.Column('quantity', sa.Integer(), nullable=True),
    sa.Column('is_container', sa.Boolean(), nullable=False),
    sa.Column('image_blob', sa.LargeBinary(), nullable=True),
    sa.Column('image_mime_type', sa.String(length=50), nullable=True),
    sa.Column('image_width', sa.Integer(), nullable=True),
    sa.Column('image_height', sa.Integer(), nullable=True),
    sa.Column('created_at', sa.DateTime(timezone=True), nullable=False),
    sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False),
    sa.ForeignKeyConstraint(['box_id'], ['boxes.id'], ondelete='CASCADE'),
    sa.PrimaryKeyConstraint('id')
    )
    with op.batch_alter_table('items', schema=None) as batch_op:
        batch_op.create_index(batch_op.f('ix_items_id'), ['id'], unique=False)
    op.create_table('subitems',
    sa.Column('id', sa.Integer(), nullable=False),
    sa.Column('parent_item_id', sa.Integer(), nullable=False),
    sa.Column('name', sa.String(length=100), nullable=False),
    sa.Column('note', sa.Text(), nullable=True),
    sa.Column('quantity', sa.Integer(), nullable=True),
    sa.Column('image_blob', sa.LargeBinary(), nullable=True),
    sa.Column('image_mime_type', sa.String(length=50), nullable=True),
    sa.Column('image_width', sa.Integer(), nullable=True),
    sa.Column('image_height', sa.Integer(), nullable=True),
    sa.Column('created_at', sa.DateTime(timezone=True), nullable=False),
    sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False),
    sa.ForeignKeyConstraint(['parent_item_id'], ['items.id'], ondelete='CASCADE'),
    sa.PrimaryKeyConstraint('id')
    )
    with op.batch_alter_table('subitems', schema=None) as batch_op:
        batch_op.create_index(batch_op.f('ix_subitems_id'), ['id'], unique=False)
    # ### end Alembic commands ###
 def downgrade() -> None:
    """Downgrade schema."""
    # ### commands auto generated by Alembic - please adjust! ###
    with op.batch_alter_table('subitems', schema=None) as batch_op:
        batch_op.drop_index(batch_op.f('ix_subitems_id'))
    op.drop_table('subitems')
    with op.batch_alter_table('items', schema=None) as batch_op:
        batch_op.drop_index(batch_op.f('ix_items_id'))
    op.drop_table('items')
    with op.batch_alter_table('boxes', schema=None) as batch_op:
        batch_op.drop_index(batch_op.f('ix_boxes_id'))
    op.drop_table('boxes')
    # ### end Alembic commands ###
@@ -0,0 +1,32 @@
 """V2 app_settings
 Revision ID: a1b2c3d4e5f6
 Revises: 57af90893f55
 Create Date: 2026-06-01 14:00:00.000000
 """
 from typing import Sequence, Union
 from alembic import op
 import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision: str = 'a1b2c3d4e5f6'
 down_revision: Union[str, Sequence[str], None] = '57af90893f55'
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    """Upgrade schema."""
    op.create_table('app_settings',
    sa.Column('key', sa.Text(), nullable=False),
    sa.Column('value', sa.Text(), nullable=True),
    sa.PrimaryKeyConstraint('key')
    )
 def downgrade() -> None:
    """Downgrade schema."""
    op.drop_table('app_settings')
@@ -1,4 +1,6 @@
 [pytest]
 pythonpath = .
 testpaths = tests
 norecursedirs = app .venv
 filterwarnings =
    ignore:'asyncio\.iscoroutinefunction' is deprecated and slated for removal in Python 3\.16; use inspect\.iscoroutinefunction\(\) instead:DeprecationWarning:fastapi\.routing
@@ -2,6 +2,7 @@ fastapi==0.116.1
 uvicorn[standard]==0.35.0
 jinja2==3.1.6
 sqlalchemy==2.0.43
 alembic==1.16.5
 python-multipart==0.0.20
 pillow==11.2.1
 requests==2.32.3
@@ -73,9 +73,8 @@ for backup_file in $(find "$BACKUP_DIR" -maxdepth 1 -type f -name 'app-*.db' | s
 done
 if [ -n "${BACKUP_REMOTE:-}" ]; then
-  remote_target=${BACKUP_REMOTE%/}/$(basename "$FINAL_BACKUP")
+  rclone sync "$BACKUP_DIR" "${BACKUP_REMOTE%/}"
-  rclone copyto "$FINAL_BACKUP" "$remote_target"
+  echo "Backup uploaded to remote: $BACKUP_REMOTE"
  echo "Backup uploaded to remote: $remote_target"
 else
  echo "BACKUP_REMOTE is empty; skipping remote upload"
 fi
@@ -6,6 +6,7 @@ from sqlalchemy.orm import Session
 from app.db import SessionLocal, configure_database
 from app.main import create_app
 from app.migrate import run_migrations
@pytest.fixture
@@ -13,6 +14,9 @@ def client(tmp_path: Path):
    test_db_path = tmp_path / "test.db"
    database_url = f"sqlite:///{test_db_path}"
    # Run migration first so DB is at head before app starts.
    run_migrations(database_url)
    configure_database(database_url)
    app = create_app()
@@ -0,0 +1,707 @@
 """Tests for AI search (Step 3).
 All LLM calls are mocked — CI never touches the network.
 Coverage areas:
 - expand_query JSON output parsing (valid, fenced, prose, bad JSON, timeout)
 - Output contract enforcement (strict JSON array only)
 - Expansion term count cap and length cap
 - ai_search seam function
 - GET /search with ai=1 trigger
 - AI button visibility on search page
 - Graceful degradation on failure
 - ai_search_extra_hints appended to prompt
 - ai_search_enabled toggle
 """
 from unittest.mock import patch
 import httpx
 import pytest
 from app.llm import (
    _MAX_EXPANSION_TERMS,
    _MAX_TERM_LENGTH,
    ExpansionResult,
    LLMResult,
    _parse_json_string_array,
    expand_query,
    is_configured,
 )
 from app.main import _ai_search, _build_search_results
 from app.models import AppSetting, Box, Item, SubItem
 from app.settings_store import LLMConfig, get_app_settings, save_app_settings
 # ---------------------------------------------------------------------------
 # Helper: configure AI search for route tests
 # ---------------------------------------------------------------------------
 _AI_CFG = LLMConfig(
    enabled=True,
    base_url="https://api.example.com/v1",
    model="gpt-4o-mini",
    api_key="sk-test-key",
    ai_search_enabled=True,
 )
 def _enable_ai_search(client, db_session):
    """Persist a fully-configured AI search setup via the settings route."""
    client.post(
        "/settings",
        data={
            "enabled": "on",
            "base_url": "https://api.example.com/v1",
            "model": "gpt-4o-mini",
            "api_key": "sk-test-key",
            "ai_search_enabled": "on",
        },
        follow_redirects=False,
    )
 # ---------------------------------------------------------------------------
 # _parse_json_string_array: strict JSON contract enforcement
 # ---------------------------------------------------------------------------
 class TestParseJsonStringArray:
    def test_valid_json_array(self):
        result = _parse_json_string_array('["炒锅","平底锅","汤锅"]')
        assert result == ["炒锅", "平底锅", "汤锅"]
    def test_json_array_with_code_fence(self):
        result = _parse_json_string_array('```json\n["锅","铲子"]\n```')
        assert result == ["锅", "铲子"]
    def test_json_array_with_code_fence_no_lang(self):
        result = _parse_json_string_array('```\n["锅","铲子"]\n```')
        assert result == ["锅", "铲子"]
    def test_empty_string_returns_empty(self):
        assert _parse_json_string_array("") == []
        assert _parse_json_string_array("  ") == []
    def test_prose_returns_empty(self):
        """Prose text does NOT become expansion terms — strict contract."""
        assert _parse_json_string_array("I cannot help with that.") == []
    def test_prose_newlines_returns_empty(self):
        """Line-separated prose does NOT become expansion terms."""
        assert _parse_json_string_array("炒锅\n平底锅\n汤锅") == []
    def test_prose_commas_returns_empty(self):
        """Comma-separated prose does NOT become expansion terms."""
        assert _parse_json_string_array("炒锅, 平底锅, 汤锅") == []
    def test_bad_json_returns_empty(self):
        """Invalid JSON returns empty — no fallback."""
        assert _parse_json_string_array("{invalid json") == []
    def test_json_object_returns_empty(self):
        """JSON object (non-array) returns empty."""
        assert _parse_json_string_array('{"terms":["锅","厨具"]}') == []
    def test_json_array_with_numbers_returns_empty(self):
        """Non-string items in array cause rejection — strict contract."""
        assert _parse_json_string_array('[1, 2, 3]') == []
    def test_json_array_with_mixed_types_returns_empty(self):
        """Mixed string/number array is rejected."""
        assert _parse_json_string_array('["锅", 1]') == []
    def test_empty_json_array(self):
        result = _parse_json_string_array('[]')
        assert result == []
    def test_capped_at_max_terms(self):
        """More than _MAX_EXPANSION_TERMS items are truncated."""
        terms = [f"词{i}" for i in range(20)]
        json_str = "[" + ",".join(f'"{t}"' for t in terms) + "]"
        result = _parse_json_string_array(json_str)
        assert len(result) == _MAX_EXPANSION_TERMS
    def test_long_terms_filtered_out(self):
        """Terms exceeding _MAX_TERM_LENGTH are silently dropped."""
        short = "锅"
        long_term = "A" * (_MAX_TERM_LENGTH + 1)
        json_str = f'["{short}", "{long_term}"]'
        result = _parse_json_string_array(json_str)
        assert result == ["锅"]
    def test_whitespace_stripped(self):
        result = _parse_json_string_array('["  锅  ", " 平底锅 "]')
        assert result == ["锅", "平底锅"]
    def test_empty_strings_filtered(self):
        result = _parse_json_string_array('["锅", "", "  ", "平底锅"]')
        assert result == ["锅", "平底锅"]
 # ---------------------------------------------------------------------------
 # expand_query: prompt, hints, graceful degradation
 # ---------------------------------------------------------------------------
 class TestExpandQueryNew:
    def test_returns_empty_when_not_configured(self):
        cfg = LLMConfig(enabled=False)
        result = expand_query(cfg, "锅")
        assert result.terms == []
        assert result.error is None
    @patch("app.llm._call_chat_completion")
    def test_parses_valid_json_response(self, mock_call):
        mock_call.return_value = {
            "choices": [{"message": {"content": '["炒锅","平底锅","汤锅","厨具"]'}}]
        }
        cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
        result = expand_query(cfg, "锅")
        assert "炒锅" in result.terms
        assert "平底锅" in result.terms
        assert "厨具" in result.terms
        assert result.error is None
    @patch("app.llm._call_chat_completion")
    def test_handles_json_with_code_fence(self, mock_call):
        mock_call.return_value = {
            "choices": [
                {"message": {"content": '```json\n["炒锅","平底锅"]\n```'}}
            ]
        }
        cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
        result = expand_query(cfg, "锅")
        assert "炒锅" in result.terms
        assert result.error is None
    @patch("app.llm._call_chat_completion")
    def test_prose_response_returns_empty_no_error(self, mock_call):
        """Prose from model → empty terms, no error (successful call, unparseable output)."""
        mock_call.return_value = {
            "choices": [{"message": {"content": "I cannot help with that."}}]
        }
        cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
        result = expand_query(cfg, "锅")
        assert result.terms == []
        assert result.error is None
    @patch("app.llm._call_chat_completion")
    def test_json_object_response_returns_empty_no_error(self, mock_call):
        """JSON object (non-array) → empty terms, no error."""
        mock_call.return_value = {
            "choices": [{"message": {"content": '{"terms":["锅","厨具"]}'}}]
        }
        cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
        result = expand_query(cfg, "锅")
        assert result.terms == []
        assert result.error is None
    @patch("app.llm._call_chat_completion")
    def test_timeout_returns_error(self, mock_call):
        """Timeout → empty terms + error message."""
        mock_call.side_effect = httpx.TimeoutException("timeout")
        cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
        result = expand_query(cfg, "锅")
        assert result.terms == []
        assert result.error is not None
        assert "超时" in result.error
    @patch("app.llm._call_chat_completion")
    def test_network_error_returns_error(self, mock_call):
        """Network error → empty terms + error message."""
        mock_call.side_effect = httpx.ConnectError("refused")
        cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
        result = expand_query(cfg, "锅")
        assert result.terms == []
        assert result.error is not None
        assert "无法连接" in result.error
    @patch("app.llm._call_chat_completion")
    def test_http_error_returns_error(self, mock_call):
        """HTTP error → empty terms + error message."""
        mock_call.side_effect = httpx.HTTPStatusError(
            "401",
            request=httpx.Request("POST", "http://x"),
            response=httpx.Response(401),
        )
        cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
        result = expand_query(cfg, "锅")
        assert result.terms == []
        assert result.error is not None
        assert "错误" in result.error
    @patch("app.llm._call_chat_completion")
    def test_returns_empty_on_empty_choices(self, mock_call):
        mock_call.return_value = {"choices": []}
        cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
        result = expand_query(cfg, "锅")
        assert result.terms == []
        assert result.error is None
    @patch("app.llm._call_chat_completion")
    def test_extra_hints_appended_to_system_prompt(self, mock_call):
        """When extra_hints is non-empty, it should be appended to the system prompt."""
        mock_call.return_value = {
            "choices": [{"message": {"content": '["扩展词"]'}}]
        }
        cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
        expand_query(cfg, "锅", extra_hints="用户物品主要涉及厨房用品")
        # Verify the system prompt includes the extra hints
        call_args = mock_call.call_args
        messages = call_args[1]["messages"] if "messages" in call_args[1] else call_args[0][1]
        system_content = messages[0]["content"]
        assert "用户物品主要涉及厨房用品" in system_content
    @patch("app.llm._call_chat_completion")
    def test_extra_hints_ignored_when_empty(self, mock_call):
        """When extra_hints is empty, system prompt should not change."""
        mock_call.return_value = {
            "choices": [{"message": {"content": '["扩展词"]'}}]
        }
        cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
        expand_query(cfg, "锅", extra_hints="")
        call_args = mock_call.call_args
        messages = call_args[1]["messages"] if "messages" in call_args[1] else call_args[0][1]
        system_content = messages[0]["content"]
        # Should be the base prompt only
        assert "搬家物品搜索助手" in system_content
        assert "JSON 字符串数组" in system_content
    @patch("app.llm._call_chat_completion")
    def test_temperature_zero_passed(self, mock_call):
        """expand_query should pass temperature=0 for deterministic output."""
        mock_call.return_value = {
            "choices": [{"message": {"content": '["扩展词"]'}}]
        }
        cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
        expand_query(cfg, "锅")
        call_args = mock_call.call_args
        assert call_args[1]["temperature"] == 0
 # ---------------------------------------------------------------------------
 # _ai_search: seam function
 # ---------------------------------------------------------------------------
 class TestAiSearchSeam:
    @patch("app.main.expand_query")
    def test_returns_expanded_terms_and_results(self, mock_expand, client, db_session):
        """AI search returns expanded terms and broader results."""
        box = Box(name="厨房箱", note="装了炒锅和铲子")
        db_session.add(box)
        db_session.commit()
        mock_expand.return_value = ExpansionResult(terms=["炒锅", "平底锅", "汤锅"])
        cfg = get_app_settings(db_session)
        expanded, results, error = _ai_search(db_session, cfg, "平底锅")
        assert "炒锅" in expanded
        assert error is None
        assert len(results) >= 1
        assert any("厨房箱" in r["name"] or "炒锅" in (r.get("note") or "") for r in results)
    @patch("app.main.expand_query")
    def test_includes_original_query_in_search(self, mock_expand, client, db_session):
        """AI search includes the original query term in the search."""
        box = Box(name="冬季衣物箱")
        db_session.add(box)
        db_session.commit()
        mock_expand.return_value = ExpansionResult(terms=["羽绒服"])
        cfg = get_app_settings(db_session)
        expanded, results, error = _ai_search(db_session, cfg, "衣物")
        assert error is None
        assert any("冬季衣物箱" in r["name"] for r in results)
    @patch("app.main.expand_query")
    def test_empty_expansion_returns_normal_results_no_error(self, mock_expand, client, db_session):
        """Legitimate empty expansion (no synonyms found) → normal results, no error."""
        box = Box(name="书房箱")
        db_session.add(box)
        db_session.commit()
        mock_expand.return_value = ExpansionResult(terms=[])
        cfg = get_app_settings(db_session)
        expanded, results, error = _ai_search(db_session, cfg, "书房")
        assert expanded == []
        assert error is None
        assert any("书房箱" in r["name"] for r in results)
    @patch("app.main.expand_query")
    def test_llm_failure_returns_normal_results_with_error(self, mock_expand, client, db_session):
        """When expand_query signals failure, seam returns normal results + error message."""
        box = Box(name="厨房箱", note="装了炒锅")
        db_session.add(box)
        db_session.commit()
        mock_expand.return_value = ExpansionResult(terms=[], error="AI 搜索请求超时，请稍后再试。")
        cfg = get_app_settings(db_session)
        expanded, results, error = _ai_search(db_session, cfg, "厨房")
        assert expanded == []
        assert error is not None
        assert "超时" in error
        assert len(results) >= 1
 # ---------------------------------------------------------------------------
 # _build_search_results: multi-keyword support
 # ---------------------------------------------------------------------------
 class TestBuildSearchResultsMultiKeyword:
    def test_single_keyword_works_as_before(self, db_session):
        box = Box(name="厨房箱")
        db_session.add(box)
        db_session.commit()
        results = _build_search_results(db_session, "厨房")
        assert len(results) == 1
        assert results[0]["name"] == "厨房箱"
    def test_multiple_keywords_match_any(self, db_session):
        box1 = Box(name="厨房箱")
        box2 = Box(name="卧室箱")
        db_session.add_all([box1, box2])
        db_session.commit()
        results = _build_search_results(db_session, ["厨房", "卧室"])
        assert len(results) == 2
    def test_multiple_keywords_dedupes_results(self, db_session):
        """A box matching multiple keywords appears only once."""
        box = Box(name="厨房箱", note="装了厨房用品")
        db_session.add(box)
        db_session.commit()
        results = _build_search_results(db_session, ["厨房", "用品"])
        assert len(results) == 1
    def test_empty_keywords_returns_empty(self, db_session):
        results = _build_search_results(db_session, [])
        assert results == []
 # ---------------------------------------------------------------------------
 # Routes: GET /search with ai=1
 # ---------------------------------------------------------------------------
 class TestSearchRouteAI:
    @patch("app.llm._call_chat_completion")
    def test_ai_search_finds_more_results(self, mock_call, client, db_session):
        """Original query misses, but expanded term finds items."""
        box = Box(name="杂物箱")
        item = Item(name="炒锅", box=box, is_container=False)
        db_session.add_all([box, item])
        db_session.commit()
        mock_call.return_value = {
            "choices": [{"message": {"content": '["炒锅","平底锅","汤锅"]'}}]
        }
        _enable_ai_search(client, db_session)
        # Normal search for "平底锅" — no results
        response = client.get("/search?q=平底锅")
        assert "没有找到匹配结果" in response.text
        # AI search for "平底锅" — finds "炒锅" via expansion
        response = client.get("/search?q=平底锅&ai=1")
        assert response.status_code == 200
        assert "炒锅" in response.text
        assert "AI 帮你扩展了" in response.text
    @patch("app.llm._call_chat_completion")
    def test_ai_search_includes_original_results(self, mock_call, client, db_session):
        """AI search should also include results from original query."""
        box = Box(name="厨房箱")
        item1 = Item(name="锅铲", box=box, is_container=False)
        item2 = Item(name="平底锅", box=box, is_container=False)
        db_session.add_all([box, item1, item2])
        db_session.commit()
        mock_call.return_value = {
            "choices": [{"message": {"content": '["炒锅","汤锅"]'}}]
        }
        _enable_ai_search(client, db_session)
        response = client.get("/search?q=锅&ai=1")
        assert response.status_code == 200
        # Original result "平底锅" should still be there
        assert "平底锅" in response.text
    @patch("app.llm._call_chat_completion")
    def test_ai_search_shows_expansion_banner(self, mock_call, client, db_session):
        """When AI search is activated, a banner shows expanded terms."""
        box = Box(name="厨房箱")
        db_session.add(box)
        db_session.commit()
        mock_call.return_value = {
            "choices": [{"message": {"content": '["炒锅","平底锅"]'}}]
        }
        _enable_ai_search(client, db_session)
        response = client.get("/search?q=锅&ai=1")
        assert response.status_code == 200
        assert "AI 帮你扩展了" in response.text
        assert "炒锅" in response.text
    def test_ai_search_without_flag_does_normal_search(self, client, db_session):
        """Without ai=1, search behaves normally even when AI is configured."""
        box = Box(name="厨房箱")
        db_session.add(box)
        db_session.commit()
        _enable_ai_search(client, db_session)
        response = client.get("/search?q=厨房")
        assert response.status_code == 200
        assert "厨房箱" in response.text
        assert "AI 帮你扩展了" not in response.text
    @patch("app.llm._call_chat_completion")
    def test_ai_search_without_configuration_ignores_flag(self, mock_call, client, db_session):
        """ai=1 is ignored when AI is not configured."""
        box = Box(name="厨房箱")
        db_session.add(box)
        db_session.commit()
        response = client.get("/search?q=厨房&ai=1")
        assert response.status_code == 200
        assert "厨房箱" in response.text
        assert "AI 帮你扩展了" not in response.text
        mock_call.assert_not_called()
    @patch("app.llm._call_chat_completion")
    def test_ai_search_graceful_degradation_on_llm_failure(self, mock_call, client, db_session):
        """LLM failure (timeout) → normal results + friendly error banner."""
        box = Box(name="厨房箱", note="装了炒锅")
        db_session.add(box)
        db_session.commit()
        # expand_query catches timeout and returns ExpansionResult with error
        mock_call.side_effect = httpx.TimeoutException("timeout")
        _enable_ai_search(client, db_session)
        response = client.get("/search?q=厨房&ai=1")
        assert response.status_code == 200
        assert "厨房箱" in response.text
        # Should show error banner — timeout is a real failure
        assert "超时" in response.text or "不可用" in response.text
    def test_ai_search_empty_query_does_nothing(self, client, db_session):
        """ai=1 with empty query does not trigger AI."""
        _enable_ai_search(client, db_session)
        response = client.get("/search?ai=1")
        assert response.status_code == 200
        assert "AI 帮你扩展了" not in response.text
    @patch("app.llm._call_chat_completion")
    def test_ai_search_disabled_ignores_flag(self, mock_call, client, db_session):
        """ai=1 is ignored when ai_search_enabled is False."""
        box = Box(name="厨房箱")
        db_session.add(box)
        db_session.commit()
        # Enable LLM but NOT ai_search_enabled
        client.post(
            "/settings",
            data={
                "enabled": "on",
                "base_url": "https://api.example.com/v1",
                "model": "gpt-4o-mini",
                "api_key": "sk-test-key",
            },
            follow_redirects=False,
        )
        response = client.get("/search?q=厨房&ai=1")
        assert response.status_code == 200
        assert "厨房箱" in response.text
        assert "AI 帮你扩展了" not in response.text
        mock_call.assert_not_called()
 # ---------------------------------------------------------------------------
 # Button visibility on search page
 # ---------------------------------------------------------------------------
 class TestAIButtonVisibility:
    @patch("app.llm._call_chat_completion")
    def test_button_visible_when_configured_and_enabled(self, mock_call, client, db_session):
        """AI search button is visible when ai_search_enabled and configured."""
        _enable_ai_search(client, db_session)
        response = client.get("/search?q=测试")
        assert response.status_code == 200
        assert "AI 智能搜索" in response.text
    def test_button_hidden_when_not_configured(self, client, db_session):
        """AI search button is hidden when LLM is not configured."""
        response = client.get("/search?q=测试")
        assert response.status_code == 200
        assert "AI 智能搜索" not in response.text
    def test_button_hidden_when_ai_search_disabled(self, client, db_session):
        """AI search button is hidden when ai_search_enabled is False."""
        client.post(
            "/settings",
            data={
                "enabled": "on",
                "base_url": "https://api.example.com/v1",
                "model": "gpt-4o-mini",
                "api_key": "sk-test-key",
            },
            follow_redirects=False,
        )
        response = client.get("/search?q=测试")
        assert "AI 智能搜索" not in response.text
    @patch("app.llm._call_chat_completion")
    def test_button_hidden_on_empty_query(self, mock_call, client, db_session):
        """AI search button is not shown when there's no query."""
        _enable_ai_search(client, db_session)
        response = client.get("/search")
        assert "AI 智能搜索" not in response.text
    @patch("app.llm._call_chat_completion")
    def test_button_link_includes_current_query(self, mock_call, client, db_session):
        """AI button link includes the current query parameter."""
        _enable_ai_search(client, db_session)
        response = client.get("/search?q=锅")
        assert response.status_code == 200
        assert "ai=1" in response.text
        from urllib.parse import quote
        assert f"q={quote('锅')}" in response.text or "q=锅" in response.text
    @patch("app.llm._call_chat_completion")
    def test_no_button_when_ai_already_activated(self, mock_call, client, db_session):
        """When AI is already activated, show status text instead of button."""
        mock_call.return_value = {
            "choices": [{"message": {"content": '["炒锅"]'}}]
        }
        _enable_ai_search(client, db_session)
        response = client.get("/search?q=锅&ai=1")
        assert response.status_code == 200
        assert "AI 搜索已启用" in response.text
 # ---------------------------------------------------------------------------
 # Settings: ai_search_extra_hints
 # ---------------------------------------------------------------------------
 class TestExtraHintsSettings:
    def test_extra_hints_defaults_to_empty(self, db_session):
        cfg = get_app_settings(db_session)
        assert cfg.ai_search_extra_hints == ""
    def test_save_extra_hints(self, db_session):
        save_app_settings(db_session, ai_search_extra_hints="用户物品主要涉及厨房")
        cfg = get_app_settings(db_session)
        assert cfg.ai_search_extra_hints == "用户物品主要涉及厨房"
    def test_save_extra_hints_empty_string(self, db_session):
        save_app_settings(db_session, ai_search_extra_hints="厨房用品")
        save_app_settings(db_session, ai_search_extra_hints="")
        cfg = get_app_settings(db_session)
        assert cfg.ai_search_extra_hints == ""
    def test_settings_page_has_extra_hints_textarea(self, client):
        response = client.get("/settings")
        assert response.status_code == 200
        assert 'name="ai_search_extra_hints"' in response.text
        assert "额外领域提示" in response.text
    def test_settings_page_has_ai_search_checkbox(self, client):
        response = client.get("/settings")
        assert response.status_code == 200
        assert 'name="ai_search_enabled"' in response.text
        assert "启用 AI 智能搜索" in response.text
    def test_save_ai_search_settings_via_route(self, client, db_session):
        client.post(
            "/settings",
            data={
                "enabled": "on",
                "base_url": "https://api.example.com/v1",
                "model": "gpt-4o-mini",
                "api_key": "sk-key",
                "ai_search_enabled": "on",
                "ai_search_extra_hints": "用户物品主要涉及厨房用品",
            },
            follow_redirects=False,
        )
        cfg = get_app_settings(db_session)
        assert cfg.ai_search_enabled is True
        assert cfg.ai_search_extra_hints == "用户物品主要涉及厨房用品"
    def test_save_preserves_extra_hints_on_other_changes(self, client, db_session):
        """Changing LLM settings should not clear extra hints."""
        client.post(
            "/settings",
            data={
                "enabled": "on",
                "base_url": "https://api.example.com/v1",
                "model": "gpt-4o-mini",
                "api_key": "sk-key",
                "ai_search_enabled": "on",
                "ai_search_extra_hints": "厨房用品和电子产品",
            },
            follow_redirects=False,
        )
        client.post(
            "/settings",
            data={
                "enabled": "on",
                "base_url": "https://api.example.com/v1",
                "model": "gpt-4o",
                "api_key": "",
                "ai_search_enabled": "on",
                "ai_search_extra_hints": "厨房用品和电子产品",
            },
            follow_redirects=False,
        )
        cfg = get_app_settings(db_session)
        assert cfg.ai_search_extra_hints == "厨房用品和电子产品"
        assert cfg.model == "gpt-4o"
 # ---------------------------------------------------------------------------
 # Regression: existing features still work without AI
 # ---------------------------------------------------------------------------
 class TestRegressionWithoutAI:
    def test_normal_search_still_works(self, client, db_session):
        box = Box(name="测试箱")
        db_session.add(box)
        db_session.commit()
        response = client.get("/search?q=测试")
        assert response.status_code == 200
        assert "测试箱" in response.text
    def test_search_page_no_results(self, client):
        response = client.get("/search?q=不存在")
        assert "没有找到匹配结果" in response.text
    def test_search_empty_query(self, client):
        response = client.get("/search")
        assert "输入关键词后" in response.text
@@ -0,0 +1,661 @@
 """Tests for the Alembic migration wrapper (app.migrate).
 Covers:
 - Fresh DB: empty → upgrade head (tables created, version at head)
 - Unmanaged DB (2a): has tables + matches baseline → stamp V1 → upgrade head
 - Unmanaged DB (2b): schema mismatch → fail-close, no changes
 - Non-empty non-app DB (rogue tables) → unmanaged → fail-close
 - Managed DB: already at head → upgrade head is a no-op
 - verify_schema_is_current: pass when at head, fail-close otherwise
 - verify_schema_is_current: no write side-effects (no file creation)
 - init_db startup: fail-close when DB not at head, pass when at head
 - Data preservation: adoption does not lose existing data
 - Schema correctness: tables match the ORM model definitions
 - V1_REVISION constant matches the actual revision in versions/
 - _detect_db_state correctly identifies all three states
 - _schema_matches_baseline checks FK, indexes, PK, types — not just column names
 - CLI entry point: python -m app.migrate
 """
 from pathlib import Path
 import shutil
 import pytest
 from sqlalchemy import create_engine, inspect, text
 import app.models  # noqa: F401 — register models on Base.metadata
 from app.db import Base, SessionLocal, configure_database
 from app.migrate import (
    V1_REVISION,
    _detect_db_state,
    _make_alembic_config,
    run_migrations,
    verify_schema_is_current,
 )
 from app.main import create_app
 from fastapi.testclient import TestClient
 def _get_head_revision() -> str:
    """Resolve the current Alembic head revision from migration scripts."""
    from alembic.script import ScriptDirectory
    cfg = _make_alembic_config("sqlite:///")  # URL is unused for script lookup
    script = ScriptDirectory.from_config(cfg)
    return script.get_current_head()
 HEAD_REVISION = _get_head_revision()
@pytest.fixture()
 def tmp_db_path(tmp_path):
    """Provide a temporary SQLite database path."""
    return tmp_path / "test.db"
@pytest.fixture()
 def tmp_db_url(tmp_db_path):
    """Provide a temporary SQLite database URL."""
    return f"sqlite:///{tmp_db_path}"
 # ---------------------------------------------------------------------------
 # Fresh DB: empty → upgrade head
 # ---------------------------------------------------------------------------
 class TestFreshDBMigration:
    """Empty database gets all tables created by migration."""
    def test_creates_all_tables(self, tmp_db_url):
        run_migrations(tmp_db_url)
        eng = create_engine(tmp_db_url)
        tables = set(inspect(eng).get_table_names())
        eng.dispose()
        assert "boxes" in tables
        assert "items" in tables
        assert "subitems" in tables
        assert "app_settings" in tables
    def test_creates_alembic_version_table(self, tmp_db_url):
        run_migrations(tmp_db_url)
        eng = create_engine(tmp_db_url)
        tables = set(inspect(eng).get_table_names())
        eng.dispose()
        assert "alembic_version" in tables
    def test_version_at_head(self, tmp_db_url):
        run_migrations(tmp_db_url)
        eng = create_engine(tmp_db_url)
        with eng.begin() as conn:
            version = conn.execute(text("SELECT version_num FROM alembic_version")).scalar()
        eng.dispose()
        assert version == HEAD_REVISION
    def test_boxes_table_has_all_columns(self, tmp_db_url):
        run_migrations(tmp_db_url)
        eng = create_engine(tmp_db_url)
        columns = {col["name"] for col in inspect(eng).get_columns("boxes")}
        eng.dispose()
        expected = {
            "id", "name", "note", "room", "status",
            "image_blob", "image_mime_type", "image_width", "image_height",
            "created_at", "updated_at",
        }
        assert columns == expected
    def test_items_table_has_all_columns(self, tmp_db_url):
        run_migrations(tmp_db_url)
        eng = create_engine(tmp_db_url)
        columns = {col["name"] for col in inspect(eng).get_columns("items")}
        eng.dispose()
        expected = {
            "id", "box_id", "name", "note", "quantity", "is_container",
            "image_blob", "image_mime_type", "image_width", "image_height",
            "created_at", "updated_at",
        }
        assert columns == expected
    def test_subitems_table_has_all_columns(self, tmp_db_url):
        run_migrations(tmp_db_url)
        eng = create_engine(tmp_db_url)
        columns = {col["name"] for col in inspect(eng).get_columns("subitems")}
        eng.dispose()
        expected = {
            "id", "parent_item_id", "name", "note", "quantity",
            "image_blob", "image_mime_type", "image_width", "image_height",
            "created_at", "updated_at",
        }
        assert columns == expected
    def test_foreign_keys_exist(self, tmp_db_url):
        run_migrations(tmp_db_url)
        eng = create_engine(tmp_db_url)
        item_fks = inspect(eng).get_foreign_keys("items")
        subitem_fks = inspect(eng).get_foreign_keys("subitems")
        eng.dispose()
        assert len(item_fks) == 1
        assert item_fks[0]["constrained_columns"] == ["box_id"]
        assert len(subitem_fks) == 1
        assert subitem_fks[0]["constrained_columns"] == ["parent_item_id"]
    def test_indexes_exist(self, tmp_db_url):
        run_migrations(tmp_db_url)
        eng = create_engine(tmp_db_url)
        box_indexes = inspect(eng).get_indexes("boxes")
        item_indexes = inspect(eng).get_indexes("items")
        subitem_indexes = inspect(eng).get_indexes("subitems")
        eng.dispose()
        assert any("ix_boxes_id" in idx["name"] for idx in box_indexes)
        assert any("ix_items_id" in idx["name"] for idx in item_indexes)
        assert any("ix_subitems_id" in idx["name"] for idx in subitem_indexes)
 # ---------------------------------------------------------------------------
 # Unmanaged DB adoption — 2a: matches baseline
 # ---------------------------------------------------------------------------
 class TestUnmanagedDBAdoption2a:
    """Database with existing tables matching baseline gets adopted."""
    def _create_old_db(self, db_url: str) -> None:
        """Simulate a pre-Alembic DB: create V1 tables only + insert data."""
        eng = create_engine(db_url)
        # Only create V1 tables (boxes, items, subitems) — not app_settings
        for table_name in ("boxes", "items", "subitems"):
            Base.metadata.tables[table_name].create(bind=eng)
        with eng.begin() as conn:
            conn.execute(text(
                "INSERT INTO boxes (name, room, status, created_at, updated_at) "
                "VALUES ('Kitchen Box', 'Kitchen', 'packed', '2026-01-01 00:00:00', '2026-01-01 00:00:00')"
            ))
            conn.execute(text(
                "INSERT INTO items (box_id, name, quantity, is_container, created_at, updated_at) "
                "VALUES (1, 'Plates', 4, 0, '2026-01-01 00:00:00', '2026-01-01 00:00:00')"
            ))
        eng.dispose()
    def test_stamp_and_upgrade(self, tmp_db_url):
        self._create_old_db(tmp_db_url)
        assert _detect_db_state(tmp_db_url) == "unmanaged"
        run_migrations(tmp_db_url)
        eng = create_engine(tmp_db_url)
        with eng.begin() as conn:
            version = conn.execute(text("SELECT version_num FROM alembic_version")).scalar()
        eng.dispose()
        assert version == HEAD_REVISION
    def test_data_preserved_after_adoption(self, tmp_db_url):
        self._create_old_db(tmp_db_url)
        run_migrations(tmp_db_url)
        eng = create_engine(tmp_db_url)
        with eng.begin() as conn:
            box_count = conn.execute(text("SELECT COUNT(*) FROM boxes")).scalar()
            item_count = conn.execute(text("SELECT COUNT(*) FROM items")).scalar()
            box_name = conn.execute(text("SELECT name FROM boxes WHERE id = 1")).scalar()
        eng.dispose()
        assert box_count == 1
        assert item_count == 1
        assert box_name == "Kitchen Box"
    def test_no_extra_tables_beyond_migrations(self, tmp_db_url):
        self._create_old_db(tmp_db_url)
        run_migrations(tmp_db_url)
        eng = create_engine(tmp_db_url)
        tables = set(inspect(eng).get_table_names())
        eng.dispose()
        assert tables == {"alembic_version", "boxes", "items", "subitems", "app_settings"}
    def test_adoption_is_idempotent(self, tmp_db_url):
        """Running run_migrations twice does not error or duplicate data."""
        self._create_old_db(tmp_db_url)
        run_migrations(tmp_db_url)
        run_migrations(tmp_db_url)
        eng = create_engine(tmp_db_url)
        with eng.begin() as conn:
            box_count = conn.execute(text("SELECT COUNT(*) FROM boxes")).scalar()
            version = conn.execute(text("SELECT version_num FROM alembic_version")).scalar()
        eng.dispose()
        assert box_count == 1
        assert version == HEAD_REVISION
 # ---------------------------------------------------------------------------
 # Unmanaged DB — 2b: schema mismatch → fail-close
 # ---------------------------------------------------------------------------
 class TestUnmanagedDBMismatch2b:
    """Database with schema not matching baseline → fail-close, no changes."""
    def _create_mismatched_db(self, db_url: str) -> None:
        """Create a DB that has tables but with wrong columns (missing image cols)."""
        eng = create_engine(db_url)
        with eng.begin() as conn:
            conn.execute(text(
                "CREATE TABLE boxes ("
                "id INTEGER PRIMARY KEY, name TEXT NOT NULL, "
                "created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL)"
            ))
            conn.execute(text(
                "CREATE TABLE items ("
                "id INTEGER PRIMARY KEY, box_id INTEGER NOT NULL, name TEXT NOT NULL, "
                "created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL)"
            ))
            conn.execute(text(
                "CREATE TABLE subitems ("
                "id INTEGER PRIMARY KEY, parent_item_id INTEGER NOT NULL, name TEXT NOT NULL, "
                "created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL)"
            ))
            conn.execute(text(
                "INSERT INTO boxes (name, created_at, updated_at) "
                "VALUES ('Bad Box', '2026-01-01 00:00:00', '2026-01-01 00:00:00')"
            ))
        eng.dispose()
    def test_fail_close_on_mismatch(self, tmp_db_url):
        self._create_mismatched_db(tmp_db_url)
        assert _detect_db_state(tmp_db_url) == "unmanaged"
        with pytest.raises(SystemExit, match="does not match"):
            run_migrations(tmp_db_url)
    def test_db_unchanged_after_fail_close(self, tmp_db_url):
        self._create_mismatched_db(tmp_db_url)
        with pytest.raises(SystemExit):
            run_migrations(tmp_db_url)
        # DB should be completely unchanged
        eng = create_engine(tmp_db_url)
        tables = set(inspect(eng).get_table_names())
        assert "alembic_version" not in tables
        with eng.begin() as conn:
            count = conn.execute(text("SELECT COUNT(*) FROM boxes")).scalar()
        eng.dispose()
        assert count == 1  # original data still there
    def test_extra_table_causes_fail_close(self, tmp_db_url):
        """A DB with the correct tables PLUS an extra one should fail."""
        eng = create_engine(tmp_db_url)
        Base.metadata.create_all(bind=eng)
        with eng.begin() as conn:
            conn.execute(text("CREATE TABLE rogue_table (id INTEGER PRIMARY KEY)"))
        eng.dispose()
        with pytest.raises(SystemExit, match="does not match"):
            run_migrations(tmp_db_url)
    def test_missing_fk_causes_fail_close(self, tmp_db_url):
        """Tables with correct columns but missing FK should fail."""
        eng = create_engine(tmp_db_url)
        with eng.begin() as conn:
            conn.execute(text(
                "CREATE TABLE boxes (id INTEGER PRIMARY KEY, name VARCHAR(100) NOT NULL, "
                "note TEXT, room VARCHAR(100), status VARCHAR(50), "
                "image_blob BLOB, image_mime_type VARCHAR(50), "
                "image_width INTEGER, image_height INTEGER, "
                "created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL)"
            ))
            conn.execute(text(
                "CREATE TABLE items (id INTEGER PRIMARY KEY, box_id INTEGER NOT NULL, "
                "name VARCHAR(100) NOT NULL, note TEXT, quantity INTEGER, "
                "is_container BOOLEAN NOT NULL DEFAULT 0, "
                "image_blob BLOB, image_mime_type VARCHAR(50), "
                "image_width INTEGER, image_height INTEGER, "
                "created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL)"
            ))
            conn.execute(text(
                "CREATE TABLE subitems (id INTEGER PRIMARY KEY, parent_item_id INTEGER NOT NULL, "
                "name VARCHAR(100) NOT NULL, note TEXT, quantity INTEGER, "
                "image_blob BLOB, image_mime_type VARCHAR(50), "
                "image_width INTEGER, image_height INTEGER, "
                "created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL)"
            ))
        eng.dispose()
        with pytest.raises(SystemExit, match="does not match"):
            run_migrations(tmp_db_url)
    def test_missing_index_causes_fail_close(self, tmp_db_url):
        """Tables with correct columns and FK but missing index should fail."""
        eng = create_engine(tmp_db_url)
        with eng.begin() as conn:
            conn.execute(text(
                "CREATE TABLE boxes (id INTEGER PRIMARY KEY, name VARCHAR(100) NOT NULL, "
                "note TEXT, room VARCHAR(100), status VARCHAR(50), "
                "image_blob BLOB, image_mime_type VARCHAR(50), "
                "image_width INTEGER, image_height INTEGER, "
                "created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL)"
            ))
            conn.execute(text(
                "CREATE TABLE items (id INTEGER PRIMARY KEY, box_id INTEGER NOT NULL, "
                "name VARCHAR(100) NOT NULL, note TEXT, quantity INTEGER, "
                "is_container BOOLEAN NOT NULL DEFAULT 0, "
                "image_blob BLOB, image_mime_type VARCHAR(50), "
                "image_width INTEGER, image_height INTEGER, "
                "created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL, "
                "FOREIGN KEY(box_id) REFERENCES boxes(id) ON DELETE CASCADE)"
            ))
            conn.execute(text(
                "CREATE TABLE subitems (id INTEGER PRIMARY KEY, parent_item_id INTEGER NOT NULL, "
                "name VARCHAR(100) NOT NULL, note TEXT, quantity INTEGER, "
                "image_blob BLOB, image_mime_type VARCHAR(50), "
                "image_width INTEGER, image_height INTEGER, "
                "created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL, "
                "FOREIGN KEY(parent_item_id) REFERENCES items(id) ON DELETE CASCADE)"
            ))
            # No indexes created — should fail
        eng.dispose()
        with pytest.raises(SystemExit, match="does not match"):
            run_migrations(tmp_db_url)
 # ---------------------------------------------------------------------------
 # Non-empty non-app DB (rogue tables) — treated as unmanaged, fail-close
 # ---------------------------------------------------------------------------
 class TestRogueDatabase:
    """A DB with unrelated tables must be treated as unmanaged and fail-close."""
    def test_rogue_table_detected_as_unmanaged(self, tmp_db_url):
        """A DB with only rogue_table should be 'unmanaged', not 'empty'."""
        eng = create_engine(tmp_db_url)
        with eng.begin() as conn:
            conn.execute(text("CREATE TABLE rogue_table (id INTEGER PRIMARY KEY)"))
        eng.dispose()
        assert _detect_db_state(tmp_db_url) == "unmanaged"
    def test_rogue_table_migration_fails_closed(self, tmp_db_url):
        """Migration should fail-close, NOT create app tables in rogue DB."""
        eng = create_engine(tmp_db_url)
        with eng.begin() as conn:
            conn.execute(text("CREATE TABLE rogue_table (id INTEGER PRIMARY KEY)"))
        eng.dispose()
        with pytest.raises(SystemExit, match="does not match"):
            run_migrations(tmp_db_url)
        # Verify no app tables were created
        eng = create_engine(tmp_db_url)
        tables = set(inspect(eng).get_table_names())
        eng.dispose()
        assert tables == {"rogue_table"}  # only the original rogue table
        assert "boxes" not in tables
        assert "alembic_version" not in tables
 # ---------------------------------------------------------------------------
 # Managed DB (already at head)
 # ---------------------------------------------------------------------------
 class TestManagedDBMigration:
    """Database already under Alembic control: upgrade head is a no-op."""
    def test_upgrade_head_is_noop(self, tmp_db_url):
        run_migrations(tmp_db_url)  # first run: creates tables
        assert _detect_db_state(tmp_db_url) == "managed"
        run_migrations(tmp_db_url)  # second run: should be a no-op
        eng = create_engine(tmp_db_url)
        with eng.begin() as conn:
            version = conn.execute(text("SELECT version_num FROM alembic_version")).scalar()
        eng.dispose()
        assert version == HEAD_REVISION
 # ---------------------------------------------------------------------------
 # _detect_db_state
 # ---------------------------------------------------------------------------
 class TestDetectDBState:
    def test_empty_db(self, tmp_db_url):
        assert _detect_db_state(tmp_db_url) == "empty"
    def test_unmanaged_db(self, tmp_db_url):
        eng = create_engine(tmp_db_url)
        Base.metadata.create_all(bind=eng)
        eng.dispose()
        assert _detect_db_state(tmp_db_url) == "unmanaged"
    def test_managed_db(self, tmp_db_url):
        run_migrations(tmp_db_url)
        assert _detect_db_state(tmp_db_url) == "managed"
    def test_rogue_table_is_unmanaged(self, tmp_db_url):
        """Any DB with tables but no alembic_version is 'unmanaged'."""
        eng = create_engine(tmp_db_url)
        with eng.begin() as conn:
            conn.execute(text("CREATE TABLE something (id INTEGER)"))
        eng.dispose()
        assert _detect_db_state(tmp_db_url) == "unmanaged"
 # ---------------------------------------------------------------------------
 # verify_schema_is_current (read-only startup check)
 # ---------------------------------------------------------------------------
 class TestVerifySchemaIsCurrent:
    """verify_schema_is_current is read-only — only checks, never modifies."""
    def test_passes_when_at_head(self, tmp_db_url):
        run_migrations(tmp_db_url)
        # Should not raise
        verify_schema_is_current(tmp_db_url)
    def test_fails_on_empty_db(self, tmp_db_url):
        with pytest.raises(RuntimeError, match="empty"):
            verify_schema_is_current(tmp_db_url)
    def test_fails_on_unmanaged_db(self, tmp_db_url):
        eng = create_engine(tmp_db_url)
        Base.metadata.create_all(bind=eng)
        eng.dispose()
        with pytest.raises(RuntimeError, match="alembic_version"):
            verify_schema_is_current(tmp_db_url)
    def test_fails_on_wrong_revision(self, tmp_db_url):
        """Stamp at an old/fake revision, then verify should fail."""
        run_migrations(tmp_db_url)
        eng = create_engine(tmp_db_url)
        with eng.begin() as conn:
            conn.execute(text("DELETE FROM alembic_version"))
            conn.execute(text("INSERT INTO alembic_version VALUES ('fake_old_rev')"))
        eng.dispose()
        with pytest.raises(RuntimeError, match="fake_old_rev"):
            verify_schema_is_current(tmp_db_url)
    def test_does_not_modify_db(self, tmp_db_url):
        """Calling verify on an empty DB must not create any tables."""
        with pytest.raises(RuntimeError):
            verify_schema_is_current(tmp_db_url)
        eng = create_engine(tmp_db_url)
        tables = set(inspect(eng).get_table_names())
        eng.dispose()
        assert tables == set()  # still empty
    def test_no_file_creation_for_missing_sqlite(self, tmp_path):
        """verify_schema_is_current must NOT create a missing SQLite file."""
        missing_path = tmp_path / "nonexistent" / "missing.db"
        db_url = f"sqlite:///{missing_path}"
        with pytest.raises(RuntimeError, match="does not exist"):
            verify_schema_is_current(db_url)
        assert not missing_path.exists()
        assert not missing_path.parent.exists()
 # ---------------------------------------------------------------------------
 # V1_REVISION constant
 # ---------------------------------------------------------------------------
 class TestV1RevisionConstant:
    def test_revision_file_exists(self):
        """V1_REVISION must point to an actual migration file."""
        versions_dir = Path(__file__).resolve().parent.parent / "migrations" / "versions"
        revision_files = list(versions_dir.glob(f"*{V1_REVISION}*.py"))
        assert len(revision_files) == 1, (
            f"Expected exactly one file matching revision {V1_REVISION} "
            f"in {versions_dir}, found: {revision_files}"
        )
    def test_revision_matches_baseline(self):
        """V1_REVISION must be the baseline (no down_revision)."""
        import importlib.util
        versions_dir = Path(__file__).resolve().parent.parent / "migrations" / "versions"
        revision_files = list(versions_dir.glob(f"*{V1_REVISION}*.py"))
        assert len(revision_files) == 1
        spec = importlib.util.spec_from_file_location("v1_migration", revision_files[0])
        mod = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(mod)
        assert mod.down_revision is None, "V1 baseline must have down_revision = None"
        assert mod.revision == V1_REVISION
 # ---------------------------------------------------------------------------
 # Integration: init_db startup verification
 # ---------------------------------------------------------------------------
 class TestInitDBStartupVerify:
    """init_db (called by create_app lifespan) verifies schema at startup."""
    def test_app_starts_when_db_at_head(self, tmp_path):
        """App starts normally when DB has been migrated to head."""
        test_db_path = tmp_path / "integration.db"
        database_url = f"sqlite:///{test_db_path}"
        run_migrations(database_url)
        configure_database(database_url)
        app = create_app()
        with TestClient(app) as client:
            response = client.get("/boxes", follow_redirects=False)
            assert response.status_code == 200
    def test_init_db_fails_on_empty_db(self, tmp_path):
        """init_db raises RuntimeError on empty DB — app must not start."""
        test_db_path = tmp_path / "empty.db"
        database_url = f"sqlite:///{test_db_path}"
        configure_database(database_url)
        app = create_app()
        with pytest.raises(RuntimeError, match="empty"):
            with TestClient(app):
                pass
    def test_init_db_fails_on_unmanaged_db(self, tmp_path):
        """init_db raises RuntimeError on unmanaged DB — app must not start."""
        test_db_path = tmp_path / "unmanaged.db"
        database_url = f"sqlite:///{test_db_path}"
        # Create tables the old way (no alembic_version)
        eng = create_engine(database_url)
        Base.metadata.create_all(bind=eng)
        eng.dispose()
        configure_database(database_url)
        app = create_app()
        with pytest.raises(RuntimeError, match="alembic_version"):
            with TestClient(app):
                pass
    def test_full_crud_after_migration(self, tmp_path):
        """Full CRUD works when DB is migrated first, then app starts."""
        test_db_path = tmp_path / "crud.db"
        database_url = f"sqlite:///{test_db_path}"
        run_migrations(database_url)
        configure_database(database_url)
        app = create_app()
        with TestClient(app) as client:
            # Create a box
            resp = client.post("/boxes", data={
                "name": "Test Box",
                "room": "Living Room",
                "status": "ready",
            }, follow_redirects=False)
            assert resp.status_code in (200, 302, 303)
            # Verify it's there
            resp = client.get("/boxes")
            assert "Test Box" in resp.text
            # Create an item
            resp = client.post("/boxes/1/items", data={
                "name": "Test Item",
                "quantity": "3",
            }, follow_redirects=False)
            assert resp.status_code in (200, 302, 303)
            # Delete the box (cascade)
            resp = client.post("/boxes/1/delete", follow_redirects=False)
            assert resp.status_code in (200, 302, 303)
            # Verify empty
            resp = client.get("/boxes")
            assert "Test Box" not in resp.text
 # ---------------------------------------------------------------------------
 # Production DB copy adoption
 # ---------------------------------------------------------------------------
 class TestProdDBCopyAdoption:
    """Verify migration works against a copy of the real production DB."""
    def test_adopt_prod_copy(self, tmp_path):
        prod_db = Path("data/app.db")
        if not prod_db.exists():
            pytest.skip("data/app.db not present — skipping prod copy test")
        copy_path = tmp_path / "prod_copy.db"
        shutil.copy2(prod_db, copy_path)
        db_url = f"sqlite:///{copy_path}"
        # Record row counts before
        eng = create_engine(db_url)
        with eng.begin() as conn:
            boxes_before = conn.execute(text("SELECT COUNT(*) FROM boxes")).scalar()
            items_before = conn.execute(text("SELECT COUNT(*) FROM items")).scalar()
            subitems_before = conn.execute(text("SELECT COUNT(*) FROM subitems")).scalar()
        eng.dispose()
        # Run migration (handles managed, unmanaged, or empty)
        run_migrations(db_url)
        # Verify version at head and data preserved
        eng = create_engine(db_url)
        with eng.begin() as conn:
            version = conn.execute(text("SELECT version_num FROM alembic_version")).scalar()
            boxes_after = conn.execute(text("SELECT COUNT(*) FROM boxes")).scalar()
            items_after = conn.execute(text("SELECT COUNT(*) FROM items")).scalar()
            subitems_after = conn.execute(text("SELECT COUNT(*) FROM subitems")).scalar()
        eng.dispose()
        assert version == HEAD_REVISION
        assert boxes_after == boxes_before
        assert items_after == items_before
        assert subitems_after == subitems_before
@@ -0,0 +1,656 @@
 """Tests for the settings store, LLM client, and settings routes.
 All LLM calls are mocked — CI never touches the network.
 """
 from unittest.mock import patch
 import pytest
 import app.llm as llm_module
 from app.llm import LLMResult, expand_query, is_configured
 from app.llm import ExpansionResult
 from app.models import AppSetting
 from app.settings_store import LLMConfig, get_app_settings, save_app_settings
 # Alias to avoid pytest collecting it as a test function
 _test_connection = llm_module.test_connection
 # ---------------------------------------------------------------------------
 # LLMConfig dataclass defaults
 # ---------------------------------------------------------------------------
 class TestLLMConfigDefaults:
    def test_default_values(self):
        cfg = LLMConfig()
        assert cfg.enabled is False
        assert cfg.base_url == "https://api.openai.com/v1"
        assert cfg.model == ""
        assert cfg.api_key == ""
        assert cfg.ai_search_enabled is False
        assert cfg.ai_search_extra_hints == ""
 # ---------------------------------------------------------------------------
 # settings_store: get_app_settings
 # ---------------------------------------------------------------------------
 class TestGetAppSettings:
    def test_returns_defaults_when_no_rows(self, db_session):
        cfg = get_app_settings(db_session)
        assert cfg.enabled is False
        assert cfg.base_url == "https://api.openai.com/v1"
        assert cfg.model == ""
        assert cfg.api_key == ""
        assert cfg.ai_search_enabled is False
    def test_reads_stored_values(self, db_session):
        db_session.add(AppSetting(key="llm_enabled", value="true"))
        db_session.add(AppSetting(key="llm_base_url", value="https://custom.api/v1"))
        db_session.add(AppSetting(key="llm_model", value="gpt-4o"))
        db_session.add(AppSetting(key="llm_api_key", value="sk-test-key"))
        db_session.add(AppSetting(key="ai_search_enabled", value="true"))
        db_session.commit()
        cfg = get_app_settings(db_session)
        assert cfg.enabled is True
        assert cfg.base_url == "https://custom.api/v1"
        assert cfg.model == "gpt-4o"
        assert cfg.api_key == "sk-test-key"
        assert cfg.ai_search_enabled is True
    def test_handles_null_value_as_default(self, db_session):
        db_session.add(AppSetting(key="llm_model", value=None))
        db_session.commit()
        cfg = get_app_settings(db_session)
        assert cfg.model == ""
 # ---------------------------------------------------------------------------
 # settings_store: save_app_settings
 # ---------------------------------------------------------------------------
 class TestSaveAppSettings:
    def test_saves_new_settings(self, db_session):
        save_app_settings(
            db_session,
            enabled=True,
            base_url="https://my-api.com/v1",
            model="gpt-4o-mini",
            api_key="sk-new-key",
        )
        cfg = get_app_settings(db_session)
        assert cfg.enabled is True
        assert cfg.base_url == "https://my-api.com/v1"
        assert cfg.model == "gpt-4o-mini"
        assert cfg.api_key == "sk-new-key"
    def test_updates_existing_settings(self, db_session):
        save_app_settings(db_session, enabled=True, model="old-model", api_key="key1")
        save_app_settings(db_session, model="new-model")
        cfg = get_app_settings(db_session)
        assert cfg.model == "new-model"
        # enabled was not passed in second save, so it stays unchanged
        assert cfg.enabled is True
    def test_api_key_none_preserves_old_key(self, db_session):
        save_app_settings(db_session, api_key="sk-original")
        save_app_settings(db_session, model="gpt-4o", api_key=None)
        cfg = get_app_settings(db_session)
        assert cfg.api_key == "sk-original"
        assert cfg.model == "gpt-4o"
    def test_api_key_empty_string_overwrites(self, db_session):
        save_app_settings(db_session, api_key="sk-original")
        save_app_settings(db_session, api_key="")
        cfg = get_app_settings(db_session)
        assert cfg.api_key == ""
    def test_partial_save_only_updates_specified_fields(self, db_session):
        save_app_settings(db_session, enabled=True, model="gpt-4o")
        save_app_settings(db_session, base_url="https://new.url/v1")
        cfg = get_app_settings(db_session)
        assert cfg.enabled is True
        assert cfg.model == "gpt-4o"
        assert cfg.base_url == "https://new.url/v1"
 # ---------------------------------------------------------------------------
 # is_configured
 # ---------------------------------------------------------------------------
 class TestIsConfigured:
    def test_false_when_disabled(self):
        cfg = LLMConfig(enabled=False, model="gpt-4o", api_key="sk-key")
        assert is_configured(cfg) is False
    def test_false_when_no_model(self):
        cfg = LLMConfig(enabled=True, model="", api_key="sk-key")
        assert is_configured(cfg) is False
    def test_false_when_no_api_key(self):
        cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="")
        assert is_configured(cfg) is False
    def test_true_when_all_set(self):
        cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
        assert is_configured(cfg) is True
 # ---------------------------------------------------------------------------
 # test_connection (mocked)
 # ---------------------------------------------------------------------------
 class TestTestConnection:
    def test_returns_failure_when_not_configured(self):
        cfg = LLMConfig(enabled=False)
        result = _test_connection(cfg)
        assert result.success is False
        assert "未配置" in result.message
    @patch("app.llm._call_chat_completion")
    def test_success_when_configured(self, mock_call):
        mock_call.return_value = {"choices": [{"message": {"content": "Hi"}}]}
        cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
        result = _test_connection(cfg)
        assert result.success is True
        assert "连接成功" in result.message
        assert "gpt-4o" in result.message
    @patch("app.llm._call_chat_completion")
    def test_handles_http_error(self, mock_call):
        import httpx
        mock_call.side_effect = httpx.HTTPStatusError(
            "401",
            request=httpx.Request("POST", "http://x"),
            response=httpx.Response(401),
        )
        cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-bad")
        result = _test_connection(cfg)
        assert result.success is False
        assert "401" in result.message
    @patch("app.llm._call_chat_completion")
    def test_handles_connect_error(self, mock_call):
        import httpx
        mock_call.side_effect = httpx.ConnectError("refused")
        cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
        result = _test_connection(cfg)
        assert result.success is False
        assert "无法连接" in result.message
    @patch("app.llm._call_chat_completion")
    def test_handles_timeout(self, mock_call):
        import httpx
        mock_call.side_effect = httpx.TimeoutException("timeout")
        cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
        result = _test_connection(cfg)
        assert result.success is False
        assert "超时" in result.message
 # ---------------------------------------------------------------------------
 # expand_query (mocked)
 # ---------------------------------------------------------------------------
 class TestExpandQuery:
    def test_returns_empty_when_not_configured(self):
        cfg = LLMConfig(enabled=False)
        result = expand_query(cfg, "锅")
        assert result.terms == []
        assert result.error is None
    @patch("app.llm._call_chat_completion")
    def test_expands_query_successfully(self, mock_call):
        mock_call.return_value = {
            "choices": [
                {"message": {"content": '["平底锅","炒锅","锅具","厨房锅"]'}}
            ]
        }
        cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
        result = expand_query(cfg, "锅")
        assert "平底锅" in result.terms
        assert "炒锅" in result.terms
        assert result.error is None
    @patch("app.llm._call_chat_completion")
    def test_fallback_on_api_failure(self, mock_call):
        mock_call.side_effect = Exception("network down")
        cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
        result = expand_query(cfg, "锅")
        assert result.terms == []
        assert result.error is not None
    @patch("app.llm._call_chat_completion")
    def test_fallback_on_empty_response(self, mock_call):
        mock_call.return_value = {"choices": [{"message": {"content": ""}}]}
        cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
        result = expand_query(cfg, "锅")
        assert result.terms == []
        assert result.error is None
 # ---------------------------------------------------------------------------
 # Routes: GET /settings
 # ---------------------------------------------------------------------------
 class TestSettingsPage:
    def test_settings_page_returns_200(self, client):
        response = client.get("/settings")
        assert response.status_code == 200
    def test_settings_page_has_form_elements(self, client):
        response = client.get("/settings")
        assert "设置" in response.text
        assert 'name="enabled"' in response.text
        assert 'name="base_url"' in response.text
        assert 'name="model"' in response.text
        assert 'name="api_key"' in response.text
        assert "保存设置" in response.text
        assert "测试连接" in response.text
    def test_settings_page_shows_nav_link(self, client):
        response = client.get("/boxes")
        assert "设置" in response.text
        assert 'href="/settings"' in response.text
    def test_settings_page_no_api_key_echoed(self, client, db_session):
        save_app_settings(db_session, api_key="sk-super-secret-key-12345")
        response = client.get("/settings")
        assert "sk-super-secret-key-12345" not in response.text
        assert "已配置" in response.text
    def test_settings_page_shows_placeholder_when_no_key(self, client):
        response = client.get("/settings")
        assert "输入 API Key" in response.text
    def test_settings_page_shows_default_base_url(self, client):
        response = client.get("/settings")
        assert "https://api.openai.com/v1" in response.text
 # ---------------------------------------------------------------------------
 # Routes: POST /settings
 # ---------------------------------------------------------------------------
 class TestSaveSettingsRoute:
    def test_save_settings_redirects(self, client):
        response = client.post(
            "/settings",
            data={
                "enabled": "on",
                "base_url": "https://my-api.com/v1",
                "model": "gpt-4o-mini",
                "api_key": "sk-test-key",
            },
            follow_redirects=False,
        )
        assert response.status_code == 303
        assert response.headers["location"] == "/settings"
    def test_saved_settings_persist(self, client, db_session):
        client.post(
            "/settings",
            data={
                "enabled": "on",
                "base_url": "https://my-api.com/v1",
                "model": "gpt-4o-mini",
                "api_key": "sk-test-key",
            },
            follow_redirects=False,
        )
        cfg = get_app_settings(db_session)
        assert cfg.enabled is True
        assert cfg.base_url == "https://my-api.com/v1"
        assert cfg.model == "gpt-4o-mini"
        assert cfg.api_key == "sk-test-key"
    def test_save_with_blank_api_key_preserves_old(self, client, db_session):
        # First save with a key
        client.post(
            "/settings",
            data={"enabled": "on", "model": "gpt-4o", "api_key": "sk-original"},
            follow_redirects=False,
        )
        # Second save without key (blank)
        client.post(
            "/settings",
            data={"enabled": "on", "model": "gpt-4o", "api_key": ""},
            follow_redirects=False,
        )
        cfg = get_app_settings(db_session)
        assert cfg.api_key == "sk-original"
    def test_save_disabled_state(self, client, db_session):
        # First enable
        client.post(
            "/settings",
            data={"enabled": "on", "model": "gpt-4o", "api_key": "sk-key"},
            follow_redirects=False,
        )
        # Then disable (no 'enabled' checkbox)
        client.post(
            "/settings",
            data={"model": "gpt-4o", "api_key": ""},
            follow_redirects=False,
        )
        cfg = get_app_settings(db_session)
        assert cfg.enabled is False
    def test_save_settings_no_api_key_in_redirect_page(self, client):
        client.post(
            "/settings",
            data={"enabled": "on", "model": "gpt-4o", "api_key": "sk-secret-key"},
            follow_redirects=False,
        )
        response = client.get("/settings")
        assert "sk-secret-key" not in response.text
    def test_save_refuses_when_base_url_changes_and_key_blank(self, client, db_session):
        """P1 fix: if base_url changes and api_key is blank, refuse save with error."""
        client.post(
            "/settings",
            data={
                "enabled": "on",
                "base_url": "https://old-api.com/v1",
                "model": "gpt-4o",
                "api_key": "sk-old-key",
            },
            follow_redirects=False,
        )
        # Try saving with different base_url, no key
        response = client.post(
            "/settings",
            data={
                "enabled": "on",
                "base_url": "https://new-api.com/v1",
                "model": "gpt-4o",
                "api_key": "",  # blank + base_url changed → refuse
            },
        )
        assert response.status_code == 200
        assert "请重新输入 API Key 后保存" in response.text
        # Old config should be unchanged — nothing was saved
        cfg = get_app_settings(db_session)
        assert cfg.base_url == "https://old-api.com/v1"
        assert cfg.api_key == "sk-old-key"
    def test_save_preserves_key_when_endpoint_unchanged_and_key_blank(self, client, db_session):
        """P1 fix: if endpoint is unchanged and api_key is blank, keep old key."""
        client.post(
            "/settings",
            data={
                "enabled": "on",
                "base_url": "https://api.openai.com/v1",
                "model": "gpt-4o",
                "api_key": "sk-original",
            },
            follow_redirects=False,
        )
        # Re-save same endpoint, blank key
        client.post(
            "/settings",
            data={
                "enabled": "on",
                "base_url": "https://api.openai.com/v1",
                "model": "gpt-4o",
                "api_key": "",
            },
            follow_redirects=False,
        )
        cfg = get_app_settings(db_session)
        assert cfg.api_key == "sk-original"
    def test_save_preserves_key_when_only_model_changes_and_key_blank(self, client, db_session):
        """Model change alone should not clear the key — same base_url, different model."""
        client.post(
            "/settings",
            data={
                "enabled": "on",
                "base_url": "https://api.openai.com/v1",
                "model": "gpt-4o",
                "api_key": "sk-original",
            },
            follow_redirects=False,
        )
        # Change only model, leave api_key blank
        client.post(
            "/settings",
            data={
                "enabled": "on",
                "base_url": "https://api.openai.com/v1",
                "model": "gpt-4o-mini",
                "api_key": "",
            },
            follow_redirects=False,
        )
        cfg = get_app_settings(db_session)
        assert cfg.model == "gpt-4o-mini"
        assert cfg.api_key == "sk-original"
    def test_save_includes_ai_search_enabled_checkbox(self, client, db_session):
        """Saving settings now also persists the ai_search_enabled checkbox."""
        # Set ai_search_enabled to true first
        db_session.add(AppSetting(key="ai_search_enabled", value="true"))
        db_session.commit()
        # Save without the checkbox → ai_search_enabled is set to False
        client.post(
            "/settings",
            data={"enabled": "on", "model": "gpt-4o", "api_key": "sk-key"},
            follow_redirects=False,
        )
        cfg = get_app_settings(db_session)
        assert cfg.ai_search_enabled is False
    def test_save_preserves_ai_search_enabled_when_checked(self, client, db_session):
        """Saving settings with ai_search_enabled checked persists it."""
        client.post(
            "/settings",
            data={
                "enabled": "on",
                "model": "gpt-4o",
                "api_key": "sk-key",
                "ai_search_enabled": "on",
            },
            follow_redirects=False,
        )
        cfg = get_app_settings(db_session)
        assert cfg.ai_search_enabled is True
 # ---------------------------------------------------------------------------
 # Routes: POST /settings/test
 # ---------------------------------------------------------------------------
 class TestTestConnectionRoute:
    @patch("app.llm._call_chat_completion")
    def test_test_connection_success(self, mock_call, client):
        mock_call.return_value = {"choices": [{"message": {"content": "Hi"}}]}
        response = client.post(
            "/settings/test",
            data={
                "enabled": "on",
                "base_url": "https://api.openai.com/v1",
                "model": "gpt-4o",
                "api_key": "sk-test",
            },
        )
        assert response.status_code == 200
        assert "连接成功" in response.text
        assert "gpt-4o" in response.text
    @patch("app.llm._call_chat_completion")
    def test_test_connection_failure(self, mock_call, client):
        import httpx
        mock_call.side_effect = httpx.HTTPStatusError(
            "401",
            request=httpx.Request("POST", "http://x"),
            response=httpx.Response(401),
        )
        response = client.post(
            "/settings/test",
            data={
                "enabled": "on",
                "base_url": "https://api.openai.com/v1",
                "model": "gpt-4o",
                "api_key": "sk-bad",
            },
        )
        assert response.status_code == 200
        assert "连接失败" in response.text
        assert "401" in response.text
    def test_test_connection_not_configured(self, client):
        response = client.post(
            "/settings/test",
            data={
                "enabled": "",  # not checked
                "base_url": "https://api.openai.com/v1",
                "model": "",
                "api_key": "",
            },
        )
        assert response.status_code == 200
        assert "未配置" in response.text
    @patch("app.llm._call_chat_completion")
    def test_test_connection_uses_stored_key_when_endpoint_matches(self, mock_call, client, db_session):
        """When api_key is blank but base_url and model match saved config, the stored key should be used."""
        mock_call.return_value = {"choices": [{"message": {"content": "Hi"}}]}
        # Store a config first
        save_app_settings(
            db_session,
            enabled=True,
            base_url="https://api.openai.com/v1",
            model="gpt-4o",
            api_key="sk-stored-key",
        )
        response = client.post(
            "/settings/test",
            data={
                "enabled": "on",
                "base_url": "https://api.openai.com/v1",
                "model": "gpt-4o",
                "api_key": "",  # blank → use stored key (endpoint matches)
            },
        )
        assert response.status_code == 200
        assert "连接成功" in response.text
    @patch("app.llm._call_chat_completion")
    def test_test_connection_uses_stored_key_when_only_model_changes(self, mock_call, client, db_session):
        """Model changes under the same base_url can reuse the stored key."""
        captured = {}
        def fake_call(cfg, **kwargs):
            captured["base_url"] = cfg.base_url
            captured["model"] = cfg.model
            captured["api_key"] = cfg.api_key
            return {"choices": [{"message": {"content": "Hi"}}]}
        mock_call.side_effect = fake_call
        save_app_settings(
            db_session,
            enabled=True,
            base_url="https://api.openai.com/v1",
            model="gpt-4o",
            api_key="sk-stored-key",
        )
        response = client.post(
            "/settings/test",
            data={
                "enabled": "on",
                "base_url": "https://api.openai.com/v1",
                "model": "gpt-4o-mini",
                "api_key": "",
            },
        )
        assert response.status_code == 200
        assert "连接成功" in response.text
        assert captured == {
            "base_url": "https://api.openai.com/v1",
            "model": "gpt-4o-mini",
            "api_key": "sk-stored-key",
        }
    def test_test_connection_refuses_stored_key_when_endpoint_changed(self, client, db_session):
        """When base_url changed and api_key is blank, refuse to test."""
        save_app_settings(
            db_session,
            enabled=True,
            base_url="https://api.openai.com/v1",
            model="gpt-4o",
            api_key="sk-stored-key",
        )
        response = client.post(
            "/settings/test",
            data={
                "enabled": "on",
                "base_url": "https://attacker.example/v1",  # different endpoint
                "model": "gpt-4o",
                "api_key": "",  # blank → refuse
            },
        )
        assert response.status_code == 200
        assert "请重新输入 API Key" in response.text
    def test_test_connection_result_shows_on_settings_page(self, client):
        """Test result is rendered on the same settings page."""
        response = client.post(
            "/settings/test",
            data={
                "enabled": "",
                "base_url": "https://api.openai.com/v1",
                "model": "",
                "api_key": "",
            },
        )
        assert response.status_code == 200
        assert "设置" in response.text
        assert "保存设置" in response.text
 # ---------------------------------------------------------------------------
 # Graceful degradation: unconfigured LLM does not affect existing features
 # ---------------------------------------------------------------------------
 class TestGracefulDegradation:
    def test_boxes_page_works_without_llm_config(self, client):
        response = client.get("/boxes")
        assert response.status_code == 200
    def test_search_page_works_without_llm_config(self, client):
        response = client.get("/search?q=test")
        assert response.status_code == 200
    def test_crud_works_without_llm_config(self, client, db_session):
        from app.models import Box
        response = client.post(
            "/boxes",
            data={"name": "No LLM Box"},
            follow_redirects=False,
        )
        assert response.status_code == 303
        assert db_session.query(Box).count() == 1
Author	SHA1	Message	Date
tliu93	70b0cf08ee	Add AI search query expansion test / pytest (push) Successful in 1m20s Details docker-image / build-and-push (push) Successful in 5m6s Details	2026-06-01 21:28:29 +02:00
tliu93	d36b940981	Add LLM settings integration test / pytest (push) Successful in 1m13s Details Add app_settings migration, settings UI, and OpenAI-compatible httpx LLM client with mocked tests. Preserve API keys on blank form submissions, require a fresh key when base_url changes, and keep AI search settings untouched for step 3. Update docs/design LLM integration and step 3 AI search notes, including prompt contract and extra-hints planning.	2026-06-01 20:06:22 +02:00
tliu93	8b8bd9f38f	Add Alembic migration foundation test / pytest (push) Successful in 1m34s Details	2026-06-01 16:02:43 +02:00
tliu93	c42cc2ddb6	docs: add LLM integration design and three-step implementation plan Add docs/: a bilingual repository brief, plus docs/design/ with the high-level design (Alembic migration foundation, LLM integration, basic AI search) and a self-contained per-step implementation plan (step 1-3).	2026-06-01 13:10:59 +02:00
tliu93	b9b65838c9	refine backup test / pytest (push) Successful in 44s Details	2026-05-04 11:03:44 +02:00
tliu93	13be98570d	refine backup script test / pytest (push) Successful in 1m8s Details	2026-05-04 10:49:27 +02:00