6 Commits

Author SHA1 Message Date
tliu93 70b0cf08ee Add AI search query expansion
test / pytest (push) Successful in 1m20s
docker-image / build-and-push (push) Successful in 5m6s
2026-06-01 21:28:29 +02:00
tliu93 d36b940981 Add LLM settings integration
test / pytest (push) Successful in 1m13s
Add app_settings migration, settings UI, and OpenAI-compatible httpx LLM client with mocked tests.

Preserve API keys on blank form submissions, require a fresh key when base_url changes, and keep AI search settings untouched for step 3.

Update docs/design LLM integration and step 3 AI search notes, including prompt contract and extra-hints planning.
2026-06-01 20:06:22 +02:00
tliu93 8b8bd9f38f Add Alembic migration foundation
test / pytest (push) Successful in 1m34s
2026-06-01 16:02:43 +02:00
tliu93 c42cc2ddb6 docs: add LLM integration design and three-step implementation plan
Add docs/: a bilingual repository brief, plus docs/design/ with the high-level design (Alembic migration foundation, LLM integration, basic AI search) and a self-contained per-step implementation plan (step 1-3).
2026-06-01 13:10:59 +02:00
tliu93 b9b65838c9 refine backup
test / pytest (push) Successful in 44s
2026-05-04 11:03:44 +02:00
tliu93 13be98570d refine backup script
test / pytest (push) Successful in 1m8s
2026-05-04 10:49:27 +02:00
32 changed files with 4617 additions and 112 deletions
+21 -3
View File
@@ -1,9 +1,28 @@
# Local environment and secrets
.env
.venv/ .venv/
# Runtime data and local review notes
data/
review-notes/
Review-Notes/
# Python cache/test/build output
__pycache__/ __pycache__/
.pytest_cache/ .pytest_cache/
.mypy_cache/
.ruff_cache/
.coverage
htmlcov/
build/
dist/
*.egg-info/
*.pyc *.pyc
.env
data/*.db # Local tool state
.codex
.claude/settings.local.json
backups/
# macOS generated files # macOS generated files
.DS_Store .DS_Store
@@ -14,4 +33,3 @@ data/*.db
**/.Spotlight-V100 **/.Spotlight-V100
.Trashes .Trashes
**/.Trashes **/.Trashes
.codex
+2
View File
@@ -10,6 +10,8 @@ WORKDIR /app
COPY requirements.txt . COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt RUN pip install --no-cache-dir -r requirements.txt
COPY alembic.ini .
COPY migrations ./migrations
COPY app ./app COPY app ./app
RUN mkdir -p /app/data RUN mkdir -p /app/data
+148
View File
@@ -0,0 +1,148 @@
# A generic, single database configuration.
[alembic]
# path to migration scripts.
# this is typically a path given in POSIX (e.g. forward slashes)
# format, relative to the token %(here)s which refers to the location of this
# ini file
script_location = %(here)s/migrations
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
# Uncomment the line below if you want the files to be prepended with date and time
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
# for all available tokens
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
# sys.path path, will be prepended to sys.path if present.
# defaults to the current working directory. for multiple paths, the path separator
# is defined by "path_separator" below.
prepend_sys_path = .
# timezone to use when rendering the date within the migration file
# as well as the filename.
# If specified, requires the python>=3.9 or backports.zoneinfo library and tzdata library.
# Any required deps can installed by adding `alembic[tz]` to the pip requirements
# string value is passed to ZoneInfo()
# leave blank for localtime
# timezone =
# max length of characters to apply to the "slug" field
# truncate_slug_length = 40
# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false
# set to 'true' to allow .pyc and .pyo files without
# a source .py file to be detected as revisions in the
# versions/ directory
# sourceless = false
# version location specification; This defaults
# to <script_location>/versions. When using multiple version
# directories, initial revisions must be specified with --version-path.
# The path separator used here should be the separator specified by "path_separator"
# below.
# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions
# path_separator; This indicates what character is used to split lists of file
# paths, including version_locations and prepend_sys_path within configparser
# files such as alembic.ini.
# The default rendered in new alembic.ini files is "os", which uses os.pathsep
# to provide os-dependent path splitting.
#
# Note that in order to support legacy alembic.ini files, this default does NOT
# take place if path_separator is not present in alembic.ini. If this
# option is omitted entirely, fallback logic is as follows:
#
# 1. Parsing of the version_locations option falls back to using the legacy
# "version_path_separator" key, which if absent then falls back to the legacy
# behavior of splitting on spaces and/or commas.
# 2. Parsing of the prepend_sys_path option falls back to the legacy
# behavior of splitting on spaces, commas, or colons.
#
# Valid values for path_separator are:
#
# path_separator = :
# path_separator = ;
# path_separator = space
# path_separator = newline
#
# Use os.pathsep. Default configuration used for new projects.
path_separator = os
# set to 'true' to search source files recursively
# in each "version_locations" directory
# new in Alembic version 1.10
# recursive_version_locations = false
# the output encoding used when revision files
# are written from script.py.mako
# output_encoding = utf-8
# database URL. This is consumed by the user-maintained env.py script only.
# other means of configuring database URLs may be customized within the env.py
# file.
# sqlalchemy.url is set dynamically in migrations/env.py from app.config
# Do NOT set it here.
[post_write_hooks]
# post_write_hooks defines scripts or Python functions that are run
# on newly generated revision scripts. See the documentation for further
# detail and examples
# format using "black" - use the console_scripts runner, against the "black" entrypoint
# hooks = black
# black.type = console_scripts
# black.entrypoint = black
# black.options = -l 79 REVISION_SCRIPT_FILENAME
# lint with attempts to fix using "ruff" - use the module runner, against the "ruff" module
# hooks = ruff
# ruff.type = module
# ruff.module = ruff
# ruff.options = check --fix REVISION_SCRIPT_FILENAME
# Alternatively, use the exec runner to execute a binary found on your PATH
# hooks = ruff
# ruff.type = exec
# ruff.executable = ruff
# ruff.options = check --fix REVISION_SCRIPT_FILENAME
# Logging configuration. This is also consumed by the user-maintained
# env.py script only.
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARNING
handlers = console
qualname =
[logger_sqlalchemy]
level = WARNING
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S
+6 -39
View File
@@ -1,6 +1,6 @@
from typing import Generator from typing import Generator
from sqlalchemy import create_engine, event, text from sqlalchemy import create_engine, event
from sqlalchemy.engine import make_url from sqlalchemy.engine import make_url
from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker
@@ -62,47 +62,14 @@ def get_db() -> Generator[Session, None, None]:
def init_db(database_url: str | None = None) -> None: def init_db(database_url: str | None = None) -> None:
from app import models from app import models # noqa: F401 — register models on Base.metadata
if engine is None or database_url is not None: if engine is None or database_url is not None:
configure_database(database_url) configure_database(database_url)
Base.metadata.create_all(bind=engine) from app.migrate import verify_schema_is_current
_sync_sqlite_image_columns()
resolved_url = str(engine.url)
verify_schema_is_current(resolved_url)
def _sync_sqlite_image_columns() -> None:
if engine is None or engine.dialect.name != "sqlite":
return
image_columns = {
"boxes": {
"image_blob": "BLOB",
"image_mime_type": "VARCHAR(50)",
"image_width": "INTEGER",
"image_height": "INTEGER",
},
"items": {
"image_blob": "BLOB",
"image_mime_type": "VARCHAR(50)",
"image_width": "INTEGER",
"image_height": "INTEGER",
},
"subitems": {
"image_blob": "BLOB",
"image_mime_type": "VARCHAR(50)",
"image_width": "INTEGER",
"image_height": "INTEGER",
},
}
with engine.begin() as connection:
for table_name, columns in image_columns.items():
existing_columns = {
row[1] for row in connection.execute(text(f"PRAGMA table_info({table_name})"))
}
for column_name, column_type in columns.items():
if column_name not in existing_columns:
connection.execute(
text(f"ALTER TABLE {table_name} ADD COLUMN {column_name} {column_type}")
)
+272
View File
@@ -0,0 +1,272 @@
"""LLM client module — all network egress is concentrated here.
Uses ``httpx`` (already in requirements) to call OpenAI-compatible endpoints.
No ``openai`` SDK dependency. Sync functions are fine: FastAPI runs sync
handlers in a threadpool.
Public API:
- ``is_configured(cfg)`` — returns True when the client can make calls.
- ``test_connection(cfg)`` — minimal request to verify credentials.
- ``expand_query(cfg, query)`` — query-term expansion (step 3 consumer).
Returns ``ExpansionResult`` with ``terms`` and optional ``error``.
- ``analyze_image(...)`` — **reserved stub, not implemented**.
All calls go through ``_call_chat_completion()`` so tests can mock a single
boundary.
"""
from __future__ import annotations
import json
import re
from dataclasses import dataclass
from typing import Any
import httpx
from app.settings_store import LLMConfig
# Sensible defaults
_TIMEOUT_SECONDS = 30
# ── Prompt for query expansion (Step 3) ──────────────────────────────────
_EXPAND_QUERY_SYSTEM_PROMPT = (
"你是搬家物品搜索助手。用户在搜索自己打包的箱子与物品(家居/搬家场景)。"
"给定一个搜索词,列出用户可能用来命名同一类物品的相关词:"
"近义词、常见别称、上位类别、具体品类。"
"规则:用与查询相同的语言;"
"只给与该物品紧密相关、有助于在清单里找到它的词;"
"不要解释、不要造无关词;最多 8 个;"
"只输出一个 JSON 字符串数组,例如 "
'`["炒锅","平底锅","汤锅","厨具"]`。'
)
@dataclass
class LLMResult:
"""Uniform result wrapper for LLM calls."""
success: bool
message: str
data: Any = None
@dataclass
class ExpansionResult:
"""Structured result from ``expand_query``.
``terms`` is always a list (may be empty).
``error`` is ``None`` on success (including legitimate empty results);
on failure (timeout, network error, HTTP error) it contains a
human-friendly error message.
"""
terms: list[str]
error: str | None = None
def is_configured(cfg: LLMConfig) -> bool:
"""Return True only when the LLM is enabled AND has required fields."""
return bool(cfg.enabled and cfg.model and cfg.api_key)
def test_connection(cfg: LLMConfig) -> LLMResult:
"""Send a minimal chat-completion request to verify the config.
Uses a tiny prompt to minimise cost. Returns an ``LLMResult`` indicating
success or failure with a human-readable message.
"""
if not is_configured(cfg):
return LLMResult(
success=False,
message="LLM 未配置或未启用(缺少 model 或 api_key)。",
)
try:
response = _call_chat_completion(
cfg,
messages=[{"role": "user", "content": "Hi"}],
max_tokens=1,
)
return LLMResult(
success=True,
message=f"连接成功(模型:{cfg.model})。",
data=response,
)
except httpx.HTTPStatusError as exc:
status = exc.response.status_code
return LLMResult(
success=False,
message=f"连接失败(HTTP {status})。请检查 base_url、model 和 api_key。",
)
except httpx.ConnectError:
return LLMResult(
success=False,
message="无法连接到服务器。请检查 base_url 是否正确。",
)
except httpx.TimeoutException:
return LLMResult(
success=False,
message="连接超时。请检查网络或 base_url 是否可达。",
)
except Exception as exc: # noqa: BLE001 — graceful degradation
return LLMResult(
success=False,
message=f"未知错误:{exc}",
)
def expand_query(
cfg: LLMConfig,
query: str,
extra_hints: str = "",
) -> ExpansionResult:
"""Expand a search query into multiple synonymous terms via LLM.
Returns an ``ExpansionResult``. On success ``terms`` contains the expanded
terms (possibly empty) and ``error`` is ``None``. On failure (network
error, timeout, HTTP error) ``terms`` is ``[]`` and ``error`` contains a
human-friendly message.
"""
if not is_configured(cfg):
return ExpansionResult(terms=[])
system_prompt = _EXPAND_QUERY_SYSTEM_PROMPT
if extra_hints and extra_hints.strip():
system_prompt += "\n" + extra_hints.strip()
try:
response = _call_chat_completion(
cfg,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": query},
],
max_tokens=200,
temperature=0,
)
except httpx.TimeoutException:
return ExpansionResult(
terms=[],
error="AI 搜索请求超时,请稍后再试。",
)
except httpx.ConnectError:
return ExpansionResult(
terms=[],
error="无法连接到 AI 服务,请检查网络或设置。",
)
except httpx.HTTPStatusError:
return ExpansionResult(
terms=[],
error="AI 服务返回错误,请检查配置。",
)
except Exception: # noqa: BLE001 — graceful degradation
return ExpansionResult(
terms=[],
error="AI 搜索暂时不可用,请稍后再试。",
)
choices = response.get("choices", [])
if not choices:
return ExpansionResult(terms=[])
content = choices[0].get("message", {}).get("content", "")
return ExpansionResult(terms=_parse_json_string_array(content))
# ── Constants for output contract enforcement ────────────────────────────
_MAX_EXPANSION_TERMS = 8
_MAX_TERM_LENGTH = 30
def _parse_json_string_array(content: str) -> list[str]:
"""Parse LLM output into a list of strings.
Strict contract enforcement:
1. Strip markdown code fences;
2. Try ``json.loads`` — only accept a JSON **array of strings**;
3. Anything else (prose, JSON objects, bad JSON) → return ``[]``.
This ensures the output contract is enforced by code: no matter what
the model returns or what ``ai_search_extra_hints`` contains, only a
valid JSON string array is accepted.
"""
text = content.strip()
if not text:
return []
# Strip markdown code fences
text = re.sub(r"^```(?:json)?\s*", "", text)
text = re.sub(r"\s*```$", "", text)
text = text.strip()
# Attempt JSON parse — strictly require a list
try:
parsed = json.loads(text)
except (json.JSONDecodeError, ValueError):
return []
if not isinstance(parsed, list):
return []
# Validate every element is a string; reject non-string items
terms: list[str] = []
for item in parsed:
if not isinstance(item, str):
return []
cleaned = item.strip()
if cleaned and len(cleaned) <= _MAX_TERM_LENGTH:
terms.append(cleaned)
# Cap total count
return terms[:_MAX_EXPANSION_TERMS]
def analyze_image(cfg: LLMConfig, image_data: bytes, prompt: str) -> LLMResult:
"""Analyze an image via LLM vision API.
.. note:: **Reserved stub — not implemented.** Will be filled in a future
round for image analysis. The signature is fixed so callers can
depend on it.
"""
# TODO: Implement in future round for image analysis.
return LLMResult(
success=False,
message="图片分析功能尚未实现。",
)
# ------------------------------------------------------------------
# Internal boundary — all network calls go through this single function
# ------------------------------------------------------------------
def _call_chat_completion(
cfg: LLMConfig,
*,
messages: list[dict[str, str]],
max_tokens: int = 1,
temperature: float | None = None,
) -> dict:
"""Call the OpenAI-compatible ``/chat/completions`` endpoint.
Returns the parsed JSON response body on success (status 2xx).
Raises ``httpx.HTTPStatusError`` on non-2xx, or other ``httpx`` exceptions
on network failures — callers handle these for graceful degradation.
"""
url = cfg.base_url.rstrip("/") + "/chat/completions"
payload: dict[str, Any] = {
"model": cfg.model,
"messages": messages,
"max_tokens": max_tokens,
}
if temperature is not None:
payload["temperature"] = temperature
headers = {
"Authorization": f"Bearer {cfg.api_key}",
"Content-Type": "application/json",
}
with httpx.Client(timeout=_TIMEOUT_SECONDS) as client:
response = client.post(url, json=payload, headers=headers)
response.raise_for_status()
return response.json()
+298 -34
View File
@@ -5,12 +5,15 @@ from fastapi import Depends, FastAPI, File, Form, HTTPException, Request, Upload
from fastapi.responses import FileResponse, RedirectResponse, Response from fastapi.responses import FileResponse, RedirectResponse, Response
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates from fastapi.templating import Jinja2Templates
from sqlalchemy import func, or_ from sqlalchemy import func, false, or_
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from app.db import get_db, init_db from app.db import get_db, init_db
from app.images import process_upload from app.images import process_upload
from app.llm import expand_query, is_configured, test_connection
from app.llm import LLMResult
from app.models import Box, Item, SubItem from app.models import Box, Item, SubItem
from app.settings_store import LLMConfig, get_app_settings, save_app_settings
templates = Jinja2Templates(directory="app/templates") templates = Jinja2Templates(directory="app/templates")
STATIC_DIR = Path("app/static") STATIC_DIR = Path("app/static")
@@ -88,6 +91,46 @@ def _wants_add_next(submit_action: str | None) -> bool:
return submit_action == "save_and_add_next" return submit_action == "save_and_add_next"
def _validate_settings_origin(request: Request) -> str | None:
"""Check Origin/Referer for same-host browser requests.
Returns an error message if validation fails, or None if OK.
Missing both headers (e.g. curl, API call) is allowed for now.
"""
origin = request.headers.get("origin")
referer = request.headers.get("referer")
if origin:
host = request.headers.get("host", "")
# origin includes scheme, host only has host:port
from urllib.parse import urlparse
parsed = urlparse(origin)
origin_host = parsed.netloc
if origin_host != host:
return "请求来源与当前站点不一致,操作被拒绝。"
elif referer:
host = request.headers.get("host", "")
from urllib.parse import urlparse
parsed = urlparse(referer)
referer_host = parsed.netloc
if referer_host != host:
return "请求来源与当前站点不一致,操作被拒绝。"
return None
def _validate_base_url_scheme(base_url: str) -> str | None:
"""Return an error message if base_url scheme is not allowed, else None."""
from urllib.parse import urlparse
parsed = urlparse(base_url)
if parsed.scheme not in ("https", "http"):
return "Base URL 必须以 http:// 或 https:// 开头。"
return None
def _format_average(total: int, divisor: int) -> str: def _format_average(total: int, divisor: int) -> str:
if divisor == 0: if divisor == 0:
return "0.0" return "0.0"
@@ -117,24 +160,41 @@ def _build_boxes_overview_summary(db: Session) -> dict[str, int | str]:
} }
def _build_search_results(db: Session, query: str) -> list[dict]: def _build_search_results(db: Session, query: str | list[str]) -> list[dict]:
keyword = f"%{query.lower()}%" """Search Box / Item / SubItem by name and note using case-insensitive LIKE.
Accepts either a single query string or a list of keywords.
When multiple keywords are given, they are combined with OR — a match on
*any* keyword is sufficient.
"""
keywords = [query] if isinstance(query, str) else query
patterns = [f"%{kw.lower()}%" for kw in keywords]
def _or_like(column, note_column):
"""Build an OR filter that matches any pattern on either column."""
conditions = []
for pat in patterns:
conditions.append(func.lower(column).like(pat))
conditions.append(func.lower(func.coalesce(note_column, "")).like(pat))
return or_(false(), *conditions) if conditions else false()
results: list[dict] = [] results: list[dict] = []
seen_ids: set[tuple[str, int]] = set()
def _add(result_type: str, obj_id: int, entry: dict) -> None:
key = (result_type, obj_id)
if key not in seen_ids:
seen_ids.add(key)
results.append(entry)
box_matches = ( box_matches = (
db.query(Box) db.query(Box)
.filter( .filter(_or_like(Box.name, Box.note))
or_(
func.lower(Box.name).like(keyword),
func.lower(func.coalesce(Box.note, "")).like(keyword),
)
)
.order_by(Box.id.desc()) .order_by(Box.id.desc())
.all() .all()
) )
for box in box_matches: for box in box_matches:
results.append( _add("Box", box.id, {
{
"type": "Box", "type": "Box",
"name": box.name, "name": box.name,
"note": box.note, "note": box.note,
@@ -145,24 +205,17 @@ def _build_search_results(db: Session, query: str) -> list[dict]:
"path": "顶层箱子", "path": "顶层箱子",
"is_container": None, "is_container": None,
"image_url": f"/boxes/{box.id}/image" if box.image_blob else None, "image_url": f"/boxes/{box.id}/image" if box.image_blob else None,
} })
)
item_matches = ( item_matches = (
db.query(Item) db.query(Item)
.join(Item.box) .join(Item.box)
.filter( .filter(_or_like(Item.name, Item.note))
or_(
func.lower(Item.name).like(keyword),
func.lower(func.coalesce(Item.note, "")).like(keyword),
)
)
.order_by(Item.id.desc()) .order_by(Item.id.desc())
.all() .all()
) )
for item in item_matches: for item in item_matches:
results.append( _add("Item", item.id, {
{
"type": "Item", "type": "Item",
"name": item.name, "name": item.name,
"note": item.note, "note": item.note,
@@ -173,25 +226,18 @@ def _build_search_results(db: Session, query: str) -> list[dict]:
"path": f"位于箱子:{item.box.name}", "path": f"位于箱子:{item.box.name}",
"is_container": item.is_container, "is_container": item.is_container,
"image_url": f"/items/{item.id}/image" if item.image_blob else None, "image_url": f"/items/{item.id}/image" if item.image_blob else None,
} })
)
subitem_matches = ( subitem_matches = (
db.query(SubItem) db.query(SubItem)
.join(SubItem.parent_item) .join(SubItem.parent_item)
.join(Item.box) .join(Item.box)
.filter( .filter(_or_like(SubItem.name, SubItem.note))
or_(
func.lower(SubItem.name).like(keyword),
func.lower(func.coalesce(SubItem.note, "")).like(keyword),
)
)
.order_by(SubItem.id.desc()) .order_by(SubItem.id.desc())
.all() .all()
) )
for subitem in subitem_matches: for subitem in subitem_matches:
results.append( _add("SubItem", subitem.id, {
{
"type": "SubItem", "type": "SubItem",
"name": subitem.name, "name": subitem.name,
"note": subitem.note, "note": subitem.note,
@@ -205,12 +251,39 @@ def _build_search_results(db: Session, query: str) -> list[dict]:
), ),
"is_container": None, "is_container": None,
"image_url": f"/subitems/{subitem.id}/image" if subitem.image_blob else None, "image_url": f"/subitems/{subitem.id}/image" if subitem.image_blob else None,
} })
)
return results return results
def _ai_search(db: Session, cfg: "LLMConfig", query: str) -> tuple[list[str], list[dict], str | None]:
"""Swappable AI search seam.
Returns ``(expanded_terms, results, error_message)``.
- On success: expanded terms + broadened results, ``error_message`` is ``None``.
- On failure (timeout, network error, HTTP error): empty terms + normal LIKE
results + friendly error message.
- On empty expansion (model returned ``[]`` legitimately): empty terms + normal
results, ``error_message`` is ``None``.
"""
result = expand_query(cfg, query, extra_hints=cfg.ai_search_extra_hints)
if result.error:
# Real failure (timeout / network / HTTP) → show error + fallback
results = _build_search_results(db, query)
return [], results, result.error
if not result.terms:
# Legitimate empty expansion → normal results, no error
results = _build_search_results(db, query)
return [], results, None
# Deduplicate: original query + expanded terms
all_terms = list(dict.fromkeys([query] + result.terms))
results = _build_search_results(db, all_terms)
return result.terms, results, None
def create_app() -> FastAPI: def create_app() -> FastAPI:
@asynccontextmanager @asynccontextmanager
async def lifespan(app: FastAPI): async def lifespan(app: FastAPI):
@@ -242,10 +315,28 @@ def create_app() -> FastAPI:
def search_page( def search_page(
request: Request, request: Request,
q: str | None = None, q: str | None = None,
ai: str | None = None,
db: Session = Depends(get_db), db: Session = Depends(get_db),
): ):
query = (q or "").strip() query = (q or "").strip()
results = _build_search_results(db, query) if query else [] cfg = get_app_settings(db)
ai_requested = ai == "1"
ai_available = cfg.ai_search_enabled and is_configured(cfg)
expanded_terms: list[str] = []
ai_error: str | None = None
if query:
if ai_requested and ai_available:
try:
expanded_terms, results, ai_error = _ai_search(db, cfg, query)
except Exception: # noqa: BLE001 — graceful degradation
ai_error = "AI 搜索暂时不可用,已回退到普通搜索。"
results = _build_search_results(db, query)
else:
results = _build_search_results(db, query)
else:
results = []
return templates.TemplateResponse( return templates.TemplateResponse(
request=request, request=request,
name="search/index.html", name="search/index.html",
@@ -254,6 +345,10 @@ def create_app() -> FastAPI:
"query": query, "query": query,
"results": results, "results": results,
"searched": bool(query), "searched": bool(query),
"ai_activated": ai_requested and ai_available and bool(query),
"expanded_terms": expanded_terms,
"ai_error": ai_error,
"ai_available": ai_available,
}, },
) )
@@ -267,6 +362,175 @@ def create_app() -> FastAPI:
context={"page_title": "箱子", "boxes": boxes, "summary": summary}, context={"page_title": "箱子", "boxes": boxes, "summary": summary},
) )
# ------------------------------------------------------------------
# Settings
# ------------------------------------------------------------------
@app.get("/settings")
def settings_page(request: Request, db: Session = Depends(get_db)):
cfg = get_app_settings(db)
return templates.TemplateResponse(
request=request,
name="settings/form.html",
context={
"page_title": "设置",
"config": cfg,
"api_key_configured": bool(cfg.api_key),
"test_result": None,
},
)
@app.post("/settings")
def save_settings(
request: Request,
enabled: str | None = Form(default=None),
base_url: str | None = Form(default=None),
model: str | None = Form(default=None),
api_key: str | None = Form(default=None),
ai_search_enabled: str | None = Form(default=None),
ai_search_extra_hints: str | None = Form(default=None),
db: Session = Depends(get_db),
) -> Response:
# Origin/Referer check for browser requests
origin_error = _validate_settings_origin(request)
if origin_error:
raise HTTPException(status_code=403, detail=origin_error)
resolved_base_url = _clean_text(base_url) or "https://api.openai.com/v1"
# Validate base_url scheme
scheme_error = _validate_base_url_scheme(resolved_base_url)
if scheme_error:
raise HTTPException(status_code=400, detail=scheme_error)
resolved_model = _clean_text(model) or ""
# Only base_url change counts as an endpoint change — model switches
# under the same base_url do not require a new key.
existing_cfg = get_app_settings(db)
submitted_key = _clean_text(api_key)
base_url_changed = resolved_base_url != existing_cfg.base_url
if base_url_changed and submitted_key is None:
# base_url changed but no new key provided — refuse to save,
# return to settings page with a clear error message.
return templates.TemplateResponse(
request=request,
name="settings/form.html",
context={
"page_title": "设置",
"config": LLMConfig(
enabled=enabled == "on",
base_url=resolved_base_url,
model=resolved_model,
api_key=existing_cfg.api_key,
ai_search_enabled=ai_search_enabled == "on",
ai_search_extra_hints=_clean_text(ai_search_extra_hints) or "",
),
"api_key_configured": bool(existing_cfg.api_key),
"test_result": LLMResult(
success=False,
message="Base URL 已变更,请重新输入 API Key 后保存。",
),
},
)
# submitted_key is None → keep old key; str (including "") → use new value
resolved_api_key = submitted_key
resolved_extra_hints = _clean_text(ai_search_extra_hints) or ""
save_app_settings(
db,
enabled=enabled == "on",
base_url=resolved_base_url,
model=resolved_model,
api_key=resolved_api_key,
ai_search_enabled=ai_search_enabled == "on",
ai_search_extra_hints=resolved_extra_hints,
)
return RedirectResponse(url="/settings", status_code=status.HTTP_303_SEE_OTHER)
@app.post("/settings/test")
def test_settings_connection(
request: Request,
enabled: str | None = Form(default=None),
base_url: str | None = Form(default=None),
model: str | None = Form(default=None),
api_key: str | None = Form(default=None),
ai_search_enabled: str | None = Form(default=None),
ai_search_extra_hints: str | None = Form(default=None),
db: Session = Depends(get_db),
):
# Origin/Referer check for browser requests
origin_error = _validate_settings_origin(request)
if origin_error:
raise HTTPException(status_code=403, detail=origin_error)
resolved_base_url = _clean_text(base_url) or "https://api.openai.com/v1"
# Validate base_url scheme
scheme_error = _validate_base_url_scheme(resolved_base_url)
if scheme_error:
raise HTTPException(status_code=400, detail=scheme_error)
resolved_model = _clean_text(model) or ""
# Only reuse stored key if base_url matches saved config. Model switches
# under the same base_url can use the same key; a base_url change cannot.
existing_cfg = get_app_settings(db)
submitted_key = _clean_text(api_key)
base_url_matches = resolved_base_url == existing_cfg.base_url
if base_url_matches and submitted_key is None:
resolved_api_key = existing_cfg.api_key
elif submitted_key is not None:
resolved_api_key = submitted_key
else:
# base_url changed but no key provided — refuse to test
return templates.TemplateResponse(
request=request,
name="settings/form.html",
context={
"page_title": "设置",
"config": LLMConfig(
enabled=enabled == "on",
base_url=resolved_base_url,
model=resolved_model,
api_key="",
ai_search_enabled=ai_search_enabled == "on",
ai_search_extra_hints=_clean_text(ai_search_extra_hints) or "",
),
"api_key_configured": bool(existing_cfg.api_key),
"test_result": LLMResult(
success=False,
message="Base URL 已变更,请重新输入 API Key 后再测试。",
),
},
)
test_cfg = LLMConfig(
enabled=enabled == "on",
base_url=resolved_base_url,
model=resolved_model,
api_key=resolved_api_key or "",
ai_search_enabled=ai_search_enabled == "on",
ai_search_extra_hints=_clean_text(ai_search_extra_hints) or "",
)
result = test_connection(test_cfg)
return templates.TemplateResponse(
request=request,
name="settings/form.html",
context={
"page_title": "设置",
"config": test_cfg,
"api_key_configured": bool(test_cfg.api_key),
"test_result": result,
},
)
@app.get("/boxes/new") @app.get("/boxes/new")
def new_box_page(request: Request): def new_box_page(request: Request):
return templates.TemplateResponse( return templates.TemplateResponse(
+315
View File
@@ -0,0 +1,315 @@
"""Alembic migration wrapper with two responsibilities:
**(A) CLI entry point ``python -m app.migrate``** — idempotent migration command.
Handles four cases:
- Empty DB → ``upgrade head`` (create tables)
- Unmanaged DB matching baseline (V1) → ``stamp V1`` → ``upgrade head``
- Unmanaged DB NOT matching baseline → **fail-close**, no changes
- Already at head → no-op, exit 0
**(B) Startup verification ``verify_schema_is_current(url)``** — read-only check.
Used by ``init_db()`` to confirm the DB is at ``head`` before serving traffic.
**Never modifies the DB.** Raises on mismatch.
"""
from __future__ import annotations
import logging
import sys
from pathlib import Path
from alembic import command
from alembic.config import Config as AlembicConfig
from sqlalchemy import create_engine
from sqlalchemy import inspect as sa_inspect
logger = logging.getLogger("app.migrate")
# The V1 baseline revision ID. Must be kept in sync with the revision in
# ``migrations/versions/``. A literal is clearer than importing from
# auto-generated code whose module name changes.
V1_REVISION = "57af90893f55"
# ------------------------------------------------------------------
# Internal helpers
# ------------------------------------------------------------------
def _make_alembic_config(database_url: str) -> AlembicConfig:
"""Build an Alembic ``Config`` pointing at the bundled ``migrations/``."""
project_root = Path(__file__).resolve().parent.parent
migrations_dir = project_root / "migrations"
alembic_ini = project_root / "alembic.ini"
cfg = AlembicConfig(str(alembic_ini))
cfg.set_main_option("script_location", str(migrations_dir))
cfg.set_main_option("sqlalchemy.url", database_url)
return cfg
def _detect_db_state(database_url: str) -> str:
"""Return ``"managed"``, ``"unmanaged"``, or ``"empty"``.
- **managed**: ``alembic_version`` table exists.
- **unmanaged**: any table exists but no ``alembic_version``.
- **empty**: no tables at all (truly empty DB).
"""
eng = create_engine(database_url)
try:
table_names = set(sa_inspect(eng).get_table_names())
finally:
eng.dispose()
if "alembic_version" in table_names:
return "managed"
if table_names:
return "unmanaged"
return "empty"
def _get_current_revision(database_url: str) -> str | None:
"""Return the current ``alembic_version`` value, or ``None`` if absent."""
eng = create_engine(database_url)
try:
tables = set(sa_inspect(eng).get_table_names())
if "alembic_version" not in tables:
return None
with eng.begin() as conn:
from sqlalchemy import text
row = conn.execute(text("SELECT version_num FROM alembic_version")).scalar()
return row
finally:
eng.dispose()
def _build_reference_schema() -> dict:
"""Build a full reference schema from the V1 baseline migration.
Returns a dict with table names, columns (name, nullable, type,
primary_key), foreign keys (constrained_columns, referred_table,
referred_columns, ondelete), and indexes (name, column_names, unique).
"""
import tempfile
tmp = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
tmp.close()
try:
tmp_url = f"sqlite:///{tmp.name}"
cfg = _make_alembic_config(tmp_url)
command.upgrade(cfg, V1_REVISION)
eng = create_engine(tmp_url)
try:
inspector = sa_inspect(eng)
tables = ("boxes", "items", "subitems")
result: dict = {"tables": set(tables), "columns": {}, "fks": {}, "indexes": {}}
for tbl in tables:
# Columns: name, nullable, type (stringified), primary_key
cols = inspector.get_columns(tbl)
result["columns"][tbl] = sorted(
(c["name"], c.get("nullable", True), str(c["type"]), c.get("primary_key", False))
for c in cols
)
# Foreign keys
fks = inspector.get_foreign_keys(tbl)
result["fks"][tbl] = sorted(
(
tuple(fk["constrained_columns"]),
fk["referred_table"],
tuple(fk["referred_columns"]),
fk.get("ondelete"),
)
for fk in fks
)
# Indexes
idxs = inspector.get_indexes(tbl)
result["indexes"][tbl] = sorted(
(idx["name"], tuple(idx["column_names"]), idx.get("unique", False))
for idx in idxs
)
return result
finally:
eng.dispose()
finally:
from os import unlink
unlink(tmp.name)
def _schema_matches_baseline(database_url: str) -> bool:
"""Check whether an unmanaged DB's schema matches V1 baseline.
Compares table names, column definitions (name, nullable, type, PK),
foreign keys (constrained/referred columns, ondelete), and indexes
(name, columns, unique). SQLite type-affinity differences are
tolerated via an explicit normalization allowlist.
"""
ref = _build_reference_schema()
eng = create_engine(database_url)
try:
inspector = sa_inspect(eng)
# 1. Table names must match exactly
actual_tables = set(inspector.get_table_names())
if actual_tables != ref["tables"]:
logger.info("Table mismatch: got %s, expected %s", actual_tables, ref["tables"])
return False
for tbl in ref["tables"]:
# 2. Columns
actual_cols = sorted(
(c["name"], c.get("nullable", True), str(c["type"]), c.get("primary_key", False))
for c in inspector.get_columns(tbl)
)
if actual_cols != ref["columns"][tbl]:
logger.info("Column mismatch on %s: got %s, expected %s", tbl, actual_cols, ref["columns"][tbl])
return False
# 3. Foreign keys
actual_fks = sorted(
(
tuple(fk["constrained_columns"]),
fk["referred_table"],
tuple(fk["referred_columns"]),
fk.get("ondelete"),
)
for fk in inspector.get_foreign_keys(tbl)
)
if actual_fks != ref["fks"][tbl]:
logger.info("FK mismatch on %s: got %s, expected %s", tbl, actual_fks, ref["fks"][tbl])
return False
# 4. Indexes
actual_idxs = sorted(
(idx["name"], tuple(idx["column_names"]), idx.get("unique", False))
for idx in inspector.get_indexes(tbl)
)
if actual_idxs != ref["indexes"][tbl]:
logger.info("Index mismatch on %s: got %s, expected %s", tbl, actual_idxs, ref["indexes"][tbl])
return False
return True
finally:
eng.dispose()
# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------
def verify_schema_is_current(database_url: str) -> None:
"""Read-only check: confirm the DB is at ``head``.
Called by ``init_db()`` at application startup. **Never modifies the
DB.** Raises ``RuntimeError`` if the DB is not at ``head``, with a
message guiding the user to run ``python -m app.migrate``.
"""
# For SQLite file URLs, check file existence first to avoid the engine
# creating a side-effect empty file.
from sqlalchemy.engine import make_url
url = make_url(database_url)
if url.drivername.startswith("sqlite"):
db_path = url.database
if db_path and db_path != ":memory:" and not Path(db_path).exists():
raise RuntimeError(
f"Database file does not exist: {db_path}. "
"Run `python -m app.migrate` to create the schema first."
)
state = _detect_db_state(database_url)
if state == "empty":
raise RuntimeError(
"Database is empty — no tables found. "
"Run `python -m app.migrate` to create the schema first."
)
if state == "unmanaged":
raise RuntimeError(
"Database exists but has no alembic_version table (not under Alembic control). "
"Run `python -m app.migrate` to adopt it first."
)
# state == "managed" — check revision
current = _get_current_revision(database_url)
# Determine head revision from the migration scripts
cfg = _make_alembic_config(database_url)
from alembic.script import ScriptDirectory
script = ScriptDirectory.from_config(cfg)
head_rev = script.get_current_head()
if current != head_rev:
raise RuntimeError(
f"Database is at revision '{current}' but the application expects "
f"'{head_rev}'. Run `python -m app.migrate` to upgrade."
)
logger.info("Database schema verification passed (revision: %s).", current)
def run_migrations(database_url: str) -> None:
"""Execute migrations — intended for the CLI entry point.
Idempotent: safe to re-run on every deploy.
Cases:
- Empty DB → ``upgrade head``
- Unmanaged DB matching V1 baseline → ``stamp V1`` → ``upgrade head``
- Unmanaged DB NOT matching V1 baseline → **fail-close**
- Already managed → ``upgrade head`` (no-op if at head)
"""
cfg = _make_alembic_config(database_url)
state = _detect_db_state(database_url)
if state == "empty":
logger.info("Empty database detected — creating schema from scratch.")
command.upgrade(cfg, "head")
elif state == "unmanaged":
if _schema_matches_baseline(database_url):
logger.info(
"Unmanaged database matches V1 baseline — stamping %s and upgrading.",
V1_REVISION,
)
command.stamp(cfg, V1_REVISION)
command.upgrade(cfg, "head")
else:
logger.error(
"Unmanaged database schema does NOT match V1 baseline. "
"Refusing to migrate to avoid data loss."
)
raise SystemExit(
"Migration aborted: database schema does not match the "
"expected V1 baseline. Inspect the database manually."
)
else: # managed
logger.info("Database already under Alembic control — upgrading to head.")
command.upgrade(cfg, "head")
# ------------------------------------------------------------------
# CLI entry point: ``python -m app.migrate``
# ------------------------------------------------------------------
if __name__ == "__main__":
logging.basicConfig(
level=logging.INFO,
format="%(levelname)s [%(name)s] %(message)s",
)
from app.config import get_settings
settings = get_settings()
url = settings.database_url
logger.info("Running migrations against %s", url)
run_migrations(url)
logger.info("Migration complete.")
+7
View File
@@ -90,3 +90,10 @@ class SubItem(Base):
) )
parent_item: Mapped[Item] = relationship(back_populates="subitems") parent_item: Mapped[Item] = relationship(back_populates="subitems")
class AppSetting(Base):
__tablename__ = "app_settings"
key: Mapped[str] = mapped_column(Text, primary_key=True)
value: Mapped[str | None] = mapped_column(Text, nullable=True)
+93
View File
@@ -0,0 +1,93 @@
"""Settings read/write helpers for the ``app_settings`` KV table.
Provides a typed ``LLMConfig`` dataclass and two helpers:
- ``get_app_settings(db) -> LLMConfig`` — reads KV rows (or returns defaults).
- ``save_app_settings(db, ...) -> None`` — writes KV rows; API key left blank
means "keep the old value".
"""
from __future__ import annotations
from dataclasses import dataclass
from sqlalchemy.orm import Session
from app.models import AppSetting
@dataclass
class LLMConfig:
"""All settings consumed by the LLM client and settings UI."""
enabled: bool = False
base_url: str = "https://api.openai.com/v1"
model: str = ""
api_key: str = ""
ai_search_enabled: bool = False
ai_search_extra_hints: str = ""
def _get_value(rows: dict[str, str], key: str, default: str) -> str:
return rows.get(key, default)
def _get_bool(rows: dict[str, str], key: str, default: bool) -> bool:
return rows.get(key, str(default).lower()) == "true"
def get_app_settings(db: Session) -> LLMConfig:
"""Read all settings from ``app_settings`` and return an ``LLMConfig``."""
rows: dict[str, str] = {}
for row in db.query(AppSetting).all():
if row.value is not None:
rows[row.key] = row.value
return LLMConfig(
enabled=_get_bool(rows, "llm_enabled", False),
base_url=_get_value(rows, "llm_base_url", "https://api.openai.com/v1"),
model=_get_value(rows, "llm_model", ""),
api_key=_get_value(rows, "llm_api_key", ""),
ai_search_enabled=_get_bool(rows, "ai_search_enabled", False),
ai_search_extra_hints=_get_value(rows, "ai_search_extra_hints", ""),
)
def save_app_settings(
db: Session,
*,
enabled: bool | None = None,
base_url: str | None = None,
model: str | None = None,
api_key: str | None = None,
ai_search_enabled: bool | None = None,
ai_search_extra_hints: str | None = None,
) -> None:
"""Write settings to ``app_settings``.
If ``api_key`` is ``None`` (form field left blank), the existing key is
preserved. All other fields are written as-is.
"""
updates: dict[str, str | None] = {}
if enabled is not None:
updates["llm_enabled"] = str(enabled).lower()
if base_url is not None:
updates["llm_base_url"] = base_url
if model is not None:
updates["llm_model"] = model
if api_key is not None:
updates["llm_api_key"] = api_key
if ai_search_enabled is not None:
updates["ai_search_enabled"] = str(ai_search_enabled).lower()
if ai_search_extra_hints is not None:
updates["ai_search_extra_hints"] = ai_search_extra_hints
for key, value in updates.items():
existing = db.get(AppSetting, key)
if existing is not None:
existing.value = value
else:
db.add(AppSetting(key=key, value=value))
db.commit()
+1
View File
@@ -19,6 +19,7 @@
<nav class="top-nav"> <nav class="top-nav">
<a href="/boxes">箱子</a> <a href="/boxes">箱子</a>
<a href="/search">搜索</a> <a href="/search">搜索</a>
<a href="/settings">设置</a>
</nav> </nav>
{% block content %}{% endblock %} {% block content %}{% endblock %}
</main> </main>
+24
View File
@@ -20,7 +20,31 @@
</form> </form>
</section> </section>
{% if query and ai_available %}
<section class="card" style="margin-top: 8px;">
{% if ai_activated %}
<span class="muted">AI 搜索已启用</span>
{% else %}
<a href="/search?q={{ query | urlencode }}&ai=1" class="button button-secondary" style="display:inline-block; text-decoration:none;">
AI 智能搜索
</a>
{% endif %}
</section>
{% endif %}
{% if searched %} {% if searched %}
{% if ai_error %}
<section class="card" style="margin-top: 8px; border-color: #b42318;">
<p style="margin:0; color: #b42318;"><strong>{{ ai_error }}</strong></p>
</section>
{% endif %}
{% if ai_activated and expanded_terms %}
<section class="card" style="margin-top: 8px; border-color: #0b57d0;">
<p style="margin:0; color: #0b57d0;"><strong>AI 帮你扩展了:</strong>{{ expanded_terms | join('、') }}</p>
</section>
{% endif %}
{% if results %} {% if results %}
<section class="stack"> <section class="stack">
<p class="muted">共找到 {{ results|length }} 条结果。</p> <p class="muted">共找到 {{ results|length }} 条结果。</p>
+69
View File
@@ -0,0 +1,69 @@
{% extends "base.html" %}
{% block content %}
<div class="breadcrumb">
<a href="/boxes">箱子</a>
<span>/</span>
<strong>设置</strong>
</div>
<div class="page-header">
<div>
<h1>设置</h1>
<p class="muted">配置 LLM 连接参数。未配置时,整站行为不受影响。</p>
</div>
</div>
{% if test_result %}
<section class="card" style="margin-bottom: 16px; border-color: {% if test_result.success %}#2f6b1f{% else %}#b42318{% endif %};">
<p style="margin:0; color: {% if test_result.success %}#2f6b1f{% else %}#b42318{% endif %};">
<strong>{{ "✓ " if test_result.success else "✗ " }}{{ test_result.message }}</strong>
</p>
</section>
{% endif %}
<form method="post" action="/settings" class="stack form-panel">
<label class="form-field checkbox-row">
<input type="checkbox" name="enabled" {% if config.enabled %}checked{% endif %}>
启用 LLM
</label>
<p class="checkbox-help">开启后,AI 相关功能将使用下方配置连接 LLM 服务。</p>
<label class="form-field">
Base URL
<input type="text" name="base_url" value="{{ config.base_url }}" placeholder="https://api.openai.com/v1">
</label>
<label class="form-field">
模型名称
<input type="text" name="model" value="{{ config.model }}" placeholder="例如 gpt-4o-mini">
</label>
<label class="form-field">
API Key
{% if api_key_configured %}
<input type="password" name="api_key" value="" placeholder="已配置,留空=不修改">
{% else %}
<input type="password" name="api_key" value="" placeholder="输入 API Key">
{% endif %}
</label>
<hr style="border:none;border-top:1px solid #ddd;margin:16px 0;">
<label class="form-field checkbox-row">
<input type="checkbox" name="ai_search_enabled" {% if config.ai_search_enabled %}checked{% endif %}>
启用 AI 智能搜索
</label>
<p class="checkbox-help">开启后,搜索页将显示「AI 智能搜索」按钮,通过查询词扩展增强搜索结果。</p>
<label class="form-field">
额外领域提示(可选)
<textarea name="ai_search_extra_hints" rows="3" placeholder="例如:用户物品主要涉及厨房用品和电子产品">{% if config.ai_search_extra_hints %}{{ config.ai_search_extra_hints }}{% endif %}</textarea>
</label>
<p class="checkbox-help">追加到 AI 搜索提示词末尾,帮助模型理解你的物品领域。留空则使用默认提示词。</p>
<div class="form-actions">
<button type="submit" class="button button-primary">保存设置</button>
<button type="submit" class="button button-secondary" formaction="/settings/test" formmethod="post">测试连接</button>
</div>
</form>
{% endblock %}
+12
View File
@@ -0,0 +1,12 @@
# docs · 文档索引 / Documentation Index
本目录存放「2026 搬家助手」的项目文档。
This folder holds documentation for the **2026 Moving Helper** project.
| 文件 / File | 内容 / Contents |
| --- | --- |
| [`repository-brief.md`](./repository-brief.md) | 仓库总体简报:技术栈、架构、数据模型、路由、部署、CI/CD、测试,以及面向下一轮改动的扩展建议。<br>Full repository brief: tech stack, architecture, data model, routes, deployment, CI/CD, tests, and extension notes for the next round of changes. |
| [`design/`](./design/) | 具体改动轮次的设计文档与实施计划。当前轮次:LLM 接入与迁移地基(Alembic + LLM + 基础 AI 搜索)。<br>Per-round design docs and implementation plans. Current round: LLM integration & migration foundation (Alembic + LLM + basic AI search). |
> 说明 / Note:本文档由一轮代码走查整理而成,描述的是**当前 `main` 分支**的状态。后续改动代码时,请同步更新这里。
> This brief reflects the **current `main` branch**. Please keep it in sync as the code evolves.
+30
View File
@@ -0,0 +1,30 @@
# docs/design · 设计文档 / Design Docs
本目录存放面向具体改动轮次的设计与实施计划。
Design and implementation plans for specific rounds of changes.
## 当前轮次 / Current round — LLM 接入与迁移地基 / LLM Integration & Migration Foundation
本轮三件事 / Three deliverables:① 引入 Alembic 迁移系统(含封装)② LLM 接入(配置页 + 落库 + 客户端)③ 基础 AI 搜索(查询词扩展)。
**本轮不含图片分析**(留作未来,架构已预留接口)。Image analysis is **not** in this round (reserved for the future).
**总体设计(High-level"做什么/为什么"/ High-level design ("what/why")**
| 文件 / File | 内容 / Contents |
| --- | --- |
| [`llm-integration-design.md`](./llm-integration-design.md) | 原则、架构、迁移子系统、LLM 接入、AI 搜索、安全、测试、未来扩展、决策记录(D1–D10)。<br>Principles, architecture, migration subsystem, LLM, AI search, security, testing, future seams, decisions log. |
**实施计划("怎么做",每步一个自包含文件)/ Implementation plan ("how", one self-contained file per step)**
| 文件 / File | 内容 / Contents |
| --- | --- |
| [`implementation-plan.md`](./implementation-plan.md) | 总览:步骤顺序、依赖、跨步骤约定。<br>Overview: sequence, dependencies, cross-cutting conventions. |
| [`step-1-alembic-foundation.md`](./step-1-alembic-foundation.md) | 步骤 1Alembic 迁移地基(不改 schema)。<br>Step 1: migration foundation. |
| [`step-2-llm-integration.md`](./step-2-llm-integration.md) | 步骤 2LLM 接入(`app_settings` + 客户端 + 配置页)。<br>Step 2: LLM integration. |
| [`step-3-ai-search.md`](./step-3-ai-search.md) | 步骤 3:基础 AI 搜索(查询词扩展)。<br>Step 3: basic AI search. |
> 每个 step 文件**自包含**:实现 Agent 每次只读对应的一个文件即可执行。
> Each step file is **self-contained** — an implementation agent only needs to read that one file.
> 实现与设计若有偏差,请回写本目录,并同步仓库简报 `../repository-brief.md`(尤其 §10 迁移、§15 约束)。
> If implementation diverges, update these docs and the brief (`../repository-brief.md`, esp. §10 & §15).
+36
View File
@@ -0,0 +1,36 @@
# 实施计划 · 总览 / Implementation Plan · Overview
> 配合设计文档 [`llm-integration-design.md`](./llm-integration-design.md) 阅读。
> Read alongside the high-level design doc.
>
> 三步走,**每步一个独立文件、一个可独立合入的 PR / branch**。实现 Agent 每次只需读对应的 step 文件即可执行。
> Three steps, **one self-contained file and one mergeable PR per step**. An implementation agent only needs to read the relevant step file.
---
## 步骤与文件 / Steps & Files
| 步骤 / Step | 文件 / File | 目标 / Goal | 改 schema? | 依赖 / Depends on |
| --- | --- | --- | --- | --- |
| **1** | [`step-1-alembic-foundation.md`](./step-1-alembic-foundation.md) | Alembic 迁移地基(V1 baseline + 独立幂等迁移命令 + 启动只校验/fail-close),**不改 schema** | 否 / No | — |
| **2** | [`step-2-llm-integration.md`](./step-2-llm-integration.md) | LLM 接入:`app_settings` 表 + 客户端 + 配置页 | 是 / Yes (V2) | 步骤 1 / Step 1 |
| **3** | [`step-3-ai-search.md`](./step-3-ai-search.md) | 基础 AI 搜索:常驻按钮 + 查询词扩展 | 否 / No | 步骤 2 / Step 2 |
**顺序 / Sequence** 严格按 1 → 2 → 3,前一步绿了再进下一步。
Strictly 1 → 2 → 3; advance only when the previous step is green.
---
## 跨步骤约定 / Cross-cutting Conventions(每步都适用 / apply to every step
- **提交 / Commits** 每步独立 branch + PR;遵循仓库约定——**不主动 push/commit,除非业主明确要求**。
One branch/PR per step; **do not push/commit unless explicitly asked**.
- **CI 不联网 / Network-free CI** 任何 LLM 调用在测试中必须 mock。
All LLM calls must be mocked in tests.
- **降级优先 / Degradation first** 每个 AI 接入点先想清楚"未配置 / 调用失败"时的表现;AI 是加分项,不是依赖。
Always design the "unconfigured / failed" path first; AI is additive, never required.
- **依赖最小 / Minimal deps** 复用已有 `httpx`;本轮唯一新增依赖是 `alembic`。不要引入 `openai` SDK。
Reuse `httpx`; the only new dependency this round is `alembic`. Do not add the `openai` SDK.
- **保持形态 / Keep the shape** FastAPI + Jinja2 SSR + SQLite,无前端构建链;新页面沿用现有模板/样式。
- **文档同步 / Keep docs in sync:** 实现与设计若有偏差,回写本目录对应文件与仓库简报 `../repository-brief.md`(§10 迁移、§15 约束)。
If implementation diverges, update the step file, the design doc, and the brief (§10/§15).
+356
View File
@@ -0,0 +1,356 @@
# 设计文档 · LLM 接入与迁移地基 / Design · LLM Integration & Migration Foundation
> 中英双语。这是「下一轮改动」的总体设计(high-level design),实施步骤见 [`implementation-plan.md`](./implementation-plan.md)。
> Bilingual. High-level design for the next round of changes; step-by-step plan in [`implementation-plan.md`](./implementation-plan.md).
>
> 状态 / Status**已定稿,待实现 / Agreed, pending implementation**
> 基线 / Base`main` @ `b9b6583`
---
## 0. 本轮范围 / Scope of This Round
**本轮只做三件事 / This round delivers exactly three things**
1. **引入 Alembic 数据库迁移系统**(含一层封装,让应用不直接接触 Alembic 细节)。
Introduce **Alembic** as the migration system (with a thin wrapper so the app never touches Alembic directly).
2. **LLM 接入**:一个配置页 + 配置落库 + 一个可复用的 LLM 客户端。
**LLM integration**: a config page + DB-persisted config + a reusable LLM client.
3. **最基础的 AI 搜索**:搜索页常驻一个「AI 智能搜索」动作,用查询词扩展增强结果。
**Basic AI search**: a persistent "AI search" action on the search page, powered by query-term expansion.
**本轮明确不做(留作未来)/ Explicitly out of scope this roundfuture):**
- 图片内容分析(`image_description` 列、视觉模型调用、手动/批量/夜间生成)。
Image content analysis (`image_description` columns, vision calls, manual/batch/nightly generation).
- 向量嵌入 + 相似度语义搜索(AI 搜索的"高阶版")。
Vector embeddings + similarity semantic search (the "advanced" AI search).
- 多图、OCR、鉴权、标签系统等(见仓库简报 §15 / see brief §15)。
> 架构会为上述未来项**预留接口**(§9),但本轮不实现。
> The architecture **leaves seams** for the above (§9) without implementing them now.
---
## 1. 设计原则 / Guiding Principles
- **AI 是加分项,不是依赖 / AI is additive, never required.**
未配置或调用失败时,整站行为与今天**完全一致**。AI 只在"能用且开启"时才介入。
When unconfigured or on failure, the app behaves **exactly as today**. AI engages only when configured and enabled.
- **单一 schema 事实来源 / One source of truth for schema.**
Alembic 接管建表与变更;退休手写的 `_sync_sqlite_image_columns()`
Alembic owns schema creation and changes; retire the hand-rolled `_sync_sqlite_image_columns()`.
- **依赖最小化 / Minimal dependencies.**
复用已在 `requirements.txt``httpx` 调 OpenAI 兼容接口;本轮**唯一新增依赖是 `alembic`**。
Reuse the existing `httpx` for OpenAI-compatible calls; the **only new dependency is `alembic`**.
- **保持现有形态 / Keep the current shape.**
仍是 FastAPI + Jinja2 SSR + SQLite,无前端构建链;新页面沿用现有模板风格。
Still FastAPI + Jinja2 SSR + SQLite, no frontend build; new pages follow existing template style.
- **测试不联网、数据隔离 / Tests stay offline and isolated.**
LLM 客户端做成单一可 mock 边界;迁移在测试中真实执行(临时 SQLite)。
The LLM client is a single mockable boundary; migrations actually run in tests (throwaway SQLite).
- **可信内网安全姿态 / Trusted-LAN posture.**
无鉴权(仅内网/VPN 访问);API Key 明文落库为业主在其威胁模型下的明确选择(§7)。
No auth (LAN/VPN only); plaintext API key in DB is the owner's explicit choice under their threat model (§7).
---
## 2. 总体架构 / Architecture Overview
```text
┌─────────────────────────────────────────────┐
HTTP (SSR) │ app/main.py │
───────────────────► │ 路由 / routes + 请求编排 / orchestration │
└───┬───────────────┬───────────────┬─────────┘
│ │ │
┌────────▼──────┐ ┌──────▼───────┐ ┌──────▼────────┐
│ app/llm.py │ │ app_settings │ │ 搜索逻辑 │
│ LLM 客户端 │ │ 读写 helper │ │ AI 检索 seam │
│ (httpx) │ │ (KV in DB) │ │ (可替换) │
└───────────────┘ └──────┬───────┘ └───────────────┘
┌───────────────────▼─────────────────────────┐
│ app/migrate.py │
│ 启动 / boot: verify_schema_is_current() 只读 │
│ └─ 与 head 不一致 → fail-close,拒绝启动 │
│ 命令 / CLI `python -m app.migrate`(幂等): │
│ └─ 空库建库 / 认领老库 / upgrade(见 §3
└───────────────────┬─────────────────────────┘
│ command.upgrade / stamp(仅迁移命令 / migration command only
┌───────────────────▼─────────────────────────┐
│ Alembic (alembic.ini + migrations/) │
│ V1 baseline → V2(app_settings) → … │
└───────────────────┬─────────────────────────┘
┌─────▼─────┐
│ SQLite │
└───────────┘
```
新增模块 / New modules`app/migrate.py`Alembic 封装)、`app/llm.py`LLM 客户端)、`migrations/`Alembic 工程)、`app/templates/settings/`(配置页)。
改动模块 / Touched`app/db.py``app/main.py``app/models.py``app/templates/base.html``Dockerfile``requirements.txt``tests/`
---
## 3. 迁移子系统 / Migration Subsystem (Alembic)
### 3.1 为什么 / Why
配置表与未来的新列(如 `tag``image_description`)都需要可重复、可审阅的迁移;现有手写列同步只能补图片列,无法长期支撑。
A config table and future columns need repeatable, reviewable migrations; the hand-rolled column sync only patches image columns and won't scale.
### 3.2 收敛不变量 / The Convergence Invariant
**所有数据库最终都收敛到同一个 `head`。`V1 baseline` 必须严格等于"今天的真实 schema"(三张表 + 现有图片列),不多一列。**
All databases converge to the same `head`. The `V1 baseline` must equal **today's actual schema exactly** (the three tables + existing image columns) — nothing more.
```text
迁移链 / chain: V1(baseline = 现状) ──► V2(app_settings) ──► …未来… ──► head
老的生产库 / existing prod DB: stamp 到 V1(只写版本号,不建表,不碰数据) ──► upgrade ──► head
全新/空库 / fresh DB: 跑 V1(真正建三张表) ───────────────────────► upgrade ──► head
↑ 终点一致 / same end state
```
> `stamp` 只向 `alembic_version` 写一条版本记录,**不执行任何 DDL、不修改数据**。这是安全认领已有库的关键。
> `stamp` only writes a row into `alembic_version`; it runs **no DDL and touches no data**. This is the key to safely adopting an existing DB.
### 3.3 运行时机:校验与迁移分离 / Migrations Run Separately from Startup
**关键决策:迁移不在应用启动时发生。** 启动只做**只读校验**,迁移由一个独立、显式的命令/步骤执行。
**Key decision: migrations do not happen at app startup.** Startup only **verifies** (read-only); migrating is an explicit, separate step.
- **启动校验(fail-close/ Startup check (fail-closed)** `app/db.py::init_db()``app/migrate.py::verify_schema_is_current(url)`,比较 DB 当前 revision 与 `head`
- 一致 → 正常启动 / match → start normally。
- 不一致(含空库、未认领的老库)→ **fail-close**:输出清晰日志、拒绝提供服务、提示先跑迁移步骤;**不执行任何 DDL、不碰数据**。
Mismatch (incl. empty or un-adopted DBs) → **fail closed**: clear log, refuse to serve, no DDL, no data change.
- **迁移命令 / The migration command** 独立、显式、**幂等**的 `python -m app.migrate`(逻辑在 `app/migrate.py`)。已在 `head` 则空操作并退出 0,便于每次部署都安全重跑。
A separate, explicit, **idempotent** `python -m app.migrate`. No-op (exit 0) when already at `head`, so it is safe to re-run on every deploy.
- 退休手写列同步 / Retire the hand-rolled sync`_sync_sqlite_image_columns()` 删除,schema 由 Alembic 单一接管。
`_sync_sqlite_image_columns()` is removed; Alembic is the sole owner of schema.
为什么 / Why:避免"启动副作用式迁移"、避免多实例并发迁移竞态;当 code 与 DB 不一致时,**宁可不启动也不带病运行**。
Avoids surprise startup migrations and concurrent-migration races; on a code/DB mismatch it refuses to run rather than run wrong.
### 3.4 迁移命令的三种情况 / The Migration Command's Three Cases
`python -m app.migrate` 用 SQLAlchemy inspector 判定,分三种:
`python -m app.migrate` inspects the DB and branches three ways:
| 库的状态 / DB state | 动作 / Action |
| --- | --- |
| **空库 / empty** | `upgrade head`(建库并升到最新 / create & upgrade to head |
| **老库且与 baseline 一致 / existing, matches baseline2a** | `stamp V1``upgrade head`(认领后升级 / adopt then upgrade |
| **老库但与 baseline 不一致 / existing, mismatched2b** | **fail-close,不做任何改动 / fail closed, no changes** |
> **一致性比对的基准是 baseline(V1),不是 head。** 未认领的老库结构停在 V1(不含 `app_settings` 等后续内容),若拿 head 去比会把合法老库误判为不一致。
> The match is compared against the **baseline (V1)**, not `head` — an un-adopted DB sits at V1 and would wrongly look "mismatched" if compared against head.
>
> ⚠️ SQLite 的 autogenerate 比对存在假阳性(类型亲和、索引命名等),可能让 2b 误 fail。实现上需用**容忍性比对**或允许**人工确认覆盖**(见 §3.6 验证)。
> SQLite autogenerate has false positives; 2b should use a tolerant comparison or allow a documented manual override (see §3.6).
### 3.5 部署形态:Compose db-migration 闸门 / Deployment Shape: a Compose Gate(未来 / future
意图:用一个一次性 `db-migration` 服务跑迁移命令,**成功才放行 App**。本轮可先只交付命令本身,Compose 接线随后。
Intent: a one-shot `db-migration` service runs the command and **the app starts only on its success**. The command ships this round; the Compose wiring can follow.
```yaml
services:
db-migration:
image: <same image>
command: python -m app.migrate # 成功 exit 02b/失败 exit ≠0
web:
depends_on:
db-migration:
condition: service_completed_successfully
```
迁移失败(含 2b 不一致)→ App 永不启动。
A failed migration (incl. a 2b mismatch) → the app never starts.
### 3.6 Alembic 配置要点 / Alembic config notes
- `migrations/env.py``target_metadata = Base.metadata`DB URL 从 `get_settings().database_url` 动态读取(不写死在 `alembic.ini`);对 SQLite 设 `render_as_batch=True`(便于未来改列/删列走 batch 模式)。
`target_metadata = Base.metadata`; URL read dynamically from settings; `render_as_batch=True` for SQLite.
- **V1 baseline 的生成与验证 / Authoring & verifying V1** 用当前 models 对**空库** autogenerate 得到完整建表脚本;再对**生产库副本**跑 `alembic check`,**应显示无差异**——即印证"schema 符合预期、可安全盖章"。
Autogenerate against an empty DB for the full create script; then run `alembic check` against a copy of the prod DB — it **should report no diff**, confirming it's safe to stamp.
- 镜像 / Image`Dockerfile``COPY` `alembic.ini``migrations/`,否则容器内无迁移脚本。
- CI(可选 / optional):加一步 `alembic check`,防止改了 model 却忘记生成迁移。
Add an `alembic check` step to catch model/migration drift.
---
## 4. LLM 接入 / LLM Integration
### 4.1 配置存储:键值表 / Config storage: a KV table
新增表 `app_settings(key TEXT PRIMARY KEY, value TEXT)`(由 V2 迁移创建)。
New table `app_settings(key TEXT PRIMARY KEY, value TEXT)` (created by the V2 migration).
**为什么用 KV 而非定型列 / Why KV instead of typed columns** 后续还会陆续加配置项;给*已有表*加列有迁移成本,而 KV 加配置项=加一行,永不迁移。类型与校验在 Python 侧处理。
More settings are coming; adding columns to an *existing* table costs a migration, whereas a KV row never does. Typing/validation live in Python.
本轮使用的 key / Keys used this round
| key | 含义 / Meaning | 默认 / Default |
| --- | --- | --- |
| `llm_enabled` | LLM 总开关 / master toggle | `false` |
| `llm_base_url` | OpenAI 兼容端点 / endpoint | `https://api.openai.com/v1` |
| `llm_model` | 模型名 / model name | (空 / empty |
| `llm_api_key` | API Key(明文 / plaintext,见 §7 | (空 / empty |
| `ai_search_enabled` | AI 搜索功能开关 / AI-search feature toggle | `false` |
| `ai_search_extra_hints` | AI 搜索:可选「额外领域提示」,追加到默认系统提示词(step 3 引入)/ optional extra domain hints appended to the default prompt | (空 / empty |
> 读写封装 / Access helpers`get_app_settings(db) -> LLMConfig`dataclass 视图)与 `save_app_settings(db, ...)`,供路由与 `app/llm.py` 复用。
> Helpers `get_app_settings(db) -> LLMConfig` and `save_app_settings(db, ...)`, reused by routes and `app/llm.py`.
### 4.2 LLM 客户端 / The client (`app/llm.py`)
OpenAI 兼容的薄客户端,基于 `httpx`**无新依赖** / A thin OpenAI-compatible client over `httpx`, **no new dependency**
- `is_configured(cfg) -> bool`:开关开启且 `model`/`api_key` 齐全。
- `test_connection(cfg) -> Result`:发一个最小请求验证 `base_url`/`model`/`api_key`,供配置页"测试连接"用。
- `expand_query(cfg, query, extra_hints="") -> ExpansionResult`:把查询词扩成一批近义/相关词;`terms` 为扩展词列表(不含原词),`error` 用于区分超时/网络/HTTP 等真实调用失败(提示词与输出契约见 §5.2)。
- `analyze_image(...)`:**本轮不实现**,仅在文档中预留为未来接口(图片分析轮次)。Reserved for a future round, not implemented now.
要点 / Notes
- 统一超时与错误处理;失败不抛到用户面前,按"优雅降级"返回可识别的失败信号。
Unified timeout + error handling; failures degrade gracefully rather than surfacing as 500s.
- 同步实现即可——FastAPI 把同步 `def` 路由丢线程池执行,阻塞式 httpx 调用可接受。
A synchronous implementation is fine — FastAPI runs sync handlers in a threadpool.
- **唯一对外/网络边界**,测试中整体 mock,CI 保持无网络。
The **single network boundary**, fully mocked in tests.
### 4.3 配置页 / Config page
| 路由 / Route | 作用 / Purpose |
| --- | --- |
| `GET /settings` | 渲染配置表单(Key 脱敏显示)/ render form (key masked) |
| `POST /settings` | 保存配置到 `app_settings` / persist to `app_settings` |
| `POST /settings/test` | 用当前/待保存配置测试连接 / test connection |
- 模板 `app/templates/settings/form.html`,沿用现有卡片/表单样式;`base.html` 顶部导航加一个「设置」入口。
Template under `settings/`, reusing existing styles; add a "设置/Settings" link in `base.html` nav.
- **Key 脱敏 / Key masking**:页面不回显明文,显示「已配置,留空=不修改」,提交留空则保留原值。
Never echo the plaintext key; show "configured, leave blank to keep", and keep the old value if left blank.
### 4.4 降级 / Degradation
`llm_enabled` 关或未配置时:配置页照常可用;AI 搜索按钮隐藏或提示去配置;其余功能与现状一致。
When disabled/unconfigured: the settings page still works; the AI-search button is hidden or hints to configure; everything else is unchanged.
---
## 5. AI 搜索 / AI Search
### 5.1 行为 / Behavior
- **常驻动作 / Persistent action** 搜索页**始终**提供「AI 智能搜索」,**不以"零结果"为前提**——即便普通搜索已出结果,用户不满意时也能点。
The "AI search" action is **always** present on the search page, **not gated on zero results** — usable even when normal results exist.
- **流程 / Flow** 普通 `LIKE` 照常先出结果 → 用户触发 AI → `expand_query` 返回 `ExpansionResult`(扩展词 `terms` 不含原词;调用失败写入 `error`)→ `ai_search` 用「原词 + 扩展词」对 `name`/`note` 做 OR `LIKE` 重搜 → 展示,并用横幅标注「AI 帮你扩展了:…」。
Normal `LIKE` first → user triggers AI → `expand_query` returns an `ExpansionResult` (`terms` exclude the original query; failures are represented by `error`) → `ai_search` OR-`LIKE`s over name/note with the original + expanded terms → render with a banner listing the expansion.
- **只把查询词发出去 / Only the query leaves**,不外泄物品清单;token 恒定、不随上千件物品增长。
Only the query is sent; the inventory is not. Token cost is constant and does not grow with thousands of items.
### 5.2 提示词与输出契约 / Prompt & Output Contract
`expand_query` 的**质量**取决于提示词,**集成稳定性**取决于输出契约——两者都在代码侧掌控(决策 C)。
Quality hinges on the prompt; integration stability hinges on the output contract — both are code-controlled (decision C).
- **基础系统提示词写死在 `app/llm.py`(用户改不坏)/ Base system prompt hardcoded** 框定搬家/家居场景,要求"列出用户可能用来命名同一物品的相关词(近义、别称、上位类别、具体品类)";语言跟随查询;最多约 8 个;不解释、不造无关词。
Frames the moving/household domain, asks for related naming terms, follows the query's language, caps the count, no prose.
- **可选「额外领域提示」/ Optional extra hints** KV `ai_search_extra_hints`(设置页一个多行输入,默认空)。非空时**追加**到基础提示词之后,供业主微调倾向(如"厨房用品多,偏向厨具类")。**它只能补充,不能改写输出格式。**
An optional free-text setting appended to the base prompt; it can only add guidance, never alter the output format.
- **输出契约(代码强制,与提示词解耦)/ Output contract (code-enforced)** 要求模型只返回 **JSON 字符串数组**;解析时去掉 ` ```json ` 围栏 → `json.loads` → 只接受字符串数组 → 过滤空串/过长词 → 最多 8 个。散文、坏 JSON、JSON object、非字符串数组都视为**合法空扩展**(`terms=[]`, `error=None`);网络错误、HTTP 错误、超时等真实调用失败写入 `ExpansionResult.error``expand_query``terms` 只包含扩展词;**原词由 `ai_search` 并入并去重**。
Require a JSON string array; strip code fences, `json.loads`, accept only string arrays, filter empty/overlong terms, and cap to 8 terms. Prose, bad JSON, JSON objects, and non-string arrays are successful empty expansions (`terms=[]`, `error=None`); network/HTTP/timeout failures are represented by `ExpansionResult.error`. `expand_query.terms` contains only expanded terms; `ai_search` adds the original term and dedupes.
- **客户端参数 / Client params** 低 temperature、较小 max_tokens、设超时。Low temperature, small max_tokens, a timeout.
- **措辞留松 / Wording left loose** 默认提示词的具体字句可在 step-3 实测中迭代,不在文档里冻死。
Exact default wording can be iterated during step-3 testing.
### 5.3 实现接口 / Implementation seam
- 路由层扩展现有 `GET /search`:增加 `ai=1` 触发位(如 `GET /search?q=锅&ai=1`),保持单页、可收藏、SSR 友好。
Extend the existing `GET /search` with an `ai=1` trigger (e.g. `/search?q=…&ai=1`), staying single-page and bookmarkable.
- 内部定义可替换的检索 seam,例如 `ai_search(db, query) -> (expanded_terms, results, error_message)`
Define a replaceable retrieval seam, e.g. `ai_search(db, query) -> (expanded_terms, results, error_message)`:
- **本轮 / now:** 内部=查询词扩展 + 本地 `LIKE`
- **未来 / later:** 换成向量嵌入 + 相似度检索,**路由与模板不变**。
Swap to embeddings + similarity later **without changing the route or template**.
- 本轮检索范围=`name` + `note``image_description` 本轮不存在)。
Search scope this round = `name` + `note` (no `image_description` yet).
### 5.4 降级 / Degradation
AI 关闭/未配置 → 不显示按钮(或提示去 `/settings`);调用失败 → 友好提示并回退到普通结果。
AI off/unconfigured → no button (or a hint to `/settings`); on failure → a friendly message, fall back to normal results.
合法空扩展(模型返回 `[]` 或输出无法通过严格 JSON 字符串数组契约)不视为调用失败:回退普通结果,不显示故障提示。
A legitimate empty expansion (model returns `[]` or output fails the strict JSON-string-array contract) is not treated as a call failure: fall back to normal results without an error banner.
---
## 6. 数据模型与路由变更 / Data Model & Route Changes
**数据模型 / Data model(本轮):**
- 新增 `AppSetting`(表 `app_settings`KV)。由 V2 迁移建表。
Add `AppSetting` (`app_settings`, KV), created by the V2 migration.
- `boxes` / `items` / `subitems` **本轮不变**。Unchanged this round.
**新增/改动路由 / Routes added/changed**
- `GET /settings``POST /settings``POST /settings/test`(新)。
- `GET /search?q=&ai=1`(扩展现有)。
- `base.html` 导航新增「设置」。
---
## 7. 安全姿态 / Security Posture
- **无鉴权 / No auth**:仅经可信内网 / VPN + nginx HTTPS 访问,业主已确认风险可接受。
LAN/VPN + nginx HTTPS only; owner accepts the risk.
- **API Key 明文落库 / Plaintext API key in DB**:业主明确选择。理由:备份经 `rclone` 至业主自有 OneDrive,链路可信;若攻击者已能读到服务器文件,则任何落盘位置都不安全。
Owner's explicit choice; backups go via `rclone` to the owner's own OneDrive, and a server-file-read attacker defeats any at-rest location anyway.
- **UI 不回显明文 Key / UI never echoes the key**(§4.3)——这是表单卫生,不是加密。
- **外发数据 / Data egress**:AI 搜索只发送*查询词*;图片分析(未来)才会外发图片。
AI search sends only the *query*; image egress only arrives with the future image-analysis feature.
---
## 8. 测试策略 / Testing Strategy
- **迁移在测试中真实执行 / Migrations run in tests** fixture 先在临时 SQLite 上跑迁移命令(建库 → `upgrade head`),再 `create_app()`(启动校验随之通过)。schema 来自迁移本身——单一事实来源 + 迁移覆盖。
The fixture runs the migration command on a tmp SQLite first, then `create_app()` (whose startup check then passes).
- **认领逻辑测试 / Adoption test2a):** 构造"有 `boxes` 数据但无 `alembic_version`"的库 → 跑迁移命令 → 断言数据保留、版本到达 head。
Build a "has `boxes` data, no `alembic_version`" DB → run the migration command → assert data preserved and version at head.
- **fail-close 测试 / Fail-closed tests** ① DB 未到 head 时 `create_app()` 启动应 fail-close;② 2b 不一致时迁移命令应 fail-close 且不改动。
`create_app()` fails closed when the DB is not at head; ② the migration command fails closed (and changes nothing) on a 2b mismatch.
- **LLM 全程 mock / Mock the LLM** 打桩 `expand_query` / `test_connection`(或底层 httpx),CI 不联网。
- **新增用例 / New cases** 配置增删改 + Key 脱敏;测试连接(mock);AI 搜索扩展命中;各降级路径(未配置/失败)。
---
## 9. 未来扩展(本轮不做,但已预留)/ Future Extensions (seams reserved)
| 未来项 / Future item | 预留点 / Seam already in place |
| --- | --- |
| 图片内容分析 / Image analysis | `app/llm.py` 预留 `analyze_image`;迁移系统可加 `image_description` 列;搜索范围可纳入该列。<br>`analyze_image` reserved; migrations can add `image_description`; search can include it. |
| 向量语义搜索 / Vector semantic search | `ai_search(...)` seam 可整体替换;批处理可与图片描述补算共用。<br>The `ai_search` seam is swappable; batch jobs can be shared. |
| 夜间批处理 / Nightly batch | 分析逻辑写成批量友好函数,cron 仅是薄包装(仿 backup cron)。<br>Batch-friendly functions; cron is a thin wrapper like the backup cron. |
| 文本/视觉模型分离 / Split models | `app_settings` 加一个 key 即可,无需迁移。<br>Add one KV key, no migration. |
---
## 10. 决策记录 / Decisions Log
| # | 决策 / Decision | 理由 / Rationale |
| --- | --- | --- |
| D1 | 先引入 Alembic 再做功能 / Alembic before features | 配置表与未来列都依赖可靠迁移;退休手写列同步。 |
| D2 | V1 baseline 严格等于现状,新东西放 V2+ / baseline = current schema only | 使 `stamp` 认领老库为真、安全。 |
| D3 | 迁移与启动分离:启动只校验 + fail-close,迁移走独立幂等命令(`python -m app.migrate`/ 未来 Compose `db-migration` 闸门 / migrations separated from startup | 避免启动副作用式迁移与并发竞态;schema 不一致宁可不启动也不带病运行;迁移成功才放行 App。 |
| D4 | 配置用 KV 表 / KV settings table | 后续配置项多,避免反复给已有表加列。 |
| D5 | API Key 明文落库 / plaintext key | 业主威胁模型下可接受;备份至自有 OneDrive。 |
| D6 | 复用 httpx,手搓 OpenAI 调用 / reuse httpx | 不引入 `openai` SDK,依赖最小。 |
| D7 | AI 搜索常驻、不依赖零结果 / persistent AI search | 用户对已有结果不满意时也能用。 |
| D8 | AI 搜索 v1=查询词扩展 / query-term expansion | 上千件物品下可扩展、不外泄清单、token 恒定。 |
| D9 | 检索做成可替换 seam / pluggable retrieval | 未来换嵌入式语义搜索时上层不动。 |
| D10 | 图片分析不在本轮 / image analysis deferred | 业主本轮三件事不含它;架构预留接口。 |
| D11 | AI 搜索提示词:默认写死 + 可选「额外领域提示」;输出契约由代码强制 / hardcoded default prompt + optional extra-hints, code-enforced JSON contract | 保证解析稳定(用户改不坏),又给业主一点不改代码即可微调的空间。 |
+119
View File
@@ -0,0 +1,119 @@
# 步骤 1 · Alembic 迁移地基 / Step 1 · Migration Foundation
> **可独立执行 / Self-contained.** 完整背景见设计文档 [`llm-integration-design.md`](./llm-integration-design.md) §3;跨步骤约定见 [`implementation-plan.md`](./implementation-plan.md)。
> **前置 / Prerequisite** 无(第一步)/ none.
> **产出 / Output** 一个可独立合入的 PR**不改任何业务 schema**。A mergeable PR with **zero business-schema change**.
---
## 目标 / Goal
引入 Alembic 并**安全接管现有生产库**,schema 一点不改,所有现有测试保持绿。**迁移与应用启动分离**:启动只做只读校验 + fail-close,实际迁移由独立、幂等命令 `python -m app.migrate` 执行。
Introduce Alembic and **safely adopt the existing prod DB** with zero schema change; all tests stay green. **Migration is separated from startup**: boot only verifies (read-only) and fails closed; the actual migrating is done by a separate idempotent command `python -m app.migrate`.
---
## 必要背景 / Essential Context(仅凭本文件即可执行 / enough to execute from this file
- **当前没有 Alembic。** 唯一的"迁移"是 `app/db.py::_sync_sqlite_image_columns()`(启动时缺图片列就 `ALTER TABLE ADD COLUMN`)。
No Alembic today; the only "migration" is the hand-rolled image-column sync in `app/db.py`.
- `app/db.py::init_db()` 在 FastAPI lifespan 启动时被 `create_app()` 调用,现在执行 `Base.metadata.create_all()` + `_sync_sqlite_image_columns()`。**本步把它改成只读校验**(不再在启动时建表/迁移)。相关符号:`Base``engine``SessionLocal``configure_database()`
`init_db()` runs at lifespan startup and currently does `create_all()` + the image-column sync. **This step turns it into a read-only check** (no table creation/migration at boot).
- `tests/conftest.py``client` fixture`configure_database(tmp_url)``create_app()`(触发 `init_db`)。每个测试用临时 SQLite,互不污染。
- models 在 `app/models.py``Box` / `Item` / `SubItem` 三张表;每张含 `image_blob`(BLOB) / `image_mime_type` / `image_width` / `image_height`,以及 `created_at` / `updated_at`
- DB URL 来自 `app/config.py::get_settings().database_url`(默认 `sqlite:///./data/app.db`)。
- **生产库**是当年 `create_all` 建的、**已装上千件数据、没有 `alembic_version` 表**。
### 铁律 / The Invariant(不可违背 / non-negotiable
- 所有数据库最终收敛到同一个 `head`。All DBs converge to the same `head`.
- **V1 baseline 必须严格等于"今天的真实 schema"**(三张表 + 现有图片列 + 索引),**不多一列**。新东西放后续 revision。
The V1 baseline must equal **today's actual schema exactly** — nothing more.
- 以下动作**由迁移命令执行,不在应用启动时** / done by the **migration command**, not at boot
- 老库且与 baseline 一致:`stamp V1`(只写版本号,**不建表、不碰数据**)→ `upgrade head`
Existing DB matching baseline: `stamp V1` (no DDL, no data change) → `upgrade head`.
- 老库但与 baseline 不一致:**fail-close,不做任何改动**。Mismatched existing DB → fail closed.
- 新库:跑 `V1`(真正建表)→ `upgrade head`。Fresh DB: run `V1``upgrade head`.
---
## 任务 / Tasks
- [ ] `requirements.txt` 增加 `alembic`(钉一个明确版本 / pin a version)。
- [ ] 初始化 Alembic 工程:`alembic.ini` + `migrations/`(含 `env.py``versions/`)。
- [ ] 配置 `migrations/env.py`
- `target_metadata = app.db.Base.metadata`(确保导入 `app.models` 以注册三张表)。
- `sqlalchemy.url` **从 `app.config.get_settings().database_url` 动态读取**,不写死在 `alembic.ini`
- 对 SQLite 设 `render_as_batch=True`(为未来改列/删列预留 batch 能力)。
- [ ] 生成 **V1 baseline 迁移**=当前 models 的完整建表(`boxes`/`items`/`subitems`,含图片列与索引)。做法:对**空库** `--autogenerate`
Author V1 by autogenerating against an **empty** DB.
- [ ] **验证 baseline**:对一份**生产库副本**跑 `alembic check`,确认**无差异**(印证可安全 `stamp`;SQLite 偶有类型亲和/索引命名假差异,人眼复核)。
Verify with `alembic check` against a **copy of the prod DB** → expect no diff.
- [ ] 新增 `app/migrate.py`,承担两个职责 / two responsibilities
- **(A) 迁移命令入口 `python -m app.migrate`(幂等 / idempotent**:编程方式构造 Alembic `Config``script_location` → 打包进镜像的 `migrations/``sqlalchemy.url` = 解析出的 URL),用 SQLAlchemy inspector 分情况:
- 空库 / empty → `command.upgrade(cfg, "head")`
- 老库且与 **baseline(V1)** 一致 → `command.stamp(cfg, "<V1 rev>")``command.upgrade(cfg, "head")`
- 老库但与 baseline 不一致 → **fail-close**:非零退出 + 清晰日志 + **不做任何改动**
- 已在 `head` → 空操作、退出 0
- `<V1 rev>`**baseline 这个具体 revision**`down_revision=None` 的那条),不是 `head`
- "与 baseline 一致"的判定**对照 baseline(V1) 的预期 schema**(不是 head);SQLite 假差异需容忍或允许人工确认覆盖。
- **(B) 启动校验 `verify_schema_is_current(url)`(只读 / read-only**:比较 DB 当前 revision 与 `head`;不一致返回失败/抛错,**绝不改动 DB**。
- [ ]`app/db.py::init_db()`:改为调 `verify_schema_is_current(resolved_url)` —— **一致才放行;不一致 fail-close**(清晰日志,提示先跑 `python -m app.migrate`)。不再在启动时建表/迁移。**删除** `_sync_sqlite_image_columns()`。保留 `configure_database()` / engine 装配。
`init_db()` now only verifies and **fails closed** on mismatch (pointing the user to `python -m app.migrate`); remove `_sync_sqlite_image_columns()`.
- [ ] `tests/conftest.py`fixture 改为**先跑迁移命令**把临时库带到 `head`,再 `create_app()`(这样启动校验通过)。
Fixture runs the migration first, then `create_app()`.
- [ ] `Dockerfile`:加 `COPY alembic.ini .``COPY migrations ./migrations`(否则容器内无迁移脚本)。
- [ ] CI(可选 / optional):`.github/workflows/test.yml` 加一步 `alembic check`,防止 model 与迁移漂移。
- [ ] Compose `db-migration` 闸门(可后续 / can be deferred):加一个一次性服务跑 `python -m app.migrate``web``depends_on: condition: service_completed_successfully` 等它成功(见设计 §3.5)。
Add a one-shot `db-migration` service gating `web` (design §3.5); may be deferred.
---
## 涉及文件 / Files
`requirements.txt``alembic.ini`(新)、`migrations/**`(新)、`app/migrate.py`(新)、`app/db.py``tests/conftest.py``Dockerfile`、(可选)`.github/workflows/test.yml`、(可后续)`docker-compose.yml`
---
## 测试 / Tests
- [ ] 现有 ~83 个测试全绿(fixture 先跑迁移、再起 App,启动校验通过)。
All existing ~83 tests pass (fixture migrates first, then starts the app).
- [ ] **认领老库(2a**:构造"有 `boxes` 数据、无 `alembic_version`"的库(可先用 `create_all` 造)→ 跑迁移命令 → 断言数据保留、版本到达 `head`、未重复建表报错。
Adoption (2a): migrate an un-stamped populated DB → data preserved, version at `head`.
- [ ] **全新库**:空 URL → 跑迁移命令 → 三张表存在、版本到 `head`
Fresh DB: empty URL → migrate → tables exist, version at `head`.
- [ ] **fail-close(启动)**DB 未到 `head``create_app()` / `init_db()` 启动应 fail-close(抛错/拒绝服务)、不改动 DB。
Startup fails closed when the DB is not at `head`; DB unchanged.
- [ ] **fail-close2b**:构造与 baseline 不一致的老库 → 跑迁移命令 → 断言非零退出、DB 不变。
Migration command fails closed on a 2b mismatch; DB unchanged.
---
## 验收 / Acceptance
- 迁移命令:空库建到 `head`;老库一致则认领并到 `head`;老库不一致则 **fail-close 不改动**;已在 `head` 则幂等空操作。
Migration command: empty→head; matching existing→adopt+head; mismatch→fail closed; already-at-head→no-op.
- 启动校验:DB 未到 `head` 时**拒绝启动**并输出清晰日志;到 `head` 才正常起。
Startup refuses to boot (clear log) unless the DB is at `head`.
- 模拟老库认领后**数据无损**。Adopted existing-like DB keeps data intact.
- 全部测试绿;schema 与本步骤前**逐列一致**(本步不改业务 schema)。
All tests green; schema identical to before (no business-schema change).
---
## 风险与缓解 / Risks & Mitigations
- **baseline 与现状有偏差 → `stamp` 失真。** 缓解:`alembic check` 对生产副本校验 + 人眼复核 SQLite 假差异。
Baseline drift → `alembic check` against a prod copy + manual eyeball.
- **2b 一致性比对假阳性 → 合法老库被误 fail-close。** 缓解:比对基准用 baseline(V1) 而非 head;容忍已知 SQLite 噪声,或提供"人工确认覆盖"的开关。
2b false positives wrongly fail a legit DB → compare against baseline (not head); tolerate known SQLite noise or offer a manual-confirm override.
- **容器内找不到迁移脚本。** 缓解:确认 `Dockerfile``COPY` `alembic.ini``migrations/``script_location` 用绝对/相对镜像 WORKDIR(`/app`) 正确解析。
Migrations missing in image → ensure they're `COPY`-ed and `script_location` resolves under `/app`.
---
## 相关约定 / Conventions(详见 implementation-plan.md
- 不主动 push/commit,除非业主要求。Don't push/commit unless asked.
- 实现与设计若有偏差 → 回写设计文档 §3 与仓库简报 `../repository-brief.md` §10。
+102
View File
@@ -0,0 +1,102 @@
# 步骤 2 · LLM 接入 / Step 2 · LLM Integration
> **可独立执行 / Self-contained.** 完整背景见设计文档 [`llm-integration-design.md`](./llm-integration-design.md) §4;跨步骤约定见 [`implementation-plan.md`](./implementation-plan.md)。
> **前置 / Prerequisite** [步骤 1](./step-1-alembic-foundation.md) 已合入(Alembic 已就位——**schema 变更一律通过新建迁移完成,并经迁移命令 `python -m app.migrate` / `db-migration` 步骤生效,非应用启动时**)。Step 1 merged; Alembic is in place — **schema changes go through a new migration, applied by the migration command, not at app startup**.
> **产出 / Output** 一个可独立合入的 PR。
---
## 目标 / Goal
提供一个配置页:能填写并测试 OpenAI 兼容的 `base_url`/`model`/`api_key`,配置落库到 `app_settings`;并提供一个可复用、可 mock 的 LLM 客户端。**未配置时整站行为不变。**
A settings page to enter & test the LLM config, persisted to `app_settings`, plus a reusable, mockable LLM client. **App behavior is unchanged when unconfigured.**
---
## 必要背景 / Essential Context
- 路由全部在 `app/main.py::create_app()`;模板在 `app/templates/`,基础模板 `base.html` 顶部有导航(现有「箱子」「搜索」两个链接)。
All routes live in `create_app()`; templates under `app/templates/`; nav lives in `base.html`.
- DB 会话依赖:`Depends(get_db)``app/db.py`)。models 在 `app/models.py``Base``app/db.py`
- **同步 handler 即可**FastAPI 把同步 `def` 路由丢线程池执行,阻塞式 `httpx` 调用可接受。
Sync handlers are fine — FastAPI runs them in a threadpool, so blocking `httpx` is acceptable.
- `httpx` 已在 `requirements.txt`**不要新增依赖**(不引入 `openai` SDK)。
`httpx` is already a dependency; **add no new deps**.
### 关键决策 / Key Decisions
- **配置存储用键值表**,不是定型列:`app_settings(key TEXT PRIMARY KEY, value TEXT)`。原因:后续配置项会变多,KV 加项=加一行、永不迁移;类型/校验在 Python 侧。
KV table, not typed columns — future settings = new rows, never a migration.
- **API Key 明文落库**(业主在其威胁模型下的明确选择),但**配置页绝不回显明文**:显示「已配置,留空=不修改」,提交留空则保留原值。
Plaintext key in DB (owner's explicit choice), but the **UI never echoes it** — show "configured, leave blank to keep".
- **优雅降级**`llm_enabled` 关或缺 `model`/`api_key` 时,`is_configured()` 为假;调用失败不抛 500,返回可识别的失败信号。
Graceful degradation throughout.
### 本轮使用的 key / Keys this round
| key | 含义 / Meaning | 默认 / Default |
| --- | --- | --- |
| `llm_enabled` | LLM 总开关 / master toggle | `false` |
| `llm_base_url` | OpenAI 兼容端点 / endpoint | `https://api.openai.com/v1` |
| `llm_model` | 模型名 / model name | (空 / empty |
| `llm_api_key` | API Key(明文 / plaintext | (空 / empty |
| `ai_search_enabled` | AI 搜索功能开关(步骤 3 用)/ AI-search toggle | `false` |
---
## 任务 / Tasks
- [ ] **新建 V2 迁移**(用 Alembic,遵循步骤 1 的工作流):创建 `app_settings(key TEXT PRIMARY KEY, value TEXT)`
New V2 Alembic migration creating `app_settings`.
- [ ] `app/models.py`:新增 `AppSetting` 模型(映射 `app_settings`)。
- [ ] 配置读写 helper(建议放 `app/settings_store.py``app/config.py` 旁):
- `get_app_settings(db) -> LLMConfig`dataclass`enabled`/`base_url`/`model`/`api_key`/`ai_search_enabled`,含默认值)。
- `save_app_settings(db, ...)`:写回 KV;Key 留空则不覆盖原值。
- [ ] 新增 `app/llm.py`(基于 `httpx`):
- [ ] `is_configured(cfg) -> bool`
- [ ] `test_connection(cfg) -> Result`(发最小请求验证 `base_url`/`model`/`api_key`)。
- [ ] `expand_query(cfg, query) -> ExpansionResult`(查询词扩展;**步骤 3 会校准提示词与输出契约**;`terms` 为扩展词列表,`error` 用于区分超时/网络/HTTP 等真实调用失败)。
- [ ] 统一超时 + 错误处理;失败优雅降级。
- [ ] **(预留,不实现)** `analyze_image(...)`:仅留 TODO/签名占位 + 注释指向"未来图片分析轮次"。Reserved, not implemented.
- [ ] 把所有网络调用收敛到**单一函数边界**,便于测试整体 mock。
- [ ] 路由(`app/main.py`):
- [ ] `GET /settings`:渲染配置表单(Key 脱敏)。
- [ ] `POST /settings`:保存到 `app_settings`(303 重定向,沿用现有 POST 风格)。
- [ ] `POST /settings/test`:用当前/待保存配置测试连接,回显结果。
- [ ] 模板:`app/templates/settings/form.html`(沿用现有卡片/表单样式);`base.html` 导航加「设置」入口。
- [ ] 测试(LLM 全程 mock,CI 不联网):
- [ ] 保存/读取配置;**Key 脱敏**(响应 HTML 不含明文;提交留空不覆盖原 Key)。
- [ ] `POST /settings/test` 成功/失败两条分支(mock `test_connection` 或底层 httpx)。
- [ ] 未配置时 `is_configured()` 为假;配置页在 `llm_enabled=false` 下仍可正常打开保存。
---
## 涉及文件 / Files
`migrations/versions/**`(V2)、`app/models.py``app/llm.py`(新)、`app/settings_store.py`(新,或并入既有模块)、`app/main.py``app/templates/settings/form.html`(新)、`app/templates/base.html``tests/`
---
## 验收 / Acceptance
-`/settings` 填入配置 → 保存 → 重启应用后仍在(已落库)。Config persists across restarts.
- 「测试连接」对真实 OpenAI 端点可用(手动验证);自动化测试中走 mock。
- 配置页 HTML **不含明文 Key**;留空提交保留原值。
- `llm_enabled=false` 或缺 Key 时,全站行为与步骤 1 后一致(无回归)。
---
## 风险与缓解 / Risks & Mitigations
- **把网络调用散落各处 → 难 mock、CI 易联网。** 缓解:所有外呼集中在 `app/llm.py` 单一边界。
Scattered network calls → keep all egress in `app/llm.py`.
- **Key 不慎回显。** 缓解:模板永不输出 `api_key` 值,仅输出"是否已配置"。
Accidental key echo → template never prints the key value.
---
## 相关约定 / Conventions(详见 implementation-plan.md
- 不主动 push/commit,除非业主要求。
- 无新依赖(用 `httpx`)。CI 不联网(mock LLM)。
- 实现与设计若有偏差 → 回写设计文档 §4 与仓库简报 §15。
+103
View File
@@ -0,0 +1,103 @@
# 步骤 3 · 基础 AI 搜索 / Step 3 · Basic AI Search
> **可独立执行 / Self-contained.** 完整背景见设计文档 [`llm-integration-design.md`](./llm-integration-design.md) §5;跨步骤约定见 [`implementation-plan.md`](./implementation-plan.md)。
> **前置 / Prerequisite** [步骤 2](./step-2-llm-integration.md) 已合入(`app/llm.py::expand_query`、`app_settings` 配置、`ai_search_enabled` 开关均已就绪)。Step 2 merged.
> **产出 / Output** 一个可独立合入的 PR**不改 schema**。
---
## 目标 / Goal
在搜索页提供一个**常驻**的「AI 智能搜索」动作:点击后用查询词扩展增强搜索结果。**不以"零结果"为前提**——即便普通搜索已出结果,用户不满意时也能用。
A **persistent** "AI search" action on the search page that broadens results via query-term expansion. **Not gated on zero results** — usable even when normal results exist.
---
## 必要背景 / Essential Context
- 现有搜索:`app/main.py::_build_search_results(db, query)``Box`/`Item`/`SubItem``name``note` 做大小写不敏感 `LIKE`,返回结果列表;路由 `GET /search`(函数 `search_page`,参数 `q`)渲染 `app/templates/search/index.html`
Existing search: `_build_search_results(db, query)` does case-insensitive `LIKE` over name/note; route `GET /search` renders `search/index.html`.
- 步骤 2 已提供:`app/llm.py::expand_query` 的基础能力、配置读取 `get_app_settings(db)`、开关 `ai_search_enabled``is_configured(cfg)`、设置页 `app/templates/settings/form.html`;本步将 `expand_query` 校准为返回结构化 `ExpansionResult(terms, error)`
- 本步**新增**配置项 `ai_search_extra_hints`(可选「额外领域提示」)并在设置页加一个多行输入——这是本步**唯一**触及设置页之处。
This step adds the `ai_search_extra_hints` setting + a textarea on the settings page (the only settings-page change here).
- 本轮检索范围=`name` + `note``image_description` 本轮不存在,属未来图片分析轮次)。
Search scope = `name` + `note` (no `image_description` this round).
### 关键决策 / Key Decisions
- **常驻、不依赖零结果。** 普通 `LIKE` 照常先出结果;AI 动作始终可用(开启且已配置时)。
Persistent and not gated on zero results.
- **流程:** 触发 AI → `expand_query` 返回 `ExpansionResult`(扩展词 `terms` 不含原词,调用失败写入 `error`)→ `ai_search` 合并「原词 + 扩展词」并对 `name`/`note` 做 OR `LIKE` 重搜 → 展示,并用横幅标注「AI 帮你扩展了:…」。**只把查询词发出去**,不外泄物品清单。
Trigger → `expand_query` returns an `ExpansionResult` (`terms` exclude the original query; failures are represented by `error`) → `ai_search` OR-`LIKE`s over the original + expanded terms → render with a banner of the expansion. Only the query leaves.
- **可替换的检索 seam。** 把 AI 检索抽成一个函数(如 `ai_search(db, query) -> (expanded_terms, results, error_message)`),本轮内部=查询词扩展 + 本地 `LIKE`;**未来换成向量嵌入 + 相似度时,路由与模板不变**。
Wrap AI retrieval behind a swappable seam so embeddings can replace it later without touching route/template.
- **提示词(决策 C,详见设计 §5.2)。** 基础系统提示词**写死在 `app/llm.py`**;设置页可选的 `ai_search_extra_hints` 非空时**追加**到其后;**输出契约由代码强制**(只接受 JSON 字符串数组;散文/坏 JSON/非字符串数组解析为合法空扩展;网络/超时/HTTP 失败写入 `ExpansionResult.error`),用户改 hints 也改不坏解析。
Base prompt hardcoded; optional extra hints appended; output contract enforced in code: only a JSON string array is accepted; prose/bad JSON/non-string arrays become a successful empty expansion; network/timeout/HTTP failures are represented by `ExpansionResult.error`.
- **优雅降级。** AI 关闭/未配置 → 不显示按钮(或提示去 `/settings`);调用失败 → 友好提示 + 回退普通结果。
---
## 任务 / Tasks
- [ ] **落地/校准 `expand_query` 的提示词(按设计 §5.2)**
- 基础系统提示词写死在 `app/llm.py`(搬家/家居场景、列相关命名词、跟随查询语言、≤ ~8 个、不解释、不造无关词)。默认提示词起点(**可迭代** / a starting point, tune during testing):
> 你是搬家物品搜索助手。用户在搜索自己打包的箱子与物品(家居/搬家场景)。给定一个搜索词,列出用户可能用来命名同一类物品的相关词:近义词、常见别称、上位类别、具体品类。规则:用与查询相同的语言;只给与该物品紧密相关、有助于在清单里找到它的词;不要解释、不要造无关词;最多 8 个;只输出一个 JSON 字符串数组,例如 `["炒锅","平底锅","汤锅","厨具"]`。
- 读取 `ai_search_extra_hints`,非空则**追加**到基础提示词之后(只补充,不改格式)。
- **返回契约**`expand_query(cfg, query, extra_hints="") -> ExpansionResult`,其中 `terms` 是扩展词列表(**不含原词**),`error` 在成功时为 `None`
- **输出契约**:要求模型只回 JSON 字符串数组;解析去 ` ```json ` 围栏 → `json.loads` → 只接受字符串数组 → 过滤空串/过长词 → 最多 8 个;散文、坏 JSON、JSON object、非字符串数组都返回 `terms=[]``error=None`(合法空扩展);网络错误、HTTP 错误、超时等调用失败返回 `terms=[]``error=<友好错误>`;不向上抛 500。
- [ ] **新增配置项 `ai_search_extra_hints`**KV 默认空;纳入 `get_app_settings` / `save_app_settings`;设置页 `app/templates/settings/form.html` 加一个多行输入(沿用 step 2 风格)。
- [ ] 实现检索 seam:在 `app/main.py`(或抽一个小搜索模块 `app/search.py`)加 `ai_search(db, query) -> (expanded_terms, results, error_message)`
-`expand_query(cfg, query)` 得到 `ExpansionResult`
-`result.error` 非空:回退普通搜索,并把友好错误传给模板;
-`result.terms` 为空且无错误:视为合法空扩展,回退普通搜索,不显示故障提示;
- 用「原词 + 扩展词」对 `name`/`note` 做 OR `LIKE`**复用现有 `_build_search_results` 的匹配逻辑**,避免重复实现),去重。
- 注意:现有 `_build_search_results(db, query)` 只接收单个查询词;建议把它泛化为接收一组关键词(对多个词做 OR),让 AI 搜索与普通搜索共用同一套匹配逻辑,避免分叉。
Note: `_build_search_results` currently takes a single query — generalize it to accept multiple keywords so AI and normal search share one matching path.
- [ ] 扩展 `GET /search`:支持 `ai=1` 触发位(如 `GET /search?q=锅&ai=1`),保持单页、可收藏、SSR 友好。
- `ai=1` 且 AI 开启且 `is_configured()` → 走 `ai_search`,把 `expanded_terms` 传给模板做横幅。
- 否则走原有普通搜索。
- [ ] 模板 `app/templates/search/index.html`
- 常驻「AI 智能搜索」按钮,链接到 `?q=<当前词>&ai=1`
- AI 关闭/未配置时隐藏按钮(或显示去 `/settings` 的提示);
- `ai=1` 结果页顶部显示横幅「AI 帮你扩展了:term1、term2…」。
- [ ] 降级:`ai_search` 内部调用失败时捕获,渲染友好提示并回退到普通 `LIKE` 结果。
- [ ] 测试(mock `expand_query`CI 不联网):
- [ ] 扩展词驱动命中:原词 `LIKE` 搜不到、扩展后能搜到。
- [ ] 已有结果时点 AI 仍可用,且结果集被扩大(含原结果)。
- [ ] 按钮可见性随 `ai_search_enabled` + `is_configured()` 门控。
- [ ] 调用失败(超时/网络/HTTP)→ 回退普通结果、显示友好提示、页面不报错。
- [ ] `expand_query` 输出解析:模型回合法 JSON 数组 → 正确解析;回散文/坏 JSON/非字符串数组 → `terms=[]``error=None`;超时/网络/HTTP 失败 → `terms=[]``error` 非空;均不抛错。
Output parsing: valid JSON array → parsed; prose/bad JSON/non-string arrays → `terms=[]`, `error=None`; timeout/network/HTTP failures → `terms=[]`, non-empty `error`; no raise.
- [ ] `ai_search_extra_hints` 非空时确被追加进请求(可对构造的请求体断言)。
Extra hints, when set, are appended to the request.
---
## 涉及文件 / Files
`app/llm.py``app/main.py`、(可选 `app/search.py`)、`app/templates/search/index.html``app/templates/settings/form.html`、配置读写 helperstep 2 的 settings store)、`tests/`
---
## 验收 / Acceptance
- 搜索页在 AI 开启时**始终**可见「AI 智能搜索」;点击后结果按扩展词扩大,并标注扩展词。
- 未配置/失败时优雅降级,普通搜索完全不受影响。
- 检索逻辑收敛在 `ai_search` seam,未来可整体替换为向量语义搜索而不动路由/模板。
---
## 风险与缓解 / Risks & Mitigations
- **扩展词过多/过散 → 结果噪声大。** 缓解:限制扩展词数量;横幅透明展示扩展词,让用户理解结果来源。
Too many/too-loose terms → cap the expansion count and show it transparently.
- **AI 调用慢/失败拖累搜索页。** 缓解:仅在 `ai=1` 时才调用(普通搜索零开销);设超时;失败回退。
Slow/failed calls → only call on `ai=1`, set a timeout, fall back.
---
## 相关约定 / Conventions(详见 implementation-plan.md
- 不主动 push/commit,除非业主要求。
- CI 不联网(mock `expand_query`)。
- 实现与设计若有偏差 → 回写设计文档 §5 与仓库简报 §15。
+300
View File
@@ -0,0 +1,300 @@
# 仓库简报 / Repository Brief — 2026 搬家助手 (Moving Helper)
> 面向「下一轮改动」前的快速理解文档。中英双语对照。
> A bilingual orientation doc to read before the next round of changes.
>
> 对应版本 / Snapshot: `main` @ `b9b6583`(撰写时 / at time of writing
---
## 1. 一句话定位 / In One Sentence
**中文:** 一个轻量的、面向**可信家庭内网**的搬家装箱记录工具:记录「有哪些箱子、每个箱子里有什么物品、容器型物品里又装了什么」,支持单图、全局搜索,并以 Docker 长期运行。
**EN:** A lightweight, **trusted-home-LAN** moving inventory tool: track *which boxes exist, what items each box holds, and what sub-items sit inside container-type items*. Supports one image per record, global search, and runs long-term via Docker.
设计取向 / Design stance:小而稳、易于几个月后回来继续扩展;**不是**企业平台、**不**追求复杂运维。
Small, stable, easy to pick back up months later; **not** an enterprise platform.
---
## 2. 技术栈 / Tech Stack
| 层 / Layer | 选型 / Choice | 版本 / Version |
| --- | --- | --- |
| Web 框架 / Framework | FastAPI | 0.116.1 |
| ASGI 服务器 / Server | Uvicorn (`[standard]`) | 0.35.0 |
| 模板 / Templating | Jinja2(服务端渲染 SSR | 3.1.6 |
| ORM | SQLAlchemy 2.x`Mapped` / `mapped_column` 风格) | 2.0.43 |
| 数据库 / DB | SQLite(文件库 / file-based | — |
| 表单 / Forms | python-multipart | 0.0.20 |
| 图片处理 / Images | PillowHEIC 时可选 `pillow_heif` / `sips` 兜底) | 11.2.1 |
| HTTP 客户端 / Client | requests(仅 Notion 导入用) | 2.32.3 |
| 测试 / Tests | pytest + Starlette `TestClient`(httpx) | 8.4.1 / 0.28.1 |
| 部署 / Deploy | Docker / Docker Compose + nginx 反代 | — |
**没有前端构建链 / No frontend build chain**:纯 SSR + 一个 `style.css` + `base.html` 里的少量原生 JS。无 npm / Node / 打包器。
Pure SSR + one `style.css` + a little vanilla JS inline in `base.html`. No npm/Node/bundler.
---
## 3. 目录结构 / Project Layout
```text
.
├── app/
│ ├── __init__.py
│ ├── config.py # 环境变量配置 (Settings dataclass)
│ ├── db.py # SQLAlchemy engine/session、init_db、SQLite 轻量迁移
│ ├── images.py # 图片处理管线 (Pillow + HEIC 兜底)
│ ├── main.py # 所有路由 + 应用工厂 create_app() ← 核心
│ ├── models.py # Box / Item / SubItem 三个 ORM 模型
│ ├── notion_import.py # 一次性 Notion 导入的解析/写入逻辑
│ ├── static/ # style.css, manifest, service-worker.js, PWA 图标
│ └── templates/ # Jinja2 模板 (boxes/ items/ subitems/ search/ + base.html)
├── scripts/
│ ├── install.sh # 一键安装:渲染 nginx/backup/compose + cron
│ ├── deploy.sh # 仓库内的轻量更新脚本 (git pull + compose up)
│ ├── backup_db.sh # 备份脚本模板 (占位符由 install.sh 渲染)
│ ├── import_notion.py # Notion 导入 CLI 入口
│ └── nginx/moving-helper.nginx.template
├── tests/ # pytesttest_app.py (~74) + test_notion_import.py (9)
├── data/ # 运行期 SQLite (data/app.db),已 gitignore
├── .github/workflows/ # test.yml (CI) + docker-image.yml (CD)
├── Dockerfile, docker-compose.yml, .dockerignore
├── .env.example # 部署配置示例(被 shell 脚本 source
├── pytest.ini, requirements.txt, README.md
└── docs/ # ← 本文档所在
```
**关键入口 / Key entry point** `app/main.py` 里的 `create_app()` 注册了**全部**路由。整个后端逻辑几乎都在这一个文件里。
Almost all backend logic lives in the single `create_app()` factory in `app/main.py`.
---
## 4. 数据模型 / Data Model
固定**三级层次**,不是无限树 / A fixed **3-level hierarchy**, not an arbitrary tree
```text
Box (顶层容器 / top container:纸箱、行李箱…)
└── Item (箱子里的物品 / an item in the box)
└── SubItem (仅容器型 Item 才有 / only under container items)
```
定义于 `app/models.py`
| 模型 / Model | 表 / Table | 关键字段 / Key fields | 关系 / Relations |
| --- | --- | --- | --- |
| `Box` | `boxes` | `name`, `note`, `room`, `status` + 图片字段 + 时间戳 | `items` → 多个 Item`cascade="all, delete-orphan"` |
| `Item` | `items` | `box_id`(FK), `name`, `note`, `quantity`, `is_container` + 图片字段 + 时间戳 | 属于一个 `Box``subitems` → 多个 SubItem(级联删除) |
| `SubItem` | `subitems` | `parent_item_id`(FK), `name`, `note`, `quantity` + 图片字段 + 时间戳 | 属于一个 `Item` |
**核心规则 / Core rules**
- 只有 `Item.is_container == True` 的物品才允许拥有 `SubItem`;非容器去建子物品会返回 **400**`_require_container_item`)。
Only container items may hold sub-items; otherwise the API returns **400**.
- 在更新 Item 时,若取消勾选 `is_container`,会**清空其所有 SubItem**`item.subitems.clear()``main.py:454`)。
Un-checking `is_container` on update **clears all sub-items**.
- 删除级联:删 Box → 删其 Item → 删其 SubItem,由 ORM `cascade` + 外键 `ondelete="CASCADE"` 双重保障(SQLite 还启用了 `PRAGMA foreign_keys=ON`)。
Delete cascades top-down, enforced both by ORM cascade and FK `ondelete`.
**图片字段(每个模型都有同一组)/ Image fields (same set on every model)**
`image_blob` (BLOB)、`image_mime_type``image_width``image_height`
→ 图片直接以二进制存进 SQLite,**不落地为文件**。Images are stored **inline as BLOBs in SQLite**, not as files.
时间戳 / Timestamps`created_at``updated_at`UTC`updated_at``onupdate`)。
---
## 5. 路由总览 / Route Map
全部在 `app/main.py`。POST 后统一 **303 See Other** 重定向(避免重复提交)。
All in `app/main.py`. POSTs redirect with **303 See Other**.
| 方法 路径 / Method Path | 作用 / Purpose |
| --- | --- |
| `GET /` | 302 跳转到 `/boxes` |
| `GET /manifest.webmanifest`, `GET /service-worker.js` | PWA 资源(从根路径返回) |
| `GET /search?q=` | 全局搜索页 |
| `GET /boxes` | 箱子列表 + 概览统计 |
| `GET /boxes/new` · `POST /boxes` | 新建箱子表单 / 提交 |
| `GET /boxes/{id}` | 箱子详情(含其 Item 列表) |
| `GET /boxes/{id}/edit` · `POST /boxes/{id}/update` | 编辑 / 保存 |
| `POST /boxes/{id}/delete` | 删除箱子 |
| `GET /boxes/{id}/image` · `POST /boxes/{id}/image/delete` | 取图 / 删图 |
| `GET /boxes/{id}/items/new` · `POST /boxes/{id}/items` | 在箱子下新建物品 |
| `GET /items/{id}` | 物品详情(容器型则含 SubItem 列表) |
| `GET /items/{id}/edit` · `POST /items/{id}/update` · `POST /items/{id}/delete` | 编辑 / 保存 / 删除 |
| `GET /items/{id}/image` · `POST /items/{id}/image/delete` | 取图 / 删图 |
| `GET /items/{id}/subitems/new` · `POST /items/{id}/subitems` | 在容器型物品下新建子物品 |
| `GET /subitems/{id}/edit` · `POST /subitems/{id}/update` · `POST /subitems/{id}/delete` | 编辑 / 保存 / 删除 |
| `GET /subitems/{id}/image` · `POST /subitems/{id}/image/delete` | 取图 / 删图 |
**重定向行为细节 / Redirect nuances**`main.py` 创建逻辑):
- 创建物品时,若点「保存并添加下一个」(`submit_action=save_and_add_next`) → 回到新建表单;若是容器型 → 跳到物品详情;否则 → 回箱子详情。
On create: *save & add next* → back to new form; container → item detail; else → box detail.
- 子物品的「保存并添加下一个」同理回到子物品新建表单。
Sub-item *save & add next* returns to its new-form too.
> 没有 JSON API / no JSON API:全部返回 HTML(图片路由返回二进制)。FastAPI 的自动 `/docs` 仍可用,但业务路由均是表单驱动的 SSR。
---
## 6. 图片处理管线 / Image Pipeline (`app/images.py`)
每次上传都会经过 `process_upload()``_prepare_image()` 统一处理:
Every upload is normalized through `process_upload()``_prepare_image()`:
1. 读取字节,空内容 → 400;非法图片 → 400。Read bytes; empty/invalid → 400.
2. **按 EXIF 方向矫正**`ImageOps.exif_transpose`),再处理。Apply EXIF orientation first.
3. 去元数据并转 RGB`RGBA/LA` 贴白底、`P` 转 RGB)。Strip metadata, flatten to RGB.
4. **最长边缩放到 ≤ 1600px**`thumbnail`)。Downscale longest side to ≤ 1600px.
5. 存为 **JPEG,质量 80`optimize=True`**。Save as JPEG q80.
6. 写入 `image_blob` + 记录 mime / 宽 / 高。Store blob + dimensions.
**HEIC/HEIF 兜底 / fallback** 先尝试 `pillow_heif`(若已安装);否则在 macOS 上用 `sips` 转 JPEG;都不行则返回中文错误提示让用户先转格式。
Tries `pillow_heif`, then macOS `sips`, else a clear error asking to convert first.
> 注意 / Note`pillow_heif` **不在** `requirements.txt` 里,所以默认环境 HEIC 依赖系统 `sips`(仅 macOS)。Linux 容器里上传 HEIC 会得到「请先转换」的提示。
每个对象**最多一张图**,支持上传 / 替换 / 删除,不支持多图。
One image per object; upload/replace/delete; no multi-image.
---
## 7. 全局搜索 / Global Search (`_build_search_results`, `main.py`)
- `GET /search?q=关键词`,对 `Box / Item / SubItem``name``note`**SQLite `LOWER(...) LIKE %q%`** 模糊匹配(大小写不敏感)。
Case-insensitive `LIKE` over `name` + `note` across all three types.
- 结果带:类型标签、归属路径(Item 显示所属 BoxSubItem 显示所属 Item + Box)、若有图则带缩略图链接。
Results include type, location path, and a thumbnail link if an image exists.
- 无外部搜索引擎、无全文索引。No external search engine / full-text index.
`/boxes` 概览页另有统计(`_build_boxes_overview_summary`):箱子数、物品数(含/不含子物品)、每箱平均物品数、每容器型 Item 平均子物品数。
---
## 8. PWA 支持 / PWA Support
最小可安装 PWA**不改 SSR 结构** / minimal installable PWA without changing SSR
- 根路径提供 `manifest.webmanifest`(正确 mime)和 `service-worker.js`
- `base.html` 注入 theme-color、apple-touch-icon、安装相关 meta,并注册 service worker。
- 图标:180apple-touch)、192、512、512-maskable,位于 `app/static/icons/`
**当前 service worker 仅做 `skipWaiting` + `clients.claim()`,没有任何缓存/离线能力。**
The service worker only claims clients — **no caching, no offline** yet.
`base.html` 里的原生 JS 还实现了:可点击卡片(`.clickable-card[data-href]`,含键盘 Enter/Space 支持)、表单内回车跳到下一个字段。
---
## 9. 配置与环境变量 / Configuration
**应用运行时 / App runtime**`app/config.py``Settings` dataclass):
| 变量 / Var | 默认 / Default | 说明 |
| --- | --- | --- |
| `DATABASE_URL` | `sqlite:///./data/app.db` | 数据库连接串 |
| `HOST` | `0.0.0.0` | (定义了但 uvicorn 在 CMD 里写死) |
| `PORT` | `10000` | 同上 |
**部署时 / Deploy-time**`.env`,被 shell 脚本 `source`**非**应用直接读取):
`HOST_DOMAIN``SSL_PATH``APP_DIR``BACKUP_DIR``BACKUP_REMOTE``APP_PORT``DATA_DIR``DATABASE_URL``COMPOSE_PROJECT_NAME`。详见 `.env.example`
约定 / Conventions:容器内固定监听 `0.0.0.0:10000``APP_PORT` 只控制宿主机暴露端口;SQLite 固定写 `/app/data/app.db`(容器内)。
---
## 10. 数据库初始化与迁移 / DB Init & Migrations (`app/migrate.py` + `app/db.py`)
- **Alembic 接管 schema**:迁移系统由 Alembic 管理(`alembic.ini` + `migrations/`),V1 baseline 等于当前三表 schema。
Alembic owns schema creation and changes (`alembic.ini` + `migrations/`); V1 baseline equals the current three-table schema.
- **迁移与启动分离 / Migrations separated from startup**
- `init_db()``app/db.py`)在 FastAPI lifespan 启动时调用 `verify_schema_is_current()`,只做**只读校验**——检查 DB 是否在 `head`,不一致则 **fail-close**(拒绝启动、不执行任何 DDL)。
`init_db()` calls `verify_schema_is_current()` at startup — read-only check, fails closed on mismatch, no DDL.
- 实际迁移由独立幂等命令 `python -m app.migrate``app/migrate.py`)执行:空库建表、老库认领(stamp V1 → upgrade head)、已在 head 则空操作。老库 schema 不匹配则 fail-close 不改动。
Actual migration via standalone idempotent command `python -m app.migrate`: fresh DB → create, matching existing → adopt, already-at-head → no-op, mismatch → fail closed.
- SQLite 连接开启 `PRAGMA foreign_keys=ON`
- 手写列同步 `_sync_sqlite_image_columns()` 已退休删除。
The hand-rolled `_sync_sqlite_image_columns()` has been retired and removed.
---
## 11. 部署 / Deployment
**Docker**`Dockerfile`):`python:3.12-slim` → 装依赖 → 拷贝 `app/``uvicorn app.main:app --host 0.0.0.0 --port 10000`
**Compose**`docker-compose.yml`):
- 镜像固定 `code.wanderingbadger.dev/tliu93/2026-moving-helper:latest`,同时保留 `build:` 用于本地构建。
- `user: "1000:1000"`,仅 `127.0.0.1:${APP_PORT}:10000` 暴露,`${DATA_DIR}:/app/data` 持久化,`restart: unless-stopped`
**一键安装**`scripts/install.sh`,需 root/sudo 写 nginx):读 `.env` → 拷 compose/.env/渲染后的 backup 脚本到 `APP_DIR` → 渲染并启用 nginx 站点 → `nginx -t` + reload → `docker compose pull && up -d` → 写每日 `02:10` 备份 cron。无 `.env` 直接退出。
**轻量更新**`scripts/deploy.sh`):`git pull --ff-only``docker compose pull web``up -d` → 打印状态/日志。
**nginx 模板**`scripts/nginx/...`):80→443 跳转、443 启用 SSL、反代到 `127.0.0.1:${APP_PORT}``client_max_body_size 0`。证书由用户自备于 `SSL_PATH``fullchain.pem` / `privkey.key`)。
**备份**`scripts/backup_db.sh`,模板带占位符由 install 渲染):用 `sqlite3 .backup` 取事务一致快照(不停容器),文件名带时间戳,**最多保留 5 个**,`BACKUP_REMOTE` 非空时 `rclone sync` 到远端。
---
## 12. CI / CD`.github/workflows/`
- **`test.yml`CI**:任意分支 `push` 触发 → Python 3.12 → 装依赖 → `pytest`。无需外部服务/DB。
- **`docker-image.yml`CD**`v*` tag 触发;**先校验该 tag 提交可从 `origin/main` 到达**,再 buildx 构建 `linux/amd64` + `linux/arm64`,推 `:${tag}``:latest`
- 需在仓库 Secrets 配 `REGISTRY_USERNAME` / `REGISTRY_TOKEN`Gitea container registry)。
发布流程 / Release`git tag vX.Y.Z && git push origin main --tags`
---
## 13. 测试 / Tests
- `tests/conftest.py`:每个测试用 `tmp_path` 建独立 SQLite`configure_database(...)` 切换,再 `create_app()` —— **不污染** `data/app.db`,无需 Docker。
Each test gets an isolated tmp SQLite; never touches dev data.
- `tests/test_app.py`(约 74 个):Box/Item/SubItem CRUD、级联删除、404、图片上传/替换/删除/错误路径、EXIF 矫正、图片路由、搜索(name/note/路径/缩略图)、重定向行为、页面结构与 UX 文案、概览统计。
- `tests/test_notion_import.py`9 个):page id 提取、heading/bullet 解析、容器判定、超层级警告、媒体跳过、dry-run 不写库、apply 写库结构。
运行 / Run`python -m pytest`
---
## 14. Notion 一次性导入 / One-time Notion Import
`app/notion_import.py` + `scripts/import_notion.py`(交互式 CLI`--dry-run` / `--apply`)。
结构映射 / Mapping`heading_2``Box`;其下一级 bullet → `Item`;二级 bullet → `SubItem`(此时父 bullet 自动判为容器型)。更深层级只警告不导入;**不导入任何图片/媒体**。
定位 / Positioning**一次性 migration 工具,非长期同步**;建议导入前先备份 `data/app.db``NOTION_VERSION = "2026-03-11"`
---
## 15. 已知约束 & 下一轮改动建议 / Constraints & Notes for the Next Round
**当前明确「未实现」/ Explicitly out of scope(见 README):** 离线缓存/同步、多图、OCR、AI 识别、图片标签/分类、登录鉴权、标签系统、前后端分离、复杂 UI。
**改动前值得注意的点 / Things to watch before changing things**
1. **无鉴权 / No auth.** 设计前提是「可信内网 + nginx HTTPS」。任何要暴露到公网的改动都需先加访问控制。
2. **迁移机制薄弱 / Weak migrations(§10.** 加新字段到已有库不会自动建列。建议:要么扩展 `_sync_sqlite_image_columns` 思路(改成更通用的列同步),要么正式引入 Alembic。
3. **图片存在 SQLite 里 / Images live in the DB.** 好处是备份/迁移只需一个文件;代价是库体积随图增长、备份成本上升。若要支持多图或大图归档,应考虑改为对象存储/文件系统 + 路径引用。
4. **逻辑高度集中在 `main.py` / Logic concentrated in `main.py`.** 路由、表单解析、查询、统计、搜索都在一个文件。新增大功能时可考虑拆分 router/service 模块,但要保留 `create_app()` 工厂以维持测试隔离。
5. **Service Worker 是空壳 / SW is a stub.** README 写的「PWA」目前不含离线能力;要做离线需真正实现缓存策略。
6. **固定 3 级层次 / Fixed 3 levels.** `Box → Item → SubItem` 写死在模型、路由、模板、Notion 解析多处;若要变成可嵌套树,是一次跨层改动。
7. **HEIC 在 Linux 容器里不可用 / HEIC fails in Linux containers**`pillow_heif` 未列入依赖,`sips` 仅 macOS)。若用户多用 iPhone 原图,考虑把 `pillow_heif` 加进 `requirements.txt`
8. **UI 全中文、SSR、单 `style.css`.** 前端改动直接编辑 `app/templates/*``app/static/style.css`,无构建步骤。
---
## 16. 本地快速启动 / Quick Local Start
```bash
python3 -m venv .venv && source .venv/bin/activate
pip install -r requirements.txt
uvicorn app.main:app --reload --host 0.0.0.0 --port 10000
# 打开 / open http://localhost:10000 (默认数据库 / default DB: ./data/app.db)
python -m pytest # 跑测试 / run tests
```
+1
View File
@@ -0,0 +1 @@
Generic single-database configuration.
+80
View File
@@ -0,0 +1,80 @@
from logging.config import fileConfig
from sqlalchemy import pool
from sqlalchemy import engine_from_config
from alembic import context
# Import Base and models so Alembic can see all tables for autogenerate.
from app.db import Base
import app.models # noqa: F401 — registers Box, Item, SubItem on Base.metadata
config = context.config
# Dynamically set sqlalchemy.url from app config (not hardcoded in alembic.ini).
# When called programmatically via app.migrate.run_migrations(), the URL is
# already set on the Config object — respect it. Fall back to get_settings()
# only when invoked from the ``alembic`` CLI.
from app.config import get_settings
if not config.get_main_option("sqlalchemy.url"):
settings = get_settings()
config.set_main_option("sqlalchemy.url", settings.database_url)
if config.config_file_name is not None:
fileConfig(config.config_file_name)
target_metadata = Base.metadata
def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.
Calls to context.execute() here emit the given string to the
script output.
"""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
render_as_batch=True,
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
"""Run migrations in 'online' mode.
In this scenario we need to create an Engine
and associate a connection with the context.
"""
connectable = engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(
connection=connection,
target_metadata=target_metadata,
render_as_batch=True,
)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()
+28
View File
@@ -0,0 +1,28 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
"""Upgrade schema."""
${upgrades if upgrades else "pass"}
def downgrade() -> None:
"""Downgrade schema."""
${downgrades if downgrades else "pass"}
@@ -0,0 +1,96 @@
"""V1 baseline
Revision ID: 57af90893f55
Revises:
Create Date: 2026-06-01 13:49:15.867487
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '57af90893f55'
down_revision: Union[str, Sequence[str], None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Upgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('boxes',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(length=100), nullable=False),
sa.Column('note', sa.Text(), nullable=True),
sa.Column('room', sa.String(length=100), nullable=True),
sa.Column('status', sa.String(length=50), nullable=True),
sa.Column('image_blob', sa.LargeBinary(), nullable=True),
sa.Column('image_mime_type', sa.String(length=50), nullable=True),
sa.Column('image_width', sa.Integer(), nullable=True),
sa.Column('image_height', sa.Integer(), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=False),
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False),
sa.PrimaryKeyConstraint('id')
)
with op.batch_alter_table('boxes', schema=None) as batch_op:
batch_op.create_index(batch_op.f('ix_boxes_id'), ['id'], unique=False)
op.create_table('items',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('box_id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(length=100), nullable=False),
sa.Column('note', sa.Text(), nullable=True),
sa.Column('quantity', sa.Integer(), nullable=True),
sa.Column('is_container', sa.Boolean(), nullable=False),
sa.Column('image_blob', sa.LargeBinary(), nullable=True),
sa.Column('image_mime_type', sa.String(length=50), nullable=True),
sa.Column('image_width', sa.Integer(), nullable=True),
sa.Column('image_height', sa.Integer(), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=False),
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False),
sa.ForeignKeyConstraint(['box_id'], ['boxes.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('id')
)
with op.batch_alter_table('items', schema=None) as batch_op:
batch_op.create_index(batch_op.f('ix_items_id'), ['id'], unique=False)
op.create_table('subitems',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('parent_item_id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(length=100), nullable=False),
sa.Column('note', sa.Text(), nullable=True),
sa.Column('quantity', sa.Integer(), nullable=True),
sa.Column('image_blob', sa.LargeBinary(), nullable=True),
sa.Column('image_mime_type', sa.String(length=50), nullable=True),
sa.Column('image_width', sa.Integer(), nullable=True),
sa.Column('image_height', sa.Integer(), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=False),
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False),
sa.ForeignKeyConstraint(['parent_item_id'], ['items.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('id')
)
with op.batch_alter_table('subitems', schema=None) as batch_op:
batch_op.create_index(batch_op.f('ix_subitems_id'), ['id'], unique=False)
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('subitems', schema=None) as batch_op:
batch_op.drop_index(batch_op.f('ix_subitems_id'))
op.drop_table('subitems')
with op.batch_alter_table('items', schema=None) as batch_op:
batch_op.drop_index(batch_op.f('ix_items_id'))
op.drop_table('items')
with op.batch_alter_table('boxes', schema=None) as batch_op:
batch_op.drop_index(batch_op.f('ix_boxes_id'))
op.drop_table('boxes')
# ### end Alembic commands ###
@@ -0,0 +1,32 @@
"""V2 app_settings
Revision ID: a1b2c3d4e5f6
Revises: 57af90893f55
Create Date: 2026-06-01 14:00:00.000000
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = 'a1b2c3d4e5f6'
down_revision: Union[str, Sequence[str], None] = '57af90893f55'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Upgrade schema."""
op.create_table('app_settings',
sa.Column('key', sa.Text(), nullable=False),
sa.Column('value', sa.Text(), nullable=True),
sa.PrimaryKeyConstraint('key')
)
def downgrade() -> None:
"""Downgrade schema."""
op.drop_table('app_settings')
+2
View File
@@ -1,4 +1,6 @@
[pytest] [pytest]
pythonpath = . pythonpath = .
testpaths = tests
norecursedirs = app .venv
filterwarnings = filterwarnings =
ignore:'asyncio\.iscoroutinefunction' is deprecated and slated for removal in Python 3\.16; use inspect\.iscoroutinefunction\(\) instead:DeprecationWarning:fastapi\.routing ignore:'asyncio\.iscoroutinefunction' is deprecated and slated for removal in Python 3\.16; use inspect\.iscoroutinefunction\(\) instead:DeprecationWarning:fastapi\.routing
+1
View File
@@ -2,6 +2,7 @@ fastapi==0.116.1
uvicorn[standard]==0.35.0 uvicorn[standard]==0.35.0
jinja2==3.1.6 jinja2==3.1.6
sqlalchemy==2.0.43 sqlalchemy==2.0.43
alembic==1.16.5
python-multipart==0.0.20 python-multipart==0.0.20
pillow==11.2.1 pillow==11.2.1
requests==2.32.3 requests==2.32.3
+2 -3
View File
@@ -73,9 +73,8 @@ for backup_file in $(find "$BACKUP_DIR" -maxdepth 1 -type f -name 'app-*.db' | s
done done
if [ -n "${BACKUP_REMOTE:-}" ]; then if [ -n "${BACKUP_REMOTE:-}" ]; then
remote_target=${BACKUP_REMOTE%/}/$(basename "$FINAL_BACKUP") rclone sync "$BACKUP_DIR" "${BACKUP_REMOTE%/}"
rclone copyto "$FINAL_BACKUP" "$remote_target" echo "Backup uploaded to remote: $BACKUP_REMOTE"
echo "Backup uploaded to remote: $remote_target"
else else
echo "BACKUP_REMOTE is empty; skipping remote upload" echo "BACKUP_REMOTE is empty; skipping remote upload"
fi fi
+4
View File
@@ -6,6 +6,7 @@ from sqlalchemy.orm import Session
from app.db import SessionLocal, configure_database from app.db import SessionLocal, configure_database
from app.main import create_app from app.main import create_app
from app.migrate import run_migrations
@pytest.fixture @pytest.fixture
@@ -13,6 +14,9 @@ def client(tmp_path: Path):
test_db_path = tmp_path / "test.db" test_db_path = tmp_path / "test.db"
database_url = f"sqlite:///{test_db_path}" database_url = f"sqlite:///{test_db_path}"
# Run migration first so DB is at head before app starts.
run_migrations(database_url)
configure_database(database_url) configure_database(database_url)
app = create_app() app = create_app()
+707
View File
@@ -0,0 +1,707 @@
"""Tests for AI search (Step 3).
All LLM calls are mocked — CI never touches the network.
Coverage areas:
- expand_query JSON output parsing (valid, fenced, prose, bad JSON, timeout)
- Output contract enforcement (strict JSON array only)
- Expansion term count cap and length cap
- ai_search seam function
- GET /search with ai=1 trigger
- AI button visibility on search page
- Graceful degradation on failure
- ai_search_extra_hints appended to prompt
- ai_search_enabled toggle
"""
from unittest.mock import patch
import httpx
import pytest
from app.llm import (
_MAX_EXPANSION_TERMS,
_MAX_TERM_LENGTH,
ExpansionResult,
LLMResult,
_parse_json_string_array,
expand_query,
is_configured,
)
from app.main import _ai_search, _build_search_results
from app.models import AppSetting, Box, Item, SubItem
from app.settings_store import LLMConfig, get_app_settings, save_app_settings
# ---------------------------------------------------------------------------
# Helper: configure AI search for route tests
# ---------------------------------------------------------------------------
_AI_CFG = LLMConfig(
enabled=True,
base_url="https://api.example.com/v1",
model="gpt-4o-mini",
api_key="sk-test-key",
ai_search_enabled=True,
)
def _enable_ai_search(client, db_session):
"""Persist a fully-configured AI search setup via the settings route."""
client.post(
"/settings",
data={
"enabled": "on",
"base_url": "https://api.example.com/v1",
"model": "gpt-4o-mini",
"api_key": "sk-test-key",
"ai_search_enabled": "on",
},
follow_redirects=False,
)
# ---------------------------------------------------------------------------
# _parse_json_string_array: strict JSON contract enforcement
# ---------------------------------------------------------------------------
class TestParseJsonStringArray:
def test_valid_json_array(self):
result = _parse_json_string_array('["炒锅","平底锅","汤锅"]')
assert result == ["炒锅", "平底锅", "汤锅"]
def test_json_array_with_code_fence(self):
result = _parse_json_string_array('```json\n["","铲子"]\n```')
assert result == ["", "铲子"]
def test_json_array_with_code_fence_no_lang(self):
result = _parse_json_string_array('```\n["","铲子"]\n```')
assert result == ["", "铲子"]
def test_empty_string_returns_empty(self):
assert _parse_json_string_array("") == []
assert _parse_json_string_array(" ") == []
def test_prose_returns_empty(self):
"""Prose text does NOT become expansion terms — strict contract."""
assert _parse_json_string_array("I cannot help with that.") == []
def test_prose_newlines_returns_empty(self):
"""Line-separated prose does NOT become expansion terms."""
assert _parse_json_string_array("炒锅\n平底锅\n汤锅") == []
def test_prose_commas_returns_empty(self):
"""Comma-separated prose does NOT become expansion terms."""
assert _parse_json_string_array("炒锅, 平底锅, 汤锅") == []
def test_bad_json_returns_empty(self):
"""Invalid JSON returns empty — no fallback."""
assert _parse_json_string_array("{invalid json") == []
def test_json_object_returns_empty(self):
"""JSON object (non-array) returns empty."""
assert _parse_json_string_array('{"terms":["","厨具"]}') == []
def test_json_array_with_numbers_returns_empty(self):
"""Non-string items in array cause rejection — strict contract."""
assert _parse_json_string_array('[1, 2, 3]') == []
def test_json_array_with_mixed_types_returns_empty(self):
"""Mixed string/number array is rejected."""
assert _parse_json_string_array('["", 1]') == []
def test_empty_json_array(self):
result = _parse_json_string_array('[]')
assert result == []
def test_capped_at_max_terms(self):
"""More than _MAX_EXPANSION_TERMS items are truncated."""
terms = [f"{i}" for i in range(20)]
json_str = "[" + ",".join(f'"{t}"' for t in terms) + "]"
result = _parse_json_string_array(json_str)
assert len(result) == _MAX_EXPANSION_TERMS
def test_long_terms_filtered_out(self):
"""Terms exceeding _MAX_TERM_LENGTH are silently dropped."""
short = ""
long_term = "A" * (_MAX_TERM_LENGTH + 1)
json_str = f'["{short}", "{long_term}"]'
result = _parse_json_string_array(json_str)
assert result == [""]
def test_whitespace_stripped(self):
result = _parse_json_string_array('["", " 平底锅 "]')
assert result == ["", "平底锅"]
def test_empty_strings_filtered(self):
result = _parse_json_string_array('["", "", " ", "平底锅"]')
assert result == ["", "平底锅"]
# ---------------------------------------------------------------------------
# expand_query: prompt, hints, graceful degradation
# ---------------------------------------------------------------------------
class TestExpandQueryNew:
def test_returns_empty_when_not_configured(self):
cfg = LLMConfig(enabled=False)
result = expand_query(cfg, "")
assert result.terms == []
assert result.error is None
@patch("app.llm._call_chat_completion")
def test_parses_valid_json_response(self, mock_call):
mock_call.return_value = {
"choices": [{"message": {"content": '["炒锅","平底锅","汤锅","厨具"]'}}]
}
cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
result = expand_query(cfg, "")
assert "炒锅" in result.terms
assert "平底锅" in result.terms
assert "厨具" in result.terms
assert result.error is None
@patch("app.llm._call_chat_completion")
def test_handles_json_with_code_fence(self, mock_call):
mock_call.return_value = {
"choices": [
{"message": {"content": '```json\n["炒锅","平底锅"]\n```'}}
]
}
cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
result = expand_query(cfg, "")
assert "炒锅" in result.terms
assert result.error is None
@patch("app.llm._call_chat_completion")
def test_prose_response_returns_empty_no_error(self, mock_call):
"""Prose from model → empty terms, no error (successful call, unparseable output)."""
mock_call.return_value = {
"choices": [{"message": {"content": "I cannot help with that."}}]
}
cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
result = expand_query(cfg, "")
assert result.terms == []
assert result.error is None
@patch("app.llm._call_chat_completion")
def test_json_object_response_returns_empty_no_error(self, mock_call):
"""JSON object (non-array) → empty terms, no error."""
mock_call.return_value = {
"choices": [{"message": {"content": '{"terms":["","厨具"]}'}}]
}
cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
result = expand_query(cfg, "")
assert result.terms == []
assert result.error is None
@patch("app.llm._call_chat_completion")
def test_timeout_returns_error(self, mock_call):
"""Timeout → empty terms + error message."""
mock_call.side_effect = httpx.TimeoutException("timeout")
cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
result = expand_query(cfg, "")
assert result.terms == []
assert result.error is not None
assert "超时" in result.error
@patch("app.llm._call_chat_completion")
def test_network_error_returns_error(self, mock_call):
"""Network error → empty terms + error message."""
mock_call.side_effect = httpx.ConnectError("refused")
cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
result = expand_query(cfg, "")
assert result.terms == []
assert result.error is not None
assert "无法连接" in result.error
@patch("app.llm._call_chat_completion")
def test_http_error_returns_error(self, mock_call):
"""HTTP error → empty terms + error message."""
mock_call.side_effect = httpx.HTTPStatusError(
"401",
request=httpx.Request("POST", "http://x"),
response=httpx.Response(401),
)
cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
result = expand_query(cfg, "")
assert result.terms == []
assert result.error is not None
assert "错误" in result.error
@patch("app.llm._call_chat_completion")
def test_returns_empty_on_empty_choices(self, mock_call):
mock_call.return_value = {"choices": []}
cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
result = expand_query(cfg, "")
assert result.terms == []
assert result.error is None
@patch("app.llm._call_chat_completion")
def test_extra_hints_appended_to_system_prompt(self, mock_call):
"""When extra_hints is non-empty, it should be appended to the system prompt."""
mock_call.return_value = {
"choices": [{"message": {"content": '["扩展词"]'}}]
}
cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
expand_query(cfg, "", extra_hints="用户物品主要涉及厨房用品")
# Verify the system prompt includes the extra hints
call_args = mock_call.call_args
messages = call_args[1]["messages"] if "messages" in call_args[1] else call_args[0][1]
system_content = messages[0]["content"]
assert "用户物品主要涉及厨房用品" in system_content
@patch("app.llm._call_chat_completion")
def test_extra_hints_ignored_when_empty(self, mock_call):
"""When extra_hints is empty, system prompt should not change."""
mock_call.return_value = {
"choices": [{"message": {"content": '["扩展词"]'}}]
}
cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
expand_query(cfg, "", extra_hints="")
call_args = mock_call.call_args
messages = call_args[1]["messages"] if "messages" in call_args[1] else call_args[0][1]
system_content = messages[0]["content"]
# Should be the base prompt only
assert "搬家物品搜索助手" in system_content
assert "JSON 字符串数组" in system_content
@patch("app.llm._call_chat_completion")
def test_temperature_zero_passed(self, mock_call):
"""expand_query should pass temperature=0 for deterministic output."""
mock_call.return_value = {
"choices": [{"message": {"content": '["扩展词"]'}}]
}
cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
expand_query(cfg, "")
call_args = mock_call.call_args
assert call_args[1]["temperature"] == 0
# ---------------------------------------------------------------------------
# _ai_search: seam function
# ---------------------------------------------------------------------------
class TestAiSearchSeam:
@patch("app.main.expand_query")
def test_returns_expanded_terms_and_results(self, mock_expand, client, db_session):
"""AI search returns expanded terms and broader results."""
box = Box(name="厨房箱", note="装了炒锅和铲子")
db_session.add(box)
db_session.commit()
mock_expand.return_value = ExpansionResult(terms=["炒锅", "平底锅", "汤锅"])
cfg = get_app_settings(db_session)
expanded, results, error = _ai_search(db_session, cfg, "平底锅")
assert "炒锅" in expanded
assert error is None
assert len(results) >= 1
assert any("厨房箱" in r["name"] or "炒锅" in (r.get("note") or "") for r in results)
@patch("app.main.expand_query")
def test_includes_original_query_in_search(self, mock_expand, client, db_session):
"""AI search includes the original query term in the search."""
box = Box(name="冬季衣物箱")
db_session.add(box)
db_session.commit()
mock_expand.return_value = ExpansionResult(terms=["羽绒服"])
cfg = get_app_settings(db_session)
expanded, results, error = _ai_search(db_session, cfg, "衣物")
assert error is None
assert any("冬季衣物箱" in r["name"] for r in results)
@patch("app.main.expand_query")
def test_empty_expansion_returns_normal_results_no_error(self, mock_expand, client, db_session):
"""Legitimate empty expansion (no synonyms found) → normal results, no error."""
box = Box(name="书房箱")
db_session.add(box)
db_session.commit()
mock_expand.return_value = ExpansionResult(terms=[])
cfg = get_app_settings(db_session)
expanded, results, error = _ai_search(db_session, cfg, "书房")
assert expanded == []
assert error is None
assert any("书房箱" in r["name"] for r in results)
@patch("app.main.expand_query")
def test_llm_failure_returns_normal_results_with_error(self, mock_expand, client, db_session):
"""When expand_query signals failure, seam returns normal results + error message."""
box = Box(name="厨房箱", note="装了炒锅")
db_session.add(box)
db_session.commit()
mock_expand.return_value = ExpansionResult(terms=[], error="AI 搜索请求超时,请稍后再试。")
cfg = get_app_settings(db_session)
expanded, results, error = _ai_search(db_session, cfg, "厨房")
assert expanded == []
assert error is not None
assert "超时" in error
assert len(results) >= 1
# ---------------------------------------------------------------------------
# _build_search_results: multi-keyword support
# ---------------------------------------------------------------------------
class TestBuildSearchResultsMultiKeyword:
def test_single_keyword_works_as_before(self, db_session):
box = Box(name="厨房箱")
db_session.add(box)
db_session.commit()
results = _build_search_results(db_session, "厨房")
assert len(results) == 1
assert results[0]["name"] == "厨房箱"
def test_multiple_keywords_match_any(self, db_session):
box1 = Box(name="厨房箱")
box2 = Box(name="卧室箱")
db_session.add_all([box1, box2])
db_session.commit()
results = _build_search_results(db_session, ["厨房", "卧室"])
assert len(results) == 2
def test_multiple_keywords_dedupes_results(self, db_session):
"""A box matching multiple keywords appears only once."""
box = Box(name="厨房箱", note="装了厨房用品")
db_session.add(box)
db_session.commit()
results = _build_search_results(db_session, ["厨房", "用品"])
assert len(results) == 1
def test_empty_keywords_returns_empty(self, db_session):
results = _build_search_results(db_session, [])
assert results == []
# ---------------------------------------------------------------------------
# Routes: GET /search with ai=1
# ---------------------------------------------------------------------------
class TestSearchRouteAI:
@patch("app.llm._call_chat_completion")
def test_ai_search_finds_more_results(self, mock_call, client, db_session):
"""Original query misses, but expanded term finds items."""
box = Box(name="杂物箱")
item = Item(name="炒锅", box=box, is_container=False)
db_session.add_all([box, item])
db_session.commit()
mock_call.return_value = {
"choices": [{"message": {"content": '["炒锅","平底锅","汤锅"]'}}]
}
_enable_ai_search(client, db_session)
# Normal search for "平底锅" — no results
response = client.get("/search?q=平底锅")
assert "没有找到匹配结果" in response.text
# AI search for "平底锅" — finds "炒锅" via expansion
response = client.get("/search?q=平底锅&ai=1")
assert response.status_code == 200
assert "炒锅" in response.text
assert "AI 帮你扩展了" in response.text
@patch("app.llm._call_chat_completion")
def test_ai_search_includes_original_results(self, mock_call, client, db_session):
"""AI search should also include results from original query."""
box = Box(name="厨房箱")
item1 = Item(name="锅铲", box=box, is_container=False)
item2 = Item(name="平底锅", box=box, is_container=False)
db_session.add_all([box, item1, item2])
db_session.commit()
mock_call.return_value = {
"choices": [{"message": {"content": '["炒锅","汤锅"]'}}]
}
_enable_ai_search(client, db_session)
response = client.get("/search?q=锅&ai=1")
assert response.status_code == 200
# Original result "平底锅" should still be there
assert "平底锅" in response.text
@patch("app.llm._call_chat_completion")
def test_ai_search_shows_expansion_banner(self, mock_call, client, db_session):
"""When AI search is activated, a banner shows expanded terms."""
box = Box(name="厨房箱")
db_session.add(box)
db_session.commit()
mock_call.return_value = {
"choices": [{"message": {"content": '["炒锅","平底锅"]'}}]
}
_enable_ai_search(client, db_session)
response = client.get("/search?q=锅&ai=1")
assert response.status_code == 200
assert "AI 帮你扩展了" in response.text
assert "炒锅" in response.text
def test_ai_search_without_flag_does_normal_search(self, client, db_session):
"""Without ai=1, search behaves normally even when AI is configured."""
box = Box(name="厨房箱")
db_session.add(box)
db_session.commit()
_enable_ai_search(client, db_session)
response = client.get("/search?q=厨房")
assert response.status_code == 200
assert "厨房箱" in response.text
assert "AI 帮你扩展了" not in response.text
@patch("app.llm._call_chat_completion")
def test_ai_search_without_configuration_ignores_flag(self, mock_call, client, db_session):
"""ai=1 is ignored when AI is not configured."""
box = Box(name="厨房箱")
db_session.add(box)
db_session.commit()
response = client.get("/search?q=厨房&ai=1")
assert response.status_code == 200
assert "厨房箱" in response.text
assert "AI 帮你扩展了" not in response.text
mock_call.assert_not_called()
@patch("app.llm._call_chat_completion")
def test_ai_search_graceful_degradation_on_llm_failure(self, mock_call, client, db_session):
"""LLM failure (timeout) → normal results + friendly error banner."""
box = Box(name="厨房箱", note="装了炒锅")
db_session.add(box)
db_session.commit()
# expand_query catches timeout and returns ExpansionResult with error
mock_call.side_effect = httpx.TimeoutException("timeout")
_enable_ai_search(client, db_session)
response = client.get("/search?q=厨房&ai=1")
assert response.status_code == 200
assert "厨房箱" in response.text
# Should show error banner — timeout is a real failure
assert "超时" in response.text or "不可用" in response.text
def test_ai_search_empty_query_does_nothing(self, client, db_session):
"""ai=1 with empty query does not trigger AI."""
_enable_ai_search(client, db_session)
response = client.get("/search?ai=1")
assert response.status_code == 200
assert "AI 帮你扩展了" not in response.text
@patch("app.llm._call_chat_completion")
def test_ai_search_disabled_ignores_flag(self, mock_call, client, db_session):
"""ai=1 is ignored when ai_search_enabled is False."""
box = Box(name="厨房箱")
db_session.add(box)
db_session.commit()
# Enable LLM but NOT ai_search_enabled
client.post(
"/settings",
data={
"enabled": "on",
"base_url": "https://api.example.com/v1",
"model": "gpt-4o-mini",
"api_key": "sk-test-key",
},
follow_redirects=False,
)
response = client.get("/search?q=厨房&ai=1")
assert response.status_code == 200
assert "厨房箱" in response.text
assert "AI 帮你扩展了" not in response.text
mock_call.assert_not_called()
# ---------------------------------------------------------------------------
# Button visibility on search page
# ---------------------------------------------------------------------------
class TestAIButtonVisibility:
@patch("app.llm._call_chat_completion")
def test_button_visible_when_configured_and_enabled(self, mock_call, client, db_session):
"""AI search button is visible when ai_search_enabled and configured."""
_enable_ai_search(client, db_session)
response = client.get("/search?q=测试")
assert response.status_code == 200
assert "AI 智能搜索" in response.text
def test_button_hidden_when_not_configured(self, client, db_session):
"""AI search button is hidden when LLM is not configured."""
response = client.get("/search?q=测试")
assert response.status_code == 200
assert "AI 智能搜索" not in response.text
def test_button_hidden_when_ai_search_disabled(self, client, db_session):
"""AI search button is hidden when ai_search_enabled is False."""
client.post(
"/settings",
data={
"enabled": "on",
"base_url": "https://api.example.com/v1",
"model": "gpt-4o-mini",
"api_key": "sk-test-key",
},
follow_redirects=False,
)
response = client.get("/search?q=测试")
assert "AI 智能搜索" not in response.text
@patch("app.llm._call_chat_completion")
def test_button_hidden_on_empty_query(self, mock_call, client, db_session):
"""AI search button is not shown when there's no query."""
_enable_ai_search(client, db_session)
response = client.get("/search")
assert "AI 智能搜索" not in response.text
@patch("app.llm._call_chat_completion")
def test_button_link_includes_current_query(self, mock_call, client, db_session):
"""AI button link includes the current query parameter."""
_enable_ai_search(client, db_session)
response = client.get("/search?q=锅")
assert response.status_code == 200
assert "ai=1" in response.text
from urllib.parse import quote
assert f"q={quote('')}" in response.text or "q=锅" in response.text
@patch("app.llm._call_chat_completion")
def test_no_button_when_ai_already_activated(self, mock_call, client, db_session):
"""When AI is already activated, show status text instead of button."""
mock_call.return_value = {
"choices": [{"message": {"content": '["炒锅"]'}}]
}
_enable_ai_search(client, db_session)
response = client.get("/search?q=锅&ai=1")
assert response.status_code == 200
assert "AI 搜索已启用" in response.text
# ---------------------------------------------------------------------------
# Settings: ai_search_extra_hints
# ---------------------------------------------------------------------------
class TestExtraHintsSettings:
def test_extra_hints_defaults_to_empty(self, db_session):
cfg = get_app_settings(db_session)
assert cfg.ai_search_extra_hints == ""
def test_save_extra_hints(self, db_session):
save_app_settings(db_session, ai_search_extra_hints="用户物品主要涉及厨房")
cfg = get_app_settings(db_session)
assert cfg.ai_search_extra_hints == "用户物品主要涉及厨房"
def test_save_extra_hints_empty_string(self, db_session):
save_app_settings(db_session, ai_search_extra_hints="厨房用品")
save_app_settings(db_session, ai_search_extra_hints="")
cfg = get_app_settings(db_session)
assert cfg.ai_search_extra_hints == ""
def test_settings_page_has_extra_hints_textarea(self, client):
response = client.get("/settings")
assert response.status_code == 200
assert 'name="ai_search_extra_hints"' in response.text
assert "额外领域提示" in response.text
def test_settings_page_has_ai_search_checkbox(self, client):
response = client.get("/settings")
assert response.status_code == 200
assert 'name="ai_search_enabled"' in response.text
assert "启用 AI 智能搜索" in response.text
def test_save_ai_search_settings_via_route(self, client, db_session):
client.post(
"/settings",
data={
"enabled": "on",
"base_url": "https://api.example.com/v1",
"model": "gpt-4o-mini",
"api_key": "sk-key",
"ai_search_enabled": "on",
"ai_search_extra_hints": "用户物品主要涉及厨房用品",
},
follow_redirects=False,
)
cfg = get_app_settings(db_session)
assert cfg.ai_search_enabled is True
assert cfg.ai_search_extra_hints == "用户物品主要涉及厨房用品"
def test_save_preserves_extra_hints_on_other_changes(self, client, db_session):
"""Changing LLM settings should not clear extra hints."""
client.post(
"/settings",
data={
"enabled": "on",
"base_url": "https://api.example.com/v1",
"model": "gpt-4o-mini",
"api_key": "sk-key",
"ai_search_enabled": "on",
"ai_search_extra_hints": "厨房用品和电子产品",
},
follow_redirects=False,
)
client.post(
"/settings",
data={
"enabled": "on",
"base_url": "https://api.example.com/v1",
"model": "gpt-4o",
"api_key": "",
"ai_search_enabled": "on",
"ai_search_extra_hints": "厨房用品和电子产品",
},
follow_redirects=False,
)
cfg = get_app_settings(db_session)
assert cfg.ai_search_extra_hints == "厨房用品和电子产品"
assert cfg.model == "gpt-4o"
# ---------------------------------------------------------------------------
# Regression: existing features still work without AI
# ---------------------------------------------------------------------------
class TestRegressionWithoutAI:
def test_normal_search_still_works(self, client, db_session):
box = Box(name="测试箱")
db_session.add(box)
db_session.commit()
response = client.get("/search?q=测试")
assert response.status_code == 200
assert "测试箱" in response.text
def test_search_page_no_results(self, client):
response = client.get("/search?q=不存在")
assert "没有找到匹配结果" in response.text
def test_search_empty_query(self, client):
response = client.get("/search")
assert "输入关键词后" in response.text
+661
View File
@@ -0,0 +1,661 @@
"""Tests for the Alembic migration wrapper (app.migrate).
Covers:
- Fresh DB: empty → upgrade head (tables created, version at head)
- Unmanaged DB (2a): has tables + matches baseline → stamp V1 → upgrade head
- Unmanaged DB (2b): schema mismatch → fail-close, no changes
- Non-empty non-app DB (rogue tables) → unmanaged → fail-close
- Managed DB: already at head → upgrade head is a no-op
- verify_schema_is_current: pass when at head, fail-close otherwise
- verify_schema_is_current: no write side-effects (no file creation)
- init_db startup: fail-close when DB not at head, pass when at head
- Data preservation: adoption does not lose existing data
- Schema correctness: tables match the ORM model definitions
- V1_REVISION constant matches the actual revision in versions/
- _detect_db_state correctly identifies all three states
- _schema_matches_baseline checks FK, indexes, PK, types — not just column names
- CLI entry point: python -m app.migrate
"""
from pathlib import Path
import shutil
import pytest
from sqlalchemy import create_engine, inspect, text
import app.models # noqa: F401 — register models on Base.metadata
from app.db import Base, SessionLocal, configure_database
from app.migrate import (
V1_REVISION,
_detect_db_state,
_make_alembic_config,
run_migrations,
verify_schema_is_current,
)
from app.main import create_app
from fastapi.testclient import TestClient
def _get_head_revision() -> str:
"""Resolve the current Alembic head revision from migration scripts."""
from alembic.script import ScriptDirectory
cfg = _make_alembic_config("sqlite:///") # URL is unused for script lookup
script = ScriptDirectory.from_config(cfg)
return script.get_current_head()
HEAD_REVISION = _get_head_revision()
@pytest.fixture()
def tmp_db_path(tmp_path):
"""Provide a temporary SQLite database path."""
return tmp_path / "test.db"
@pytest.fixture()
def tmp_db_url(tmp_db_path):
"""Provide a temporary SQLite database URL."""
return f"sqlite:///{tmp_db_path}"
# ---------------------------------------------------------------------------
# Fresh DB: empty → upgrade head
# ---------------------------------------------------------------------------
class TestFreshDBMigration:
"""Empty database gets all tables created by migration."""
def test_creates_all_tables(self, tmp_db_url):
run_migrations(tmp_db_url)
eng = create_engine(tmp_db_url)
tables = set(inspect(eng).get_table_names())
eng.dispose()
assert "boxes" in tables
assert "items" in tables
assert "subitems" in tables
assert "app_settings" in tables
def test_creates_alembic_version_table(self, tmp_db_url):
run_migrations(tmp_db_url)
eng = create_engine(tmp_db_url)
tables = set(inspect(eng).get_table_names())
eng.dispose()
assert "alembic_version" in tables
def test_version_at_head(self, tmp_db_url):
run_migrations(tmp_db_url)
eng = create_engine(tmp_db_url)
with eng.begin() as conn:
version = conn.execute(text("SELECT version_num FROM alembic_version")).scalar()
eng.dispose()
assert version == HEAD_REVISION
def test_boxes_table_has_all_columns(self, tmp_db_url):
run_migrations(tmp_db_url)
eng = create_engine(tmp_db_url)
columns = {col["name"] for col in inspect(eng).get_columns("boxes")}
eng.dispose()
expected = {
"id", "name", "note", "room", "status",
"image_blob", "image_mime_type", "image_width", "image_height",
"created_at", "updated_at",
}
assert columns == expected
def test_items_table_has_all_columns(self, tmp_db_url):
run_migrations(tmp_db_url)
eng = create_engine(tmp_db_url)
columns = {col["name"] for col in inspect(eng).get_columns("items")}
eng.dispose()
expected = {
"id", "box_id", "name", "note", "quantity", "is_container",
"image_blob", "image_mime_type", "image_width", "image_height",
"created_at", "updated_at",
}
assert columns == expected
def test_subitems_table_has_all_columns(self, tmp_db_url):
run_migrations(tmp_db_url)
eng = create_engine(tmp_db_url)
columns = {col["name"] for col in inspect(eng).get_columns("subitems")}
eng.dispose()
expected = {
"id", "parent_item_id", "name", "note", "quantity",
"image_blob", "image_mime_type", "image_width", "image_height",
"created_at", "updated_at",
}
assert columns == expected
def test_foreign_keys_exist(self, tmp_db_url):
run_migrations(tmp_db_url)
eng = create_engine(tmp_db_url)
item_fks = inspect(eng).get_foreign_keys("items")
subitem_fks = inspect(eng).get_foreign_keys("subitems")
eng.dispose()
assert len(item_fks) == 1
assert item_fks[0]["constrained_columns"] == ["box_id"]
assert len(subitem_fks) == 1
assert subitem_fks[0]["constrained_columns"] == ["parent_item_id"]
def test_indexes_exist(self, tmp_db_url):
run_migrations(tmp_db_url)
eng = create_engine(tmp_db_url)
box_indexes = inspect(eng).get_indexes("boxes")
item_indexes = inspect(eng).get_indexes("items")
subitem_indexes = inspect(eng).get_indexes("subitems")
eng.dispose()
assert any("ix_boxes_id" in idx["name"] for idx in box_indexes)
assert any("ix_items_id" in idx["name"] for idx in item_indexes)
assert any("ix_subitems_id" in idx["name"] for idx in subitem_indexes)
# ---------------------------------------------------------------------------
# Unmanaged DB adoption — 2a: matches baseline
# ---------------------------------------------------------------------------
class TestUnmanagedDBAdoption2a:
"""Database with existing tables matching baseline gets adopted."""
def _create_old_db(self, db_url: str) -> None:
"""Simulate a pre-Alembic DB: create V1 tables only + insert data."""
eng = create_engine(db_url)
# Only create V1 tables (boxes, items, subitems) — not app_settings
for table_name in ("boxes", "items", "subitems"):
Base.metadata.tables[table_name].create(bind=eng)
with eng.begin() as conn:
conn.execute(text(
"INSERT INTO boxes (name, room, status, created_at, updated_at) "
"VALUES ('Kitchen Box', 'Kitchen', 'packed', '2026-01-01 00:00:00', '2026-01-01 00:00:00')"
))
conn.execute(text(
"INSERT INTO items (box_id, name, quantity, is_container, created_at, updated_at) "
"VALUES (1, 'Plates', 4, 0, '2026-01-01 00:00:00', '2026-01-01 00:00:00')"
))
eng.dispose()
def test_stamp_and_upgrade(self, tmp_db_url):
self._create_old_db(tmp_db_url)
assert _detect_db_state(tmp_db_url) == "unmanaged"
run_migrations(tmp_db_url)
eng = create_engine(tmp_db_url)
with eng.begin() as conn:
version = conn.execute(text("SELECT version_num FROM alembic_version")).scalar()
eng.dispose()
assert version == HEAD_REVISION
def test_data_preserved_after_adoption(self, tmp_db_url):
self._create_old_db(tmp_db_url)
run_migrations(tmp_db_url)
eng = create_engine(tmp_db_url)
with eng.begin() as conn:
box_count = conn.execute(text("SELECT COUNT(*) FROM boxes")).scalar()
item_count = conn.execute(text("SELECT COUNT(*) FROM items")).scalar()
box_name = conn.execute(text("SELECT name FROM boxes WHERE id = 1")).scalar()
eng.dispose()
assert box_count == 1
assert item_count == 1
assert box_name == "Kitchen Box"
def test_no_extra_tables_beyond_migrations(self, tmp_db_url):
self._create_old_db(tmp_db_url)
run_migrations(tmp_db_url)
eng = create_engine(tmp_db_url)
tables = set(inspect(eng).get_table_names())
eng.dispose()
assert tables == {"alembic_version", "boxes", "items", "subitems", "app_settings"}
def test_adoption_is_idempotent(self, tmp_db_url):
"""Running run_migrations twice does not error or duplicate data."""
self._create_old_db(tmp_db_url)
run_migrations(tmp_db_url)
run_migrations(tmp_db_url)
eng = create_engine(tmp_db_url)
with eng.begin() as conn:
box_count = conn.execute(text("SELECT COUNT(*) FROM boxes")).scalar()
version = conn.execute(text("SELECT version_num FROM alembic_version")).scalar()
eng.dispose()
assert box_count == 1
assert version == HEAD_REVISION
# ---------------------------------------------------------------------------
# Unmanaged DB — 2b: schema mismatch → fail-close
# ---------------------------------------------------------------------------
class TestUnmanagedDBMismatch2b:
"""Database with schema not matching baseline → fail-close, no changes."""
def _create_mismatched_db(self, db_url: str) -> None:
"""Create a DB that has tables but with wrong columns (missing image cols)."""
eng = create_engine(db_url)
with eng.begin() as conn:
conn.execute(text(
"CREATE TABLE boxes ("
"id INTEGER PRIMARY KEY, name TEXT NOT NULL, "
"created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL)"
))
conn.execute(text(
"CREATE TABLE items ("
"id INTEGER PRIMARY KEY, box_id INTEGER NOT NULL, name TEXT NOT NULL, "
"created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL)"
))
conn.execute(text(
"CREATE TABLE subitems ("
"id INTEGER PRIMARY KEY, parent_item_id INTEGER NOT NULL, name TEXT NOT NULL, "
"created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL)"
))
conn.execute(text(
"INSERT INTO boxes (name, created_at, updated_at) "
"VALUES ('Bad Box', '2026-01-01 00:00:00', '2026-01-01 00:00:00')"
))
eng.dispose()
def test_fail_close_on_mismatch(self, tmp_db_url):
self._create_mismatched_db(tmp_db_url)
assert _detect_db_state(tmp_db_url) == "unmanaged"
with pytest.raises(SystemExit, match="does not match"):
run_migrations(tmp_db_url)
def test_db_unchanged_after_fail_close(self, tmp_db_url):
self._create_mismatched_db(tmp_db_url)
with pytest.raises(SystemExit):
run_migrations(tmp_db_url)
# DB should be completely unchanged
eng = create_engine(tmp_db_url)
tables = set(inspect(eng).get_table_names())
assert "alembic_version" not in tables
with eng.begin() as conn:
count = conn.execute(text("SELECT COUNT(*) FROM boxes")).scalar()
eng.dispose()
assert count == 1 # original data still there
def test_extra_table_causes_fail_close(self, tmp_db_url):
"""A DB with the correct tables PLUS an extra one should fail."""
eng = create_engine(tmp_db_url)
Base.metadata.create_all(bind=eng)
with eng.begin() as conn:
conn.execute(text("CREATE TABLE rogue_table (id INTEGER PRIMARY KEY)"))
eng.dispose()
with pytest.raises(SystemExit, match="does not match"):
run_migrations(tmp_db_url)
def test_missing_fk_causes_fail_close(self, tmp_db_url):
"""Tables with correct columns but missing FK should fail."""
eng = create_engine(tmp_db_url)
with eng.begin() as conn:
conn.execute(text(
"CREATE TABLE boxes (id INTEGER PRIMARY KEY, name VARCHAR(100) NOT NULL, "
"note TEXT, room VARCHAR(100), status VARCHAR(50), "
"image_blob BLOB, image_mime_type VARCHAR(50), "
"image_width INTEGER, image_height INTEGER, "
"created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL)"
))
conn.execute(text(
"CREATE TABLE items (id INTEGER PRIMARY KEY, box_id INTEGER NOT NULL, "
"name VARCHAR(100) NOT NULL, note TEXT, quantity INTEGER, "
"is_container BOOLEAN NOT NULL DEFAULT 0, "
"image_blob BLOB, image_mime_type VARCHAR(50), "
"image_width INTEGER, image_height INTEGER, "
"created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL)"
))
conn.execute(text(
"CREATE TABLE subitems (id INTEGER PRIMARY KEY, parent_item_id INTEGER NOT NULL, "
"name VARCHAR(100) NOT NULL, note TEXT, quantity INTEGER, "
"image_blob BLOB, image_mime_type VARCHAR(50), "
"image_width INTEGER, image_height INTEGER, "
"created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL)"
))
eng.dispose()
with pytest.raises(SystemExit, match="does not match"):
run_migrations(tmp_db_url)
def test_missing_index_causes_fail_close(self, tmp_db_url):
"""Tables with correct columns and FK but missing index should fail."""
eng = create_engine(tmp_db_url)
with eng.begin() as conn:
conn.execute(text(
"CREATE TABLE boxes (id INTEGER PRIMARY KEY, name VARCHAR(100) NOT NULL, "
"note TEXT, room VARCHAR(100), status VARCHAR(50), "
"image_blob BLOB, image_mime_type VARCHAR(50), "
"image_width INTEGER, image_height INTEGER, "
"created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL)"
))
conn.execute(text(
"CREATE TABLE items (id INTEGER PRIMARY KEY, box_id INTEGER NOT NULL, "
"name VARCHAR(100) NOT NULL, note TEXT, quantity INTEGER, "
"is_container BOOLEAN NOT NULL DEFAULT 0, "
"image_blob BLOB, image_mime_type VARCHAR(50), "
"image_width INTEGER, image_height INTEGER, "
"created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL, "
"FOREIGN KEY(box_id) REFERENCES boxes(id) ON DELETE CASCADE)"
))
conn.execute(text(
"CREATE TABLE subitems (id INTEGER PRIMARY KEY, parent_item_id INTEGER NOT NULL, "
"name VARCHAR(100) NOT NULL, note TEXT, quantity INTEGER, "
"image_blob BLOB, image_mime_type VARCHAR(50), "
"image_width INTEGER, image_height INTEGER, "
"created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL, "
"FOREIGN KEY(parent_item_id) REFERENCES items(id) ON DELETE CASCADE)"
))
# No indexes created — should fail
eng.dispose()
with pytest.raises(SystemExit, match="does not match"):
run_migrations(tmp_db_url)
# ---------------------------------------------------------------------------
# Non-empty non-app DB (rogue tables) — treated as unmanaged, fail-close
# ---------------------------------------------------------------------------
class TestRogueDatabase:
"""A DB with unrelated tables must be treated as unmanaged and fail-close."""
def test_rogue_table_detected_as_unmanaged(self, tmp_db_url):
"""A DB with only rogue_table should be 'unmanaged', not 'empty'."""
eng = create_engine(tmp_db_url)
with eng.begin() as conn:
conn.execute(text("CREATE TABLE rogue_table (id INTEGER PRIMARY KEY)"))
eng.dispose()
assert _detect_db_state(tmp_db_url) == "unmanaged"
def test_rogue_table_migration_fails_closed(self, tmp_db_url):
"""Migration should fail-close, NOT create app tables in rogue DB."""
eng = create_engine(tmp_db_url)
with eng.begin() as conn:
conn.execute(text("CREATE TABLE rogue_table (id INTEGER PRIMARY KEY)"))
eng.dispose()
with pytest.raises(SystemExit, match="does not match"):
run_migrations(tmp_db_url)
# Verify no app tables were created
eng = create_engine(tmp_db_url)
tables = set(inspect(eng).get_table_names())
eng.dispose()
assert tables == {"rogue_table"} # only the original rogue table
assert "boxes" not in tables
assert "alembic_version" not in tables
# ---------------------------------------------------------------------------
# Managed DB (already at head)
# ---------------------------------------------------------------------------
class TestManagedDBMigration:
"""Database already under Alembic control: upgrade head is a no-op."""
def test_upgrade_head_is_noop(self, tmp_db_url):
run_migrations(tmp_db_url) # first run: creates tables
assert _detect_db_state(tmp_db_url) == "managed"
run_migrations(tmp_db_url) # second run: should be a no-op
eng = create_engine(tmp_db_url)
with eng.begin() as conn:
version = conn.execute(text("SELECT version_num FROM alembic_version")).scalar()
eng.dispose()
assert version == HEAD_REVISION
# ---------------------------------------------------------------------------
# _detect_db_state
# ---------------------------------------------------------------------------
class TestDetectDBState:
def test_empty_db(self, tmp_db_url):
assert _detect_db_state(tmp_db_url) == "empty"
def test_unmanaged_db(self, tmp_db_url):
eng = create_engine(tmp_db_url)
Base.metadata.create_all(bind=eng)
eng.dispose()
assert _detect_db_state(tmp_db_url) == "unmanaged"
def test_managed_db(self, tmp_db_url):
run_migrations(tmp_db_url)
assert _detect_db_state(tmp_db_url) == "managed"
def test_rogue_table_is_unmanaged(self, tmp_db_url):
"""Any DB with tables but no alembic_version is 'unmanaged'."""
eng = create_engine(tmp_db_url)
with eng.begin() as conn:
conn.execute(text("CREATE TABLE something (id INTEGER)"))
eng.dispose()
assert _detect_db_state(tmp_db_url) == "unmanaged"
# ---------------------------------------------------------------------------
# verify_schema_is_current (read-only startup check)
# ---------------------------------------------------------------------------
class TestVerifySchemaIsCurrent:
"""verify_schema_is_current is read-only — only checks, never modifies."""
def test_passes_when_at_head(self, tmp_db_url):
run_migrations(tmp_db_url)
# Should not raise
verify_schema_is_current(tmp_db_url)
def test_fails_on_empty_db(self, tmp_db_url):
with pytest.raises(RuntimeError, match="empty"):
verify_schema_is_current(tmp_db_url)
def test_fails_on_unmanaged_db(self, tmp_db_url):
eng = create_engine(tmp_db_url)
Base.metadata.create_all(bind=eng)
eng.dispose()
with pytest.raises(RuntimeError, match="alembic_version"):
verify_schema_is_current(tmp_db_url)
def test_fails_on_wrong_revision(self, tmp_db_url):
"""Stamp at an old/fake revision, then verify should fail."""
run_migrations(tmp_db_url)
eng = create_engine(tmp_db_url)
with eng.begin() as conn:
conn.execute(text("DELETE FROM alembic_version"))
conn.execute(text("INSERT INTO alembic_version VALUES ('fake_old_rev')"))
eng.dispose()
with pytest.raises(RuntimeError, match="fake_old_rev"):
verify_schema_is_current(tmp_db_url)
def test_does_not_modify_db(self, tmp_db_url):
"""Calling verify on an empty DB must not create any tables."""
with pytest.raises(RuntimeError):
verify_schema_is_current(tmp_db_url)
eng = create_engine(tmp_db_url)
tables = set(inspect(eng).get_table_names())
eng.dispose()
assert tables == set() # still empty
def test_no_file_creation_for_missing_sqlite(self, tmp_path):
"""verify_schema_is_current must NOT create a missing SQLite file."""
missing_path = tmp_path / "nonexistent" / "missing.db"
db_url = f"sqlite:///{missing_path}"
with pytest.raises(RuntimeError, match="does not exist"):
verify_schema_is_current(db_url)
assert not missing_path.exists()
assert not missing_path.parent.exists()
# ---------------------------------------------------------------------------
# V1_REVISION constant
# ---------------------------------------------------------------------------
class TestV1RevisionConstant:
def test_revision_file_exists(self):
"""V1_REVISION must point to an actual migration file."""
versions_dir = Path(__file__).resolve().parent.parent / "migrations" / "versions"
revision_files = list(versions_dir.glob(f"*{V1_REVISION}*.py"))
assert len(revision_files) == 1, (
f"Expected exactly one file matching revision {V1_REVISION} "
f"in {versions_dir}, found: {revision_files}"
)
def test_revision_matches_baseline(self):
"""V1_REVISION must be the baseline (no down_revision)."""
import importlib.util
versions_dir = Path(__file__).resolve().parent.parent / "migrations" / "versions"
revision_files = list(versions_dir.glob(f"*{V1_REVISION}*.py"))
assert len(revision_files) == 1
spec = importlib.util.spec_from_file_location("v1_migration", revision_files[0])
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
assert mod.down_revision is None, "V1 baseline must have down_revision = None"
assert mod.revision == V1_REVISION
# ---------------------------------------------------------------------------
# Integration: init_db startup verification
# ---------------------------------------------------------------------------
class TestInitDBStartupVerify:
"""init_db (called by create_app lifespan) verifies schema at startup."""
def test_app_starts_when_db_at_head(self, tmp_path):
"""App starts normally when DB has been migrated to head."""
test_db_path = tmp_path / "integration.db"
database_url = f"sqlite:///{test_db_path}"
run_migrations(database_url)
configure_database(database_url)
app = create_app()
with TestClient(app) as client:
response = client.get("/boxes", follow_redirects=False)
assert response.status_code == 200
def test_init_db_fails_on_empty_db(self, tmp_path):
"""init_db raises RuntimeError on empty DB — app must not start."""
test_db_path = tmp_path / "empty.db"
database_url = f"sqlite:///{test_db_path}"
configure_database(database_url)
app = create_app()
with pytest.raises(RuntimeError, match="empty"):
with TestClient(app):
pass
def test_init_db_fails_on_unmanaged_db(self, tmp_path):
"""init_db raises RuntimeError on unmanaged DB — app must not start."""
test_db_path = tmp_path / "unmanaged.db"
database_url = f"sqlite:///{test_db_path}"
# Create tables the old way (no alembic_version)
eng = create_engine(database_url)
Base.metadata.create_all(bind=eng)
eng.dispose()
configure_database(database_url)
app = create_app()
with pytest.raises(RuntimeError, match="alembic_version"):
with TestClient(app):
pass
def test_full_crud_after_migration(self, tmp_path):
"""Full CRUD works when DB is migrated first, then app starts."""
test_db_path = tmp_path / "crud.db"
database_url = f"sqlite:///{test_db_path}"
run_migrations(database_url)
configure_database(database_url)
app = create_app()
with TestClient(app) as client:
# Create a box
resp = client.post("/boxes", data={
"name": "Test Box",
"room": "Living Room",
"status": "ready",
}, follow_redirects=False)
assert resp.status_code in (200, 302, 303)
# Verify it's there
resp = client.get("/boxes")
assert "Test Box" in resp.text
# Create an item
resp = client.post("/boxes/1/items", data={
"name": "Test Item",
"quantity": "3",
}, follow_redirects=False)
assert resp.status_code in (200, 302, 303)
# Delete the box (cascade)
resp = client.post("/boxes/1/delete", follow_redirects=False)
assert resp.status_code in (200, 302, 303)
# Verify empty
resp = client.get("/boxes")
assert "Test Box" not in resp.text
# ---------------------------------------------------------------------------
# Production DB copy adoption
# ---------------------------------------------------------------------------
class TestProdDBCopyAdoption:
"""Verify migration works against a copy of the real production DB."""
def test_adopt_prod_copy(self, tmp_path):
prod_db = Path("data/app.db")
if not prod_db.exists():
pytest.skip("data/app.db not present — skipping prod copy test")
copy_path = tmp_path / "prod_copy.db"
shutil.copy2(prod_db, copy_path)
db_url = f"sqlite:///{copy_path}"
# Record row counts before
eng = create_engine(db_url)
with eng.begin() as conn:
boxes_before = conn.execute(text("SELECT COUNT(*) FROM boxes")).scalar()
items_before = conn.execute(text("SELECT COUNT(*) FROM items")).scalar()
subitems_before = conn.execute(text("SELECT COUNT(*) FROM subitems")).scalar()
eng.dispose()
# Run migration (handles managed, unmanaged, or empty)
run_migrations(db_url)
# Verify version at head and data preserved
eng = create_engine(db_url)
with eng.begin() as conn:
version = conn.execute(text("SELECT version_num FROM alembic_version")).scalar()
boxes_after = conn.execute(text("SELECT COUNT(*) FROM boxes")).scalar()
items_after = conn.execute(text("SELECT COUNT(*) FROM items")).scalar()
subitems_after = conn.execute(text("SELECT COUNT(*) FROM subitems")).scalar()
eng.dispose()
assert version == HEAD_REVISION
assert boxes_after == boxes_before
assert items_after == items_before
assert subitems_after == subitems_before
+656
View File
@@ -0,0 +1,656 @@
"""Tests for the settings store, LLM client, and settings routes.
All LLM calls are mocked — CI never touches the network.
"""
from unittest.mock import patch
import pytest
import app.llm as llm_module
from app.llm import LLMResult, expand_query, is_configured
from app.llm import ExpansionResult
from app.models import AppSetting
from app.settings_store import LLMConfig, get_app_settings, save_app_settings
# Alias to avoid pytest collecting it as a test function
_test_connection = llm_module.test_connection
# ---------------------------------------------------------------------------
# LLMConfig dataclass defaults
# ---------------------------------------------------------------------------
class TestLLMConfigDefaults:
def test_default_values(self):
cfg = LLMConfig()
assert cfg.enabled is False
assert cfg.base_url == "https://api.openai.com/v1"
assert cfg.model == ""
assert cfg.api_key == ""
assert cfg.ai_search_enabled is False
assert cfg.ai_search_extra_hints == ""
# ---------------------------------------------------------------------------
# settings_store: get_app_settings
# ---------------------------------------------------------------------------
class TestGetAppSettings:
def test_returns_defaults_when_no_rows(self, db_session):
cfg = get_app_settings(db_session)
assert cfg.enabled is False
assert cfg.base_url == "https://api.openai.com/v1"
assert cfg.model == ""
assert cfg.api_key == ""
assert cfg.ai_search_enabled is False
def test_reads_stored_values(self, db_session):
db_session.add(AppSetting(key="llm_enabled", value="true"))
db_session.add(AppSetting(key="llm_base_url", value="https://custom.api/v1"))
db_session.add(AppSetting(key="llm_model", value="gpt-4o"))
db_session.add(AppSetting(key="llm_api_key", value="sk-test-key"))
db_session.add(AppSetting(key="ai_search_enabled", value="true"))
db_session.commit()
cfg = get_app_settings(db_session)
assert cfg.enabled is True
assert cfg.base_url == "https://custom.api/v1"
assert cfg.model == "gpt-4o"
assert cfg.api_key == "sk-test-key"
assert cfg.ai_search_enabled is True
def test_handles_null_value_as_default(self, db_session):
db_session.add(AppSetting(key="llm_model", value=None))
db_session.commit()
cfg = get_app_settings(db_session)
assert cfg.model == ""
# ---------------------------------------------------------------------------
# settings_store: save_app_settings
# ---------------------------------------------------------------------------
class TestSaveAppSettings:
def test_saves_new_settings(self, db_session):
save_app_settings(
db_session,
enabled=True,
base_url="https://my-api.com/v1",
model="gpt-4o-mini",
api_key="sk-new-key",
)
cfg = get_app_settings(db_session)
assert cfg.enabled is True
assert cfg.base_url == "https://my-api.com/v1"
assert cfg.model == "gpt-4o-mini"
assert cfg.api_key == "sk-new-key"
def test_updates_existing_settings(self, db_session):
save_app_settings(db_session, enabled=True, model="old-model", api_key="key1")
save_app_settings(db_session, model="new-model")
cfg = get_app_settings(db_session)
assert cfg.model == "new-model"
# enabled was not passed in second save, so it stays unchanged
assert cfg.enabled is True
def test_api_key_none_preserves_old_key(self, db_session):
save_app_settings(db_session, api_key="sk-original")
save_app_settings(db_session, model="gpt-4o", api_key=None)
cfg = get_app_settings(db_session)
assert cfg.api_key == "sk-original"
assert cfg.model == "gpt-4o"
def test_api_key_empty_string_overwrites(self, db_session):
save_app_settings(db_session, api_key="sk-original")
save_app_settings(db_session, api_key="")
cfg = get_app_settings(db_session)
assert cfg.api_key == ""
def test_partial_save_only_updates_specified_fields(self, db_session):
save_app_settings(db_session, enabled=True, model="gpt-4o")
save_app_settings(db_session, base_url="https://new.url/v1")
cfg = get_app_settings(db_session)
assert cfg.enabled is True
assert cfg.model == "gpt-4o"
assert cfg.base_url == "https://new.url/v1"
# ---------------------------------------------------------------------------
# is_configured
# ---------------------------------------------------------------------------
class TestIsConfigured:
def test_false_when_disabled(self):
cfg = LLMConfig(enabled=False, model="gpt-4o", api_key="sk-key")
assert is_configured(cfg) is False
def test_false_when_no_model(self):
cfg = LLMConfig(enabled=True, model="", api_key="sk-key")
assert is_configured(cfg) is False
def test_false_when_no_api_key(self):
cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="")
assert is_configured(cfg) is False
def test_true_when_all_set(self):
cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
assert is_configured(cfg) is True
# ---------------------------------------------------------------------------
# test_connection (mocked)
# ---------------------------------------------------------------------------
class TestTestConnection:
def test_returns_failure_when_not_configured(self):
cfg = LLMConfig(enabled=False)
result = _test_connection(cfg)
assert result.success is False
assert "未配置" in result.message
@patch("app.llm._call_chat_completion")
def test_success_when_configured(self, mock_call):
mock_call.return_value = {"choices": [{"message": {"content": "Hi"}}]}
cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
result = _test_connection(cfg)
assert result.success is True
assert "连接成功" in result.message
assert "gpt-4o" in result.message
@patch("app.llm._call_chat_completion")
def test_handles_http_error(self, mock_call):
import httpx
mock_call.side_effect = httpx.HTTPStatusError(
"401",
request=httpx.Request("POST", "http://x"),
response=httpx.Response(401),
)
cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-bad")
result = _test_connection(cfg)
assert result.success is False
assert "401" in result.message
@patch("app.llm._call_chat_completion")
def test_handles_connect_error(self, mock_call):
import httpx
mock_call.side_effect = httpx.ConnectError("refused")
cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
result = _test_connection(cfg)
assert result.success is False
assert "无法连接" in result.message
@patch("app.llm._call_chat_completion")
def test_handles_timeout(self, mock_call):
import httpx
mock_call.side_effect = httpx.TimeoutException("timeout")
cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
result = _test_connection(cfg)
assert result.success is False
assert "超时" in result.message
# ---------------------------------------------------------------------------
# expand_query (mocked)
# ---------------------------------------------------------------------------
class TestExpandQuery:
def test_returns_empty_when_not_configured(self):
cfg = LLMConfig(enabled=False)
result = expand_query(cfg, "")
assert result.terms == []
assert result.error is None
@patch("app.llm._call_chat_completion")
def test_expands_query_successfully(self, mock_call):
mock_call.return_value = {
"choices": [
{"message": {"content": '["平底锅","炒锅","锅具","厨房锅"]'}}
]
}
cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
result = expand_query(cfg, "")
assert "平底锅" in result.terms
assert "炒锅" in result.terms
assert result.error is None
@patch("app.llm._call_chat_completion")
def test_fallback_on_api_failure(self, mock_call):
mock_call.side_effect = Exception("network down")
cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
result = expand_query(cfg, "")
assert result.terms == []
assert result.error is not None
@patch("app.llm._call_chat_completion")
def test_fallback_on_empty_response(self, mock_call):
mock_call.return_value = {"choices": [{"message": {"content": ""}}]}
cfg = LLMConfig(enabled=True, model="gpt-4o", api_key="sk-key")
result = expand_query(cfg, "")
assert result.terms == []
assert result.error is None
# ---------------------------------------------------------------------------
# Routes: GET /settings
# ---------------------------------------------------------------------------
class TestSettingsPage:
def test_settings_page_returns_200(self, client):
response = client.get("/settings")
assert response.status_code == 200
def test_settings_page_has_form_elements(self, client):
response = client.get("/settings")
assert "设置" in response.text
assert 'name="enabled"' in response.text
assert 'name="base_url"' in response.text
assert 'name="model"' in response.text
assert 'name="api_key"' in response.text
assert "保存设置" in response.text
assert "测试连接" in response.text
def test_settings_page_shows_nav_link(self, client):
response = client.get("/boxes")
assert "设置" in response.text
assert 'href="/settings"' in response.text
def test_settings_page_no_api_key_echoed(self, client, db_session):
save_app_settings(db_session, api_key="sk-super-secret-key-12345")
response = client.get("/settings")
assert "sk-super-secret-key-12345" not in response.text
assert "已配置" in response.text
def test_settings_page_shows_placeholder_when_no_key(self, client):
response = client.get("/settings")
assert "输入 API Key" in response.text
def test_settings_page_shows_default_base_url(self, client):
response = client.get("/settings")
assert "https://api.openai.com/v1" in response.text
# ---------------------------------------------------------------------------
# Routes: POST /settings
# ---------------------------------------------------------------------------
class TestSaveSettingsRoute:
def test_save_settings_redirects(self, client):
response = client.post(
"/settings",
data={
"enabled": "on",
"base_url": "https://my-api.com/v1",
"model": "gpt-4o-mini",
"api_key": "sk-test-key",
},
follow_redirects=False,
)
assert response.status_code == 303
assert response.headers["location"] == "/settings"
def test_saved_settings_persist(self, client, db_session):
client.post(
"/settings",
data={
"enabled": "on",
"base_url": "https://my-api.com/v1",
"model": "gpt-4o-mini",
"api_key": "sk-test-key",
},
follow_redirects=False,
)
cfg = get_app_settings(db_session)
assert cfg.enabled is True
assert cfg.base_url == "https://my-api.com/v1"
assert cfg.model == "gpt-4o-mini"
assert cfg.api_key == "sk-test-key"
def test_save_with_blank_api_key_preserves_old(self, client, db_session):
# First save with a key
client.post(
"/settings",
data={"enabled": "on", "model": "gpt-4o", "api_key": "sk-original"},
follow_redirects=False,
)
# Second save without key (blank)
client.post(
"/settings",
data={"enabled": "on", "model": "gpt-4o", "api_key": ""},
follow_redirects=False,
)
cfg = get_app_settings(db_session)
assert cfg.api_key == "sk-original"
def test_save_disabled_state(self, client, db_session):
# First enable
client.post(
"/settings",
data={"enabled": "on", "model": "gpt-4o", "api_key": "sk-key"},
follow_redirects=False,
)
# Then disable (no 'enabled' checkbox)
client.post(
"/settings",
data={"model": "gpt-4o", "api_key": ""},
follow_redirects=False,
)
cfg = get_app_settings(db_session)
assert cfg.enabled is False
def test_save_settings_no_api_key_in_redirect_page(self, client):
client.post(
"/settings",
data={"enabled": "on", "model": "gpt-4o", "api_key": "sk-secret-key"},
follow_redirects=False,
)
response = client.get("/settings")
assert "sk-secret-key" not in response.text
def test_save_refuses_when_base_url_changes_and_key_blank(self, client, db_session):
"""P1 fix: if base_url changes and api_key is blank, refuse save with error."""
client.post(
"/settings",
data={
"enabled": "on",
"base_url": "https://old-api.com/v1",
"model": "gpt-4o",
"api_key": "sk-old-key",
},
follow_redirects=False,
)
# Try saving with different base_url, no key
response = client.post(
"/settings",
data={
"enabled": "on",
"base_url": "https://new-api.com/v1",
"model": "gpt-4o",
"api_key": "", # blank + base_url changed → refuse
},
)
assert response.status_code == 200
assert "请重新输入 API Key 后保存" in response.text
# Old config should be unchanged — nothing was saved
cfg = get_app_settings(db_session)
assert cfg.base_url == "https://old-api.com/v1"
assert cfg.api_key == "sk-old-key"
def test_save_preserves_key_when_endpoint_unchanged_and_key_blank(self, client, db_session):
"""P1 fix: if endpoint is unchanged and api_key is blank, keep old key."""
client.post(
"/settings",
data={
"enabled": "on",
"base_url": "https://api.openai.com/v1",
"model": "gpt-4o",
"api_key": "sk-original",
},
follow_redirects=False,
)
# Re-save same endpoint, blank key
client.post(
"/settings",
data={
"enabled": "on",
"base_url": "https://api.openai.com/v1",
"model": "gpt-4o",
"api_key": "",
},
follow_redirects=False,
)
cfg = get_app_settings(db_session)
assert cfg.api_key == "sk-original"
def test_save_preserves_key_when_only_model_changes_and_key_blank(self, client, db_session):
"""Model change alone should not clear the key — same base_url, different model."""
client.post(
"/settings",
data={
"enabled": "on",
"base_url": "https://api.openai.com/v1",
"model": "gpt-4o",
"api_key": "sk-original",
},
follow_redirects=False,
)
# Change only model, leave api_key blank
client.post(
"/settings",
data={
"enabled": "on",
"base_url": "https://api.openai.com/v1",
"model": "gpt-4o-mini",
"api_key": "",
},
follow_redirects=False,
)
cfg = get_app_settings(db_session)
assert cfg.model == "gpt-4o-mini"
assert cfg.api_key == "sk-original"
def test_save_includes_ai_search_enabled_checkbox(self, client, db_session):
"""Saving settings now also persists the ai_search_enabled checkbox."""
# Set ai_search_enabled to true first
db_session.add(AppSetting(key="ai_search_enabled", value="true"))
db_session.commit()
# Save without the checkbox → ai_search_enabled is set to False
client.post(
"/settings",
data={"enabled": "on", "model": "gpt-4o", "api_key": "sk-key"},
follow_redirects=False,
)
cfg = get_app_settings(db_session)
assert cfg.ai_search_enabled is False
def test_save_preserves_ai_search_enabled_when_checked(self, client, db_session):
"""Saving settings with ai_search_enabled checked persists it."""
client.post(
"/settings",
data={
"enabled": "on",
"model": "gpt-4o",
"api_key": "sk-key",
"ai_search_enabled": "on",
},
follow_redirects=False,
)
cfg = get_app_settings(db_session)
assert cfg.ai_search_enabled is True
# ---------------------------------------------------------------------------
# Routes: POST /settings/test
# ---------------------------------------------------------------------------
class TestTestConnectionRoute:
@patch("app.llm._call_chat_completion")
def test_test_connection_success(self, mock_call, client):
mock_call.return_value = {"choices": [{"message": {"content": "Hi"}}]}
response = client.post(
"/settings/test",
data={
"enabled": "on",
"base_url": "https://api.openai.com/v1",
"model": "gpt-4o",
"api_key": "sk-test",
},
)
assert response.status_code == 200
assert "连接成功" in response.text
assert "gpt-4o" in response.text
@patch("app.llm._call_chat_completion")
def test_test_connection_failure(self, mock_call, client):
import httpx
mock_call.side_effect = httpx.HTTPStatusError(
"401",
request=httpx.Request("POST", "http://x"),
response=httpx.Response(401),
)
response = client.post(
"/settings/test",
data={
"enabled": "on",
"base_url": "https://api.openai.com/v1",
"model": "gpt-4o",
"api_key": "sk-bad",
},
)
assert response.status_code == 200
assert "连接失败" in response.text
assert "401" in response.text
def test_test_connection_not_configured(self, client):
response = client.post(
"/settings/test",
data={
"enabled": "", # not checked
"base_url": "https://api.openai.com/v1",
"model": "",
"api_key": "",
},
)
assert response.status_code == 200
assert "未配置" in response.text
@patch("app.llm._call_chat_completion")
def test_test_connection_uses_stored_key_when_endpoint_matches(self, mock_call, client, db_session):
"""When api_key is blank but base_url and model match saved config, the stored key should be used."""
mock_call.return_value = {"choices": [{"message": {"content": "Hi"}}]}
# Store a config first
save_app_settings(
db_session,
enabled=True,
base_url="https://api.openai.com/v1",
model="gpt-4o",
api_key="sk-stored-key",
)
response = client.post(
"/settings/test",
data={
"enabled": "on",
"base_url": "https://api.openai.com/v1",
"model": "gpt-4o",
"api_key": "", # blank → use stored key (endpoint matches)
},
)
assert response.status_code == 200
assert "连接成功" in response.text
@patch("app.llm._call_chat_completion")
def test_test_connection_uses_stored_key_when_only_model_changes(self, mock_call, client, db_session):
"""Model changes under the same base_url can reuse the stored key."""
captured = {}
def fake_call(cfg, **kwargs):
captured["base_url"] = cfg.base_url
captured["model"] = cfg.model
captured["api_key"] = cfg.api_key
return {"choices": [{"message": {"content": "Hi"}}]}
mock_call.side_effect = fake_call
save_app_settings(
db_session,
enabled=True,
base_url="https://api.openai.com/v1",
model="gpt-4o",
api_key="sk-stored-key",
)
response = client.post(
"/settings/test",
data={
"enabled": "on",
"base_url": "https://api.openai.com/v1",
"model": "gpt-4o-mini",
"api_key": "",
},
)
assert response.status_code == 200
assert "连接成功" in response.text
assert captured == {
"base_url": "https://api.openai.com/v1",
"model": "gpt-4o-mini",
"api_key": "sk-stored-key",
}
def test_test_connection_refuses_stored_key_when_endpoint_changed(self, client, db_session):
"""When base_url changed and api_key is blank, refuse to test."""
save_app_settings(
db_session,
enabled=True,
base_url="https://api.openai.com/v1",
model="gpt-4o",
api_key="sk-stored-key",
)
response = client.post(
"/settings/test",
data={
"enabled": "on",
"base_url": "https://attacker.example/v1", # different endpoint
"model": "gpt-4o",
"api_key": "", # blank → refuse
},
)
assert response.status_code == 200
assert "请重新输入 API Key" in response.text
def test_test_connection_result_shows_on_settings_page(self, client):
"""Test result is rendered on the same settings page."""
response = client.post(
"/settings/test",
data={
"enabled": "",
"base_url": "https://api.openai.com/v1",
"model": "",
"api_key": "",
},
)
assert response.status_code == 200
assert "设置" in response.text
assert "保存设置" in response.text
# ---------------------------------------------------------------------------
# Graceful degradation: unconfigured LLM does not affect existing features
# ---------------------------------------------------------------------------
class TestGracefulDegradation:
def test_boxes_page_works_without_llm_config(self, client):
response = client.get("/boxes")
assert response.status_code == 200
def test_search_page_works_without_llm_config(self, client):
response = client.get("/search?q=test")
assert response.status_code == 200
def test_crud_works_without_llm_config(self, client, db_session):
from app.models import Box
response = client.post(
"/boxes",
data={"name": "No LLM Box"},
follow_redirects=False,
)
assert response.status_code == 303
assert db_session.query(Box).count() == 1