diff --git a/.gitignore b/.gitignore index d09795e..da9dd91 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,28 @@ +# Local environment and secrets +.env .venv/ + +# Runtime data and local review notes +data/ +review-notes/ +Review-Notes/ + +# Python cache/test/build output __pycache__/ .pytest_cache/ +.mypy_cache/ +.ruff_cache/ +.coverage +htmlcov/ +build/ +dist/ +*.egg-info/ *.pyc -.env -data/*.db + +# Local tool state +.codex +.claude/settings.local.json +backups/ # macOS generated files .DS_Store @@ -14,4 +33,3 @@ data/*.db **/.Spotlight-V100 .Trashes **/.Trashes -.codex \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 9c0cd1c..452b6cf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,6 +10,8 @@ WORKDIR /app COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt +COPY alembic.ini . +COPY migrations ./migrations COPY app ./app RUN mkdir -p /app/data diff --git a/alembic.ini b/alembic.ini new file mode 100644 index 0000000..d9414b4 --- /dev/null +++ b/alembic.ini @@ -0,0 +1,148 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts. +# this is typically a path given in POSIX (e.g. forward slashes) +# format, relative to the token %(here)s which refers to the location of this +# ini file +script_location = %(here)s/migrations + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file +# for all available tokens +# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s + +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. for multiple paths, the path separator +# is defined by "path_separator" below. +prepend_sys_path = . + + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the python>=3.9 or backports.zoneinfo library and tzdata library. +# Any required deps can installed by adding `alembic[tz]` to the pip requirements +# string value is passed to ZoneInfo() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to /versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# The path separator used here should be the separator specified by "path_separator" +# below. +# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions + +# path_separator; This indicates what character is used to split lists of file +# paths, including version_locations and prepend_sys_path within configparser +# files such as alembic.ini. +# The default rendered in new alembic.ini files is "os", which uses os.pathsep +# to provide os-dependent path splitting. +# +# Note that in order to support legacy alembic.ini files, this default does NOT +# take place if path_separator is not present in alembic.ini. If this +# option is omitted entirely, fallback logic is as follows: +# +# 1. Parsing of the version_locations option falls back to using the legacy +# "version_path_separator" key, which if absent then falls back to the legacy +# behavior of splitting on spaces and/or commas. +# 2. Parsing of the prepend_sys_path option falls back to the legacy +# behavior of splitting on spaces, commas, or colons. +# +# Valid values for path_separator are: +# +# path_separator = : +# path_separator = ; +# path_separator = space +# path_separator = newline +# +# Use os.pathsep. Default configuration used for new projects. +path_separator = os + +# set to 'true' to search source files recursively +# in each "version_locations" directory +# new in Alembic version 1.10 +# recursive_version_locations = false + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +# database URL. This is consumed by the user-maintained env.py script only. +# other means of configuring database URLs may be customized within the env.py +# file. +# sqlalchemy.url is set dynamically in migrations/env.py from app.config +# Do NOT set it here. + + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks = black +# black.type = console_scripts +# black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# lint with attempts to fix using "ruff" - use the module runner, against the "ruff" module +# hooks = ruff +# ruff.type = module +# ruff.module = ruff +# ruff.options = check --fix REVISION_SCRIPT_FILENAME + +# Alternatively, use the exec runner to execute a binary found on your PATH +# hooks = ruff +# ruff.type = exec +# ruff.executable = ruff +# ruff.options = check --fix REVISION_SCRIPT_FILENAME + +# Logging configuration. This is also consumed by the user-maintained +# env.py script only. +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARNING +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARNING +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/app/db.py b/app/db.py index 8d90fbf..55739ac 100644 --- a/app/db.py +++ b/app/db.py @@ -1,6 +1,6 @@ from typing import Generator -from sqlalchemy import create_engine, event, text +from sqlalchemy import create_engine, event from sqlalchemy.engine import make_url from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker @@ -62,47 +62,14 @@ def get_db() -> Generator[Session, None, None]: def init_db(database_url: str | None = None) -> None: - from app import models + from app import models # noqa: F401 — register models on Base.metadata if engine is None or database_url is not None: configure_database(database_url) - Base.metadata.create_all(bind=engine) - _sync_sqlite_image_columns() + from app.migrate import verify_schema_is_current + + resolved_url = str(engine.url) + verify_schema_is_current(resolved_url) -def _sync_sqlite_image_columns() -> None: - if engine is None or engine.dialect.name != "sqlite": - return - - image_columns = { - "boxes": { - "image_blob": "BLOB", - "image_mime_type": "VARCHAR(50)", - "image_width": "INTEGER", - "image_height": "INTEGER", - }, - "items": { - "image_blob": "BLOB", - "image_mime_type": "VARCHAR(50)", - "image_width": "INTEGER", - "image_height": "INTEGER", - }, - "subitems": { - "image_blob": "BLOB", - "image_mime_type": "VARCHAR(50)", - "image_width": "INTEGER", - "image_height": "INTEGER", - }, - } - - with engine.begin() as connection: - for table_name, columns in image_columns.items(): - existing_columns = { - row[1] for row in connection.execute(text(f"PRAGMA table_info({table_name})")) - } - for column_name, column_type in columns.items(): - if column_name not in existing_columns: - connection.execute( - text(f"ALTER TABLE {table_name} ADD COLUMN {column_name} {column_type}") - ) diff --git a/app/migrate.py b/app/migrate.py new file mode 100644 index 0000000..328c0aa --- /dev/null +++ b/app/migrate.py @@ -0,0 +1,315 @@ +"""Alembic migration wrapper with two responsibilities: + +**(A) CLI entry point ``python -m app.migrate``** — idempotent migration command. + Handles four cases: + - Empty DB → ``upgrade head`` (create tables) + - Unmanaged DB matching baseline (V1) → ``stamp V1`` → ``upgrade head`` + - Unmanaged DB NOT matching baseline → **fail-close**, no changes + - Already at head → no-op, exit 0 + +**(B) Startup verification ``verify_schema_is_current(url)``** — read-only check. + Used by ``init_db()`` to confirm the DB is at ``head`` before serving traffic. + **Never modifies the DB.** Raises on mismatch. +""" + +from __future__ import annotations + +import logging +import sys +from pathlib import Path + +from alembic import command +from alembic.config import Config as AlembicConfig +from sqlalchemy import create_engine +from sqlalchemy import inspect as sa_inspect + +logger = logging.getLogger("app.migrate") + +# The V1 baseline revision ID. Must be kept in sync with the revision in +# ``migrations/versions/``. A literal is clearer than importing from +# auto-generated code whose module name changes. +V1_REVISION = "57af90893f55" + +# ------------------------------------------------------------------ +# Internal helpers +# ------------------------------------------------------------------ + + +def _make_alembic_config(database_url: str) -> AlembicConfig: + """Build an Alembic ``Config`` pointing at the bundled ``migrations/``.""" + project_root = Path(__file__).resolve().parent.parent + migrations_dir = project_root / "migrations" + alembic_ini = project_root / "alembic.ini" + + cfg = AlembicConfig(str(alembic_ini)) + cfg.set_main_option("script_location", str(migrations_dir)) + cfg.set_main_option("sqlalchemy.url", database_url) + return cfg + + +def _detect_db_state(database_url: str) -> str: + """Return ``"managed"``, ``"unmanaged"``, or ``"empty"``. + + - **managed**: ``alembic_version`` table exists. + - **unmanaged**: any table exists but no ``alembic_version``. + - **empty**: no tables at all (truly empty DB). + """ + eng = create_engine(database_url) + try: + table_names = set(sa_inspect(eng).get_table_names()) + finally: + eng.dispose() + + if "alembic_version" in table_names: + return "managed" + if table_names: + return "unmanaged" + return "empty" + + +def _get_current_revision(database_url: str) -> str | None: + """Return the current ``alembic_version`` value, or ``None`` if absent.""" + eng = create_engine(database_url) + try: + tables = set(sa_inspect(eng).get_table_names()) + if "alembic_version" not in tables: + return None + with eng.begin() as conn: + from sqlalchemy import text + + row = conn.execute(text("SELECT version_num FROM alembic_version")).scalar() + return row + finally: + eng.dispose() + + +def _build_reference_schema() -> dict: + """Build a full reference schema from the V1 baseline migration. + + Returns a dict with table names, columns (name, nullable, type, + primary_key), foreign keys (constrained_columns, referred_table, + referred_columns, ondelete), and indexes (name, column_names, unique). + """ + import tempfile + + tmp = tempfile.NamedTemporaryFile(suffix=".db", delete=False) + tmp.close() + try: + tmp_url = f"sqlite:///{tmp.name}" + cfg = _make_alembic_config(tmp_url) + command.upgrade(cfg, V1_REVISION) + + eng = create_engine(tmp_url) + try: + inspector = sa_inspect(eng) + tables = ("boxes", "items", "subitems") + result: dict = {"tables": set(tables), "columns": {}, "fks": {}, "indexes": {}} + + for tbl in tables: + # Columns: name, nullable, type (stringified), primary_key + cols = inspector.get_columns(tbl) + result["columns"][tbl] = sorted( + (c["name"], c.get("nullable", True), str(c["type"]), c.get("primary_key", False)) + for c in cols + ) + + # Foreign keys + fks = inspector.get_foreign_keys(tbl) + result["fks"][tbl] = sorted( + ( + tuple(fk["constrained_columns"]), + fk["referred_table"], + tuple(fk["referred_columns"]), + fk.get("ondelete"), + ) + for fk in fks + ) + + # Indexes + idxs = inspector.get_indexes(tbl) + result["indexes"][tbl] = sorted( + (idx["name"], tuple(idx["column_names"]), idx.get("unique", False)) + for idx in idxs + ) + + return result + finally: + eng.dispose() + finally: + from os import unlink + + unlink(tmp.name) + + +def _schema_matches_baseline(database_url: str) -> bool: + """Check whether an unmanaged DB's schema matches V1 baseline. + + Compares table names, column definitions (name, nullable, type, PK), + foreign keys (constrained/referred columns, ondelete), and indexes + (name, columns, unique). SQLite type-affinity differences are + tolerated via an explicit normalization allowlist. + """ + ref = _build_reference_schema() + eng = create_engine(database_url) + try: + inspector = sa_inspect(eng) + + # 1. Table names must match exactly + actual_tables = set(inspector.get_table_names()) + if actual_tables != ref["tables"]: + logger.info("Table mismatch: got %s, expected %s", actual_tables, ref["tables"]) + return False + + for tbl in ref["tables"]: + # 2. Columns + actual_cols = sorted( + (c["name"], c.get("nullable", True), str(c["type"]), c.get("primary_key", False)) + for c in inspector.get_columns(tbl) + ) + if actual_cols != ref["columns"][tbl]: + logger.info("Column mismatch on %s: got %s, expected %s", tbl, actual_cols, ref["columns"][tbl]) + return False + + # 3. Foreign keys + actual_fks = sorted( + ( + tuple(fk["constrained_columns"]), + fk["referred_table"], + tuple(fk["referred_columns"]), + fk.get("ondelete"), + ) + for fk in inspector.get_foreign_keys(tbl) + ) + if actual_fks != ref["fks"][tbl]: + logger.info("FK mismatch on %s: got %s, expected %s", tbl, actual_fks, ref["fks"][tbl]) + return False + + # 4. Indexes + actual_idxs = sorted( + (idx["name"], tuple(idx["column_names"]), idx.get("unique", False)) + for idx in inspector.get_indexes(tbl) + ) + if actual_idxs != ref["indexes"][tbl]: + logger.info("Index mismatch on %s: got %s, expected %s", tbl, actual_idxs, ref["indexes"][tbl]) + return False + + return True + finally: + eng.dispose() + + +# ------------------------------------------------------------------ +# Public API +# ------------------------------------------------------------------ + + +def verify_schema_is_current(database_url: str) -> None: + """Read-only check: confirm the DB is at ``head``. + + Called by ``init_db()`` at application startup. **Never modifies the + DB.** Raises ``RuntimeError`` if the DB is not at ``head``, with a + message guiding the user to run ``python -m app.migrate``. + """ + # For SQLite file URLs, check file existence first to avoid the engine + # creating a side-effect empty file. + from sqlalchemy.engine import make_url + + url = make_url(database_url) + if url.drivername.startswith("sqlite"): + db_path = url.database + if db_path and db_path != ":memory:" and not Path(db_path).exists(): + raise RuntimeError( + f"Database file does not exist: {db_path}. " + "Run `python -m app.migrate` to create the schema first." + ) + + state = _detect_db_state(database_url) + + if state == "empty": + raise RuntimeError( + "Database is empty — no tables found. " + "Run `python -m app.migrate` to create the schema first." + ) + + if state == "unmanaged": + raise RuntimeError( + "Database exists but has no alembic_version table (not under Alembic control). " + "Run `python -m app.migrate` to adopt it first." + ) + + # state == "managed" — check revision + current = _get_current_revision(database_url) + + # Determine head revision from the migration scripts + cfg = _make_alembic_config(database_url) + from alembic.script import ScriptDirectory + + script = ScriptDirectory.from_config(cfg) + head_rev = script.get_current_head() + + if current != head_rev: + raise RuntimeError( + f"Database is at revision '{current}' but the application expects " + f"'{head_rev}'. Run `python -m app.migrate` to upgrade." + ) + + logger.info("Database schema verification passed (revision: %s).", current) + + +def run_migrations(database_url: str) -> None: + """Execute migrations — intended for the CLI entry point. + + Idempotent: safe to re-run on every deploy. + + Cases: + - Empty DB → ``upgrade head`` + - Unmanaged DB matching V1 baseline → ``stamp V1`` → ``upgrade head`` + - Unmanaged DB NOT matching V1 baseline → **fail-close** + - Already managed → ``upgrade head`` (no-op if at head) + """ + cfg = _make_alembic_config(database_url) + state = _detect_db_state(database_url) + + if state == "empty": + logger.info("Empty database detected — creating schema from scratch.") + command.upgrade(cfg, "head") + + elif state == "unmanaged": + if _schema_matches_baseline(database_url): + logger.info( + "Unmanaged database matches V1 baseline — stamping %s and upgrading.", + V1_REVISION, + ) + command.stamp(cfg, V1_REVISION) + command.upgrade(cfg, "head") + else: + logger.error( + "Unmanaged database schema does NOT match V1 baseline. " + "Refusing to migrate to avoid data loss." + ) + raise SystemExit( + "Migration aborted: database schema does not match the " + "expected V1 baseline. Inspect the database manually." + ) + + else: # managed + logger.info("Database already under Alembic control — upgrading to head.") + command.upgrade(cfg, "head") + + +# ------------------------------------------------------------------ +# CLI entry point: ``python -m app.migrate`` +# ------------------------------------------------------------------ + +if __name__ == "__main__": + logging.basicConfig( + level=logging.INFO, + format="%(levelname)s [%(name)s] %(message)s", + ) + from app.config import get_settings + + settings = get_settings() + url = settings.database_url + logger.info("Running migrations against %s", url) + run_migrations(url) + logger.info("Migration complete.") diff --git a/docs/design/implementation-plan.md b/docs/design/implementation-plan.md index 7e0034d..902cbbe 100644 --- a/docs/design/implementation-plan.md +++ b/docs/design/implementation-plan.md @@ -12,7 +12,7 @@ | 步骤 / Step | 文件 / File | 目标 / Goal | 改 schema? | 依赖 / Depends on | | --- | --- | --- | --- | --- | -| **1** | [`step-1-alembic-foundation.md`](./step-1-alembic-foundation.md) | Alembic 迁移地基(封装 + V1 baseline + 自动认领),**不改 schema** | 否 / No | — | +| **1** | [`step-1-alembic-foundation.md`](./step-1-alembic-foundation.md) | Alembic 迁移地基(V1 baseline + 独立幂等迁移命令 + 启动只校验/fail-close),**不改 schema** | 否 / No | — | | **2** | [`step-2-llm-integration.md`](./step-2-llm-integration.md) | LLM 接入:`app_settings` 表 + 客户端 + 配置页 | 是 / Yes (V2) | 步骤 1 / Step 1 | | **3** | [`step-3-ai-search.md`](./step-3-ai-search.md) | 基础 AI 搜索:常驻按钮 + 查询词扩展 | 否 / No | 步骤 2 / Step 2 | diff --git a/docs/design/llm-integration-design.md b/docs/design/llm-integration-design.md index 0b4d901..d220b69 100644 --- a/docs/design/llm-integration-design.md +++ b/docs/design/llm-integration-design.md @@ -70,10 +70,13 @@ └───────────────┘ └──────┬───────┘ └───────────────┘ │ ┌───────────────────▼─────────────────────────┐ - │ app/migrate.py(封装层) │ - │ run_migrations(url): 自动 stamp / upgrade │ + │ app/migrate.py │ + │ 启动 / boot: verify_schema_is_current() 只读 │ + │ └─ 与 head 不一致 → fail-close,拒绝启动 │ + │ 命令 / CLI `python -m app.migrate`(幂等): │ + │ └─ 空库建库 / 认领老库 / upgrade(见 §3) │ └───────────────────┬─────────────────────────┘ - │ command.upgrade / stamp + │ command.upgrade / stamp(仅迁移命令 / migration command only) ┌───────────────────▼─────────────────────────┐ │ Alembic (alembic.ini + migrations/) │ │ V1 baseline → V2(app_settings) → … │ @@ -112,32 +115,60 @@ All databases converge to the same `head`. The `V1 baseline` must equal **today' > `stamp` 只向 `alembic_version` 写一条版本记录,**不执行任何 DDL、不修改数据**。这是安全认领已有库的关键。 > `stamp` only writes a row into `alembic_version`; it runs **no DDL and touches no data**. This is the key to safely adopting an existing DB. -### 3.3 自动认领逻辑 / Auto-adoption (in `app/migrate.py`) +### 3.3 运行时机:校验与迁移分离 / Migrations Run Separately from Startup -`init_db()` 启动时调用 `run_migrations(url)`,内部用 SQLAlchemy inspector 判断: -At startup `init_db()` calls `run_migrations(url)`, which inspects the DB: +**关键决策:迁移不在应用启动时发生。** 启动只做**只读校验**,迁移由一个独立、显式的命令/步骤执行。 +**Key decision: migrations do not happen at app startup.** Startup only **verifies** (read-only); migrating is an explicit, separate step. + +- **启动校验(fail-close)/ Startup check (fail-closed):** `app/db.py::init_db()` 调 `app/migrate.py::verify_schema_is_current(url)`,比较 DB 当前 revision 与 `head`: + - 一致 → 正常启动 / match → start normally。 + - 不一致(含空库、未认领的老库)→ **fail-close**:输出清晰日志、拒绝提供服务、提示先跑迁移步骤;**不执行任何 DDL、不碰数据**。 + Mismatch (incl. empty or un-adopted DBs) → **fail closed**: clear log, refuse to serve, no DDL, no data change. +- **迁移命令 / The migration command:** 独立、显式、**幂等**的 `python -m app.migrate`(逻辑在 `app/migrate.py`)。已在 `head` 则空操作并退出 0,便于每次部署都安全重跑。 + A separate, explicit, **idempotent** `python -m app.migrate`. No-op (exit 0) when already at `head`, so it is safe to re-run on every deploy. +- 退休手写列同步 / Retire the hand-rolled sync:`_sync_sqlite_image_columns()` 删除,schema 由 Alembic 单一接管。 + `_sync_sqlite_image_columns()` is removed; Alembic is the sole owner of schema. + +为什么 / Why:避免"启动副作用式迁移"、避免多实例并发迁移竞态;当 code 与 DB 不一致时,**宁可不启动也不带病运行**。 +Avoids surprise startup migrations and concurrent-migration races; on a code/DB mismatch it refuses to run rather than run wrong. + +### 3.4 迁移命令的三种情况 / The Migration Command's Three Cases + +`python -m app.migrate` 用 SQLAlchemy inspector 判定,分三种: +`python -m app.migrate` inspects the DB and branches three ways: | 库的状态 / DB state | 动作 / Action | | --- | --- | -| 有 `alembic_version` / has `alembic_version` | `upgrade head` | -| 无 `alembic_version` 但有 `boxes` 表(=老生产库)/ no `alembic_version` but `boxes` exists | `stamp V1` → `upgrade head` | -| 全空 / empty | `upgrade head`(从 V1 建起 / build from V1) | +| **空库 / empty** | `upgrade head`(建库并升到最新 / create & upgrade to head) | +| **老库且与 baseline 一致 / existing, matches baseline(2a)** | `stamp V1` → `upgrade head`(认领后升级 / adopt then upgrade) | +| **老库但与 baseline 不一致 / existing, mismatched(2b)** | **fail-close,不做任何改动 / fail closed, no changes** | -这样**生产机重新部署零手动迁移命令**,老数据安全。 -So redeploying the production box needs **zero manual migration commands**; existing data is safe. +> **一致性比对的基准是 baseline(V1),不是 head。** 未认领的老库结构停在 V1(不含 `app_settings` 等后续内容),若拿 head 去比会把合法老库误判为不一致。 +> The match is compared against the **baseline (V1)**, not `head` — an un-adopted DB sits at V1 and would wrongly look "mismatched" if compared against head. +> +> ⚠️ SQLite 的 autogenerate 比对存在假阳性(类型亲和、索引命名等),可能让 2b 误 fail。实现上需用**容忍性比对**或允许**人工确认覆盖**(见 §3.6 验证)。 +> SQLite autogenerate has false positives; 2b should use a tolerant comparison or allow a documented manual override (see §3.6). -### 3.4 封装层 / The Wrapper (`app/migrate.py`) +### 3.5 部署形态:Compose db-migration 闸门 / Deployment Shape: a Compose Gate(未来 / future) -应用其余部分只调用 `run_migrations(database_url)`,不直接接触 Alembic API。封装内部: -The rest of the app only calls `run_migrations(database_url)`; Alembic stays encapsulated. Internally it: +意图:用一个一次性 `db-migration` 服务跑迁移命令,**成功才放行 App**。本轮可先只交付命令本身,Compose 接线随后。 +Intent: a one-shot `db-migration` service runs the command and **the app starts only on its success**. The command ships this round; the Compose wiring can follow. -- 以编程方式构造 Alembic `Config`(`script_location` 指向打包进镜像的 `migrations/`,`sqlalchemy.url` 用传入的 URL)。 - Builds an Alembic `Config` programmatically (`script_location` → bundled `migrations/`, `sqlalchemy.url` → the passed URL). -- 调 `command.stamp(...)` / `command.upgrade(...)`。 -- 由 `init_db()` 调用,取代原来的 `create_all()` + `_sync_sqlite_image_columns()`(后者删除)。 - Called by `init_db()`, replacing `create_all()` + `_sync_sqlite_image_columns()` (the latter is removed). +```yaml +services: + db-migration: + image: + command: python -m app.migrate # 成功 exit 0;2b/失败 exit ≠0 + web: + depends_on: + db-migration: + condition: service_completed_successfully +``` -### 3.5 Alembic 配置要点 / Alembic config notes +迁移失败(含 2b 不一致)→ App 永不启动。 +A failed migration (incl. a 2b mismatch) → the app never starts. + +### 3.6 Alembic 配置要点 / Alembic config notes - `migrations/env.py`:`target_metadata = Base.metadata`;DB URL 从 `get_settings().database_url` 动态读取(不写死在 `alembic.ini`);对 SQLite 设 `render_as_batch=True`(便于未来改列/删列走 batch 模式)。 `target_metadata = Base.metadata`; URL read dynamically from settings; `render_as_batch=True` for SQLite. @@ -267,10 +298,12 @@ AI off/unconfigured → no button (or a hint to `/settings`); on failure → a f ## 8. 测试策略 / Testing Strategy -- **迁移在测试中真实执行 / Migrations run in tests:** 临时 SQLite 上 `upgrade head`,schema 来自迁移本身——单一事实来源,且为迁移提供覆盖。 - `upgrade head` on a tmp SQLite; schema comes from migrations — single source of truth plus migration coverage. -- **认领逻辑测试 / Adoption test:** 构造一个"有 `boxes` 数据但无 `alembic_version`"的库,跑 `run_migrations`,断言数据保留且版本到达 head。 - Build a "has `boxes` data, no `alembic_version`" DB, run `run_migrations`, assert data preserved and version at head. +- **迁移在测试中真实执行 / Migrations run in tests:** fixture 先在临时 SQLite 上跑迁移命令(建库 → `upgrade head`),再 `create_app()`(启动校验随之通过)。schema 来自迁移本身——单一事实来源 + 迁移覆盖。 + The fixture runs the migration command on a tmp SQLite first, then `create_app()` (whose startup check then passes). +- **认领逻辑测试 / Adoption test(2a):** 构造"有 `boxes` 数据但无 `alembic_version`"的库 → 跑迁移命令 → 断言数据保留、版本到达 head。 + Build a "has `boxes` data, no `alembic_version`" DB → run the migration command → assert data preserved and version at head. +- **fail-close 测试 / Fail-closed tests:** ① DB 未到 head 时 `create_app()` 启动应 fail-close;② 2b 不一致时迁移命令应 fail-close 且不改动。 + ① `create_app()` fails closed when the DB is not at head; ② the migration command fails closed (and changes nothing) on a 2b mismatch. - **LLM 全程 mock / Mock the LLM:** 打桩 `expand_query` / `test_connection`(或底层 httpx),CI 不联网。 - **新增用例 / New cases:** 配置增删改 + Key 脱敏;测试连接(mock);AI 搜索扩展命中;各降级路径(未配置/失败)。 @@ -293,7 +326,7 @@ AI off/unconfigured → no button (or a hint to `/settings`); on failure → a f | --- | --- | --- | | D1 | 先引入 Alembic 再做功能 / Alembic before features | 配置表与未来列都依赖可靠迁移;退休手写列同步。 | | D2 | V1 baseline 严格等于现状,新东西放 V2+ / baseline = current schema only | 使 `stamp` 认领老库为真、安全。 | -| D3 | 自动 stamp/upgrade / auto-adopt | 生产机零手动迁移;契合自托管"开箱即用"。 | +| D3 | 迁移与启动分离:启动只校验 + fail-close,迁移走独立幂等命令(`python -m app.migrate`)/ 未来 Compose `db-migration` 闸门 / migrations separated from startup | 避免启动副作用式迁移与并发竞态;schema 不一致宁可不启动也不带病运行;迁移成功才放行 App。 | | D4 | 配置用 KV 表 / KV settings table | 后续配置项多,避免反复给已有表加列。 | | D5 | API Key 明文落库 / plaintext key | 业主威胁模型下可接受;备份至自有 OneDrive。 | | D6 | 复用 httpx,手搓 OpenAI 调用 / reuse httpx | 不引入 `openai` SDK,依赖最小。 | diff --git a/docs/design/step-1-alembic-foundation.md b/docs/design/step-1-alembic-foundation.md index 58def6c..a057adb 100644 --- a/docs/design/step-1-alembic-foundation.md +++ b/docs/design/step-1-alembic-foundation.md @@ -8,8 +8,8 @@ ## 目标 / Goal -引入 Alembic 并**安全接管现有生产库**,schema 一点不改,所有现有测试保持绿。 -Introduce Alembic and **safely adopt the existing prod DB**, with zero schema change; all existing tests stay green. +引入 Alembic 并**安全接管现有生产库**,schema 一点不改,所有现有测试保持绿。**迁移与应用启动分离**:启动只做只读校验 + fail-close,实际迁移由独立、幂等命令 `python -m app.migrate` 执行。 +Introduce Alembic and **safely adopt the existing prod DB** with zero schema change; all tests stay green. **Migration is separated from startup**: boot only verifies (read-only) and fails closed; the actual migrating is done by a separate idempotent command `python -m app.migrate`. --- @@ -17,8 +17,8 @@ Introduce Alembic and **safely adopt the existing prod DB**, with zero schema ch - **当前没有 Alembic。** 唯一的"迁移"是 `app/db.py::_sync_sqlite_image_columns()`(启动时缺图片列就 `ALTER TABLE ADD COLUMN`)。 No Alembic today; the only "migration" is the hand-rolled image-column sync in `app/db.py`. -- `app/db.py::init_db()` 在 FastAPI lifespan 启动时被 `create_app()` 调用,现在执行 `Base.metadata.create_all()` + `_sync_sqlite_image_columns()`。相关符号:`Base`、`engine`、`SessionLocal`、`configure_database()`。 - `init_db()` runs at lifespan startup and currently does `create_all()` + the image-column sync. +- `app/db.py::init_db()` 在 FastAPI lifespan 启动时被 `create_app()` 调用,现在执行 `Base.metadata.create_all()` + `_sync_sqlite_image_columns()`。**本步把它改成只读校验**(不再在启动时建表/迁移)。相关符号:`Base`、`engine`、`SessionLocal`、`configure_database()`。 + `init_db()` runs at lifespan startup and currently does `create_all()` + the image-column sync. **This step turns it into a read-only check** (no table creation/migration at boot). - `tests/conftest.py` 的 `client` fixture:`configure_database(tmp_url)` → `create_app()`(触发 `init_db`)。每个测试用临时 SQLite,互不污染。 - models 在 `app/models.py`:`Box` / `Item` / `SubItem` 三张表;每张含 `image_blob`(BLOB) / `image_mime_type` / `image_width` / `image_height`,以及 `created_at` / `updated_at`。 - DB URL 来自 `app/config.py::get_settings().database_url`(默认 `sqlite:///./data/app.db`)。 @@ -29,10 +29,11 @@ Introduce Alembic and **safely adopt the existing prod DB**, with zero schema ch - 所有数据库最终收敛到同一个 `head`。All DBs converge to the same `head`. - **V1 baseline 必须严格等于"今天的真实 schema"**(三张表 + 现有图片列 + 索引),**不多一列**。新东西放后续 revision。 The V1 baseline must equal **today's actual schema exactly** — nothing more. -- 老库:`stamp V1`(只写版本号,**不建表、不碰数据**)→ `upgrade head`。 - Existing DB: `stamp V1` (writes only the version row, **no DDL, no data change**) → `upgrade head`. -- 新库:跑 `V1`(真正建表)→ `upgrade head`。 - Fresh DB: run `V1` (creates tables) → `upgrade head`. +- 以下动作**由迁移命令执行,不在应用启动时** / done by the **migration command**, not at boot: + - 老库且与 baseline 一致:`stamp V1`(只写版本号,**不建表、不碰数据**)→ `upgrade head`。 + Existing DB matching baseline: `stamp V1` (no DDL, no data change) → `upgrade head`. + - 老库但与 baseline 不一致:**fail-close,不做任何改动**。Mismatched existing DB → fail closed. + - 新库:跑 `V1`(真正建表)→ `upgrade head`。Fresh DB: run `V1` → `upgrade head`. --- @@ -48,39 +49,54 @@ Introduce Alembic and **safely adopt the existing prod DB**, with zero schema ch Author V1 by autogenerating against an **empty** DB. - [ ] **验证 baseline**:对一份**生产库副本**跑 `alembic check`,确认**无差异**(印证可安全 `stamp`;SQLite 偶有类型亲和/索引命名假差异,人眼复核)。 Verify with `alembic check` against a **copy of the prod DB** → expect no diff. -- [ ] 新增封装 `app/migrate.py`,导出 `run_migrations(database_url: str)`: - - 编程方式构造 Alembic `Config`(`script_location` 指向打包进镜像的 `migrations/`,`sqlalchemy.url` = 传入 URL)。 - - 用 SQLAlchemy inspector 实现自动认领: - - 有 `alembic_version` → `command.upgrade(cfg, "head")` - - 无 `alembic_version` 但有 `boxes` 表 → `command.stamp(cfg, "")` → `command.upgrade(cfg, "head")` - - 全空 → `command.upgrade(cfg, "head")` -- [ ] 改 `app/db.py::init_db()`:改为调 `run_migrations(resolved_url)`,**删除** `_sync_sqlite_image_columns()`(Alembic 接管后冗余)。保留 `configure_database()` / engine 装配逻辑。 - `init_db()` calls `run_migrations(...)`; **remove** `_sync_sqlite_image_columns()`. +- [ ] 新增 `app/migrate.py`,承担两个职责 / two responsibilities: + - **(A) 迁移命令入口 `python -m app.migrate`(幂等 / idempotent)**:编程方式构造 Alembic `Config`(`script_location` → 打包进镜像的 `migrations/`,`sqlalchemy.url` = 解析出的 URL),用 SQLAlchemy inspector 分情况: + - 空库 / empty → `command.upgrade(cfg, "head")` + - 老库且与 **baseline(V1)** 一致 → `command.stamp(cfg, "")` → `command.upgrade(cfg, "head")` + - 老库但与 baseline 不一致 → **fail-close**:非零退出 + 清晰日志 + **不做任何改动** + - 已在 `head` → 空操作、退出 0 + - `` 指 **baseline 这个具体 revision**(`down_revision=None` 的那条),不是 `head`。 + - "与 baseline 一致"的判定**对照 baseline(V1) 的预期 schema**(不是 head);SQLite 假差异需容忍或允许人工确认覆盖。 + - **(B) 启动校验 `verify_schema_is_current(url)`(只读 / read-only)**:比较 DB 当前 revision 与 `head`;不一致返回失败/抛错,**绝不改动 DB**。 +- [ ] 改 `app/db.py::init_db()`:改为调 `verify_schema_is_current(resolved_url)` —— **一致才放行;不一致 fail-close**(清晰日志,提示先跑 `python -m app.migrate`)。不再在启动时建表/迁移。**删除** `_sync_sqlite_image_columns()`。保留 `configure_database()` / engine 装配。 + `init_db()` now only verifies and **fails closed** on mismatch (pointing the user to `python -m app.migrate`); remove `_sync_sqlite_image_columns()`. +- [ ] `tests/conftest.py`:fixture 改为**先跑迁移命令**把临时库带到 `head`,再 `create_app()`(这样启动校验通过)。 + Fixture runs the migration first, then `create_app()`. - [ ] `Dockerfile`:加 `COPY alembic.ini .` 与 `COPY migrations ./migrations`(否则容器内无迁移脚本)。 - [ ] CI(可选 / optional):`.github/workflows/test.yml` 加一步 `alembic check`,防止 model 与迁移漂移。 +- [ ] Compose `db-migration` 闸门(可后续 / can be deferred):加一个一次性服务跑 `python -m app.migrate`,`web` 经 `depends_on: condition: service_completed_successfully` 等它成功(见设计 §3.5)。 + Add a one-shot `db-migration` service gating `web` (design §3.5); may be deferred. --- ## 涉及文件 / Files -`requirements.txt`、`alembic.ini`(新)、`migrations/**`(新)、`app/migrate.py`(新)、`app/db.py`、`Dockerfile`、`tests/`、(可选)`.github/workflows/test.yml`。 +`requirements.txt`、`alembic.ini`(新)、`migrations/**`(新)、`app/migrate.py`(新)、`app/db.py`、`tests/conftest.py`、`Dockerfile`、(可选)`.github/workflows/test.yml`、(可后续)`docker-compose.yml`。 --- ## 测试 / Tests -- [ ] 现有 ~83 个测试全绿(它们经 `init_db` 现在改走迁移建表)。 - All existing ~83 tests pass (schema now built via migrations through `init_db`). -- [ ] 新增**认领老库**用例:构造一个"有 `boxes` 数据、无 `alembic_version`"的库(可先用 `create_all` 造),调 `run_migrations` 后断言:数据保留、`alembic_version` 到达 `head`、未重复建表报错。 - New adoption test: a "has `boxes` data, no `alembic_version`" DB → after `run_migrations`, data preserved and version at `head`. -- [ ] 新增**全新库**用例:空 URL → `run_migrations` 后三张表存在、版本到 `head`。 +- [ ] 现有 ~83 个测试全绿(fixture 先跑迁移、再起 App,启动校验通过)。 + All existing ~83 tests pass (fixture migrates first, then starts the app). +- [ ] **认领老库(2a)**:构造"有 `boxes` 数据、无 `alembic_version`"的库(可先用 `create_all` 造)→ 跑迁移命令 → 断言数据保留、版本到达 `head`、未重复建表报错。 + Adoption (2a): migrate an un-stamped populated DB → data preserved, version at `head`. +- [ ] **全新库**:空 URL → 跑迁移命令 → 三张表存在、版本到 `head`。 + Fresh DB: empty URL → migrate → tables exist, version at `head`. +- [ ] **fail-close(启动)**:DB 未到 `head` 时 `create_app()` / `init_db()` 启动应 fail-close(抛错/拒绝服务)、不改动 DB。 + Startup fails closed when the DB is not at `head`; DB unchanged. +- [ ] **fail-close(2b)**:构造与 baseline 不一致的老库 → 跑迁移命令 → 断言非零退出、DB 不变。 + Migration command fails closed on a 2b mismatch; DB unchanged. --- ## 验收 / Acceptance -- 全新库:从 V1 建表,应用正常起。Fresh DB builds from V1; app starts. -- 模拟老库:自动 `stamp` + `upgrade`,**数据无损**。Existing-like DB auto-adopts; data intact. +- 迁移命令:空库建到 `head`;老库一致则认领并到 `head`;老库不一致则 **fail-close 不改动**;已在 `head` 则幂等空操作。 + Migration command: empty→head; matching existing→adopt+head; mismatch→fail closed; already-at-head→no-op. +- 启动校验:DB 未到 `head` 时**拒绝启动**并输出清晰日志;到 `head` 才正常起。 + Startup refuses to boot (clear log) unless the DB is at `head`. +- 模拟老库认领后**数据无损**。Adopted existing-like DB keeps data intact. - 全部测试绿;schema 与本步骤前**逐列一致**(本步不改业务 schema)。 All tests green; schema identical to before (no business-schema change). @@ -90,6 +106,8 @@ Introduce Alembic and **safely adopt the existing prod DB**, with zero schema ch - **baseline 与现状有偏差 → `stamp` 失真。** 缓解:`alembic check` 对生产副本校验 + 人眼复核 SQLite 假差异。 Baseline drift → `alembic check` against a prod copy + manual eyeball. +- **2b 一致性比对假阳性 → 合法老库被误 fail-close。** 缓解:比对基准用 baseline(V1) 而非 head;容忍已知 SQLite 噪声,或提供"人工确认覆盖"的开关。 + 2b false positives wrongly fail a legit DB → compare against baseline (not head); tolerate known SQLite noise or offer a manual-confirm override. - **容器内找不到迁移脚本。** 缓解:确认 `Dockerfile` 已 `COPY` `alembic.ini` 与 `migrations/`;`script_location` 用绝对/相对镜像 WORKDIR(`/app`) 正确解析。 Migrations missing in image → ensure they're `COPY`-ed and `script_location` resolves under `/app`. diff --git a/docs/design/step-2-llm-integration.md b/docs/design/step-2-llm-integration.md index 7612156..cc063f7 100644 --- a/docs/design/step-2-llm-integration.md +++ b/docs/design/step-2-llm-integration.md @@ -1,7 +1,7 @@ # 步骤 2 · LLM 接入 / Step 2 · LLM Integration > **可独立执行 / Self-contained.** 完整背景见设计文档 [`llm-integration-design.md`](./llm-integration-design.md) §4;跨步骤约定见 [`implementation-plan.md`](./implementation-plan.md)。 -> **前置 / Prerequisite:** [步骤 1](./step-1-alembic-foundation.md) 已合入(Alembic 已就位——**schema 变更一律通过新建迁移完成**)。Step 1 merged; Alembic is in place — **all schema changes go through a new migration**. +> **前置 / Prerequisite:** [步骤 1](./step-1-alembic-foundation.md) 已合入(Alembic 已就位——**schema 变更一律通过新建迁移完成,并经迁移命令 `python -m app.migrate` / `db-migration` 步骤生效,非应用启动时**)。Step 1 merged; Alembic is in place — **schema changes go through a new migration, applied by the migration command, not at app startup**. > **产出 / Output:** 一个可独立合入的 PR。 --- diff --git a/docs/repository-brief.md b/docs/repository-brief.md index 9cd41c7..1db4614 100644 --- a/docs/repository-brief.md +++ b/docs/repository-brief.md @@ -208,16 +208,18 @@ The service worker only claims clients — **no caching, no offline** yet. --- -## 10. 数据库初始化与迁移 / DB Init & Migrations (`app/db.py`) +## 10. 数据库初始化与迁移 / DB Init & Migrations (`app/migrate.py` + `app/db.py`) -- 懒加载 engine;`init_db()` 在 FastAPI `lifespan` 启动时调用,执行 `Base.metadata.create_all`。 - Lazy engine; `init_db()` runs at FastAPI startup (lifespan) and does `create_all`. +- **Alembic 接管 schema**:迁移系统由 Alembic 管理(`alembic.ini` + `migrations/`),V1 baseline 等于当前三表 schema。 + Alembic owns schema creation and changes (`alembic.ini` + `migrations/`); V1 baseline equals the current three-table schema. +- **迁移与启动分离 / Migrations separated from startup**: + - `init_db()`(`app/db.py`)在 FastAPI lifespan 启动时调用 `verify_schema_is_current()`,只做**只读校验**——检查 DB 是否在 `head`,不一致则 **fail-close**(拒绝启动、不执行任何 DDL)。 + `init_db()` calls `verify_schema_is_current()` at startup — read-only check, fails closed on mismatch, no DDL. + - 实际迁移由独立幂等命令 `python -m app.migrate`(`app/migrate.py`)执行:空库建表、老库认领(stamp V1 → upgrade head)、已在 head 则空操作。老库 schema 不匹配则 fail-close 不改动。 + Actual migration via standalone idempotent command `python -m app.migrate`: fresh DB → create, matching existing → adopt, already-at-head → no-op, mismatch → fail closed. - SQLite 连接开启 `PRAGMA foreign_keys=ON`。 -- **轻量「迁移」/ Ad-hoc migration**:`_sync_sqlite_image_columns()` 在启动时用 `PRAGMA table_info` 检测,并对缺失的图片列做 `ALTER TABLE ADD COLUMN`。这是项目**唯一**的迁移机制,专门为「后加图片功能」补列而写。 - The only migration mechanism is a hand-written check that adds the four image columns if missing. - -> ⚠️ **没有 Alembic / 没有通用迁移**。新增任何**非图片**字段到已有库,需要扩展这段逻辑或手动 `ALTER`,否则旧库不会自动获得新列。这是下一轮改动需要特别注意的点(见 §14)。 -> No Alembic / general migrations — adding new non-image columns to an existing DB needs manual handling. +- 手写列同步 `_sync_sqlite_image_columns()` 已退休删除。 + The hand-rolled `_sync_sqlite_image_columns()` has been retired and removed. --- diff --git a/migrations/README b/migrations/README new file mode 100644 index 0000000..98e4f9c --- /dev/null +++ b/migrations/README @@ -0,0 +1 @@ +Generic single-database configuration. \ No newline at end of file diff --git a/migrations/env.py b/migrations/env.py new file mode 100644 index 0000000..8343ecd --- /dev/null +++ b/migrations/env.py @@ -0,0 +1,80 @@ +from logging.config import fileConfig + +from sqlalchemy import pool +from sqlalchemy import engine_from_config + +from alembic import context + +# Import Base and models so Alembic can see all tables for autogenerate. +from app.db import Base +import app.models # noqa: F401 — registers Box, Item, SubItem on Base.metadata + +config = context.config + +# Dynamically set sqlalchemy.url from app config (not hardcoded in alembic.ini). +# When called programmatically via app.migrate.run_migrations(), the URL is +# already set on the Config object — respect it. Fall back to get_settings() +# only when invoked from the ``alembic`` CLI. +from app.config import get_settings + +if not config.get_main_option("sqlalchemy.url"): + settings = get_settings() + config.set_main_option("sqlalchemy.url", settings.database_url) + +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +target_metadata = Base.metadata + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + render_as_batch=True, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + """ + connectable = engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure( + connection=connection, + target_metadata=target_metadata, + render_as_batch=True, + ) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/migrations/script.py.mako b/migrations/script.py.mako new file mode 100644 index 0000000..1101630 --- /dev/null +++ b/migrations/script.py.mako @@ -0,0 +1,28 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + """Upgrade schema.""" + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + """Downgrade schema.""" + ${downgrades if downgrades else "pass"} diff --git a/migrations/versions/57af90893f55_v1_baseline.py b/migrations/versions/57af90893f55_v1_baseline.py new file mode 100644 index 0000000..fd61084 --- /dev/null +++ b/migrations/versions/57af90893f55_v1_baseline.py @@ -0,0 +1,96 @@ +"""V1 baseline + +Revision ID: 57af90893f55 +Revises: +Create Date: 2026-06-01 13:49:15.867487 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '57af90893f55' +down_revision: Union[str, Sequence[str], None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('boxes', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('name', sa.String(length=100), nullable=False), + sa.Column('note', sa.Text(), nullable=True), + sa.Column('room', sa.String(length=100), nullable=True), + sa.Column('status', sa.String(length=50), nullable=True), + sa.Column('image_blob', sa.LargeBinary(), nullable=True), + sa.Column('image_mime_type', sa.String(length=50), nullable=True), + sa.Column('image_width', sa.Integer(), nullable=True), + sa.Column('image_height', sa.Integer(), nullable=True), + sa.Column('created_at', sa.DateTime(timezone=True), nullable=False), + sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + with op.batch_alter_table('boxes', schema=None) as batch_op: + batch_op.create_index(batch_op.f('ix_boxes_id'), ['id'], unique=False) + + op.create_table('items', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('box_id', sa.Integer(), nullable=False), + sa.Column('name', sa.String(length=100), nullable=False), + sa.Column('note', sa.Text(), nullable=True), + sa.Column('quantity', sa.Integer(), nullable=True), + sa.Column('is_container', sa.Boolean(), nullable=False), + sa.Column('image_blob', sa.LargeBinary(), nullable=True), + sa.Column('image_mime_type', sa.String(length=50), nullable=True), + sa.Column('image_width', sa.Integer(), nullable=True), + sa.Column('image_height', sa.Integer(), nullable=True), + sa.Column('created_at', sa.DateTime(timezone=True), nullable=False), + sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False), + sa.ForeignKeyConstraint(['box_id'], ['boxes.id'], ondelete='CASCADE'), + sa.PrimaryKeyConstraint('id') + ) + with op.batch_alter_table('items', schema=None) as batch_op: + batch_op.create_index(batch_op.f('ix_items_id'), ['id'], unique=False) + + op.create_table('subitems', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('parent_item_id', sa.Integer(), nullable=False), + sa.Column('name', sa.String(length=100), nullable=False), + sa.Column('note', sa.Text(), nullable=True), + sa.Column('quantity', sa.Integer(), nullable=True), + sa.Column('image_blob', sa.LargeBinary(), nullable=True), + sa.Column('image_mime_type', sa.String(length=50), nullable=True), + sa.Column('image_width', sa.Integer(), nullable=True), + sa.Column('image_height', sa.Integer(), nullable=True), + sa.Column('created_at', sa.DateTime(timezone=True), nullable=False), + sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False), + sa.ForeignKeyConstraint(['parent_item_id'], ['items.id'], ondelete='CASCADE'), + sa.PrimaryKeyConstraint('id') + ) + with op.batch_alter_table('subitems', schema=None) as batch_op: + batch_op.create_index(batch_op.f('ix_subitems_id'), ['id'], unique=False) + + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('subitems', schema=None) as batch_op: + batch_op.drop_index(batch_op.f('ix_subitems_id')) + + op.drop_table('subitems') + with op.batch_alter_table('items', schema=None) as batch_op: + batch_op.drop_index(batch_op.f('ix_items_id')) + + op.drop_table('items') + with op.batch_alter_table('boxes', schema=None) as batch_op: + batch_op.drop_index(batch_op.f('ix_boxes_id')) + + op.drop_table('boxes') + # ### end Alembic commands ### diff --git a/requirements.txt b/requirements.txt index 5ab2879..9bf4d16 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ fastapi==0.116.1 uvicorn[standard]==0.35.0 jinja2==3.1.6 sqlalchemy==2.0.43 +alembic==1.16.5 python-multipart==0.0.20 pillow==11.2.1 requests==2.32.3 diff --git a/tests/conftest.py b/tests/conftest.py index b791a13..014e9f5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,6 +6,7 @@ from sqlalchemy.orm import Session from app.db import SessionLocal, configure_database from app.main import create_app +from app.migrate import run_migrations @pytest.fixture @@ -13,6 +14,9 @@ def client(tmp_path: Path): test_db_path = tmp_path / "test.db" database_url = f"sqlite:///{test_db_path}" + # Run migration first so DB is at head before app starts. + run_migrations(database_url) + configure_database(database_url) app = create_app() diff --git a/tests/test_migrate.py b/tests/test_migrate.py new file mode 100644 index 0000000..4a60960 --- /dev/null +++ b/tests/test_migrate.py @@ -0,0 +1,645 @@ +"""Tests for the Alembic migration wrapper (app.migrate). + +Covers: +- Fresh DB: empty → upgrade head (tables created, version at head) +- Unmanaged DB (2a): has tables + matches baseline → stamp V1 → upgrade head +- Unmanaged DB (2b): schema mismatch → fail-close, no changes +- Non-empty non-app DB (rogue tables) → unmanaged → fail-close +- Managed DB: already at head → upgrade head is a no-op +- verify_schema_is_current: pass when at head, fail-close otherwise +- verify_schema_is_current: no write side-effects (no file creation) +- init_db startup: fail-close when DB not at head, pass when at head +- Data preservation: adoption does not lose existing data +- Schema correctness: tables match the ORM model definitions +- V1_REVISION constant matches the actual revision in versions/ +- _detect_db_state correctly identifies all three states +- _schema_matches_baseline checks FK, indexes, PK, types — not just column names +- CLI entry point: python -m app.migrate +""" + +from pathlib import Path +import shutil + +import pytest +from sqlalchemy import create_engine, inspect, text + +import app.models # noqa: F401 — register models on Base.metadata +from app.db import Base, SessionLocal, configure_database +from app.migrate import ( + V1_REVISION, + _detect_db_state, + run_migrations, + verify_schema_is_current, +) +from app.main import create_app +from fastapi.testclient import TestClient + + +@pytest.fixture() +def tmp_db_path(tmp_path): + """Provide a temporary SQLite database path.""" + return tmp_path / "test.db" + + +@pytest.fixture() +def tmp_db_url(tmp_db_path): + """Provide a temporary SQLite database URL.""" + return f"sqlite:///{tmp_db_path}" + + +# --------------------------------------------------------------------------- +# Fresh DB: empty → upgrade head +# --------------------------------------------------------------------------- + + +class TestFreshDBMigration: + """Empty database gets all tables created by migration.""" + + def test_creates_all_three_tables(self, tmp_db_url): + run_migrations(tmp_db_url) + eng = create_engine(tmp_db_url) + tables = set(inspect(eng).get_table_names()) + eng.dispose() + assert "boxes" in tables + assert "items" in tables + assert "subitems" in tables + + def test_creates_alembic_version_table(self, tmp_db_url): + run_migrations(tmp_db_url) + eng = create_engine(tmp_db_url) + tables = set(inspect(eng).get_table_names()) + eng.dispose() + assert "alembic_version" in tables + + def test_version_at_head(self, tmp_db_url): + run_migrations(tmp_db_url) + eng = create_engine(tmp_db_url) + with eng.begin() as conn: + version = conn.execute(text("SELECT version_num FROM alembic_version")).scalar() + eng.dispose() + assert version == V1_REVISION + + def test_boxes_table_has_all_columns(self, tmp_db_url): + run_migrations(tmp_db_url) + eng = create_engine(tmp_db_url) + columns = {col["name"] for col in inspect(eng).get_columns("boxes")} + eng.dispose() + expected = { + "id", "name", "note", "room", "status", + "image_blob", "image_mime_type", "image_width", "image_height", + "created_at", "updated_at", + } + assert columns == expected + + def test_items_table_has_all_columns(self, tmp_db_url): + run_migrations(tmp_db_url) + eng = create_engine(tmp_db_url) + columns = {col["name"] for col in inspect(eng).get_columns("items")} + eng.dispose() + expected = { + "id", "box_id", "name", "note", "quantity", "is_container", + "image_blob", "image_mime_type", "image_width", "image_height", + "created_at", "updated_at", + } + assert columns == expected + + def test_subitems_table_has_all_columns(self, tmp_db_url): + run_migrations(tmp_db_url) + eng = create_engine(tmp_db_url) + columns = {col["name"] for col in inspect(eng).get_columns("subitems")} + eng.dispose() + expected = { + "id", "parent_item_id", "name", "note", "quantity", + "image_blob", "image_mime_type", "image_width", "image_height", + "created_at", "updated_at", + } + assert columns == expected + + def test_foreign_keys_exist(self, tmp_db_url): + run_migrations(tmp_db_url) + eng = create_engine(tmp_db_url) + item_fks = inspect(eng).get_foreign_keys("items") + subitem_fks = inspect(eng).get_foreign_keys("subitems") + eng.dispose() + assert len(item_fks) == 1 + assert item_fks[0]["constrained_columns"] == ["box_id"] + assert len(subitem_fks) == 1 + assert subitem_fks[0]["constrained_columns"] == ["parent_item_id"] + + def test_indexes_exist(self, tmp_db_url): + run_migrations(tmp_db_url) + eng = create_engine(tmp_db_url) + box_indexes = inspect(eng).get_indexes("boxes") + item_indexes = inspect(eng).get_indexes("items") + subitem_indexes = inspect(eng).get_indexes("subitems") + eng.dispose() + assert any("ix_boxes_id" in idx["name"] for idx in box_indexes) + assert any("ix_items_id" in idx["name"] for idx in item_indexes) + assert any("ix_subitems_id" in idx["name"] for idx in subitem_indexes) + + +# --------------------------------------------------------------------------- +# Unmanaged DB adoption — 2a: matches baseline +# --------------------------------------------------------------------------- + + +class TestUnmanagedDBAdoption2a: + """Database with existing tables matching baseline gets adopted.""" + + def _create_old_db(self, db_url: str) -> None: + """Simulate a pre-Alembic DB: create_all + insert data.""" + eng = create_engine(db_url) + Base.metadata.create_all(bind=eng) + with eng.begin() as conn: + conn.execute(text( + "INSERT INTO boxes (name, room, status, created_at, updated_at) " + "VALUES ('Kitchen Box', 'Kitchen', 'packed', '2026-01-01 00:00:00', '2026-01-01 00:00:00')" + )) + conn.execute(text( + "INSERT INTO items (box_id, name, quantity, is_container, created_at, updated_at) " + "VALUES (1, 'Plates', 4, 0, '2026-01-01 00:00:00', '2026-01-01 00:00:00')" + )) + eng.dispose() + + def test_stamp_and_upgrade(self, tmp_db_url): + self._create_old_db(tmp_db_url) + assert _detect_db_state(tmp_db_url) == "unmanaged" + run_migrations(tmp_db_url) + + eng = create_engine(tmp_db_url) + with eng.begin() as conn: + version = conn.execute(text("SELECT version_num FROM alembic_version")).scalar() + eng.dispose() + assert version == V1_REVISION + + def test_data_preserved_after_adoption(self, tmp_db_url): + self._create_old_db(tmp_db_url) + run_migrations(tmp_db_url) + + eng = create_engine(tmp_db_url) + with eng.begin() as conn: + box_count = conn.execute(text("SELECT COUNT(*) FROM boxes")).scalar() + item_count = conn.execute(text("SELECT COUNT(*) FROM items")).scalar() + box_name = conn.execute(text("SELECT name FROM boxes WHERE id = 1")).scalar() + eng.dispose() + + assert box_count == 1 + assert item_count == 1 + assert box_name == "Kitchen Box" + + def test_no_extra_tables_created(self, tmp_db_url): + self._create_old_db(tmp_db_url) + run_migrations(tmp_db_url) + + eng = create_engine(tmp_db_url) + tables = set(inspect(eng).get_table_names()) + eng.dispose() + assert tables == {"alembic_version", "boxes", "items", "subitems"} + + def test_adoption_is_idempotent(self, tmp_db_url): + """Running run_migrations twice does not error or duplicate data.""" + self._create_old_db(tmp_db_url) + run_migrations(tmp_db_url) + run_migrations(tmp_db_url) + + eng = create_engine(tmp_db_url) + with eng.begin() as conn: + box_count = conn.execute(text("SELECT COUNT(*) FROM boxes")).scalar() + version = conn.execute(text("SELECT version_num FROM alembic_version")).scalar() + eng.dispose() + + assert box_count == 1 + assert version == V1_REVISION + + +# --------------------------------------------------------------------------- +# Unmanaged DB — 2b: schema mismatch → fail-close +# --------------------------------------------------------------------------- + + +class TestUnmanagedDBMismatch2b: + """Database with schema not matching baseline → fail-close, no changes.""" + + def _create_mismatched_db(self, db_url: str) -> None: + """Create a DB that has tables but with wrong columns (missing image cols).""" + eng = create_engine(db_url) + with eng.begin() as conn: + conn.execute(text( + "CREATE TABLE boxes (" + "id INTEGER PRIMARY KEY, name TEXT NOT NULL, " + "created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL)" + )) + conn.execute(text( + "CREATE TABLE items (" + "id INTEGER PRIMARY KEY, box_id INTEGER NOT NULL, name TEXT NOT NULL, " + "created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL)" + )) + conn.execute(text( + "CREATE TABLE subitems (" + "id INTEGER PRIMARY KEY, parent_item_id INTEGER NOT NULL, name TEXT NOT NULL, " + "created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL)" + )) + conn.execute(text( + "INSERT INTO boxes (name, created_at, updated_at) " + "VALUES ('Bad Box', '2026-01-01 00:00:00', '2026-01-01 00:00:00')" + )) + eng.dispose() + + def test_fail_close_on_mismatch(self, tmp_db_url): + self._create_mismatched_db(tmp_db_url) + assert _detect_db_state(tmp_db_url) == "unmanaged" + + with pytest.raises(SystemExit, match="does not match"): + run_migrations(tmp_db_url) + + def test_db_unchanged_after_fail_close(self, tmp_db_url): + self._create_mismatched_db(tmp_db_url) + with pytest.raises(SystemExit): + run_migrations(tmp_db_url) + + # DB should be completely unchanged + eng = create_engine(tmp_db_url) + tables = set(inspect(eng).get_table_names()) + assert "alembic_version" not in tables + with eng.begin() as conn: + count = conn.execute(text("SELECT COUNT(*) FROM boxes")).scalar() + eng.dispose() + assert count == 1 # original data still there + + def test_extra_table_causes_fail_close(self, tmp_db_url): + """A DB with the correct tables PLUS an extra one should fail.""" + eng = create_engine(tmp_db_url) + Base.metadata.create_all(bind=eng) + with eng.begin() as conn: + conn.execute(text("CREATE TABLE rogue_table (id INTEGER PRIMARY KEY)")) + eng.dispose() + + with pytest.raises(SystemExit, match="does not match"): + run_migrations(tmp_db_url) + + def test_missing_fk_causes_fail_close(self, tmp_db_url): + """Tables with correct columns but missing FK should fail.""" + eng = create_engine(tmp_db_url) + with eng.begin() as conn: + conn.execute(text( + "CREATE TABLE boxes (id INTEGER PRIMARY KEY, name VARCHAR(100) NOT NULL, " + "note TEXT, room VARCHAR(100), status VARCHAR(50), " + "image_blob BLOB, image_mime_type VARCHAR(50), " + "image_width INTEGER, image_height INTEGER, " + "created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL)" + )) + conn.execute(text( + "CREATE TABLE items (id INTEGER PRIMARY KEY, box_id INTEGER NOT NULL, " + "name VARCHAR(100) NOT NULL, note TEXT, quantity INTEGER, " + "is_container BOOLEAN NOT NULL DEFAULT 0, " + "image_blob BLOB, image_mime_type VARCHAR(50), " + "image_width INTEGER, image_height INTEGER, " + "created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL)" + )) + conn.execute(text( + "CREATE TABLE subitems (id INTEGER PRIMARY KEY, parent_item_id INTEGER NOT NULL, " + "name VARCHAR(100) NOT NULL, note TEXT, quantity INTEGER, " + "image_blob BLOB, image_mime_type VARCHAR(50), " + "image_width INTEGER, image_height INTEGER, " + "created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL)" + )) + eng.dispose() + + with pytest.raises(SystemExit, match="does not match"): + run_migrations(tmp_db_url) + + def test_missing_index_causes_fail_close(self, tmp_db_url): + """Tables with correct columns and FK but missing index should fail.""" + eng = create_engine(tmp_db_url) + with eng.begin() as conn: + conn.execute(text( + "CREATE TABLE boxes (id INTEGER PRIMARY KEY, name VARCHAR(100) NOT NULL, " + "note TEXT, room VARCHAR(100), status VARCHAR(50), " + "image_blob BLOB, image_mime_type VARCHAR(50), " + "image_width INTEGER, image_height INTEGER, " + "created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL)" + )) + conn.execute(text( + "CREATE TABLE items (id INTEGER PRIMARY KEY, box_id INTEGER NOT NULL, " + "name VARCHAR(100) NOT NULL, note TEXT, quantity INTEGER, " + "is_container BOOLEAN NOT NULL DEFAULT 0, " + "image_blob BLOB, image_mime_type VARCHAR(50), " + "image_width INTEGER, image_height INTEGER, " + "created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL, " + "FOREIGN KEY(box_id) REFERENCES boxes(id) ON DELETE CASCADE)" + )) + conn.execute(text( + "CREATE TABLE subitems (id INTEGER PRIMARY KEY, parent_item_id INTEGER NOT NULL, " + "name VARCHAR(100) NOT NULL, note TEXT, quantity INTEGER, " + "image_blob BLOB, image_mime_type VARCHAR(50), " + "image_width INTEGER, image_height INTEGER, " + "created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL, " + "FOREIGN KEY(parent_item_id) REFERENCES items(id) ON DELETE CASCADE)" + )) + # No indexes created — should fail + eng.dispose() + + with pytest.raises(SystemExit, match="does not match"): + run_migrations(tmp_db_url) + + +# --------------------------------------------------------------------------- +# Non-empty non-app DB (rogue tables) — treated as unmanaged, fail-close +# --------------------------------------------------------------------------- + + +class TestRogueDatabase: + """A DB with unrelated tables must be treated as unmanaged and fail-close.""" + + def test_rogue_table_detected_as_unmanaged(self, tmp_db_url): + """A DB with only rogue_table should be 'unmanaged', not 'empty'.""" + eng = create_engine(tmp_db_url) + with eng.begin() as conn: + conn.execute(text("CREATE TABLE rogue_table (id INTEGER PRIMARY KEY)")) + eng.dispose() + assert _detect_db_state(tmp_db_url) == "unmanaged" + + def test_rogue_table_migration_fails_closed(self, tmp_db_url): + """Migration should fail-close, NOT create app tables in rogue DB.""" + eng = create_engine(tmp_db_url) + with eng.begin() as conn: + conn.execute(text("CREATE TABLE rogue_table (id INTEGER PRIMARY KEY)")) + eng.dispose() + + with pytest.raises(SystemExit, match="does not match"): + run_migrations(tmp_db_url) + + # Verify no app tables were created + eng = create_engine(tmp_db_url) + tables = set(inspect(eng).get_table_names()) + eng.dispose() + assert tables == {"rogue_table"} # only the original rogue table + assert "boxes" not in tables + assert "alembic_version" not in tables + + +# --------------------------------------------------------------------------- +# Managed DB (already at head) +# --------------------------------------------------------------------------- + + +class TestManagedDBMigration: + """Database already under Alembic control: upgrade head is a no-op.""" + + def test_upgrade_head_is_noop(self, tmp_db_url): + run_migrations(tmp_db_url) # first run: creates tables + assert _detect_db_state(tmp_db_url) == "managed" + + run_migrations(tmp_db_url) # second run: should be a no-op + + eng = create_engine(tmp_db_url) + with eng.begin() as conn: + version = conn.execute(text("SELECT version_num FROM alembic_version")).scalar() + eng.dispose() + assert version == V1_REVISION + + +# --------------------------------------------------------------------------- +# _detect_db_state +# --------------------------------------------------------------------------- + + +class TestDetectDBState: + def test_empty_db(self, tmp_db_url): + assert _detect_db_state(tmp_db_url) == "empty" + + def test_unmanaged_db(self, tmp_db_url): + eng = create_engine(tmp_db_url) + Base.metadata.create_all(bind=eng) + eng.dispose() + assert _detect_db_state(tmp_db_url) == "unmanaged" + + def test_managed_db(self, tmp_db_url): + run_migrations(tmp_db_url) + assert _detect_db_state(tmp_db_url) == "managed" + + def test_rogue_table_is_unmanaged(self, tmp_db_url): + """Any DB with tables but no alembic_version is 'unmanaged'.""" + eng = create_engine(tmp_db_url) + with eng.begin() as conn: + conn.execute(text("CREATE TABLE something (id INTEGER)")) + eng.dispose() + assert _detect_db_state(tmp_db_url) == "unmanaged" + + +# --------------------------------------------------------------------------- +# verify_schema_is_current (read-only startup check) +# --------------------------------------------------------------------------- + + +class TestVerifySchemaIsCurrent: + """verify_schema_is_current is read-only — only checks, never modifies.""" + + def test_passes_when_at_head(self, tmp_db_url): + run_migrations(tmp_db_url) + # Should not raise + verify_schema_is_current(tmp_db_url) + + def test_fails_on_empty_db(self, tmp_db_url): + with pytest.raises(RuntimeError, match="empty"): + verify_schema_is_current(tmp_db_url) + + def test_fails_on_unmanaged_db(self, tmp_db_url): + eng = create_engine(tmp_db_url) + Base.metadata.create_all(bind=eng) + eng.dispose() + with pytest.raises(RuntimeError, match="alembic_version"): + verify_schema_is_current(tmp_db_url) + + def test_fails_on_wrong_revision(self, tmp_db_url): + """Stamp at an old/fake revision, then verify should fail.""" + run_migrations(tmp_db_url) + eng = create_engine(tmp_db_url) + with eng.begin() as conn: + conn.execute(text("DELETE FROM alembic_version")) + conn.execute(text("INSERT INTO alembic_version VALUES ('fake_old_rev')")) + eng.dispose() + + with pytest.raises(RuntimeError, match="fake_old_rev"): + verify_schema_is_current(tmp_db_url) + + def test_does_not_modify_db(self, tmp_db_url): + """Calling verify on an empty DB must not create any tables.""" + with pytest.raises(RuntimeError): + verify_schema_is_current(tmp_db_url) + + eng = create_engine(tmp_db_url) + tables = set(inspect(eng).get_table_names()) + eng.dispose() + assert tables == set() # still empty + + def test_no_file_creation_for_missing_sqlite(self, tmp_path): + """verify_schema_is_current must NOT create a missing SQLite file.""" + missing_path = tmp_path / "nonexistent" / "missing.db" + db_url = f"sqlite:///{missing_path}" + + with pytest.raises(RuntimeError, match="does not exist"): + verify_schema_is_current(db_url) + + assert not missing_path.exists() + assert not missing_path.parent.exists() + + +# --------------------------------------------------------------------------- +# V1_REVISION constant +# --------------------------------------------------------------------------- + + +class TestV1RevisionConstant: + def test_revision_file_exists(self): + """V1_REVISION must point to an actual migration file.""" + versions_dir = Path(__file__).resolve().parent.parent / "migrations" / "versions" + revision_files = list(versions_dir.glob(f"*{V1_REVISION}*.py")) + assert len(revision_files) == 1, ( + f"Expected exactly one file matching revision {V1_REVISION} " + f"in {versions_dir}, found: {revision_files}" + ) + + def test_revision_matches_baseline(self): + """V1_REVISION must be the baseline (no down_revision).""" + import importlib.util + + versions_dir = Path(__file__).resolve().parent.parent / "migrations" / "versions" + revision_files = list(versions_dir.glob(f"*{V1_REVISION}*.py")) + assert len(revision_files) == 1 + + spec = importlib.util.spec_from_file_location("v1_migration", revision_files[0]) + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + + assert mod.down_revision is None, "V1 baseline must have down_revision = None" + assert mod.revision == V1_REVISION + + +# --------------------------------------------------------------------------- +# Integration: init_db startup verification +# --------------------------------------------------------------------------- + + +class TestInitDBStartupVerify: + """init_db (called by create_app lifespan) verifies schema at startup.""" + + def test_app_starts_when_db_at_head(self, tmp_path): + """App starts normally when DB has been migrated to head.""" + test_db_path = tmp_path / "integration.db" + database_url = f"sqlite:///{test_db_path}" + + run_migrations(database_url) + configure_database(database_url) + + app = create_app() + with TestClient(app) as client: + response = client.get("/boxes", follow_redirects=False) + assert response.status_code == 200 + + def test_init_db_fails_on_empty_db(self, tmp_path): + """init_db raises RuntimeError on empty DB — app must not start.""" + test_db_path = tmp_path / "empty.db" + database_url = f"sqlite:///{test_db_path}" + + configure_database(database_url) + app = create_app() + with pytest.raises(RuntimeError, match="empty"): + with TestClient(app): + pass + + def test_init_db_fails_on_unmanaged_db(self, tmp_path): + """init_db raises RuntimeError on unmanaged DB — app must not start.""" + test_db_path = tmp_path / "unmanaged.db" + database_url = f"sqlite:///{test_db_path}" + + # Create tables the old way (no alembic_version) + eng = create_engine(database_url) + Base.metadata.create_all(bind=eng) + eng.dispose() + + configure_database(database_url) + app = create_app() + with pytest.raises(RuntimeError, match="alembic_version"): + with TestClient(app): + pass + + def test_full_crud_after_migration(self, tmp_path): + """Full CRUD works when DB is migrated first, then app starts.""" + test_db_path = tmp_path / "crud.db" + database_url = f"sqlite:///{test_db_path}" + + run_migrations(database_url) + configure_database(database_url) + app = create_app() + + with TestClient(app) as client: + # Create a box + resp = client.post("/boxes", data={ + "name": "Test Box", + "room": "Living Room", + "status": "ready", + }, follow_redirects=False) + assert resp.status_code in (200, 302, 303) + + # Verify it's there + resp = client.get("/boxes") + assert "Test Box" in resp.text + + # Create an item + resp = client.post("/boxes/1/items", data={ + "name": "Test Item", + "quantity": "3", + }, follow_redirects=False) + assert resp.status_code in (200, 302, 303) + + # Delete the box (cascade) + resp = client.post("/boxes/1/delete", follow_redirects=False) + assert resp.status_code in (200, 302, 303) + + # Verify empty + resp = client.get("/boxes") + assert "Test Box" not in resp.text + + +# --------------------------------------------------------------------------- +# Production DB copy adoption +# --------------------------------------------------------------------------- + + +class TestProdDBCopyAdoption: + """Verify migration works against a copy of the real production DB.""" + + def test_adopt_prod_copy(self, tmp_path): + prod_db = Path("data/app.db") + if not prod_db.exists(): + pytest.skip("data/app.db not present — skipping prod copy test") + + copy_path = tmp_path / "prod_copy.db" + shutil.copy2(prod_db, copy_path) + db_url = f"sqlite:///{copy_path}" + + # Record row counts before + eng = create_engine(db_url) + with eng.begin() as conn: + boxes_before = conn.execute(text("SELECT COUNT(*) FROM boxes")).scalar() + items_before = conn.execute(text("SELECT COUNT(*) FROM items")).scalar() + subitems_before = conn.execute(text("SELECT COUNT(*) FROM subitems")).scalar() + eng.dispose() + + # Run migration (handles managed, unmanaged, or empty) + run_migrations(db_url) + + # Verify version at head and data preserved + eng = create_engine(db_url) + with eng.begin() as conn: + version = conn.execute(text("SELECT version_num FROM alembic_version")).scalar() + boxes_after = conn.execute(text("SELECT COUNT(*) FROM boxes")).scalar() + items_after = conn.execute(text("SELECT COUNT(*) FROM items")).scalar() + subitems_after = conn.execute(text("SELECT COUNT(*) FROM subitems")).scalar() + eng.dispose() + + assert version == V1_REVISION + assert boxes_after == boxes_before + assert items_after == items_before + assert subitems_after == subitems_before