From 1a2f9c75d916de9e2f9687dd104f47c15fa15c0e Mon Sep 17 00:00:00 2001 From: Tianyu Liu Date: Sun, 19 Apr 2026 23:02:43 +0200 Subject: [PATCH] Harden location db startup validation --- app/main.py | 13 ++++++ docs/location-recorder.md | 29 +++++++++---- docs/migration-notes.md | 6 +++ scripts/location_db_adopt.py | 59 ++++++++++++++++++++++++++ tests/conftest.py | 43 ++++++++++++++++++- tests/test_app.py | 81 ++++++++++++++++++++++++++++++++++++ tests/test_location.py | 52 +++++++++++++++++++---- 7 files changed, 266 insertions(+), 17 deletions(-) diff --git a/app/main.py b/app/main.py index 42efe4b..e1c3ee5 100644 --- a/app/main.py +++ b/app/main.py @@ -8,6 +8,18 @@ from app import models # noqa: F401 from app.api.routes import pages, status from app.api.routes.location import router as location_router from app.config import get_settings +from scripts.location_db_adopt import LocationDatabaseAdoptionError, validate_location_runtime_db + + +def ensure_location_db_ready() -> None: + settings = get_settings() + if settings.location_sqlite_path is None: + return + + try: + validate_location_runtime_db(settings.location_database_url) + except LocationDatabaseAdoptionError as exc: + raise RuntimeError(str(exc)) from exc def ensure_runtime_dirs() -> None: @@ -20,6 +32,7 @@ def ensure_runtime_dirs() -> None: @asynccontextmanager async def lifespan(_: FastAPI): ensure_runtime_dirs() + ensure_location_db_ready() yield diff --git a/docs/location-recorder.md b/docs/location-recorder.md index bb77ef2..6ed6d9b 100644 --- a/docs/location-recorder.md +++ b/docs/location-recorder.md @@ -31,8 +31,10 @@ PRAGMA user_version = 2; 1. 把上述 `location` schema 视为 Alembic baseline 2. 新数据库通过 Alembic `upgrade head` 初始化 -3. 已有 legacy SQLite 数据库,只要确认 schema 与 baseline 一致,就通过 `alembic stamp` 接管 -4. 未来不再以 `PRAGMA user_version` 作为主 migration 机制 +3. 已有 legacy SQLite 数据库,只要确认 schema 与 baseline 一致,再通过 `alembic stamp` 接管 +4. 如果数据库已经存在 `alembic_version`,则必须先确认当前 revision 与项目预期 baseline 一致 +5. 只有 revision 一致时,才视为该库已经被正确接管 +6. 未来不再以 `PRAGMA user_version` 作为主 migration 机制 当前 baseline revision 是: @@ -56,6 +58,15 @@ python -m scripts.location_db_adopt - 本地没有 DB 文件:按新库初始化 - 任一校验不通过:立即报错并停止 +应用本身在启动时不会自动替你初始化 `location` 数据库。 +应用启动时会对 `LOCATION_DATABASE_URL` 做只读校验: + +- 文件不存在:直接报错,并提示先运行接管脚本 +- 文件存在但还没有 `alembic_version`:直接报错,要求先完成 legacy 接管 +- 文件已被 Alembic 管理但 revision 不匹配:直接报错并拒绝启动 + +这是有意为之,用来避免应用在错误路径上静默创建新库,或带着错误数据库版本继续跑业务。 + ## 新数据库初始化 如果本地不存在 `LOCATION_DATABASE_URL` 指向的 DB 文件: @@ -77,11 +88,14 @@ alembic upgrade head 对于已经存在的 legacy SQLite 数据库: 1. 先确认 DB 文件存在 -2. 读取当前 DB 中 `location` 表的实际 schema -3. 与 baseline schema 做严格比对 -4. 再检查 `PRAGMA user_version` -5. 只有 schema 匹配且 `user_version = 2` 时,才执行 Alembic `stamp` -6. 接管完成后,后续 migration 才交给 Alembic 管理 +2. 如果已经存在 `alembic_version` 表,则先读取当前 revision +3. 如果 revision 等于 `20260419_01_location_baseline`,则视为该库已经被 Alembic 正确接管 +4. 如果 revision 不匹配,立即报错并停止,不做任何自动修复 +5. 如果还没有 `alembic_version` 表,则读取当前 DB 中 `location` 表的实际 schema +6. 与 baseline schema 做严格比对 +7. 再检查 `PRAGMA user_version` +8. 只有 schema 匹配且 `user_version = 2` 时,才执行 Alembic `stamp` +9. 接管完成后,后续 migration 才交给 Alembic 管理 示例: @@ -110,6 +124,7 @@ LOCATION_DATABASE_URL=sqlite:///./data/locationRecorder.db python scripts/locati - 找不到 `location` 表 - `location` 表 schema 与 baseline 不一致 - `PRAGMA user_version` 不等于 `2` +- 已有 `alembic_version`,但 revision 与预期 baseline 不一致 - 目标 DB 不是 SQLite URL 当前不会尝试: diff --git a/docs/migration-notes.md b/docs/migration-notes.md index 54079e4..f0d5a3b 100644 --- a/docs/migration-notes.md +++ b/docs/migration-notes.md @@ -68,6 +68,12 @@ CREATE TABLE location ( - 只有全部匹配才执行 Alembic `stamp` - 不匹配则直接失败,不自动修复 +同时,应用启动阶段现在也会对 location DB 做保守的只读校验: + +- DB 文件不存在时拒绝启动 +- DB 尚未被 Alembic 接管时拒绝启动 +- DB revision 与当前应用预期不一致时拒绝启动 + ## 后续建议顺序 建议继续沿用既有迁移文档中的顺序: diff --git a/scripts/location_db_adopt.py b/scripts/location_db_adopt.py index bdedf3a..5e3f782 100644 --- a/scripts/location_db_adopt.py +++ b/scripts/location_db_adopt.py @@ -54,6 +54,30 @@ def _location_table_exists(database_path: Path) -> bool: conn.close() +def _alembic_version_table_exists(database_path: Path) -> bool: + conn = sqlite3.connect(database_path) + try: + row = conn.execute( + "SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = 'alembic_version'" + ).fetchone() + return row is not None + finally: + conn.close() + + +def _fetch_alembic_revision(database_path: Path) -> str: + conn = sqlite3.connect(database_path) + try: + row = conn.execute("SELECT version_num FROM alembic_version").fetchone() + if row is None: + raise LocationDatabaseAdoptionError( + "Alembic version table exists but contains no revision" + ) + return row[0] + finally: + conn.close() + + def _fetch_location_table_info(database_path: Path) -> list[tuple]: conn = sqlite3.connect(database_path) try: @@ -91,11 +115,44 @@ def validate_legacy_location_db(database_url: str) -> None: ) +def validate_location_runtime_db(database_url: str) -> None: + database_path = _database_path_from_url(database_url) + if not database_path.exists(): + raise LocationDatabaseAdoptionError( + "Location DB file was not found. Run 'python scripts/location_db_adopt.py' " + "first to initialize or adopt the location DB before starting the app." + ) + + if not _alembic_version_table_exists(database_path): + raise LocationDatabaseAdoptionError( + "Location DB exists but is not yet Alembic-managed. Run " + "'python scripts/location_db_adopt.py' first to adopt the legacy DB " + "before starting the app." + ) + + current_revision = _fetch_alembic_revision(database_path) + if current_revision != LOCATION_BASELINE_REVISION: + raise LocationDatabaseAdoptionError( + "Location DB revision mismatch. Refusing to start the app: " + f"expected {LOCATION_BASELINE_REVISION}, got {current_revision}" + ) + + def adopt_or_initialize_location_db(database_url: str) -> str: database_path = _database_path_from_url(database_url) alembic_config = _make_alembic_config(database_url) if database_path.exists(): + if _alembic_version_table_exists(database_path): + current_revision = _fetch_alembic_revision(database_path) + if current_revision != LOCATION_BASELINE_REVISION: + raise LocationDatabaseAdoptionError( + "Location DB is already Alembic-managed but revision does not match " + f"the expected baseline: expected {LOCATION_BASELINE_REVISION}, " + f"got {current_revision}" + ) + return "already_managed" + validate_legacy_location_db(database_url) command.stamp(alembic_config, LOCATION_BASELINE_REVISION) return "adopted" @@ -110,6 +167,8 @@ def main() -> None: result = adopt_or_initialize_location_db(settings.location_database_url) if result == "initialized": print("Initialized a new location DB via Alembic upgrade head.") + elif result == "already_managed": + print("Location DB is already Alembic-managed at the expected baseline revision.") else: print("Validated legacy location DB and stamped Alembic baseline successfully.") diff --git a/tests/conftest.py b/tests/conftest.py index 4f8484c..6df20d7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,12 +1,51 @@ +from pathlib import Path + import pytest +from alembic import command +from alembic.config import Config from fastapi.testclient import TestClient +from app.config import get_settings from app.main import create_app +def _make_alembic_config(database_url: str) -> Config: + config = Config("alembic.ini") + config.set_main_option("sqlalchemy.url", database_url) + return config + + @pytest.fixture -def app(): - return create_app() +def test_database_urls(tmp_path: Path, monkeypatch: pytest.MonkeyPatch): + location_database_path = tmp_path / "location_test.db" + poo_database_path = tmp_path / "poo_placeholder.db" + location_database_url = f"sqlite:///{location_database_path}" + poo_database_url = f"sqlite:///{poo_database_path}" + + monkeypatch.setenv("LOCATION_DATABASE_URL", location_database_url) + monkeypatch.setenv("POO_DATABASE_URL", poo_database_url) + get_settings.cache_clear() + + try: + yield { + "location_path": location_database_path, + "location_url": location_database_url, + "poo_path": poo_database_path, + "poo_url": poo_database_url, + } + finally: + get_settings.cache_clear() + + +@pytest.fixture +def ready_location_database(test_database_urls): + command.upgrade(_make_alembic_config(test_database_urls["location_url"]), "head") + return test_database_urls + + +@pytest.fixture +def app(ready_location_database): + yield create_app() @pytest.fixture diff --git a/tests/test_app.py b/tests/test_app.py index b6140f2..628acfd 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -1,5 +1,19 @@ +import sqlite3 + +import anyio +import pytest +from alembic import command from fastapi.testclient import TestClient +from app.config import get_settings +from app.main import create_app +from tests.conftest import _make_alembic_config + + +async def _run_lifespan(app) -> None: + async with app.router.lifespan_context(app): + return None + def test_app_starts(client: TestClient) -> None: response = client.get("/") @@ -11,3 +25,70 @@ def test_status_endpoint(client: TestClient) -> None: assert response.status_code == 200 assert response.json() == {"status": "ok"} + +def test_app_start_fails_when_location_db_missing( + tmp_path, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.setenv("LOCATION_DATABASE_URL", f"sqlite:///{tmp_path / 'missing.db'}") + monkeypatch.setenv("POO_DATABASE_URL", f"sqlite:///{tmp_path / 'poo_placeholder.db'}") + get_settings.cache_clear() + + app = create_app() + with pytest.raises(RuntimeError, match="Run 'python scripts/location_db_adopt.py' first"): + anyio.run(_run_lifespan, app) + + get_settings.cache_clear() + + +def test_app_start_fails_when_location_db_exists_but_is_not_adopted( + tmp_path, monkeypatch: pytest.MonkeyPatch +) -> None: + database_path = tmp_path / "legacy_only.db" + conn = sqlite3.connect(database_path) + conn.execute( + """ + CREATE TABLE location ( + person TEXT NOT NULL, + datetime TEXT NOT NULL, + latitude REAL NOT NULL, + longitude REAL NOT NULL, + altitude REAL, + PRIMARY KEY (person, datetime) + ) + """ + ) + conn.execute("PRAGMA user_version = 2") + conn.commit() + conn.close() + + monkeypatch.setenv("LOCATION_DATABASE_URL", f"sqlite:///{database_path}") + monkeypatch.setenv("POO_DATABASE_URL", f"sqlite:///{tmp_path / 'poo_placeholder.db'}") + get_settings.cache_clear() + + app = create_app() + with pytest.raises(RuntimeError, match="is not yet Alembic-managed"): + anyio.run(_run_lifespan, app) + + get_settings.cache_clear() + + +def test_app_start_fails_when_location_db_revision_mismatches( + tmp_path, monkeypatch: pytest.MonkeyPatch +) -> None: + database_path = tmp_path / "wrong_revision.db" + command.upgrade(_make_alembic_config(f"sqlite:///{database_path}"), "head") + + conn = sqlite3.connect(database_path) + conn.execute("UPDATE alembic_version SET version_num = 'wrong_revision'") + conn.commit() + conn.close() + + monkeypatch.setenv("LOCATION_DATABASE_URL", f"sqlite:///{database_path}") + monkeypatch.setenv("POO_DATABASE_URL", f"sqlite:///{tmp_path / 'poo_placeholder.db'}") + get_settings.cache_clear() + + app = create_app() + with pytest.raises(RuntimeError, match="Location DB revision mismatch"): + anyio.run(_run_lifespan, app) + + get_settings.cache_clear() diff --git a/tests/test_location.py b/tests/test_location.py index c746254..522e0d6 100644 --- a/tests/test_location.py +++ b/tests/test_location.py @@ -25,11 +25,8 @@ def _make_alembic_config(database_url: str) -> Config: @pytest.fixture -def location_client(tmp_path: Path, monkeypatch: pytest.MonkeyPatch): - database_path = tmp_path / "location_test.db" - database_url = f"sqlite:///{database_path}" - - command.upgrade(_make_alembic_config(database_url), "head") +def location_client(ready_location_database, monkeypatch: pytest.MonkeyPatch): + database_url = ready_location_database["location_url"] engine = create_engine(database_url, connect_args={"check_same_thread": False}) session_local = sessionmaker(bind=engine, autoflush=False, autocommit=False) @@ -122,9 +119,11 @@ def test_location_record_endpoint_keeps_legacy_lenient_number_parsing(location_c def test_legacy_style_location_db_can_be_stamped_and_adopted( - tmp_path: Path, monkeypatch: pytest.MonkeyPatch + test_database_urls, monkeypatch: pytest.MonkeyPatch ) -> None: - database_path = tmp_path / "legacy_location.db" + database_path = test_database_urls["location_path"] + database_url = test_database_urls["location_url"] + conn = sqlite3.connect(database_path) conn.execute( """ @@ -142,7 +141,6 @@ def test_legacy_style_location_db_can_be_stamped_and_adopted( conn.commit() conn.close() - database_url = f"sqlite:///{database_path}" command.stamp(_make_alembic_config(database_url), LOCATION_BASELINE_REVISION) engine = create_engine(database_url, connect_args={"check_same_thread": False}) @@ -228,6 +226,44 @@ def test_location_db_adoption_validates_and_stamps_legacy_db(tmp_path: Path) -> assert revision == LOCATION_BASELINE_REVISION +def test_location_db_adoption_accepts_already_managed_matching_revision( + tmp_path: Path, +) -> None: + database_path = tmp_path / "managed_location.db" + command.upgrade(_make_alembic_config(f"sqlite:///{database_path}"), "head") + + result = adopt_or_initialize_location_db(f"sqlite:///{database_path}") + + assert result == "already_managed" + + +def test_location_db_adoption_fails_closed_on_alembic_revision_mismatch( + tmp_path: Path, +) -> None: + database_path = tmp_path / "wrong_revision.db" + conn = sqlite3.connect(database_path) + conn.execute( + """ + CREATE TABLE location ( + person TEXT NOT NULL, + datetime TEXT NOT NULL, + latitude REAL NOT NULL, + longitude REAL NOT NULL, + altitude REAL, + PRIMARY KEY (person, datetime) + ) + """ + ) + conn.execute("CREATE TABLE alembic_version (version_num VARCHAR(32) NOT NULL)") + conn.execute("INSERT INTO alembic_version (version_num) VALUES ('wrong_revision')") + conn.execute(f"PRAGMA user_version = {EXPECTED_USER_VERSION}") + conn.commit() + conn.close() + + with pytest.raises(LocationDatabaseAdoptionError, match="revision does not match"): + adopt_or_initialize_location_db(f"sqlite:///{database_path}") + + def test_location_db_adoption_fails_closed_on_schema_mismatch(tmp_path: Path) -> None: database_path = tmp_path / "bad_schema.db" conn = sqlite3.connect(database_path)