M1-rework: harden legacy-migration reconciliation to full-row equality
Audit finding (review-notes/M1-full-review-1.md, FINDING 1): _reconcile only checked primary-key presence, so a source row skipped by INSERT OR IGNORE due to a value difference against a pre-existing same-PK target row would false-pass. Compare ALL columns with SQLite's NULL-safe IS operator instead, so reconciliation is a true full-row guarantee (idempotent re-runs still pass because the rows match column-for-column). Add tests for the value-mismatch abort and for idempotency under full-row reconciliation. Remove the now-unused pk_cols parameter. pytest 97 passed; ruff clean (pre-existing only); data-safety grep still empty.
This commit is contained in:
@@ -53,17 +53,20 @@ def _sqlite_path_from_url(url: str) -> Path:
|
||||
def _reconcile(
|
||||
conn: sqlite3.Connection,
|
||||
table: str,
|
||||
pk_cols: list[str],
|
||||
columns: list[str],
|
||||
source_count: int,
|
||||
) -> int:
|
||||
"""Verify every legacy source row is present in the main (app) table.
|
||||
|
||||
Returns the count of source rows present in main.
|
||||
Raises RuntimeError if any rows are missing.
|
||||
Matches on ALL columns using SQLite's NULL-safe IS operator so that nullable
|
||||
columns (e.g. altitude) compare correctly. A row that was silently skipped
|
||||
by INSERT OR IGNORE due to a value difference will NOT satisfy this predicate
|
||||
even if its primary key is present in the target.
|
||||
|
||||
Returns the count of source rows whose full-row data is present in main.
|
||||
Raises RuntimeError if any rows are missing or differ in value.
|
||||
"""
|
||||
join_cond = " AND ".join(
|
||||
f"m.{col} = l.{col}" for col in pk_cols
|
||||
)
|
||||
join_cond = " AND ".join(f"m.{col} IS l.{col}" for col in columns)
|
||||
sql = (
|
||||
f"SELECT COUNT(*) FROM legacy.{table} l "
|
||||
f"WHERE EXISTS (SELECT 1 FROM main.{table} m WHERE {join_cond})"
|
||||
@@ -73,7 +76,7 @@ def _reconcile(
|
||||
missing = source_count - present
|
||||
raise RuntimeError(
|
||||
f"Reconciliation failed for table '{table}': "
|
||||
f"{missing} of {source_count} source rows are missing from the app DB."
|
||||
f"{missing} of {source_count} source rows are missing or differing in the app DB."
|
||||
)
|
||||
return present
|
||||
|
||||
@@ -114,7 +117,6 @@ def migrate_legacy_data(
|
||||
legacy_url=location_url,
|
||||
table="location",
|
||||
columns=["person", "datetime", "latitude", "longitude", "altitude"],
|
||||
pk_cols=["person", "datetime"],
|
||||
dry_run=dry_run,
|
||||
)
|
||||
|
||||
@@ -124,7 +126,6 @@ def migrate_legacy_data(
|
||||
legacy_url=poo_url,
|
||||
table="poo_records",
|
||||
columns=["timestamp", "status", "latitude", "longitude"],
|
||||
pk_cols=["timestamp"],
|
||||
dry_run=dry_run,
|
||||
)
|
||||
|
||||
@@ -137,7 +138,6 @@ def _migrate_table(
|
||||
legacy_url: str | None,
|
||||
table: str,
|
||||
columns: list[str],
|
||||
pk_cols: list[str],
|
||||
dry_run: bool,
|
||||
) -> dict:
|
||||
"""Migrate a single table from a legacy DB into the app DB.
|
||||
@@ -186,8 +186,8 @@ def _migrate_table(
|
||||
(after_count,) = conn.execute(f"SELECT COUNT(*) FROM main.{table}").fetchone()
|
||||
copied = after_count - before_count
|
||||
|
||||
# Reconciliation: every source row must be present
|
||||
_reconcile(conn, table, pk_cols, source_count)
|
||||
# Reconciliation: every source row must be present with matching values
|
||||
_reconcile(conn, table, columns, source_count)
|
||||
|
||||
conn.execute("DETACH DATABASE legacy")
|
||||
finally:
|
||||
|
||||
Reference in New Issue
Block a user