M1-rework: harden legacy-migration reconciliation to full-row equality

Audit finding (review-notes/M1-full-review-1.md, FINDING 1): _reconcile only checked primary-key presence, so a source row skipped by INSERT OR IGNORE due to a value difference against a pre-existing same-PK target row would false-pass. Compare ALL columns with SQLite's NULL-safe IS operator instead, so reconciliation is a true full-row guarantee (idempotent re-runs still pass because the rows match column-for-column). Add tests for the value-mismatch abort and for idempotency under full-row reconciliation. Remove the now-unused pk_cols parameter. pytest 97 passed; ruff clean (pre-existing only); data-safety grep still empty.
2026-06-12 19:05:56 +02:00
parent 2f634006d2
commit 1cbe6c46d2
2 changed files with 70 additions and 13 deletions
@@ -261,7 +261,7 @@ def test_reconcile_raises_on_missing_rows(tmp_path: Path) -> None:
        _reconcile(
            conn,
            table="location",
-            pk_cols=["person", "datetime"],
+            columns=["person", "datetime", "latitude", "longitude", "altitude"],
            source_count=len(LOCATION_ROWS),
        )
    conn.execute("DETACH DATABASE legacy")
@@ -326,6 +326,63 @@ def test_cli_exits_nonzero_on_reconciliation_failure(
    assert exc_info.value.code != 0


+def test_reconcile_catches_value_mismatch_not_just_pk(tmp_path: Path) -> None:
+    """Full-row reconciliation catches value mismatch that PK-only check would miss.
+
+    Scenario: the app DB is PRE-POPULATED with a row that shares the same PK as
+    a legacy source row but has DIFFERENT non-PK column values.  INSERT OR IGNORE
+    skips the source row (PK conflict), so the target retains the stale data.
+    The old PK-only reconciliation would have incorrectly reported success.
+    The new full-row reconciliation must detect the mismatch and raise.
+    """
+    app_path, app_url = _upgraded_app_db(tmp_path)
+
+    # Legacy source has a row: person="alice", datetime="2026-01-01T10:00:00Z",
+    # latitude=1.23, longitude=4.56, altitude=7.89
+    legacy_path = tmp_path / "locationRecorder.db"
+    _make_legacy_location_db(legacy_path, [("alice", "2026-01-01T10:00:00Z", 1.23, 4.56, 7.89)])
+    legacy_url = f"sqlite:///{legacy_path}"
+
+    # App DB is pre-populated with the SAME PK but DIFFERENT non-PK values
+    # (latitude/longitude/altitude all differ from the source row)
+    conn = sqlite3.connect(app_path)
+    conn.execute(
+        "INSERT INTO location (person, datetime, latitude, longitude, altitude) "
+        "VALUES (?, ?, ?, ?, ?)",
+        ("alice", "2026-01-01T10:00:00Z", 99.0, 99.0, 99.0),
+    )
+    conn.commit()
+    conn.close()
+
+    # migrate_legacy_data must raise: the source row's data is NOT in the target
+    # (INSERT OR IGNORE skipped it because of PK conflict, retaining the 99.0 values)
+    with pytest.raises(RuntimeError, match="Reconciliation failed"):
+        migrate_legacy_data(app_url, legacy_url, None)
+
+
+def test_full_row_reconciliation_idempotent_on_identical_data(tmp_path: Path) -> None:
+    """Second run on already-migrated data still reconciles cleanly.
+
+    When the target already holds identical rows (from the first run), the full-row
+    IS predicate matches every column and reconciliation passes (no raise).
+    """
+    app_path, app_url = _upgraded_app_db(tmp_path)
+    legacy_path = tmp_path / "locationRecorder.db"
+    _make_legacy_location_db(legacy_path, LOCATION_ROWS)
+    legacy_url = f"sqlite:///{legacy_path}"
+
+    # First run: migrate all rows
+    result1 = migrate_legacy_data(app_url, legacy_url, None)
+    assert result1["location"]["copied"] == len(LOCATION_ROWS)
+
+    # Second run: rows already present, INSERT OR IGNORE skips all, full-row
+    # reconciliation must still pass because values are identical
+    result2 = migrate_legacy_data(app_url, legacy_url, None)
+    assert result2["location"]["copied"] == 0
+    assert result2["location"]["final"] == len(LOCATION_ROWS)
+    # No exception raised — idempotency holds under full-row reconciliation
+
+
 # ---------------------------------------------------------------------------
 # Test 4: dry_run
 # ---------------------------------------------------------------------------