sync: detect orphaned blobs (pepper rotation) + fix AESGCM arg order

Adds an 8-byte HKDF fingerprint of the current pepper to portfolio_sync
rows. On fetch, a mismatch surfaces as 410 Gone (distinct from genuine
GCM corruption → 500), and the UI silently cleans up the dead row and
shows a soft "please re-import" notice instead of a confusing PIN
re-prompt. Legacy rows (pepper_fp NULL) are probed optimistically and
backfilled on success.

Also fixes a latent bug in unwrap(): AESGCM.decrypt args were swapped
(ct, nonce instead of nonce, ct), so restore-from-cloud always failed
even when the pepper was correct.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Giorgio Gilestro 2026-05-25 12:49:11 +02:00
parent f1903e1e61
commit 5c7cc4c6aa
8 changed files with 224 additions and 18 deletions

View file

@ -44,8 +44,16 @@ RATE_LIMIT_MAX = 6
class SyncCryptoError(Exception):
"""Outer-wrap decryption failed — usually a pepper change or
bit-rotted row. The router maps this to a 500."""
"""Outer-wrap decryption failed even though the pepper fingerprint
matched i.e. genuine corruption or tampering. The router maps this
to a 500."""
class SyncOrphanedError(Exception):
"""The row was wrapped with a different pepper than the one currently
configured (typically: dev-time pepper rotation). The data is
permanently unrecoverable, but this is a *known* state, not a server
fault the router maps this to a 410 Gone."""
def _utcnow() -> datetime:
@ -72,6 +80,22 @@ def _server_key(user_id: int) -> bytes:
).derive(_pepper_bytes())
_FP_LEN = 8
def current_pepper_fp() -> bytes:
"""8-byte HKDF-derived fingerprint of the current pepper. Doesn't
leak the pepper itself (HKDF is one-way) and is short enough to make
accidental collisions across rotations effectively zero (2^-32 birthday
floor fine for a few-row dev install)."""
return HKDF(
algorithm=hashes.SHA256(),
length=_FP_LEN,
salt=b"portfolio-sync-pepper-fp",
info=b"v1",
).derive(_pepper_bytes())
def wrap(user_id: int, inner_blob: bytes) -> tuple[bytes, bytes]:
"""Encrypt the client-side ciphertext (`inner_blob`) for storage.
Returns (outer_ct, outer_nonce). The nonce is random per write."""
@ -81,9 +105,15 @@ def wrap(user_id: int, inner_blob: bytes) -> tuple[bytes, bytes]:
def unwrap(user_id: int, outer_ct: bytes, outer_nonce: bytes) -> bytes:
"""Inverse of wrap(). Raises SyncCryptoError if the GCM tag fails."""
"""Inverse of wrap(). Raises SyncCryptoError if the GCM tag fails.
AESGCM.decrypt takes (nonce, data, associated_data) not
(data, nonce). The original implementation had the arguments
swapped, which meant restore-from-cloud always failed even when
the pepper was correct.
"""
try:
return AESGCM(_server_key(user_id)).decrypt(outer_ct, outer_nonce, None)
return AESGCM(_server_key(user_id)).decrypt(outer_nonce, outer_ct, None)
except Exception as exc: # InvalidTag, malformed ciphertext, etc.
raise SyncCryptoError("outer wrap unwrap failed") from exc
@ -91,6 +121,7 @@ def unwrap(user_id: int, outer_ct: bytes, outer_nonce: bytes) -> bytes:
async def upsert(session: AsyncSession, user_id: int, inner_blob: bytes) -> datetime:
"""Insert or replace this user's sync row. Returns the new updated_at."""
outer_ct, outer_nonce = wrap(user_id, inner_blob)
fp = current_pepper_fp()
now = _utcnow()
row = await session.get(PortfolioSync, user_id)
if row is None:
@ -101,6 +132,7 @@ async def upsert(session: AsyncSession, user_id: int, inner_blob: bytes) -> date
version=1,
created_at=now,
updated_at=now,
pepper_fp=fp,
)
session.add(row)
else:
@ -109,19 +141,34 @@ async def upsert(session: AsyncSession, user_id: int, inner_blob: bytes) -> date
row.updated_at = now
# Bump version field forward if we ever change the wrap scheme.
row.version = 1
row.pepper_fp = fp
await session.commit()
return now
def _is_orphaned(row: PortfolioSync) -> bool:
"""A row is orphaned when its stored pepper fingerprint is present
and differs from the current pepper's fingerprint. NULL fingerprint
(rows from before the pepper_fp column existed) is treated
optimistically: we don't know whether the pepper rotated, so we let
the fetch path probe with a real unwrap and self-heal on success.
Status returns orphaned=False for NULL so the user is offered the
Restore form; if unwrap then fails, the GET path returns 410 and the
UI flips to the stale state."""
return row.pepper_fp is not None and row.pepper_fp != current_pepper_fp()
async def fetch_status(
session: AsyncSession, user_id: int,
) -> tuple[bool, datetime | None]:
"""Cheap existence check — does NOT decrypt. Used by the dashboard to
decide whether to show the restore prompt."""
) -> tuple[bool, bool, datetime | None]:
"""Cheap existence check — does NOT decrypt. Returns
(exists, orphaned, updated_at). Used by the dashboard to decide
whether to show the restore prompt vs the "stale, re-upload" prompt.
"""
row = await session.get(PortfolioSync, user_id)
if row is None:
return False, None
return True, row.updated_at
return False, False, None
return True, _is_orphaned(row), row.updated_at
async def fetch(
@ -129,13 +176,36 @@ async def fetch(
) -> tuple[bytes, datetime] | None:
"""Returns (inner_blob, updated_at) or None if sync disabled.
Raises SyncCryptoError if the row exists but the outer wrap is
unreadable (typically: pepper was rotated without re-encrypting).
Raises SyncOrphanedError if the row's pepper fingerprint mismatches
the current pepper, OR if a fingerprint-less legacy row fails to
unwrap (which can only mean a pepper rotation, since the arg-order
bug fix landed alongside the fingerprint column).
Raises SyncCryptoError if the fingerprint matched but the outer wrap
still failed (genuine corruption or tampering).
On a successful unwrap of a fingerprint-less legacy row, the current
pepper's fingerprint is backfilled so subsequent status checks
correctly report healthy (and future rotations are detectable).
"""
row = await session.get(PortfolioSync, user_id)
if row is None:
return None
inner = unwrap(user_id, row.outer_ciphertext, row.outer_nonce)
if _is_orphaned(row):
raise SyncOrphanedError("pepper fingerprint mismatch")
legacy = row.pepper_fp is None
try:
inner = unwrap(user_id, row.outer_ciphertext, row.outer_nonce)
except SyncCryptoError:
if legacy:
# Legacy row + decrypt fails = pepper rotated before the
# fingerprint column existed. Same observable state as a
# post-fingerprint orphan; report it that way.
raise SyncOrphanedError("legacy row, decrypt failed")
raise
if legacy:
row.pepper_fp = current_pepper_fp()
await session.commit()
return inner, row.updated_at