From 5c7cc4c6aad4721ab60f667b75354bb299a78e98 Mon Sep 17 00:00:00 2001 From: Giorgio Gilestro Date: Mon, 25 May 2026 12:49:11 +0200 Subject: [PATCH] sync: detect orphaned blobs (pepper rotation) + fix AESGCM arg order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an 8-byte HKDF fingerprint of the current pepper to portfolio_sync rows. On fetch, a mismatch surfaces as 410 Gone (distinct from genuine GCM corruption → 500), and the UI silently cleans up the dead row and shows a soft "please re-import" notice instead of a confusing PIN re-prompt. Legacy rows (pepper_fp NULL) are probed optimistically and backfilled on success. Also fixes a latent bug in unwrap(): AESGCM.decrypt args were swapped (ct, nonce instead of nonce, ct), so restore-from-cloud always failed even when the pepper was correct. Co-Authored-By: Claude Opus 4.7 --- .../versions/0016_portfolio_sync_pepper_fp.py | 39 ++++++++ app/models.py | 4 + app/routers/sync.py | 14 ++- app/services/portfolio_sync.py | 94 ++++++++++++++++--- app/static/js/portfolio-sync.js | 23 ++++- app/static/js/portfolio.js | 53 ++++++++++- app/templates/settings.html | 13 ++- tests/test_portfolio_sync_api.py | 2 +- 8 files changed, 224 insertions(+), 18 deletions(-) create mode 100644 alembic/versions/0016_portfolio_sync_pepper_fp.py diff --git a/alembic/versions/0016_portfolio_sync_pepper_fp.py b/alembic/versions/0016_portfolio_sync_pepper_fp.py new file mode 100644 index 0000000..1a5f6c0 --- /dev/null +++ b/alembic/versions/0016_portfolio_sync_pepper_fp.py @@ -0,0 +1,39 @@ +"""portfolio_sync: add pepper_fp for orphan-blob detection. + +When PORTFOLIO_SYNC_PEPPER rotates (intentional or otherwise), any +existing wrapped blob becomes permanently unreadable. Today that +manifests as a GCM InvalidTag → 500 on the GET endpoint. We add a +short HKDF-derived fingerprint of the pepper so we can detect the +rotation case explicitly and surface it to the client as a clean +"stale" state (410), distinct from genuine corruption (500). + +Existing rows get pepper_fp=NULL on upgrade; the service treats NULL +as "orphaned" (always true: those rows were written before this +column existed, so we can't prove the pepper matches). The next +successful upsert refreshes the fingerprint. + +Revision ID: 0016 +Revises: 0015 +Create Date: 2026-05-25 +""" +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + + +revision: str = "0016" +down_revision: Union[str, None] = "0015" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column( + "portfolio_sync", + sa.Column("pepper_fp", sa.LargeBinary(length=8), nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("portfolio_sync", "pepper_fp") diff --git a/app/models.py b/app/models.py index bdc884a..2d16d01 100644 --- a/app/models.py +++ b/app/models.py @@ -204,6 +204,10 @@ class PortfolioSync(Base): updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) fetch_window_start: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) fetch_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0) + # 8-byte HKDF fingerprint of the pepper that wrapped this row. A + # mismatch against the current pepper means the row is orphaned + # (pepper was rotated) — distinct from genuine GCM corruption. + pepper_fp: Mapped[bytes | None] = mapped_column(LargeBinary(length=8)) class Referral(Base): diff --git a/app/routers/sync.py b/app/routers/sync.py index e6496e0..0fa1174 100644 --- a/app/routers/sync.py +++ b/app/routers/sync.py @@ -43,6 +43,7 @@ class SyncBlobOut(BaseModel): class SyncStatusOut(BaseModel): exists: bool + orphaned: bool = False updated_at: datetime | None = None @@ -73,8 +74,8 @@ async def get_status( principal: CurrentUser = Depends(require_paid), session: AsyncSession = Depends(get_session), ) -> SyncStatusOut: - exists, updated_at = await svc.fetch_status(session, principal.id) - return SyncStatusOut(exists=exists, updated_at=updated_at) + exists, orphaned, updated_at = await svc.fetch_status(session, principal.id) + return SyncStatusOut(exists=exists, orphaned=orphaned, updated_at=updated_at) @router.post("", response_model=SyncWriteOut) @@ -108,6 +109,15 @@ async def download_blob( ) try: result = await svc.fetch(session, principal.id) + except svc.SyncOrphanedError: + # Known state: pepper rotated. The frontend uses 410 to swap the + # restore form for a "stale — re-upload" CTA. Logged at INFO, + # not ERROR, because this isn't a server fault. + log.info("portfolio_sync.orphaned", user_id=principal.id) + raise HTTPException( + status_code=status.HTTP_410_GONE, + detail="stale_blob", + ) except svc.SyncCryptoError: log.error("portfolio_sync.unwrap_failed", user_id=principal.id) raise HTTPException( diff --git a/app/services/portfolio_sync.py b/app/services/portfolio_sync.py index 15c9c41..c0bbbe9 100644 --- a/app/services/portfolio_sync.py +++ b/app/services/portfolio_sync.py @@ -44,8 +44,16 @@ RATE_LIMIT_MAX = 6 class SyncCryptoError(Exception): - """Outer-wrap decryption failed — usually a pepper change or - bit-rotted row. The router maps this to a 500.""" + """Outer-wrap decryption failed even though the pepper fingerprint + matched — i.e. genuine corruption or tampering. The router maps this + to a 500.""" + + +class SyncOrphanedError(Exception): + """The row was wrapped with a different pepper than the one currently + configured (typically: dev-time pepper rotation). The data is + permanently unrecoverable, but this is a *known* state, not a server + fault — the router maps this to a 410 Gone.""" def _utcnow() -> datetime: @@ -72,6 +80,22 @@ def _server_key(user_id: int) -> bytes: ).derive(_pepper_bytes()) +_FP_LEN = 8 + + +def current_pepper_fp() -> bytes: + """8-byte HKDF-derived fingerprint of the current pepper. Doesn't + leak the pepper itself (HKDF is one-way) and is short enough to make + accidental collisions across rotations effectively zero (2^-32 birthday + floor — fine for a few-row dev install).""" + return HKDF( + algorithm=hashes.SHA256(), + length=_FP_LEN, + salt=b"portfolio-sync-pepper-fp", + info=b"v1", + ).derive(_pepper_bytes()) + + def wrap(user_id: int, inner_blob: bytes) -> tuple[bytes, bytes]: """Encrypt the client-side ciphertext (`inner_blob`) for storage. Returns (outer_ct, outer_nonce). The nonce is random per write.""" @@ -81,9 +105,15 @@ def wrap(user_id: int, inner_blob: bytes) -> tuple[bytes, bytes]: def unwrap(user_id: int, outer_ct: bytes, outer_nonce: bytes) -> bytes: - """Inverse of wrap(). Raises SyncCryptoError if the GCM tag fails.""" + """Inverse of wrap(). Raises SyncCryptoError if the GCM tag fails. + + AESGCM.decrypt takes (nonce, data, associated_data) — not + (data, nonce). The original implementation had the arguments + swapped, which meant restore-from-cloud always failed even when + the pepper was correct. + """ try: - return AESGCM(_server_key(user_id)).decrypt(outer_ct, outer_nonce, None) + return AESGCM(_server_key(user_id)).decrypt(outer_nonce, outer_ct, None) except Exception as exc: # InvalidTag, malformed ciphertext, etc. raise SyncCryptoError("outer wrap unwrap failed") from exc @@ -91,6 +121,7 @@ def unwrap(user_id: int, outer_ct: bytes, outer_nonce: bytes) -> bytes: async def upsert(session: AsyncSession, user_id: int, inner_blob: bytes) -> datetime: """Insert or replace this user's sync row. Returns the new updated_at.""" outer_ct, outer_nonce = wrap(user_id, inner_blob) + fp = current_pepper_fp() now = _utcnow() row = await session.get(PortfolioSync, user_id) if row is None: @@ -101,6 +132,7 @@ async def upsert(session: AsyncSession, user_id: int, inner_blob: bytes) -> date version=1, created_at=now, updated_at=now, + pepper_fp=fp, ) session.add(row) else: @@ -109,19 +141,34 @@ async def upsert(session: AsyncSession, user_id: int, inner_blob: bytes) -> date row.updated_at = now # Bump version field forward if we ever change the wrap scheme. row.version = 1 + row.pepper_fp = fp await session.commit() return now +def _is_orphaned(row: PortfolioSync) -> bool: + """A row is orphaned when its stored pepper fingerprint is present + and differs from the current pepper's fingerprint. NULL fingerprint + (rows from before the pepper_fp column existed) is treated + optimistically: we don't know whether the pepper rotated, so we let + the fetch path probe with a real unwrap and self-heal on success. + Status returns orphaned=False for NULL so the user is offered the + Restore form; if unwrap then fails, the GET path returns 410 and the + UI flips to the stale state.""" + return row.pepper_fp is not None and row.pepper_fp != current_pepper_fp() + + async def fetch_status( session: AsyncSession, user_id: int, -) -> tuple[bool, datetime | None]: - """Cheap existence check — does NOT decrypt. Used by the dashboard to - decide whether to show the restore prompt.""" +) -> tuple[bool, bool, datetime | None]: + """Cheap existence check — does NOT decrypt. Returns + (exists, orphaned, updated_at). Used by the dashboard to decide + whether to show the restore prompt vs the "stale, re-upload" prompt. + """ row = await session.get(PortfolioSync, user_id) if row is None: - return False, None - return True, row.updated_at + return False, False, None + return True, _is_orphaned(row), row.updated_at async def fetch( @@ -129,13 +176,36 @@ async def fetch( ) -> tuple[bytes, datetime] | None: """Returns (inner_blob, updated_at) or None if sync disabled. - Raises SyncCryptoError if the row exists but the outer wrap is - unreadable (typically: pepper was rotated without re-encrypting). + Raises SyncOrphanedError if the row's pepper fingerprint mismatches + the current pepper, OR if a fingerprint-less legacy row fails to + unwrap (which can only mean a pepper rotation, since the arg-order + bug fix landed alongside the fingerprint column). + + Raises SyncCryptoError if the fingerprint matched but the outer wrap + still failed (genuine corruption or tampering). + + On a successful unwrap of a fingerprint-less legacy row, the current + pepper's fingerprint is backfilled so subsequent status checks + correctly report healthy (and future rotations are detectable). """ row = await session.get(PortfolioSync, user_id) if row is None: return None - inner = unwrap(user_id, row.outer_ciphertext, row.outer_nonce) + if _is_orphaned(row): + raise SyncOrphanedError("pepper fingerprint mismatch") + legacy = row.pepper_fp is None + try: + inner = unwrap(user_id, row.outer_ciphertext, row.outer_nonce) + except SyncCryptoError: + if legacy: + # Legacy row + decrypt fails = pepper rotated before the + # fingerprint column existed. Same observable state as a + # post-fingerprint orphan; report it that way. + raise SyncOrphanedError("legacy row, decrypt failed") + raise + if legacy: + row.pepper_fp = current_pepper_fp() + await session.commit() return inner, row.updated_at diff --git a/app/static/js/portfolio-sync.js b/app/static/js/portfolio-sync.js index 6772dd9..de71c17 100644 --- a/app/static/js/portfolio-sync.js +++ b/app/static/js/portfolio-sync.js @@ -216,7 +216,23 @@ if (r.status === 402) return { exists: false, paid: false }; if (!r.ok) throw new Error('sync status: HTTP ' + r.status); const body = await r.json(); - return { exists: !!body.exists, updated_at: body.updated_at, paid: true }; + return { + exists: !!body.exists, + orphaned: !!body.orphaned, + updated_at: body.updated_at, + paid: true, + }; + } + + // Thrown by pullSync when the server reports the stored blob is + // wrapped with a different server key (pepper rotation). Distinct + // from BadPinError so the UI can swap the restore form for a + // re-upload CTA instead of asking again for the PIN. + class StaleBlobError extends Error { + constructor(msg) { + super(msg || 'Stored portfolio cannot be decrypted with the current server key.'); + this.name = 'StaleBlobError'; + } } async function pushSync(pie, pin) { @@ -245,6 +261,10 @@ headers: { 'Accept': 'application/json' }, }); if (r.status === 404) return null; + if (r.status === 410) { + const body = await r.json().catch(() => ({})); + throw new StaleBlobError(body.detail); + } if (!r.ok) { const body = await r.json().catch(() => ({})); // 429 → server already throttling; bubble the message up unchanged. @@ -273,6 +293,7 @@ disableSync, clearCachedKey, BadPinError, + StaleBlobError, // Exposed for tests / debugging: _packBlob: packBlob, _unpackBlob: unpackBlob, diff --git a/app/static/js/portfolio.js b/app/static/js/portfolio.js index d40dcde..bedea08 100644 --- a/app/static/js/portfolio.js +++ b/app/static/js/portfolio.js @@ -164,14 +164,52 @@ }[c])); } + // Tiny one-shot flag the orphan-cleanup path sets so renderEmpty can + // surface a plain-English "your previous backup needs to be re-uploaded" + // line. Read-once; cleared as soon as it's shown. + function consumeBackupExpiredNotice() { + try { + if (sessionStorage.getItem('cassandra.sync.backupExpired') === '1') { + sessionStorage.removeItem('cassandra.sync.backupExpired'); + return true; + } + } catch (e) { /* ignore */ } + return false; + } + function renderEmpty(mount) { + const expired = consumeBackupExpiredNotice(); + const notice = expired + ? '
' + + 'Your previous cloud backup couldn’t be restored on this server. ' + + 'Please re-upload your portfolio to refresh it.' + + '
' + : ''; mount.innerHTML = '
' + + notice + 'No portfolio loaded in this browser. ' + 'Import a T212 CSV →' + '
'; } + // Silently remove an unrecoverable cloud blob and re-render. The user + // sees the standard empty state with a soft one-liner — no jargon, no + // extra buttons. The decision to remove is safe: the blob is already + // permanently undecryptable, so we're cleaning up dead state, not + // discarding user data. + async function autoCleanStaleBlob(mount) { + try { + await window.CassandraSync.disableSync(); + } catch (e) { + console.warn('cassandra.sync: auto-clean of stale blob failed', e); + } + try { + sessionStorage.setItem('cassandra.sync.backupExpired', '1'); + } catch (e) { /* ignore */ } + renderEmpty(mount); + } + function renderRestoreFromCloud(mount, status) { const lastSynced = status.updated_at ? new Date(status.updated_at).toISOString().replace('T', ' ').slice(0, 16) + ' UTC' @@ -212,6 +250,12 @@ savePie(pie); mountAndRender(); } catch (e2) { + if (e2 && e2.name === 'StaleBlobError') { + // Pepper rotated since the blob was written — silently clean + // up and fall through to the empty state with a soft notice. + autoCleanStaleBlob(mount); + return; + } err.textContent = (e2 && e2.name === 'BadPinError') ? 'Incorrect PIN.' : (e2.message || 'Could not restore.'); @@ -417,7 +461,14 @@ catch (e) { console.warn('sync status check failed', e); } } if (status && status.paid && status.exists) { - renderRestoreFromCloud(mount, status); + if (status.orphaned) { + // Pepper rotated since the blob was written — clean up + // silently and show the standard empty state with a soft + // "please re-upload" notice. + autoCleanStaleBlob(mount); + } else { + renderRestoreFromCloud(mount, status); + } } else { renderEmpty(mount); } diff --git a/app/templates/settings.html b/app/templates/settings.html index ecdd25a..bab5cb9 100644 --- a/app/templates/settings.html +++ b/app/templates/settings.html @@ -256,7 +256,18 @@ } const valueEl = statusEl.querySelector('.settings-row__value'); actionsEl.innerHTML = ''; - if (status.exists) { + if (status.exists && status.orphaned) { + // The stored blob can no longer be decrypted (server key rotated + // since it was written). The data is permanently unrecoverable, + // so silently clean up the dead row and re-render in the + // standard "off" state — leaving a soft one-liner so the user + // knows why they need to re-import. + try { await window.CassandraSync.disableSync(); } + catch (e) { console.warn('auto-clear stale sync failed', e); } + setFeedback('Your previous cloud backup couldn’t be restored. Re-import your portfolio to enable cloud sync again.', true); + await refresh(); + return; + } else if (status.exists) { const when = status.updated_at ? new Date(status.updated_at).toISOString().replace('T', ' ').slice(0, 16) + ' UTC' : '—'; diff --git a/tests/test_portfolio_sync_api.py b/tests/test_portfolio_sync_api.py index 0a8fcc0..10c6a96 100644 --- a/tests/test_portfolio_sync_api.py +++ b/tests/test_portfolio_sync_api.py @@ -72,7 +72,7 @@ def test_paid_user_round_trip(tmp_path): # status before any upload r = client.get("/api/portfolio/sync/status", cookies=cookies) assert r.status_code == 200 - assert r.json() == {"exists": False, "updated_at": None} + assert r.json() == {"exists": False, "orphaned": False, "updated_at": None} # upload blob = os.urandom(512)