sync: detect orphaned blobs (pepper rotation) + fix AESGCM arg order
Adds an 8-byte HKDF fingerprint of the current pepper to portfolio_sync rows. On fetch, a mismatch surfaces as 410 Gone (distinct from genuine GCM corruption → 500), and the UI silently cleans up the dead row and shows a soft "please re-import" notice instead of a confusing PIN re-prompt. Legacy rows (pepper_fp NULL) are probed optimistically and backfilled on success. Also fixes a latent bug in unwrap(): AESGCM.decrypt args were swapped (ct, nonce instead of nonce, ct), so restore-from-cloud always failed even when the pepper was correct. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
f1903e1e61
commit
5c7cc4c6aa
8 changed files with 224 additions and 18 deletions
|
|
@ -204,6 +204,10 @@ class PortfolioSync(Base):
|
|||
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
|
||||
fetch_window_start: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
fetch_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
|
||||
# 8-byte HKDF fingerprint of the pepper that wrapped this row. A
|
||||
# mismatch against the current pepper means the row is orphaned
|
||||
# (pepper was rotated) — distinct from genuine GCM corruption.
|
||||
pepper_fp: Mapped[bytes | None] = mapped_column(LargeBinary(length=8))
|
||||
|
||||
|
||||
class Referral(Base):
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ class SyncBlobOut(BaseModel):
|
|||
|
||||
class SyncStatusOut(BaseModel):
|
||||
exists: bool
|
||||
orphaned: bool = False
|
||||
updated_at: datetime | None = None
|
||||
|
||||
|
||||
|
|
@ -73,8 +74,8 @@ async def get_status(
|
|||
principal: CurrentUser = Depends(require_paid),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
) -> SyncStatusOut:
|
||||
exists, updated_at = await svc.fetch_status(session, principal.id)
|
||||
return SyncStatusOut(exists=exists, updated_at=updated_at)
|
||||
exists, orphaned, updated_at = await svc.fetch_status(session, principal.id)
|
||||
return SyncStatusOut(exists=exists, orphaned=orphaned, updated_at=updated_at)
|
||||
|
||||
|
||||
@router.post("", response_model=SyncWriteOut)
|
||||
|
|
@ -108,6 +109,15 @@ async def download_blob(
|
|||
)
|
||||
try:
|
||||
result = await svc.fetch(session, principal.id)
|
||||
except svc.SyncOrphanedError:
|
||||
# Known state: pepper rotated. The frontend uses 410 to swap the
|
||||
# restore form for a "stale — re-upload" CTA. Logged at INFO,
|
||||
# not ERROR, because this isn't a server fault.
|
||||
log.info("portfolio_sync.orphaned", user_id=principal.id)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_410_GONE,
|
||||
detail="stale_blob",
|
||||
)
|
||||
except svc.SyncCryptoError:
|
||||
log.error("portfolio_sync.unwrap_failed", user_id=principal.id)
|
||||
raise HTTPException(
|
||||
|
|
|
|||
|
|
@ -44,8 +44,16 @@ RATE_LIMIT_MAX = 6
|
|||
|
||||
|
||||
class SyncCryptoError(Exception):
|
||||
"""Outer-wrap decryption failed — usually a pepper change or
|
||||
bit-rotted row. The router maps this to a 500."""
|
||||
"""Outer-wrap decryption failed even though the pepper fingerprint
|
||||
matched — i.e. genuine corruption or tampering. The router maps this
|
||||
to a 500."""
|
||||
|
||||
|
||||
class SyncOrphanedError(Exception):
|
||||
"""The row was wrapped with a different pepper than the one currently
|
||||
configured (typically: dev-time pepper rotation). The data is
|
||||
permanently unrecoverable, but this is a *known* state, not a server
|
||||
fault — the router maps this to a 410 Gone."""
|
||||
|
||||
|
||||
def _utcnow() -> datetime:
|
||||
|
|
@ -72,6 +80,22 @@ def _server_key(user_id: int) -> bytes:
|
|||
).derive(_pepper_bytes())
|
||||
|
||||
|
||||
_FP_LEN = 8
|
||||
|
||||
|
||||
def current_pepper_fp() -> bytes:
|
||||
"""8-byte HKDF-derived fingerprint of the current pepper. Doesn't
|
||||
leak the pepper itself (HKDF is one-way) and is short enough to make
|
||||
accidental collisions across rotations effectively zero (2^-32 birthday
|
||||
floor — fine for a few-row dev install)."""
|
||||
return HKDF(
|
||||
algorithm=hashes.SHA256(),
|
||||
length=_FP_LEN,
|
||||
salt=b"portfolio-sync-pepper-fp",
|
||||
info=b"v1",
|
||||
).derive(_pepper_bytes())
|
||||
|
||||
|
||||
def wrap(user_id: int, inner_blob: bytes) -> tuple[bytes, bytes]:
|
||||
"""Encrypt the client-side ciphertext (`inner_blob`) for storage.
|
||||
Returns (outer_ct, outer_nonce). The nonce is random per write."""
|
||||
|
|
@ -81,9 +105,15 @@ def wrap(user_id: int, inner_blob: bytes) -> tuple[bytes, bytes]:
|
|||
|
||||
|
||||
def unwrap(user_id: int, outer_ct: bytes, outer_nonce: bytes) -> bytes:
|
||||
"""Inverse of wrap(). Raises SyncCryptoError if the GCM tag fails."""
|
||||
"""Inverse of wrap(). Raises SyncCryptoError if the GCM tag fails.
|
||||
|
||||
AESGCM.decrypt takes (nonce, data, associated_data) — not
|
||||
(data, nonce). The original implementation had the arguments
|
||||
swapped, which meant restore-from-cloud always failed even when
|
||||
the pepper was correct.
|
||||
"""
|
||||
try:
|
||||
return AESGCM(_server_key(user_id)).decrypt(outer_ct, outer_nonce, None)
|
||||
return AESGCM(_server_key(user_id)).decrypt(outer_nonce, outer_ct, None)
|
||||
except Exception as exc: # InvalidTag, malformed ciphertext, etc.
|
||||
raise SyncCryptoError("outer wrap unwrap failed") from exc
|
||||
|
||||
|
|
@ -91,6 +121,7 @@ def unwrap(user_id: int, outer_ct: bytes, outer_nonce: bytes) -> bytes:
|
|||
async def upsert(session: AsyncSession, user_id: int, inner_blob: bytes) -> datetime:
|
||||
"""Insert or replace this user's sync row. Returns the new updated_at."""
|
||||
outer_ct, outer_nonce = wrap(user_id, inner_blob)
|
||||
fp = current_pepper_fp()
|
||||
now = _utcnow()
|
||||
row = await session.get(PortfolioSync, user_id)
|
||||
if row is None:
|
||||
|
|
@ -101,6 +132,7 @@ async def upsert(session: AsyncSession, user_id: int, inner_blob: bytes) -> date
|
|||
version=1,
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
pepper_fp=fp,
|
||||
)
|
||||
session.add(row)
|
||||
else:
|
||||
|
|
@ -109,19 +141,34 @@ async def upsert(session: AsyncSession, user_id: int, inner_blob: bytes) -> date
|
|||
row.updated_at = now
|
||||
# Bump version field forward if we ever change the wrap scheme.
|
||||
row.version = 1
|
||||
row.pepper_fp = fp
|
||||
await session.commit()
|
||||
return now
|
||||
|
||||
|
||||
def _is_orphaned(row: PortfolioSync) -> bool:
|
||||
"""A row is orphaned when its stored pepper fingerprint is present
|
||||
and differs from the current pepper's fingerprint. NULL fingerprint
|
||||
(rows from before the pepper_fp column existed) is treated
|
||||
optimistically: we don't know whether the pepper rotated, so we let
|
||||
the fetch path probe with a real unwrap and self-heal on success.
|
||||
Status returns orphaned=False for NULL so the user is offered the
|
||||
Restore form; if unwrap then fails, the GET path returns 410 and the
|
||||
UI flips to the stale state."""
|
||||
return row.pepper_fp is not None and row.pepper_fp != current_pepper_fp()
|
||||
|
||||
|
||||
async def fetch_status(
|
||||
session: AsyncSession, user_id: int,
|
||||
) -> tuple[bool, datetime | None]:
|
||||
"""Cheap existence check — does NOT decrypt. Used by the dashboard to
|
||||
decide whether to show the restore prompt."""
|
||||
) -> tuple[bool, bool, datetime | None]:
|
||||
"""Cheap existence check — does NOT decrypt. Returns
|
||||
(exists, orphaned, updated_at). Used by the dashboard to decide
|
||||
whether to show the restore prompt vs the "stale, re-upload" prompt.
|
||||
"""
|
||||
row = await session.get(PortfolioSync, user_id)
|
||||
if row is None:
|
||||
return False, None
|
||||
return True, row.updated_at
|
||||
return False, False, None
|
||||
return True, _is_orphaned(row), row.updated_at
|
||||
|
||||
|
||||
async def fetch(
|
||||
|
|
@ -129,13 +176,36 @@ async def fetch(
|
|||
) -> tuple[bytes, datetime] | None:
|
||||
"""Returns (inner_blob, updated_at) or None if sync disabled.
|
||||
|
||||
Raises SyncCryptoError if the row exists but the outer wrap is
|
||||
unreadable (typically: pepper was rotated without re-encrypting).
|
||||
Raises SyncOrphanedError if the row's pepper fingerprint mismatches
|
||||
the current pepper, OR if a fingerprint-less legacy row fails to
|
||||
unwrap (which can only mean a pepper rotation, since the arg-order
|
||||
bug fix landed alongside the fingerprint column).
|
||||
|
||||
Raises SyncCryptoError if the fingerprint matched but the outer wrap
|
||||
still failed (genuine corruption or tampering).
|
||||
|
||||
On a successful unwrap of a fingerprint-less legacy row, the current
|
||||
pepper's fingerprint is backfilled so subsequent status checks
|
||||
correctly report healthy (and future rotations are detectable).
|
||||
"""
|
||||
row = await session.get(PortfolioSync, user_id)
|
||||
if row is None:
|
||||
return None
|
||||
inner = unwrap(user_id, row.outer_ciphertext, row.outer_nonce)
|
||||
if _is_orphaned(row):
|
||||
raise SyncOrphanedError("pepper fingerprint mismatch")
|
||||
legacy = row.pepper_fp is None
|
||||
try:
|
||||
inner = unwrap(user_id, row.outer_ciphertext, row.outer_nonce)
|
||||
except SyncCryptoError:
|
||||
if legacy:
|
||||
# Legacy row + decrypt fails = pepper rotated before the
|
||||
# fingerprint column existed. Same observable state as a
|
||||
# post-fingerprint orphan; report it that way.
|
||||
raise SyncOrphanedError("legacy row, decrypt failed")
|
||||
raise
|
||||
if legacy:
|
||||
row.pepper_fp = current_pepper_fp()
|
||||
await session.commit()
|
||||
return inner, row.updated_at
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -216,7 +216,23 @@
|
|||
if (r.status === 402) return { exists: false, paid: false };
|
||||
if (!r.ok) throw new Error('sync status: HTTP ' + r.status);
|
||||
const body = await r.json();
|
||||
return { exists: !!body.exists, updated_at: body.updated_at, paid: true };
|
||||
return {
|
||||
exists: !!body.exists,
|
||||
orphaned: !!body.orphaned,
|
||||
updated_at: body.updated_at,
|
||||
paid: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Thrown by pullSync when the server reports the stored blob is
|
||||
// wrapped with a different server key (pepper rotation). Distinct
|
||||
// from BadPinError so the UI can swap the restore form for a
|
||||
// re-upload CTA instead of asking again for the PIN.
|
||||
class StaleBlobError extends Error {
|
||||
constructor(msg) {
|
||||
super(msg || 'Stored portfolio cannot be decrypted with the current server key.');
|
||||
this.name = 'StaleBlobError';
|
||||
}
|
||||
}
|
||||
|
||||
async function pushSync(pie, pin) {
|
||||
|
|
@ -245,6 +261,10 @@
|
|||
headers: { 'Accept': 'application/json' },
|
||||
});
|
||||
if (r.status === 404) return null;
|
||||
if (r.status === 410) {
|
||||
const body = await r.json().catch(() => ({}));
|
||||
throw new StaleBlobError(body.detail);
|
||||
}
|
||||
if (!r.ok) {
|
||||
const body = await r.json().catch(() => ({}));
|
||||
// 429 → server already throttling; bubble the message up unchanged.
|
||||
|
|
@ -273,6 +293,7 @@
|
|||
disableSync,
|
||||
clearCachedKey,
|
||||
BadPinError,
|
||||
StaleBlobError,
|
||||
// Exposed for tests / debugging:
|
||||
_packBlob: packBlob,
|
||||
_unpackBlob: unpackBlob,
|
||||
|
|
|
|||
|
|
@ -164,14 +164,52 @@
|
|||
}[c]));
|
||||
}
|
||||
|
||||
// Tiny one-shot flag the orphan-cleanup path sets so renderEmpty can
|
||||
// surface a plain-English "your previous backup needs to be re-uploaded"
|
||||
// line. Read-once; cleared as soon as it's shown.
|
||||
function consumeBackupExpiredNotice() {
|
||||
try {
|
||||
if (sessionStorage.getItem('cassandra.sync.backupExpired') === '1') {
|
||||
sessionStorage.removeItem('cassandra.sync.backupExpired');
|
||||
return true;
|
||||
}
|
||||
} catch (e) { /* ignore */ }
|
||||
return false;
|
||||
}
|
||||
|
||||
function renderEmpty(mount) {
|
||||
const expired = consumeBackupExpiredNotice();
|
||||
const notice = expired
|
||||
? '<div class="settings-row__hint" style="margin-bottom:8px;">' +
|
||||
'Your previous cloud backup couldn’t be restored on this server. ' +
|
||||
'Please re-upload your portfolio to refresh it.' +
|
||||
'</div>'
|
||||
: '';
|
||||
mount.innerHTML =
|
||||
'<div class="empty" style="padding:16px;">' +
|
||||
notice +
|
||||
'No portfolio loaded in this browser. ' +
|
||||
'<a href="/settings#import">Import a T212 CSV →</a>' +
|
||||
'</div>';
|
||||
}
|
||||
|
||||
// Silently remove an unrecoverable cloud blob and re-render. The user
|
||||
// sees the standard empty state with a soft one-liner — no jargon, no
|
||||
// extra buttons. The decision to remove is safe: the blob is already
|
||||
// permanently undecryptable, so we're cleaning up dead state, not
|
||||
// discarding user data.
|
||||
async function autoCleanStaleBlob(mount) {
|
||||
try {
|
||||
await window.CassandraSync.disableSync();
|
||||
} catch (e) {
|
||||
console.warn('cassandra.sync: auto-clean of stale blob failed', e);
|
||||
}
|
||||
try {
|
||||
sessionStorage.setItem('cassandra.sync.backupExpired', '1');
|
||||
} catch (e) { /* ignore */ }
|
||||
renderEmpty(mount);
|
||||
}
|
||||
|
||||
function renderRestoreFromCloud(mount, status) {
|
||||
const lastSynced = status.updated_at
|
||||
? new Date(status.updated_at).toISOString().replace('T', ' ').slice(0, 16) + ' UTC'
|
||||
|
|
@ -212,6 +250,12 @@
|
|||
savePie(pie);
|
||||
mountAndRender();
|
||||
} catch (e2) {
|
||||
if (e2 && e2.name === 'StaleBlobError') {
|
||||
// Pepper rotated since the blob was written — silently clean
|
||||
// up and fall through to the empty state with a soft notice.
|
||||
autoCleanStaleBlob(mount);
|
||||
return;
|
||||
}
|
||||
err.textContent = (e2 && e2.name === 'BadPinError')
|
||||
? 'Incorrect PIN.'
|
||||
: (e2.message || 'Could not restore.');
|
||||
|
|
@ -417,7 +461,14 @@
|
|||
catch (e) { console.warn('sync status check failed', e); }
|
||||
}
|
||||
if (status && status.paid && status.exists) {
|
||||
renderRestoreFromCloud(mount, status);
|
||||
if (status.orphaned) {
|
||||
// Pepper rotated since the blob was written — clean up
|
||||
// silently and show the standard empty state with a soft
|
||||
// "please re-upload" notice.
|
||||
autoCleanStaleBlob(mount);
|
||||
} else {
|
||||
renderRestoreFromCloud(mount, status);
|
||||
}
|
||||
} else {
|
||||
renderEmpty(mount);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -256,7 +256,18 @@
|
|||
}
|
||||
const valueEl = statusEl.querySelector('.settings-row__value');
|
||||
actionsEl.innerHTML = '';
|
||||
if (status.exists) {
|
||||
if (status.exists && status.orphaned) {
|
||||
// The stored blob can no longer be decrypted (server key rotated
|
||||
// since it was written). The data is permanently unrecoverable,
|
||||
// so silently clean up the dead row and re-render in the
|
||||
// standard "off" state — leaving a soft one-liner so the user
|
||||
// knows why they need to re-import.
|
||||
try { await window.CassandraSync.disableSync(); }
|
||||
catch (e) { console.warn('auto-clear stale sync failed', e); }
|
||||
setFeedback('Your previous cloud backup couldn’t be restored. Re-import your portfolio to enable cloud sync again.', true);
|
||||
await refresh();
|
||||
return;
|
||||
} else if (status.exists) {
|
||||
const when = status.updated_at
|
||||
? new Date(status.updated_at).toISOString().replace('T', ' ').slice(0, 16) + ' UTC'
|
||||
: '—';
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue