jobs: per-row savepoint + aggregate logging in translation fan-out

Previously translate_log_for_active_languages and
translate_summary_for_active_languages added every successful
translation to the session and called session.commit() once at the
end. A single bad row (DB error, constraint violation, encoding
mismatch) rolled back the whole batch — losing all the languages that
had succeeded.

Wrap each row in session.begin_nested() so a per-row failure only
loses that one row. Track succeeded/failed counts and log them at the
end — escalating to error if zero succeeded out of N attempted, so
total failure surfaces in monitoring instead of just N warning lines.
This commit is contained in:
Giorgio Gilestro 2026-05-28 12:37:06 +02:00
parent 7348055d72
commit c5fb4525f3
2 changed files with 59 additions and 25 deletions

View file

@ -47,8 +47,8 @@ async def translate_summary_for_active_languages(session, summary_id: int) -> No
Mirrors ``ai_log_job.translate_log_for_active_languages``: reads the
distinct non-en ``users.lang`` set, translates the English content
once per active language in parallel via ``asyncio.gather``, and
persists each result as an ``IndicatorSummaryTranslation`` row.
Per-language failures are logged but never raise.
persists each result as an ``IndicatorSummaryTranslation`` row in
its own savepoint so one bad row doesn't lose the rest.
"""
target_langs = sorted({l for l in ACTIVE_LANGUAGES if l != "en"})
if not target_langs:
@ -70,23 +70,40 @@ async def translate_summary_for_active_languages(session, summary_id: int) -> No
for lang in active_langs
], return_exceptions=True)
succeeded = 0
failed = 0
for lang, result in zip(active_langs, results):
if isinstance(result, Exception):
log.warning("ind_summary.translate.failed",
lang=lang, summary_id=summary_id,
error=str(result)[:200])
failed += 1
continue
translated_md, llm_result = result
session.add(IndicatorSummaryTranslation(
summary_id=summary_id, lang=lang,
content=translated_md,
generated_at=utcnow(),
model=llm_result.model,
prompt_tokens=llm_result.prompt_tokens,
completion_tokens=llm_result.completion_tokens,
cost_usd=llm_result.cost_usd,
))
await session.commit()
try:
async with session.begin_nested():
session.add(IndicatorSummaryTranslation(
summary_id=summary_id, lang=lang,
content=translated_md,
generated_at=utcnow(),
model=llm_result.model,
prompt_tokens=llm_result.prompt_tokens,
completion_tokens=llm_result.completion_tokens,
cost_usd=llm_result.cost_usd,
))
await session.commit()
succeeded += 1
except Exception as exc:
log.warning("ind_summary.translate.persist_failed",
lang=lang, summary_id=summary_id, error=str(exc)[:200])
failed += 1
if failed and succeeded == 0:
log.error("ind_summary.translate.all_failed",
summary_id=summary_id, attempted=len(active_langs))
else:
log.info("ind_summary.translate.done",
summary_id=summary_id, succeeded=succeeded, failed=failed)
# Strip known meta-commentary openers the model sometimes leaks despite the