jobs: per-row savepoint + aggregate logging in translation fan-out
Previously translate_log_for_active_languages and translate_summary_for_active_languages added every successful translation to the session and called session.commit() once at the end. A single bad row (DB error, constraint violation, encoding mismatch) rolled back the whole batch — losing all the languages that had succeeded. Wrap each row in session.begin_nested() so a per-row failure only loses that one row. Track succeeded/failed counts and log them at the end — escalating to error if zero succeeded out of N attempted, so total failure surfaces in monitoring instead of just N warning lines.
This commit is contained in:
parent
7348055d72
commit
c5fb4525f3
2 changed files with 59 additions and 25 deletions
|
|
@ -40,9 +40,9 @@ async def translate_log_for_active_languages(session, log_id: int) -> None:
|
|||
Reads ``users.lang`` (deduplicated, restricted to ACTIVE_LANGUAGES
|
||||
minus English), one translation call per language in parallel via
|
||||
``asyncio.gather``, persists each successful result as a
|
||||
``StrategicLogTranslation`` row. Per-language failures are logged
|
||||
but never raise — the strategic log itself is already committed at
|
||||
this point and translation is a best-effort enhancement.
|
||||
``StrategicLogTranslation`` row. Each row is committed in its own
|
||||
savepoint so a per-language LLM error or DB error doesn't roll back
|
||||
the languages that already succeeded.
|
||||
|
||||
The job orchestrator calls this AFTER the English ``StrategicLog``
|
||||
row is committed; pass the row's ``id`` in.
|
||||
|
|
@ -68,22 +68,39 @@ async def translate_log_for_active_languages(session, log_id: int) -> None:
|
|||
for lang in active_langs
|
||||
], return_exceptions=True)
|
||||
|
||||
succeeded = 0
|
||||
failed = 0
|
||||
for lang, result in zip(active_langs, results):
|
||||
if isinstance(result, Exception):
|
||||
log.warning("log.translate.failed", lang=lang, log_id=log_id,
|
||||
error=str(result)[:200])
|
||||
failed += 1
|
||||
continue
|
||||
translated_md, llm_result = result
|
||||
session.add(StrategicLogTranslation(
|
||||
log_id=log_id, lang=lang,
|
||||
content=translated_md,
|
||||
generated_at=utcnow(),
|
||||
model=llm_result.model,
|
||||
prompt_tokens=llm_result.prompt_tokens,
|
||||
completion_tokens=llm_result.completion_tokens,
|
||||
cost_usd=llm_result.cost_usd,
|
||||
))
|
||||
await session.commit()
|
||||
try:
|
||||
async with session.begin_nested():
|
||||
session.add(StrategicLogTranslation(
|
||||
log_id=log_id, lang=lang,
|
||||
content=translated_md,
|
||||
generated_at=utcnow(),
|
||||
model=llm_result.model,
|
||||
prompt_tokens=llm_result.prompt_tokens,
|
||||
completion_tokens=llm_result.completion_tokens,
|
||||
cost_usd=llm_result.cost_usd,
|
||||
))
|
||||
await session.commit()
|
||||
succeeded += 1
|
||||
except Exception as exc:
|
||||
log.warning("log.translate.persist_failed",
|
||||
lang=lang, log_id=log_id, error=str(exc)[:200])
|
||||
failed += 1
|
||||
|
||||
if failed and succeeded == 0:
|
||||
log.error("log.translate.all_failed",
|
||||
log_id=log_id, attempted=len(active_langs))
|
||||
else:
|
||||
log.info("log.translate.done",
|
||||
log_id=log_id, succeeded=succeeded, failed=failed)
|
||||
|
||||
|
||||
async def run() -> None:
|
||||
|
|
|
|||
|
|
@ -47,8 +47,8 @@ async def translate_summary_for_active_languages(session, summary_id: int) -> No
|
|||
Mirrors ``ai_log_job.translate_log_for_active_languages``: reads the
|
||||
distinct non-en ``users.lang`` set, translates the English content
|
||||
once per active language in parallel via ``asyncio.gather``, and
|
||||
persists each result as an ``IndicatorSummaryTranslation`` row.
|
||||
Per-language failures are logged but never raise.
|
||||
persists each result as an ``IndicatorSummaryTranslation`` row in
|
||||
its own savepoint so one bad row doesn't lose the rest.
|
||||
"""
|
||||
target_langs = sorted({l for l in ACTIVE_LANGUAGES if l != "en"})
|
||||
if not target_langs:
|
||||
|
|
@ -70,23 +70,40 @@ async def translate_summary_for_active_languages(session, summary_id: int) -> No
|
|||
for lang in active_langs
|
||||
], return_exceptions=True)
|
||||
|
||||
succeeded = 0
|
||||
failed = 0
|
||||
for lang, result in zip(active_langs, results):
|
||||
if isinstance(result, Exception):
|
||||
log.warning("ind_summary.translate.failed",
|
||||
lang=lang, summary_id=summary_id,
|
||||
error=str(result)[:200])
|
||||
failed += 1
|
||||
continue
|
||||
translated_md, llm_result = result
|
||||
session.add(IndicatorSummaryTranslation(
|
||||
summary_id=summary_id, lang=lang,
|
||||
content=translated_md,
|
||||
generated_at=utcnow(),
|
||||
model=llm_result.model,
|
||||
prompt_tokens=llm_result.prompt_tokens,
|
||||
completion_tokens=llm_result.completion_tokens,
|
||||
cost_usd=llm_result.cost_usd,
|
||||
))
|
||||
await session.commit()
|
||||
try:
|
||||
async with session.begin_nested():
|
||||
session.add(IndicatorSummaryTranslation(
|
||||
summary_id=summary_id, lang=lang,
|
||||
content=translated_md,
|
||||
generated_at=utcnow(),
|
||||
model=llm_result.model,
|
||||
prompt_tokens=llm_result.prompt_tokens,
|
||||
completion_tokens=llm_result.completion_tokens,
|
||||
cost_usd=llm_result.cost_usd,
|
||||
))
|
||||
await session.commit()
|
||||
succeeded += 1
|
||||
except Exception as exc:
|
||||
log.warning("ind_summary.translate.persist_failed",
|
||||
lang=lang, summary_id=summary_id, error=str(exc)[:200])
|
||||
failed += 1
|
||||
|
||||
if failed and succeeded == 0:
|
||||
log.error("ind_summary.translate.all_failed",
|
||||
summary_id=summary_id, attempted=len(active_langs))
|
||||
else:
|
||||
log.info("ind_summary.translate.done",
|
||||
summary_id=summary_id, succeeded=succeeded, failed=failed)
|
||||
|
||||
|
||||
# Strip known meta-commentary openers the model sometimes leaks despite the
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue