diff --git a/app/jobs/ai_log_job.py b/app/jobs/ai_log_job.py index 2c0277e..9b5683e 100644 --- a/app/jobs/ai_log_job.py +++ b/app/jobs/ai_log_job.py @@ -40,9 +40,9 @@ async def translate_log_for_active_languages(session, log_id: int) -> None: Reads ``users.lang`` (deduplicated, restricted to ACTIVE_LANGUAGES minus English), one translation call per language in parallel via ``asyncio.gather``, persists each successful result as a - ``StrategicLogTranslation`` row. Per-language failures are logged - but never raise — the strategic log itself is already committed at - this point and translation is a best-effort enhancement. + ``StrategicLogTranslation`` row. Each row is committed in its own + savepoint so a per-language LLM error or DB error doesn't roll back + the languages that already succeeded. The job orchestrator calls this AFTER the English ``StrategicLog`` row is committed; pass the row's ``id`` in. @@ -68,22 +68,39 @@ async def translate_log_for_active_languages(session, log_id: int) -> None: for lang in active_langs ], return_exceptions=True) + succeeded = 0 + failed = 0 for lang, result in zip(active_langs, results): if isinstance(result, Exception): log.warning("log.translate.failed", lang=lang, log_id=log_id, error=str(result)[:200]) + failed += 1 continue translated_md, llm_result = result - session.add(StrategicLogTranslation( - log_id=log_id, lang=lang, - content=translated_md, - generated_at=utcnow(), - model=llm_result.model, - prompt_tokens=llm_result.prompt_tokens, - completion_tokens=llm_result.completion_tokens, - cost_usd=llm_result.cost_usd, - )) - await session.commit() + try: + async with session.begin_nested(): + session.add(StrategicLogTranslation( + log_id=log_id, lang=lang, + content=translated_md, + generated_at=utcnow(), + model=llm_result.model, + prompt_tokens=llm_result.prompt_tokens, + completion_tokens=llm_result.completion_tokens, + cost_usd=llm_result.cost_usd, + )) + await session.commit() + succeeded += 1 + except Exception as exc: + log.warning("log.translate.persist_failed", + lang=lang, log_id=log_id, error=str(exc)[:200]) + failed += 1 + + if failed and succeeded == 0: + log.error("log.translate.all_failed", + log_id=log_id, attempted=len(active_langs)) + else: + log.info("log.translate.done", + log_id=log_id, succeeded=succeeded, failed=failed) async def run() -> None: diff --git a/app/jobs/indicator_summary_job.py b/app/jobs/indicator_summary_job.py index fb21f24..97c5f80 100644 --- a/app/jobs/indicator_summary_job.py +++ b/app/jobs/indicator_summary_job.py @@ -47,8 +47,8 @@ async def translate_summary_for_active_languages(session, summary_id: int) -> No Mirrors ``ai_log_job.translate_log_for_active_languages``: reads the distinct non-en ``users.lang`` set, translates the English content once per active language in parallel via ``asyncio.gather``, and - persists each result as an ``IndicatorSummaryTranslation`` row. - Per-language failures are logged but never raise. + persists each result as an ``IndicatorSummaryTranslation`` row in + its own savepoint so one bad row doesn't lose the rest. """ target_langs = sorted({l for l in ACTIVE_LANGUAGES if l != "en"}) if not target_langs: @@ -70,23 +70,40 @@ async def translate_summary_for_active_languages(session, summary_id: int) -> No for lang in active_langs ], return_exceptions=True) + succeeded = 0 + failed = 0 for lang, result in zip(active_langs, results): if isinstance(result, Exception): log.warning("ind_summary.translate.failed", lang=lang, summary_id=summary_id, error=str(result)[:200]) + failed += 1 continue translated_md, llm_result = result - session.add(IndicatorSummaryTranslation( - summary_id=summary_id, lang=lang, - content=translated_md, - generated_at=utcnow(), - model=llm_result.model, - prompt_tokens=llm_result.prompt_tokens, - completion_tokens=llm_result.completion_tokens, - cost_usd=llm_result.cost_usd, - )) - await session.commit() + try: + async with session.begin_nested(): + session.add(IndicatorSummaryTranslation( + summary_id=summary_id, lang=lang, + content=translated_md, + generated_at=utcnow(), + model=llm_result.model, + prompt_tokens=llm_result.prompt_tokens, + completion_tokens=llm_result.completion_tokens, + cost_usd=llm_result.cost_usd, + )) + await session.commit() + succeeded += 1 + except Exception as exc: + log.warning("ind_summary.translate.persist_failed", + lang=lang, summary_id=summary_id, error=str(exc)[:200]) + failed += 1 + + if failed and succeeded == 0: + log.error("ind_summary.translate.all_failed", + summary_id=summary_id, attempted=len(active_langs)) + else: + log.info("ind_summary.translate.done", + summary_id=summary_id, succeeded=succeeded, failed=failed) # Strip known meta-commentary openers the model sometimes leaks despite the