diff --git a/app/services/output_review.py b/app/services/output_review.py index fe22e6d..401096d 100644 --- a/app/services/output_review.py +++ b/app/services/output_review.py @@ -114,8 +114,22 @@ async def review_read(client: httpx.AsyncClient, candidate: str) -> Verdict: return Verdict(clean=False, reason=f"reviewer error: {str(e)[:80]}", cost_usd=None) + # Haiku (and several other models) occasionally wrap their JSON + # output in a markdown code fence even with response_format set — + # ```json\n{...}\n``` — so strip a single leading/trailing fence + # before parsing. We do this defensively for any model; it's a + # no-op for callers that already emit bare JSON. + raw = result.content.strip() + if raw.startswith("```"): + first_nl = raw.find("\n") + if first_nl != -1: + raw = raw[first_nl + 1:] + if raw.rstrip().endswith("```"): + raw = raw.rstrip()[:-3].rstrip() + raw = raw.strip() + try: - parsed = json.loads(result.content) + parsed = json.loads(raw) except json.JSONDecodeError: log.warning("review.parse_failed", preview=result.content[:200]) return Verdict(clean=False, reason="reviewer returned non-JSON", diff --git a/tests/test_output_review.py b/tests/test_output_review.py index 4e6fa4b..c437678 100644 --- a/tests/test_output_review.py +++ b/tests/test_output_review.py @@ -109,6 +109,25 @@ async def test_review_unclean_verdict(monkeypatch): assert "chain of thought" in v.reason +@pytest.mark.asyncio +async def test_review_strips_markdown_fence_around_json(monkeypatch): + """Haiku (and friends) sometimes wrap JSON in ```json ... ``` even + when response_format is set. The parser needs to peel that off + before json.loads or it'll reject otherwise-valid verdicts.""" + _configure(monkeypatch) + fenced = '```json\n{"clean": true, "reason": "polished read"}\n```' + def handler(_req): + return httpx.Response(200, json={ + "choices": [{"message": {"content": fenced}, + "finish_reason": "stop"}], + "usage": {"prompt_tokens": 50, "completion_tokens": 18, "cost": 0.0006}, + }) + async with httpx.AsyncClient(transport=_mock_post(handler)) as client: + v = await review_read(client, "Markets are pricing tighter policy.") + assert v.clean is True + assert v.reason == "polished read" + + @pytest.mark.asyncio async def test_review_failsafe_on_malformed_json(monkeypatch): """Reviewer returned prose instead of JSON → conservative reject."""