"""Pure-function tests for app.services.news.""" from __future__ import annotations import pytest pytest.importorskip("httpx") from datetime import datetime, timezone from pathlib import Path from app.services.news import Headline, _parse_date, dedupe, parse_feed FIXTURE = Path(__file__).parent / "fixtures" / "rss_sample.xml" def test_parse_feed_returns_real_items_only(): items = parse_feed("Sample", "world", FIXTURE.read_bytes()) titles = [h.title for h in items] assert "Brent crude jumps on Hormuz uncertainty" in titles assert "Fed signals caution as inflation re-accelerates" in titles # Empty-title row is dropped. assert all(t for t in titles) def test_parse_feed_uses_rfc822_dates(): items = parse_feed("Sample", "world", FIXTURE.read_bytes()) when = items[0].when assert when.tzinfo is not None assert when.year == 2026 def test_parse_date_atom_iso(): d = _parse_date("2026-05-15T12:34:56Z") assert d == datetime(2026, 5, 15, 12, 34, 56, tzinfo=timezone.utc) def test_headline_fingerprint_is_normalised(): h1 = Headline(datetime.now(timezone.utc), "S1", "c", " Hello WORLD ", "u1") h2 = Headline(datetime.now(timezone.utc), "S2", "c", "hello world", "u2") assert h1.fingerprint == h2.fingerprint def test_dedupe_keeps_first_by_url_or_title(): t = datetime.now(timezone.utc) hs = [ Headline(t, "A", "c", "Same headline", "https://a.example/1"), Headline(t, "B", "c", "Same headline", "https://b.example/2"), # title dupe Headline(t, "C", "c", "Other", "https://a.example/1"), # url dupe Headline(t, "D", "c", "Fresh", "https://d.example"), ] out = dedupe(hs) assert [h.source for h in out] == ["A", "D"]