{"_id": {"$oid": "69d4eb12b877594bc5718d04"}, "test_id": "T0773", "benchmark": "magazine_pages", "date": "2026-03-23", "tags": {"century": [20], "document-type": ["newspaper-page"], "language": ["en"], "layout": ["prose", "columns"], "script": ["latin"], "task": ["document-understanding"], "writing": ["printed"]}, "contributors": ["lea_kasper", "sorin_marti", "arno_bosse"], "hidden": false, "config": {"provider": "openai", "model": "gpt-4.1-mini", "dataclass": "MagazinePage", "temperature": 1.0, "role_description": "You are a historian with expertise in analyzing and summarizing historical magazine pages", "prompt_file": "prompt.txt", "legacy_test": false}, "prompt": "Extract all advertisements and return their bounding boxes.\nThe original size of the page is {width} x {height} pixels.", "results": {"text": "{\"advertisements\":[{\"box\":[45.0,42.0,1241.0,282.0]},{\"box\":[57.0,411.0,664.0,997.0]},{\"box\":[713.0,409.0,1224.0,995.0]},{\"box\":[45.0,1025.0,680.0,1303.0]},{\"box\":[690.0,1027.0,1227.0,1307.0]}]}", "model": "gpt-4.1-mini-2025-04-14", "provider": "openai", "finish_reason": "stop", "usage": {"input_tokens": 2586, "output_tokens": 67, "total_tokens": 2653, "input_cost_usd": 0.0010344000000000002, "output_cost_usd": 0.0001072, "estimated_cost_usd": 0.0011416000000000002}, "duration": 3.93045711517334, "timestamp": "2026-03-23T22:29:20.388285", "parsed": {"advertisements": [{"box": [45.0, 42.0, 1241.0, 282.0]}, {"box": [57.0, 411.0, 664.0, 997.0]}, {"box": [713.0, 409.0, 1224.0, 995.0]}, {"box": [45.0, 1025.0, 680.0, 1303.0]}, {"box": [690.0, 1027.0, 1227.0, 1307.0]}]}, "conversation_id": "e5ffd6b2-ab22-4cc8-912e-dd8bb6110e8d", "score": {"f1": 0.0, "precision": 0.0, "recall": 0.0, "mean_iou": 0.0, "true_positives": 0, "false_positives": 5, "false_negatives": 5, "iou_threshold": 0.5}, "raw_response": "{\"id\":\"chatcmpl-DMh7m5zrD8R3VeTVbujpZay6HiUqX\",\"choices\":[{\"finish_reason\":\"stop\",\"index\":0,\"logprobs\":null,\"message\":{\"content\":\"{\\\"advertisements\\\":[{\\\"box\\\":[45,42,1241,282]},{\\\"box\\\":[57,411,664,997]},{\\\"box\\\":[713,409,1224,995]},{\\\"box\\\":[45,1025,680,1303]},{\\\"box\\\":[690,1027,1227,1307]}]}\",\"refusal\":null,\"role\":\"assistant\",\"annotations\":[],\"audio\":null,\"function_call\":null,\"tool_calls\":null,\"parsed\":{\"advertisements\":[{\"box\":[45.0,42.0,1241.0,282.0]},{\"box\":[57.0,411.0,664.0,997.0]},{\"box\":[713.0,409.0,1224.0,995.0]},{\"box\":[45.0,1025.0,680.0,1303.0]},{\"box\":[690.0,1027.0,1227.0,1307.0]}]}}}],\"created\":1774301358,\"model\":\"gpt-4.1-mini-2025-04-14\",\"object\":\"chat.completion\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_53108d00e8\",\"usage\":{\"completion_tokens\":67,\"prompt_tokens\":2586,\"total_tokens\":2653,\"completion_tokens_details\":{\"accepted_prediction_tokens\":0,\"audio_tokens\":0,\"reasoning_tokens\":0,\"rejected_prediction_tokens\":0},\"prompt_tokens_details\":{\"audio_tokens\":0,\"cached_tokens\":0}}}"}, "scoring": {"f1": 0.199, "precision": 0.21, "recall": 0.195, "mean_iou": 0.171, "true_positives": 16, "false_positives": 101, "false_negatives": 110, "num_pages": 46, "cost_summary": {"total_input_tokens": 118956, "total_output_tokens": 1759, "total_tokens": 120715, "input_cost_usd": 0.04758239999999998, "output_cost_usd": 0.002814399999999999, "total_cost_usd": 0.050396800000000005}}, "normalized_score": 19.900000000000002}