{"_id": {"$oid": "69d4eb12b877594bc5718cd8"}, "test_id": "T0717", "benchmark": "magazine_pages", "date": "2026-03-21", "tags": {"century": [20], "document-type": ["newspaper-page"], "language": ["en"], "layout": ["prose", "columns"], "script": ["latin"], "task": ["document-understanding"], "writing": ["printed"]}, "contributors": ["lea_kasper", "sorin_marti", "arno_bosse"], "hidden": false, "config": {"provider": "openai", "model": "gpt-4o", "dataclass": "MagazinePage", "temperature": 1.0, "role_description": "You are a historian with expertise in analyzing and summarizing historical magazine pages", "prompt_file": "prompt.txt", "legacy_test": false}, "prompt": "Extract all advertisements and return their bounding boxes.\nThe original size of the page is {width} x {height} pixels.", "results": {"text": "{\"advertisements\":[{\"box\":[202.0,108.0,2272.0,1090.0]},{\"box\":[220.0,1112.0,1250.0,1312.0]},{\"box\":[1308.0,1110.0,2250.0,1314.0]},{\"box\":[208.0,1332.0,1256.0,1502.0]},{\"box\":[1296.0,1334.0,2244.0,1496.0]},{\"box\":[200.0,1518.0,2272.0,2150.0]}]}", "model": "gpt-4o-2024-08-06", "provider": "openai", "finish_reason": "stop", "usage": {"input_tokens": 1305, "output_tokens": 90, "total_tokens": 1395, "input_cost_usd": 0.0032624999999999998, "output_cost_usd": 0.0009000000000000001, "estimated_cost_usd": 0.0041624999999999995}, "duration": 5.963014125823975, "timestamp": "2026-03-21T21:50:45.536183", "parsed": {"advertisements": [{"box": [202.0, 108.0, 2272.0, 1090.0]}, {"box": [220.0, 1112.0, 1250.0, 1312.0]}, {"box": [1308.0, 1110.0, 2250.0, 1314.0]}, {"box": [208.0, 1332.0, 1256.0, 1502.0]}, {"box": [1296.0, 1334.0, 2244.0, 1496.0]}, {"box": [200.0, 1518.0, 2272.0, 2150.0]}]}, "conversation_id": "334ac4b3-2265-4aaf-a9d8-429201458f8f", "score": {"f1": 0.182, "precision": 0.167, "recall": 0.2, "mean_iou": 0.527, "true_positives": 1, "false_positives": 5, "false_negatives": 4, "iou_threshold": 0.5}, "raw_response": "{\"id\":\"chatcmpl-DLxZLAO3aSj0CbzAqTsvepQeILGin\",\"choices\":[{\"finish_reason\":\"stop\",\"index\":0,\"logprobs\":null,\"message\":{\"content\":\"{\\\"advertisements\\\":[{\\\"box\\\":[202,108,2272,1090]},{\\\"box\\\":[220,1112,1250,1312]},{\\\"box\\\":[1308,1110,2250,1314]},{\\\"box\\\":[208,1332,1256,1502]},{\\\"box\\\":[1296,1334,2244,1496]},{\\\"box\\\":[200,1518,2272,2150]}]}\",\"refusal\":null,\"role\":\"assistant\",\"annotations\":[],\"audio\":null,\"function_call\":null,\"tool_calls\":null,\"parsed\":{\"advertisements\":[{\"box\":[202.0,108.0,2272.0,1090.0]},{\"box\":[220.0,1112.0,1250.0,1312.0]},{\"box\":[1308.0,1110.0,2250.0,1314.0]},{\"box\":[208.0,1332.0,1256.0,1502.0]},{\"box\":[1296.0,1334.0,2244.0,1496.0]},{\"box\":[200.0,1518.0,2272.0,2150.0]}]}}}],\"created\":1774126243,\"model\":\"gpt-4o-2024-08-06\",\"object\":\"chat.completion\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_b920207102\",\"usage\":{\"completion_tokens\":90,\"prompt_tokens\":1305,\"total_tokens\":1395,\"completion_tokens_details\":{\"accepted_prediction_tokens\":0,\"audio_tokens\":0,\"reasoning_tokens\":0,\"rejected_prediction_tokens\":0},\"prompt_tokens_details\":{\"audio_tokens\":0,\"cached_tokens\":0}}}"}, "scoring": {"f1": 0.398, "precision": 0.417, "recall": 0.388, "mean_iou": 0.368, "true_positives": 31, "false_positives": 80, "false_negatives": 95, "num_pages": 46, "cost_summary": {"total_input_tokens": 60030, "total_output_tokens": 1727, "total_tokens": 61757, "input_cost_usd": 0.15007500000000004, "output_cost_usd": 0.017269999999999994, "total_cost_usd": 0.167345}}, "normalized_score": 39.800000000000004}