{"_id": {"$oid": "6a26c67c3572c06bc1455d0b"}, "test_id": "T0779", "benchmark": "magazine_pages", "date": "2026-03-23", "tags": {"century": [20], "document-type": ["newspaper-page"], "language": ["en"], "layout": ["prose", "columns"], "script": ["latin"], "task": ["document-understanding"], "writing": ["printed"]}, "contributors": ["lea_kasper", "sorin_marti", "arno_bosse"], "hidden": false, "config": {"provider": "genai", "model": "gemini-2.5-pro", "dataclass": "MagazinePage", "temperature": 0.0, "role_description": "You are a historian with expertise in analyzing and summarizing historical magazine pages", "prompt_file": "prompt.txt", "legacy_test": false}, "prompt": "Extract all advertisements and return their bounding boxes.\nThe original size of the page is {width} x {height} pixels.", "results": {"text": "{\"advertisements\":[{\"box\":[297.0,161.0,2330.0,682.0]},{\"box\":[297.0,706.0,1301.0,1215.0]},{\"box\":[1326.0,706.0,2330.0,1215.0]},{\"box\":[297.0,1239.0,1301.0,1748.0]},{\"box\":[1326.0,1239.0,2330.0,1748.0]},{\"box\":[297.0,1772.0,1301.0,2281.0]},{\"box\":[1326.0,1772.0,2330.0,2281.0]}]}", "model": "gemini-2.5-pro", "provider": "genai", "finish_reason": "STOP", "usage": {"input_tokens": 303, "output_tokens": 186, "total_tokens": 4271, "input_cost_usd": 0.00037874999999999996, "output_cost_usd": 0.0018599999999999999, "estimated_cost_usd": 0.00223875}, "duration": 27.199869394302368, "timestamp": "2026-03-23T22:28:08.771898", "parsed": {"advertisements": [{"box": [297, 161, 2330, 682]}, {"box": [297, 706, 1301, 1215]}, {"box": [1326, 706, 2330, 1215]}, {"box": [297, 1239, 1301, 1748]}, {"box": [1326, 1239, 2330, 1748]}, {"box": [297, 1772, 1301, 2281]}, {"box": [1326, 1772, 2330, 2281]}]}, "conversation_id": "295a9bf4-b22a-4b04-bcaa-279a635803e5", "score": {"f1": 0.714, "precision": 0.714, "recall": 0.714, "mean_iou": 0.643, "true_positives": 5, "false_positives": 2, "false_negatives": 2, "iou_threshold": 0.5}, "raw_response": "{\"sdk_http_response\":{\"headers\":{\"x-gemini-service-tier\":\"standard\",\"content-type\":\"application/json; charset=UTF-8\",\"vary\":\"Origin, X-Origin, Referer\",\"content-encoding\":\"gzip\",\"date\":\"Mon, 23 Mar 2026 21:28:09 GMT\",\"server\":\"scaffolding on HTTPServer2\",\"x-xss-protection\":\"0\",\"x-frame-options\":\"SAMEORIGIN\",\"x-content-type-options\":\"nosniff\",\"server-timing\":\"gfet4t7; dur=27029\",\"alt-svc\":\"h3=\\\":443\\\"; ma=2592000,h3-29=\\\":443\\\"; ma=2592000\",\"transfer-encoding\":\"chunked\"},\"body\":null},\"candidates\":[{\"content\":{\"parts\":[{\"media_resolution\":null,\"code_execution_result\":null,\"executable_code\":null,\"file_data\":null,\"function_call\":null,\"function_response\":null,\"inline_data\":null,\"text\":\"{\\\"advertisements\\\": [{\\\"box\\\": [297, 161, 2330, 682]}, {\\\"box\\\": [297, 706, 1301, 1215]}, {\\\"box\\\": [1326, 706, 2330, 1215]}, {\\\"box\\\": [297, 1239, 1301, 1748]}, {\\\"box\\\": [1326, 1239, 2330, 1748]}, {\\\"box\\\": [297, 1772, 1301, 2281]}, {\\\"box\\\": [1326, 1772, 2330, 2281]}]}\",\"thought\":null,\"thought_signature\":null,\"video_metadata\":null}],\"role\":\"model\"},\"citation_metadata\":null,\"finish_message\":null,\"token_count\":null,\"finish_reason\":\"STOP\",\"grounding_metadata\":null,\"avg_logprobs\":null,\"index\":0,\"logprobs_result\":null,\"safety_ratings\":null,\"url_context_metadata\":null}],\"create_time\":null,\"model_version\":\"gemini-2.5-pro\",\"prompt_feedback\":null,\"response_id\":\"abDBaeLvEaCInsEPh-aYyQs\",\"usage_metadata\":{\"cache_tokens_details\":null,\"cached_content_token_count\":null,\"candidates_token_count\":186,\"candidates_tokens_details\":null,\"prompt_token_count\":303,\"prompt_tokens_details\":[{\"modality\":\"TEXT\",\"token_count\":45},{\"modality\":\"IMAGE\",\"token_count\":258}],\"thoughts_token_count\":3782,\"tool_use_prompt_token_count\":null,\"tool_use_prompt_tokens_details\":null,\"total_token_count\":4271,\"traffic_type\":null},\"automatic_function_calling_history\":[],\"parsed\":{\"advertisements\":[{\"box\":[297,161,2330,682]},{\"box\":[297,706,1301,1215]},{\"box\":[1326,706,2330,1215]},{\"box\":[297,1239,1301,1748]},{\"box\":[1326,1239,2330,1748]},{\"box\":[297,1772,1301,2281]},{\"box\":[1326,1772,2330,2281]}]}}"}, "scoring": {"f1": 0.222, "precision": 0.233, "recall": 0.218, "mean_iou": 0.188, "true_positives": 17, "false_positives": 108, "false_negatives": 109, "num_pages": 46, "cost_summary": {"total_input_tokens": 13938, "total_output_tokens": 3129, "total_tokens": 17067, "input_cost_usd": 0.01742250000000001, "output_cost_usd": 0.031290000000000005, "total_cost_usd": 0.04871249999999999}}, "normalized_score": 22.2}