{"_id": {"$oid": "6a26c67c3572c06bc1455cf1"}, "test_id": "T0784", "benchmark": "magazine_pages", "date": "2026-03-23", "tags": {"century": [20], "document-type": ["newspaper-page"], "language": ["en"], "layout": ["prose", "columns"], "script": ["latin"], "task": ["document-understanding"], "writing": ["printed"]}, "contributors": ["lea_kasper", "sorin_marti", "arno_bosse"], "hidden": false, "config": {"provider": "genai", "model": "gemini-2.5-flash", "dataclass": "MagazinePage", "temperature": 0.0, "role_description": "You are a historian with expertise in analyzing and summarizing historical magazine pages", "prompt_file": "prompt.txt", "legacy_test": false}, "prompt": "Extract all advertisements and return their bounding boxes.\nThe original size of the page is {width} x {height} pixels.", "results": {"text": "{\"advertisements\":[{\"box\":[78.0,70.0,2370.0,830.0]},{\"box\":[78.0,278.0,1200.0,680.0]},{\"box\":[1218.0,278.0,2370.0,680.0]},{\"box\":[78.0,698.0,2370.0,860.0]}]}", "model": "gemini-2.5-flash", "provider": "genai", "finish_reason": "STOP", "usage": {"input_tokens": 303, "output_tokens": 98, "total_tokens": 1486, "input_cost_usd": 9.09e-05, "output_cost_usd": 0.000245, "estimated_cost_usd": 0.0003359}, "duration": 6.657160043716431, "timestamp": "2026-03-23T22:31:32.423731", "parsed": {"advertisements": [{"box": [78, 70, 2370, 830]}, {"box": [78, 278, 1200, 680]}, {"box": [1218, 278, 2370, 680]}, {"box": [78, 698, 2370, 860]}]}, "conversation_id": "1f3ef46e-f67a-4775-baed-c970bd1009aa", "score": {"f1": 0.25, "precision": 0.25, "recall": 0.25, "mean_iou": 0.647, "true_positives": 1, "false_positives": 3, "false_negatives": 3, "iou_threshold": 0.5}, "raw_response": "{\"sdk_http_response\":{\"headers\":{\"x-gemini-service-tier\":\"standard\",\"content-type\":\"application/json; charset=UTF-8\",\"vary\":\"Origin, X-Origin, Referer\",\"content-encoding\":\"gzip\",\"date\":\"Mon, 23 Mar 2026 21:31:33 GMT\",\"server\":\"scaffolding on HTTPServer2\",\"x-xss-protection\":\"0\",\"x-frame-options\":\"SAMEORIGIN\",\"x-content-type-options\":\"nosniff\",\"server-timing\":\"gfet4t7; dur=6478\",\"alt-svc\":\"h3=\\\":443\\\"; ma=2592000,h3-29=\\\":443\\\"; ma=2592000\",\"transfer-encoding\":\"chunked\"},\"body\":null},\"candidates\":[{\"content\":{\"parts\":[{\"media_resolution\":null,\"code_execution_result\":null,\"executable_code\":null,\"file_data\":null,\"function_call\":null,\"function_response\":null,\"inline_data\":null,\"text\":\"{\\\"advertisements\\\": [{\\\"box\\\": [78, 70, 2370, 830]}, {\\\"box\\\": [78, 278, 1200, 680]}, {\\\"box\\\": [1218, 278, 2370, 680]}, {\\\"box\\\": [78, 698, 2370, 860]}]}\",\"thought\":null,\"thought_signature\":null,\"video_metadata\":null}],\"role\":\"model\"},\"citation_metadata\":null,\"finish_message\":null,\"token_count\":null,\"finish_reason\":\"STOP\",\"grounding_metadata\":null,\"avg_logprobs\":null,\"index\":0,\"logprobs_result\":null,\"safety_ratings\":null,\"url_context_metadata\":null}],\"create_time\":null,\"model_version\":\"gemini-2.5-flash\",\"prompt_feedback\":null,\"response_id\":\"LrHBaZvEJ_WhnsEPoY2DsQ0\",\"usage_metadata\":{\"cache_tokens_details\":null,\"cached_content_token_count\":null,\"candidates_token_count\":98,\"candidates_tokens_details\":null,\"prompt_token_count\":303,\"prompt_tokens_details\":[{\"modality\":\"TEXT\",\"token_count\":45},{\"modality\":\"IMAGE\",\"token_count\":258}],\"thoughts_token_count\":1085,\"tool_use_prompt_token_count\":null,\"tool_use_prompt_tokens_details\":null,\"total_token_count\":1486,\"traffic_type\":null},\"automatic_function_calling_history\":[],\"parsed\":{\"advertisements\":[{\"box\":[78,70,2370,830]},{\"box\":[78,278,1200,680]},{\"box\":[1218,278,2370,680]},{\"box\":[78,698,2370,860]}]}}"}, "scoring": {"f1": 0.016, "precision": 0.016, "recall": 0.016, "mean_iou": 0.027, "true_positives": 3, "false_positives": 127, "false_negatives": 123, "num_pages": 46, "cost_summary": {"total_input_tokens": 13938, "total_output_tokens": 3233, "total_tokens": 17171, "input_cost_usd": 0.004181400000000002, "output_cost_usd": 0.008082500000000001, "total_cost_usd": 0.012263900000000005}}, "normalized_score": 1.6}