{"_id": {"$oid": "69d4eb12b877594bc5718cff"}, "test_id": "T0792", "benchmark": "magazine_pages", "date": "2026-03-23", "tags": {"century": [20], "document-type": ["newspaper-page"], "language": ["en"], "layout": ["prose", "columns"], "script": ["latin"], "task": ["document-understanding"], "writing": ["printed"]}, "contributors": ["lea_kasper", "sorin_marti", "arno_bosse"], "hidden": false, "config": {"provider": "openrouter", "model": "qwen/qwen3-vl-8b-instruct", "dataclass": "MagazinePage", "temperature": 0.0, "role_description": "You are a historian with expertise in analyzing and summarizing historical magazine pages", "prompt_file": "prompt.txt", "legacy_test": false}, "prompt": "Extract all advertisements and return their bounding boxes.\nThe original size of the page is {width} x {height} pixels.", "results": {"text": "{\"advertisements\":[{\"box\":[73.0,69.0,482.0,493.0]},{\"box\":[502.0,69.0,902.0,272.0]},{\"box\":[502.0,288.0,902.0,493.0]},{\"box\":[73.0,507.0,482.0,705.0]},{\"box\":[502.0,507.0,902.0,705.0]},{\"box\":[73.0,719.0,482.0,925.0]},{\"box\":[502.0,719.0,902.0,925.0]}]}", "model": "qwen/qwen3-vl-8b-instruct", "provider": "openai", "finish_reason": "stop", "usage": {"input_tokens": 2793, "output_tokens": 252, "total_tokens": 3045}, "duration": 11.19090986251831, "timestamp": "2026-03-23T22:50:17.839588", "parsed": {"advertisements": [{"box": [73.0, 69.0, 482.0, 493.0]}, {"box": [502.0, 69.0, 902.0, 272.0]}, {"box": [502.0, 288.0, 902.0, 493.0]}, {"box": [73.0, 507.0, 482.0, 705.0]}, {"box": [502.0, 507.0, 902.0, 705.0]}, {"box": [73.0, 719.0, 482.0, 925.0]}, {"box": [502.0, 719.0, 902.0, 925.0]}]}, "conversation_id": "580b63bd-e1f0-435e-b970-0016a3c65bf5", "score": {"f1": 0.0, "precision": 0.0, "recall": 0.0, "mean_iou": 0.0, "true_positives": 0, "false_positives": 7, "false_negatives": 7, "iou_threshold": 0.5}, "raw_response": "{\"id\":\"gen-1774302608-ekeglsynG3Uht7B7q6ZH\",\"choices\":[{\"finish_reason\":\"stop\",\"index\":0,\"logprobs\":null,\"message\":{\"content\":\"{\\n  \\\"advertisements\\\": [\\n    {\\n      \\\"box\\\": [\\n        73,\\n        69,\\n        482,\\n        493\\n      ]\\n    },\\n    {\\n      \\\"box\\\": [\\n        502,\\n        69,\\n        902,\\n        272\\n      ]\\n    },\\n    {\\n      \\\"box\\\": [\\n        502,\\n        288,\\n        902,\\n        493\\n      ]\\n    },\\n    {\\n      \\\"box\\\": [\\n        73,\\n        507,\\n        482,\\n        705\\n      ]\\n    },\\n    {\\n      \\\"box\\\": [\\n        502,\\n        507,\\n        902,\\n        705\\n      ]\\n    },\\n    {\\n      \\\"box\\\": [\\n        73,\\n        719,\\n        482,\\n        925\\n      ]\\n    },\\n    {\\n      \\\"box\\\": [\\n        502,\\n        719,\\n        902,\\n        925\\n      ]\\n    }\\n  ]\\n}\",\"refusal\":null,\"role\":\"assistant\",\"annotations\":null,\"audio\":null,\"function_call\":null,\"tool_calls\":null,\"parsed\":{\"advertisements\":[{\"box\":[73.0,69.0,482.0,493.0]},{\"box\":[502.0,69.0,902.0,272.0]},{\"box\":[502.0,288.0,902.0,493.0]},{\"box\":[73.0,507.0,482.0,705.0]},{\"box\":[502.0,507.0,902.0,705.0]},{\"box\":[73.0,719.0,482.0,925.0]},{\"box\":[502.0,719.0,902.0,925.0]}]},\"reasoning\":null},\"native_finish_reason\":\"stop\"}],\"created\":1774302608,\"model\":\"qwen/qwen3-vl-8b-instruct\",\"object\":\"chat.completion\",\"service_tier\":null,\"system_fingerprint\":null,\"usage\":{\"completion_tokens\":252,\"prompt_tokens\":2793,\"total_tokens\":3045,\"completion_tokens_details\":{\"accepted_prediction_tokens\":null,\"audio_tokens\":0,\"reasoning_tokens\":0,\"rejected_prediction_tokens\":null,\"image_tokens\":0},\"prompt_tokens_details\":{\"audio_tokens\":0,\"cached_tokens\":0,\"cache_write_tokens\":0,\"video_tokens\":0},\"cost\":0.00034944,\"is_byok\":false,\"cost_details\":{\"upstream_inference_cost\":0.00034944,\"upstream_inference_prompt_cost\":0.00022344,\"upstream_inference_completions_cost\":0.000126}},\"provider\":\"Novita\"}"}, "scoring": {"f1": 0.0, "precision": 0.0, "recall": 0.0, "mean_iou": 0.0, "true_positives": 0, "false_positives": 112, "false_negatives": 126, "num_pages": 46, "cost_summary": {"total_input_tokens": 131223, "total_output_tokens": 4413, "total_tokens": 135636, "input_cost_usd": 0.0, "output_cost_usd": 0.0, "total_cost_usd": 0.00387457}}, "normalized_score": 0}