{"_id": {"$oid": "6a26c67c3572c06bc1455d86"}, "test_id": "T0979", "benchmark": "company_lists", "date": "2026-04-21", "tags": {"document-type": ["book-page"], "writing": ["printed"], "century": [20], "language": ["en", "de"], "layout": ["list"], "entry-type": ["company"], "task": ["information-extraction"]}, "contributors": ["lea_kasper", "sorin_marti"], "hidden": false, "config": {"provider": "openrouter", "model": "qwen/qwen3.5-flash-02-23", "dataclass": "ListPage", "temperature": 0.5, "role_description": "You are a Historian", "prompt_file": "prompt_min.txt", "legacy_test": false}, "prompt": "- Answer in valid JSON.\n- The page ID is given as {page_id}.\n", "results": {"text": "{\"page_id\":\"89\",\"entries\":[{\"entry_id\":\"1\",\"company_name\":\"C. Troeber-Ruesch\",\"location\":\"Zurich\"},{\"entry_id\":\"2\",\"company_name\":\"Unitraco Ltd.\",\"location\":\"Zurich\"},{\"entry_id\":\"3\",\"company_name\":\"H. Ursprung\",\"location\":\"Zurich\"},{\"entry_id\":\"4\",\"company_name\":\"Wettler & Frey\",\"location\":\"Zurich\"},{\"entry_id\":\"5\",\"company_name\":\"Paul W\u00fcthrich\",\"location\":\"Lausanne\"},{\"entry_id\":\"6\",\"company_name\":\"Hultons Press Ltd.\",\"location\":\"London\"},{\"entry_id\":\"7\",\"company_name\":\"George Fischer Ltd.\",\"location\":\"Schaffhausen\"},{\"entry_id\":\"8\",\"company_name\":\"Louis Martin\",\"location\":\"Lausanne\"},{\"entry_id\":\"9\",\"company_name\":\"Techag Ltd.\",\"location\":\"Zurich\"},{\"entry_id\":\"10\",\"company_name\":\"Usine Fega\",\"location\":\"Zurich\"},{\"entry_id\":\"11\",\"company_name\":\"R. Amiguet-Peter\",\"location\":\"Lausanne\"}]}", "model": "qwen/qwen3.5-flash-20260224", "provider": "openrouter", "finish_reason": "stop", "usage": {"input_tokens": 2759, "output_tokens": 433, "total_tokens": 3192, "input_cost_usd": 0.00017933500000000003, "output_cost_usd": 0.00011258000000000001, "estimated_cost_usd": 0.000291915}, "duration": 4.283410549163818, "timestamp": "2026-04-21T13:42:38.782348", "parsed": {"page_id": "89", "entries": [{"entry_id": "1", "company_name": "C. Troeber-Ruesch", "location": "Zurich"}, {"entry_id": "2", "company_name": "Unitraco Ltd.", "location": "Zurich"}, {"entry_id": "3", "company_name": "H. Ursprung", "location": "Zurich"}, {"entry_id": "4", "company_name": "Wettler & Frey", "location": "Zurich"}, {"entry_id": "5", "company_name": "Paul W\u00fcthrich", "location": "Lausanne"}, {"entry_id": "6", "company_name": "Hultons Press Ltd.", "location": "London"}, {"entry_id": "7", "company_name": "George Fischer Ltd.", "location": "Schaffhausen"}, {"entry_id": "8", "company_name": "Louis Martin", "location": "Lausanne"}, {"entry_id": "9", "company_name": "Techag Ltd.", "location": "Zurich"}, {"entry_id": "10", "company_name": "Usine Fega", "location": "Zurich"}, {"entry_id": "11", "company_name": "R. Amiguet-Peter", "location": "Lausanne"}]}, "conversation_id": "7c224735-d5cb-4b49-903c-56401e6fd754", "score": {"f1_score": 0.56, "precision": 0.5588235294117647, "recall": 0.5588235294117647, "true_positives": 19, "false_positives": 15, "false_negatives": 15, "field_scores": {"entries[9].entry_id": {"response": "10", "ground_truth": "156089_1321097_125-10", "score": 0.17391304347826086}, "entries[5].location": {"response": "London", "ground_truth": "London E. C. 4", "score": 0.6}, "entries[7].location": {"response": "Lausanne", "ground_truth": "Lausanne", "score": 1.0}, "entries[10].company_name": {"response": "R. Amiguet-Peter", "ground_truth": "R. Amiguet-Peter", "score": 1.0}, "entries[0].company_name": {"response": "C. Troeber-Ruesch", "ground_truth": "C. Troeber-Ruesch", "score": 1.0}, "entries[3].company_name": {"response": "Wettler & Frey", "ground_truth": "Wettler & Frey", "score": 1.0}, "entries[0].entry_id": {"response": "1", "ground_truth": "156089_1321097_125-1", "score": 0.09523809523809523}, "page_id": {"response": "89", "ground_truth": "156089_1321097_125", "score": 0.19999999999999996}, "entries[4].entry_id": {"response": "5", "ground_truth": "156089_1321097_125-5", "score": 0.09523809523809523}, "entries[4].company_name": {"response": "Paul W\u00fcthrich", "ground_truth": "Paul W\u00fcthrich", "score": 1.0}, "entries[4].location": {"response": "Lausanne", "ground_truth": "Lausanne", "score": 1.0}, "entries[6].company_name": {"response": "George Fischer Ltd.", "ground_truth": "George Fischer Ltd.", "score": 1.0}, "entries[2].entry_id": {"response": "3", "ground_truth": "156089_1321097_125-3", "score": 0.09523809523809523}, "entries[9].company_name": {"response": "Usine Fega", "ground_truth": "Usine Fega", "score": 1.0}, "entries[9].location": {"response": "Zurich", "ground_truth": "Zurich", "score": 1.0}, "entries[10].entry_id": {"response": "11", "ground_truth": "156089_1321097_125-11", "score": 0.17391304347826086}, "entries[2].location": {"response": "Zurich", "ground_truth": "Zurich", "score": 1.0}, "entries[2].company_name": {"response": "H. Ursprung", "ground_truth": "H. Ursprung", "score": 1.0}, "entries[8].location": {"response": "Zurich", "ground_truth": "Zurich", "score": 1.0}, "entries[8].entry_id": {"response": "9", "ground_truth": "156089_1321097_125-9", "score": 0.09523809523809523}, "entries[5].entry_id": {"response": "6", "ground_truth": "156089_1321097_125-6", "score": 0.09523809523809523}, "entries[1].company_name": {"response": "Unitraco Ltd.", "ground_truth": "Unitraco Ltd.", "score": 1.0}, "entries[3].entry_id": {"response": "4", "ground_truth": "156089_1321097_125-4", "score": 0.09523809523809523}, "entries[3].location": {"response": "Zurich", "ground_truth": "Zurich 6", "score": 0.8571428571428572}, "entries[1].location": {"response": "Zurich", "ground_truth": "Zurich", "score": 1.0}, "entries[5].company_name": {"response": "Hultons Press Ltd.", "ground_truth": "Hultons Press Ltd.", "score": 1.0}, "entries[8].company_name": {"response": "Techag Ltd.", "ground_truth": "Techag Ltd.", "score": 1.0}, "entries[6].location": {"response": "Schaffhausen", "ground_truth": "Schaffhausen", "score": 1.0}, "entries[7].entry_id": {"response": "8", "ground_truth": "156089_1321097_125-8", "score": 0.09523809523809523}, "entries[6].entry_id": {"response": "7", "ground_truth": "156089_1321097_125-7", "score": 0.09523809523809523}, "entries[10].location": {"response": "Lausanne", "ground_truth": "Lausanne", "score": 1.0}, "entries[7].company_name": {"response": "Louis Martin", "ground_truth": "Louis Martin", "score": 1.0}, "entries[1].entry_id": {"response": "2", "ground_truth": "156089_1321097_125-2", "score": 0.09523809523809523}, "entries[0].location": {"response": "Zurich", "ground_truth": "K\u00fcsnacht (Zurich)", "score": 0.5217391304347826}}, "total_fields": 34}, "raw_response": "{\"id\":\"gen-1776771754-9KM98x4yuM2pSJ6Pp9qQ\",\"choices\":[{\"finish_reason\":\"stop\",\"index\":0,\"logprobs\":null,\"message\":{\"content\":\"{\\n  \\\"page_id\\\": \\\"89\\\",\\n  \\\"entries\\\": [\\n    {\\n      \\\"entry_id\\\": \\\"1\\\",\\n      \\\"company_name\\\": \\\"C. Troeber-Ruesch\\\",\\n      \\\"location\\\": \\\"Zurich\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"2\\\",\\n      \\\"company_name\\\": \\\"Unitraco Ltd.\\\",\\n      \\\"location\\\": \\\"Zurich\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"3\\\",\\n      \\\"company_name\\\": \\\"H. Ursprung\\\",\\n      \\\"location\\\": \\\"Zurich\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"4\\\",\\n      \\\"company_name\\\": \\\"Wettler & Frey\\\",\\n      \\\"location\\\": \\\"Zurich\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"5\\\",\\n      \\\"company_name\\\": \\\"Paul W\u00fcthrich\\\",\\n      \\\"location\\\": \\\"Lausanne\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"6\\\",\\n      \\\"company_name\\\": \\\"Hultons Press Ltd.\\\",\\n      \\\"location\\\": \\\"London\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"7\\\",\\n      \\\"company_name\\\": \\\"George Fischer Ltd.\\\",\\n      \\\"location\\\": \\\"Schaffhausen\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"8\\\",\\n      \\\"company_name\\\": \\\"Louis Martin\\\",\\n      \\\"location\\\": \\\"Lausanne\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"9\\\",\\n      \\\"company_name\\\": \\\"Techag Ltd.\\\",\\n      \\\"location\\\": \\\"Zurich\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"10\\\",\\n      \\\"company_name\\\": \\\"Usine Fega\\\",\\n      \\\"location\\\": \\\"Zurich\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"11\\\",\\n      \\\"company_name\\\": \\\"R. Amiguet-Peter\\\",\\n      \\\"location\\\": \\\"Lausanne\\\"\\n    }\\n  ]\\n}\",\"refusal\":null,\"role\":\"assistant\",\"annotations\":null,\"audio\":null,\"function_call\":null,\"tool_calls\":null,\"parsed\":{\"page_id\":\"89\",\"entries\":[{\"entry_id\":\"1\",\"company_name\":\"C. Troeber-Ruesch\",\"location\":\"Zurich\"},{\"entry_id\":\"2\",\"company_name\":\"Unitraco Ltd.\",\"location\":\"Zurich\"},{\"entry_id\":\"3\",\"company_name\":\"H. Ursprung\",\"location\":\"Zurich\"},{\"entry_id\":\"4\",\"company_name\":\"Wettler & Frey\",\"location\":\"Zurich\"},{\"entry_id\":\"5\",\"company_name\":\"Paul W\u00fcthrich\",\"location\":\"Lausanne\"},{\"entry_id\":\"6\",\"company_name\":\"Hultons Press Ltd.\",\"location\":\"London\"},{\"entry_id\":\"7\",\"company_name\":\"George Fischer Ltd.\",\"location\":\"Schaffhausen\"},{\"entry_id\":\"8\",\"company_name\":\"Louis Martin\",\"location\":\"Lausanne\"},{\"entry_id\":\"9\",\"company_name\":\"Techag Ltd.\",\"location\":\"Zurich\"},{\"entry_id\":\"10\",\"company_name\":\"Usine Fega\",\"location\":\"Zurich\"},{\"entry_id\":\"11\",\"company_name\":\"R. Amiguet-Peter\",\"location\":\"Lausanne\"}]},\"reasoning\":null},\"native_finish_reason\":\"stop\"}],\"created\":1776771754,\"model\":\"qwen/qwen3.5-flash-20260224\",\"object\":\"chat.completion\",\"service_tier\":null,\"system_fingerprint\":null,\"usage\":{\"completion_tokens\":433,\"prompt_tokens\":2759,\"total_tokens\":3192,\"completion_tokens_details\":{\"accepted_prediction_tokens\":null,\"audio_tokens\":0,\"reasoning_tokens\":0,\"rejected_prediction_tokens\":null,\"image_tokens\":0},\"prompt_tokens_details\":{\"audio_tokens\":0,\"cached_tokens\":0,\"cache_write_tokens\":0,\"video_tokens\":0},\"cost\":0.000291915,\"is_byok\":false,\"cost_details\":{\"upstream_inference_cost\":0.000291915,\"upstream_inference_prompt_cost\":0.000179335,\"upstream_inference_completions_cost\":0.00011258}},\"provider\":\"Alibaba\"}"}, "scoring": {"f1_micro": 0.5066931085770947, "f1_macro": 0.498, "micro_precision": 0.49853658536585366, "micro_recall": 0.5151209677419355, "total_instances": 15, "total_tp": 511, "total_fp": 514, "total_fn": 481, "cost_summary": {"total_input_tokens": 41296, "total_output_tokens": 14317, "total_tokens": 55613, "input_cost_usd": 0.00268424, "output_cost_usd": 0.0037224199999999997, "total_cost_usd": 0.006406660000000001}}, "normalized_score": 49.8}