{"_id": {"$oid": "69d4eb12b877594bc5718d36"}, "test_id": "T0887", "benchmark": "company_lists", "date": "2026-03-25", "tags": {"document-type": ["book-page"], "writing": ["printed"], "century": [20], "language": ["en", "de"], "layout": ["list"], "entry-type": ["company"], "task": ["information-extraction"]}, "contributors": ["lea_kasper", "sorin_marti"], "hidden": false, "config": {"provider": "alibaba", "model": "qwen3.5-flash-2026-02-23", "dataclass": "ListPage", "temperature": 0.5, "role_description": "You are a Historian", "prompt_file": "prompt.txt", "rules": "{\"base_url\": \"https://ws-wkjpbv6ov7sqlgqe.ap-southeast-1.maas.aliyuncs.com/compatible-mode/v1\"}", "legacy_test": false}, "prompt": "The image you are presented with stems from a digitized book containing lists of companies.\nYour task is to extract structured information about each company listed on the page.\n\nAbout the source:\n- The image stems from a trade index of the British Swiss Chamber of Commerce.\n- The image can show an alphabetical or a thematic list of companies.\n- The companies are mostly located in Switzerland and the UK.\n- The image stems from a trade index between 1925 and 1958.\n- Most pages have one column but some years have two columns.\n- The source itself is in English and German but the company names can be in English, German, French or Italian.\n\nAbout the entries:\n- Each entry describes a single company or person.\n- Alphabetical entries have filling dots between the company name and the page number. Dots and page numbers are not part of the data and should be ignored.\n- Alphabetical entries seldom to never have locations.\n- Thematic entries often have locations.\n- Thematic entries are listed under headings that describe the type of business.\n- Some thematic headings are only references to other headings, e.g. \"X, s. Y\".\n\nAbout the output:\n- Answer in valid JSON. The JSON should be an array of objects with the following fields:\n- The page ID is given as {page_id}.\n- Do not add country information, if it is not directly written with the location.\n\n{\n  \"entry_id\": \"A unique identifier for the entry, e.g. '{page_id}-1'\",\n  \"company_name\": \"The name of the company or person\",\n  \"location\": \"The location of the company, e.g. 'Zurich' or 'London, UK'. If no location is given, set to null.\"\n  ]\n}\n", "results": {"text": "{\"page_id\":\"25\",\"entries\":[{\"entry_id\":\"25-1\",\"company_name\":\"Adolphe Saurer S.A.\",\"location\":\"Arbon\"},{\"entry_id\":\"25-2\",\"company_name\":\"The North British Rubber Co. (Continental) Ltd.\",\"location\":\"Zurich\"},{\"entry_id\":\"25-3\",\"company_name\":\"Church Street Manufacturing Co., Ltd.\",\"location\":\"Great Harwood\"},{\"entry_id\":\"25-4\",\"company_name\":\"Z\u00fcrcher, Tobler & Oertle\",\"location\":\"Teufen/St. Gallen\"},{\"entry_id\":\"25-5\",\"company_name\":\"Thomas Adams, Ltd.\",\"location\":\"Nottingham\"},{\"entry_id\":\"25-6\",\"company_name\":\"Brown Boveri & Cie., A.-G.\",\"location\":\"Baden, Switzerland\"},{\"entry_id\":\"25-7\",\"company_name\":\"Nova Knitting Works, Siegrist & Co.\",\"location\":\"Oftringen\"},{\"entry_id\":\"25-8\",\"company_name\":\"Bigar Fr\u00e8res (Grands Magasins Innovation S.A)\",\"location\":\"Lausanne\"},{\"entry_id\":\"25-9\",\"company_name\":\"Bonnard & Cie., S.A\",\"location\":\"Lausanne\"},{\"entry_id\":\"25-10\",\"company_name\":\"Fr. Sauter A.-G.\",\"location\":\"Basle\"},{\"entry_id\":\"25-11\",\"company_name\":\"John Layton & Co., Ltd.\",\"location\":\"Zurich\"},{\"entry_id\":\"25-12\",\"company_name\":\"Cunard Steam Ship Co., Ltd.\",\"location\":\"Lucerne\"}]}", "model": "qwen3.5-flash-2026-02-23", "provider": "alibaba", "finish_reason": "stop", "usage": {"input_tokens": 3125, "output_tokens": 559, "total_tokens": 3684, "input_cost_usd": 0.00031250000000000006, "output_cost_usd": 0.00022360000000000001, "estimated_cost_usd": 0.0005361000000000001}, "duration": 12.058691024780273, "timestamp": "2026-03-25T10:36:20.414361", "parsed": {"page_id": "25", "entries": [{"entry_id": "25-1", "company_name": "Adolphe Saurer S.A.", "location": "Arbon"}, {"entry_id": "25-2", "company_name": "The North British Rubber Co. (Continental) Ltd.", "location": "Zurich"}, {"entry_id": "25-3", "company_name": "Church Street Manufacturing Co., Ltd.", "location": "Great Harwood"}, {"entry_id": "25-4", "company_name": "Z\u00fcrcher, Tobler & Oertle", "location": "Teufen/St. Gallen"}, {"entry_id": "25-5", "company_name": "Thomas Adams, Ltd.", "location": "Nottingham"}, {"entry_id": "25-6", "company_name": "Brown Boveri & Cie., A.-G.", "location": "Baden, Switzerland"}, {"entry_id": "25-7", "company_name": "Nova Knitting Works, Siegrist & Co.", "location": "Oftringen"}, {"entry_id": "25-8", "company_name": "Bigar Fr\u00e8res (Grands Magasins Innovation S.A)", "location": "Lausanne"}, {"entry_id": "25-9", "company_name": "Bonnard & Cie., S.A", "location": "Lausanne"}, {"entry_id": "25-10", "company_name": "Fr. Sauter A.-G.", "location": "Basle"}, {"entry_id": "25-11", "company_name": "John Layton & Co., Ltd.", "location": "Zurich"}, {"entry_id": "25-12", "company_name": "Cunard Steam Ship Co., Ltd.", "location": "Lucerne"}]}, "conversation_id": "3bc35564-efcc-4fc5-a967-85fc7349603d", "score": {"f1_score": 0.62, "precision": 0.6216216216216216, "recall": 0.6216216216216216, "true_positives": 23, "false_positives": 14, "false_negatives": 14, "field_scores": {"entries[0].entry_id": {"response": "25-1", "ground_truth": "156089_1321091_28-1", "score": 0.26086956521739135}, "entries[6].location": {"response": "Oftringen", "ground_truth": "Oftringen", "score": 1.0}, "entries[2].location": {"response": "Great Harwood", "ground_truth": "Great Harwood", "score": 1.0}, "entries[3].company_name": {"response": "Z\u00fcrcher, Tobler & Oertle", "ground_truth": "Z\u00fcrcher, Tobler & Oertle", "score": 1.0}, "entries[6].entry_id": {"response": "25-7", "ground_truth": "156089_1321091_28-7", "score": 0.26086956521739135}, "entries[3].entry_id": {"response": "25-4", "ground_truth": "156089_1321091_28-4", "score": 0.26086956521739135}, "entries[10].entry_id": {"response": "25-11", "ground_truth": "156089_1321091_28-11", "score": 0.31999999999999995}, "entries[4].company_name": {"response": "Thomas Adams, Ltd.", "ground_truth": "Thomas Adams, Ltd.", "score": 1.0}, "entries[6].company_name": {"response": "Nova Knitting Works, Siegrist & Co.", "ground_truth": "Nova Knitting Works, Siegrist & Co.", "score": 1.0}, "entries[0].location": {"response": "Arbon", "ground_truth": "Arbon", "score": 1.0}, "entries[7].entry_id": {"response": "25-8", "ground_truth": "156089_1321091_28-8", "score": 0.26086956521739135}, "entries[5].location": {"response": "Baden, Switzerland", "ground_truth": "Baden, Switzerland", "score": 1.0}, "entries[1].company_name": {"response": "The North British Rubber Co. (Continental) Ltd.", "ground_truth": "The North British Rubber Co. (Continental) Ltd.", "score": 1.0}, "entries[4].location": {"response": "Nottingham", "ground_truth": "Nottingham", "score": 1.0}, "entries[8].company_name": {"response": "Bonnard & Cie., S.A", "ground_truth": "Bonnard & Cie., S. A.", "score": 0.95}, "entries[11].entry_id": {"response": "25-12", "ground_truth": "156089_1321091_28-12", "score": 0.31999999999999995}, "entries[9].company_name": {"response": "Fr. Sauter A.-G.", "ground_truth": "Fr. Sauter A.-G.", "score": 1.0}, "entries[2].entry_id": {"response": "25-3", "ground_truth": "156089_1321091_28-3", "score": 0.26086956521739135}, "page_id": {"response": "25", "ground_truth": "156089_1321091_28", "score": 0.10526315789473684}, "entries[1].entry_id": {"response": "25-2", "ground_truth": "156089_1321091_28-2", "score": 0.26086956521739135}, "entries[5].entry_id": {"response": "25-6", "ground_truth": "156089_1321091_28-6", "score": 0.26086956521739135}, "entries[3].location": {"response": "Teufen/St. Gallen", "ground_truth": "Teufen/St. Gallen", "score": 1.0}, "entries[7].company_name": {"response": "Bigar Fr\u00e8res (Grands Magasins Innovation S.A)", "ground_truth": "Bigar Fr\u00e8res (Grands Magasins Innovation S. A.)", "score": 0.9782608695652173}, "entries[2].company_name": {"response": "Church Street Manufacturing Co., Ltd.", "ground_truth": "Church Street Manufacturing Co., Ltd.", "score": 1.0}, "entries[7].location": {"response": "Lausanne", "ground_truth": "Lausanne", "score": 1.0}, "entries[11].location": {"response": "Lucerne", "ground_truth": "Lucerne", "score": 1.0}, "entries[8].location": {"response": "Lausanne", "ground_truth": "Lausanne", "score": 1.0}, "entries[5].company_name": {"response": "Brown Boveri & Cie., A.-G.", "ground_truth": "Brown Boveri & Cie, A.-G.", "score": 0.9803921568627451}, "entries[4].entry_id": {"response": "25-5", "ground_truth": "156089_1321091_28-5", "score": 0.26086956521739135}, "entries[1].location": {"response": "Zurich", "ground_truth": "Zurich", "score": 1.0}, "entries[10].company_name": {"response": "John Layton & Co., Ltd.", "ground_truth": "John Layton & Co., Ltd.", "score": 1.0}, "entries[10].location": {"response": "Zurich", "ground_truth": "Zurich 1", "score": 0.8571428571428572}, "entries[11].company_name": {"response": "Cunard Steam Ship Co., Ltd.", "ground_truth": "Cunard Steam Ship Co., Ltd.", "score": 1.0}, "entries[0].company_name": {"response": "Adolphe Saurer S.A.", "ground_truth": "Adolphe Saurer S. A.", "score": 0.9743589743589743}, "entries[8].entry_id": {"response": "25-9", "ground_truth": "156089_1321091_28-9", "score": 0.26086956521739135}, "entries[9].location": {"response": "Basle", "ground_truth": "Basle", "score": 1.0}, "entries[9].entry_id": {"response": "25-10", "ground_truth": "156089_1321091_28-10", "score": 0.31999999999999995}}, "total_fields": 37}, "raw_response": "{\"id\":\"chatcmpl-c726bbc9-2f04-93b7-87e1-85ec27b9fc72\",\"choices\":[{\"finish_reason\":\"stop\",\"index\":0,\"logprobs\":null,\"message\":{\"content\":\"{\\n  \\\"page_id\\\": \\\"25\\\",\\n  \\\"entries\\\": [\\n    {\\n      \\\"entry_id\\\": \\\"25-1\\\",\\n      \\\"company_name\\\": \\\"Adolphe Saurer S.A.\\\",\\n      \\\"location\\\": \\\"Arbon\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"25-2\\\",\\n      \\\"company_name\\\": \\\"The North British Rubber Co. (Continental) Ltd.\\\",\\n      \\\"location\\\": \\\"Zurich\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"25-3\\\",\\n      \\\"company_name\\\": \\\"Church Street Manufacturing Co., Ltd.\\\",\\n      \\\"location\\\": \\\"Great Harwood\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"25-4\\\",\\n      \\\"company_name\\\": \\\"Z\u00fcrcher, Tobler & Oertle\\\",\\n      \\\"location\\\": \\\"Teufen/St. Gallen\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"25-5\\\",\\n      \\\"company_name\\\": \\\"Thomas Adams, Ltd.\\\",\\n      \\\"location\\\": \\\"Nottingham\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"25-6\\\",\\n      \\\"company_name\\\": \\\"Brown Boveri & Cie., A.-G.\\\",\\n      \\\"location\\\": \\\"Baden, Switzerland\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"25-7\\\",\\n      \\\"company_name\\\": \\\"Nova Knitting Works, Siegrist & Co.\\\",\\n      \\\"location\\\": \\\"Oftringen\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"25-8\\\",\\n      \\\"company_name\\\": \\\"Bigar Fr\u00e8res (Grands Magasins Innovation S.A)\\\",\\n      \\\"location\\\": \\\"Lausanne\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"25-9\\\",\\n      \\\"company_name\\\": \\\"Bonnard & Cie., S.A\\\",\\n      \\\"location\\\": \\\"Lausanne\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"25-10\\\",\\n      \\\"company_name\\\": \\\"Fr. Sauter A.-G.\\\",\\n      \\\"location\\\": \\\"Basle\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"25-11\\\",\\n      \\\"company_name\\\": \\\"John Layton & Co., Ltd.\\\",\\n      \\\"location\\\": \\\"Zurich\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"25-12\\\",\\n      \\\"company_name\\\": \\\"Cunard Steam Ship Co., Ltd.\\\",\\n      \\\"location\\\": \\\"Lucerne\\\"\\n    }\\n  ]\\n}\",\"refusal\":null,\"role\":\"assistant\",\"annotations\":null,\"audio\":null,\"function_call\":null,\"tool_calls\":null,\"reasoning_content\":\"\"}}],\"created\":1774431380,\"model\":\"qwen3.5-flash-2026-02-23\",\"object\":\"chat.completion\",\"service_tier\":null,\"system_fingerprint\":null,\"usage\":{\"completion_tokens\":559,\"prompt_tokens\":3125,\"total_tokens\":3684,\"completion_tokens_details\":{\"accepted_prediction_tokens\":null,\"audio_tokens\":null,\"reasoning_tokens\":null,\"rejected_prediction_tokens\":null,\"text_tokens\":559},\"prompt_tokens_details\":{\"audio_tokens\":null,\"cached_tokens\":null,\"image_tokens\":2544,\"text_tokens\":581}}}"}, "scoring": {"f1_micro": 0.5239294710327456, "f1_macro": 0.524, "micro_precision": 0.5236656596173213, "micro_recall": 0.5241935483870968, "total_instances": 15, "total_tp": 520, "total_fp": 473, "total_fn": 472, "cost_summary": {"total_input_tokens": 46456, "total_output_tokens": 15713, "total_tokens": 62169, "input_cost_usd": 0.0046456000000000015, "output_cost_usd": 0.006285200000000001, "total_cost_usd": 0.0109308}}, "normalized_score": 52.400000000000006}