{"_id": {"$oid": "69d4eb12b877594bc5718d71"}, "test_id": "T0848", "benchmark": "company_lists", "date": "2026-03-25", "tags": {"document-type": ["book-page"], "writing": ["printed"], "century": [20], "language": ["en", "de"], "layout": ["list"], "entry-type": ["company"], "task": ["information-extraction"]}, "contributors": ["lea_kasper", "sorin_marti"], "hidden": false, "config": {"provider": "alibaba", "model": "qwen3.5-27b", "dataclass": "ListPage", "temperature": 0.5, "role_description": "You are a Historian", "prompt_file": "prompt.txt", "rules": "{\"base_url\": \"https://ws-wkjpbv6ov7sqlgqe.ap-southeast-1.maas.aliyuncs.com/compatible-mode/v1\"}", "legacy_test": false}, "prompt": "The image you are presented with stems from a digitized book containing lists of companies.\nYour task is to extract structured information about each company listed on the page.\n\nAbout the source:\n- The image stems from a trade index of the British Swiss Chamber of Commerce.\n- The image can show an alphabetical or a thematic list of companies.\n- The companies are mostly located in Switzerland and the UK.\n- The image stems from a trade index between 1925 and 1958.\n- Most pages have one column but some years have two columns.\n- The source itself is in English and German but the company names can be in English, German, French or Italian.\n\nAbout the entries:\n- Each entry describes a single company or person.\n- Alphabetical entries have filling dots between the company name and the page number. Dots and page numbers are not part of the data and should be ignored.\n- Alphabetical entries seldom to never have locations.\n- Thematic entries often have locations.\n- Thematic entries are listed under headings that describe the type of business.\n- Some thematic headings are only references to other headings, e.g. \"X, s. Y\".\n\nAbout the output:\n- Answer in valid JSON. The JSON should be an array of objects with the following fields:\n- The page ID is given as {page_id}.\n- Do not add country information, if it is not directly written with the location.\n\n{\n  \"entry_id\": \"A unique identifier for the entry, e.g. '{page_id}-1'\",\n  \"company_name\": \"The name of the company or person\",\n  \"location\": \"The location of the company, e.g. 'Zurich' or 'London, UK'. If no location is given, set to null.\"\n  ]\n}\n", "results": {"text": "{\n  \"$defs\": {\n    \"Entry\": {\n      \"properties\": {\n        \"entry_id\": {\n          \"title\": \"Entry Id\",\n          \"type\": \"string\"\n        },\n        \"company_name\": {\n          \"title\": \"Company Name\",\n          \"type\": \"string\"\n        },\n        \"location\": {\n          \"title\": \"Location\",\n          \"type\": \"string\"\n        }\n      },\n      \"required\": [\n        \"entry_id\",\n        \"company_name\",\n        \"location\"\n      ],\n      \"title\": \"Entry\",\n      \"type\": \"object\"\n    }\n  },\n  \"properties\": {\n    \"page_id\": {\n      \"title\": \"Page Id\",\n      \"type\": \"string\"\n    },\n    \"entries\": {\n      \"items\": {\n        \"$ref\": \"#/$defs/Entry\"\n      },\n      \"title\": \"Entries\",\n      \"type\": \"array\"\n    }\n  },\n  \"required\": [\n    \"page_id\",\n    \"entries\"\n  ],\n  \"title\": \"ListPage\",\n  \"type\": \"object\"\n}", "model": "qwen3.5-27b", "provider": "alibaba", "finish_reason": "stop", "usage": {"input_tokens": 3084, "output_tokens": 270, "total_tokens": 3354, "input_cost_usd": 0.0009251999999999999, "output_cost_usd": 0.000648, "estimated_cost_usd": 0.0015731999999999999}, "duration": 29.43880319595337, "timestamp": "2026-03-25T10:24:46.729326", "parsed": {"$defs": {"Entry": {"properties": {"entry_id": {"title": "Entry Id", "type": "string"}, "company_name": {"title": "Company Name", "type": "string"}, "location": {"title": "Location", "type": "string"}}, "required": ["entry_id", "company_name", "location"], "title": "Entry", "type": "object"}}, "properties": {"page_id": {"title": "Page Id", "type": "string"}, "entries": {"items": {"$ref": "#/$defs/Entry"}, "title": "Entries", "type": "array"}}, "required": ["page_id", "entries"], "title": "ListPage", "type": "object"}, "conversation_id": "d23361dd-c90b-4431-8539-5882f4621929", "score": {"f1_score": 0.0, "precision": 0.0, "recall": 0.0, "true_positives": 0, "false_positives": 20, "false_negatives": 68, "field_scores": {"entries[18].entry_id": {"response": "", "ground_truth": "156089_1321081_63-19", "score": 0.0}, "entries[7].company_name": {"response": "", "ground_truth": "Paillard & Benoit", "score": 0.0}, "entries[11].company_name": {"response": "", "ground_truth": "Peter, Cailler, Kohler", "score": 0.0}, "$defs.Entry.properties.location.title": {"response": "Location", "ground_truth": "", "score": 0.0}, "entries[0].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[7].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[27].entry_id": {"response": "", "ground_truth": "156089_1321081_63-28", "score": 0.0}, "entries[23].company_name": {"response": "", "ground_truth": "Rolo & Co.", "score": 0.0}, "$defs.Entry.properties.entry_id.title": {"response": "Entry Id", "ground_truth": "", "score": 0.0}, "entries[19].company_name": {"response": "", "ground_truth": "Reinhart & Co.", "score": 0.0}, "entries[5].company_name": {"response": "", "ground_truth": "Obrist & Co.", "score": 0.0}, "entries[8].company_name": {"response": "", "ground_truth": "Palace Hotel, Montana", "score": 0.0}, "entries[9].company_name": {"response": "", "ground_truth": "Pearse, A. C. & Co., Ltd.", "score": 0.0}, "entries[12].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[24].company_name": {"response": "", "ground_truth": "\"Roneo\" A. G.", "score": 0.0}, "entries[17].company_name": {"response": "", "ground_truth": "Record Dreadnought Watch Co., S. A.", "score": 0.0}, "entries[24].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[0].company_name": {"response": "", "ground_truth": "Nardin, Ulysse", "score": 0.0}, "entries[11].entry_id": {"response": "", "ground_truth": "156089_1321081_63-12", "score": 0.0}, "properties.entries.items.$ref": {"response": "#/$defs/Entry", "ground_truth": "", "score": 0.0}, "entries[1].company_name": {"response": "", "ground_truth": "Natural, Le Coultre & Cie., S. A.", "score": 0.0}, "$defs.Entry.properties.entry_id.type": {"response": "string", "ground_truth": "", "score": 0.0}, "entries[30].entry_id": {"response": "", "ground_truth": "156089_1321081_63-31", "score": 0.0}, "entries[10].entry_id": {"response": "", "ground_truth": "156089_1321081_63-11", "score": 0.0}, "entries[18].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[21].entry_id": {"response": "", "ground_truth": "156089_1321081_63-22", "score": 0.0}, "$defs.Entry.required[1]": {"response": "company_name", "ground_truth": "", "score": 0.0}, "entries[12].company_name": {"response": "", "ground_truth": "Philippi & Hermann", "score": 0.0}, "entries[20].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[5].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[20].company_name": {"response": "", "ground_truth": "Rinderknecht, J.", "score": 0.0}, "entries[26].company_name": {"response": "", "ground_truth": "R\u00fcegger & Co.", "score": 0.0}, "entries[29].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[16].company_name": {"response": "", "ground_truth": "Quasi-Arc Co., Ltd., The", "score": 0.0}, "entries[30].company_name": {"response": "", "ground_truth": "Sarasin, Rodolphe & Co., Ltd.", "score": 0.0}, "title": {"response": "ListPage", "ground_truth": "", "score": 0.0}, "entries[29].entry_id": {"response": "", "ground_truth": "156089_1321081_63-30", "score": 0.0}, "$defs.Entry.properties.company_name.type": {"response": "string", "ground_truth": "", "score": 0.0}, "entries[28].entry_id": {"response": "", "ground_truth": "156089_1321081_63-29", "score": 0.0}, "entries[9].entry_id": {"response": "", "ground_truth": "156089_1321081_63-10", "score": 0.0}, "entries[17].entry_id": {"response": "", "ground_truth": "156089_1321081_63-18", "score": 0.0}, "required[0]": {"response": "page_id", "ground_truth": "", "score": 0.0}, "entries[23].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[16].entry_id": {"response": "", "ground_truth": "156089_1321081_63-17", "score": 0.0}, "entries[14].entry_id": {"response": "", "ground_truth": "156089_1321081_63-15", "score": 0.0}, "$defs.Entry.properties.company_name.title": {"response": "Company Name", "ground_truth": "", "score": 0.0}, "entries[15].company_name": {"response": "", "ground_truth": "Preisig, Walter & Cie.", "score": 0.0}, "entries[27].company_name": {"response": "", "ground_truth": "Ryff & Co., Ltd.", "score": 0.0}, "entries[31].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[30].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[6].location": {"response": "", "ground_truth": "", "score": 1.0}, "type": {"response": "object", "ground_truth": "", "score": 0.0}, "entries[1].location": {"response": "", "ground_truth": "", "score": 1.0}, "$defs.Entry.title": {"response": "Entry", "ground_truth": "", "score": 0.0}, "entries[25].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[15].entry_id": {"response": "", "ground_truth": "156089_1321081_63-16", "score": 0.0}, "entries[32].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[31].company_name": {"response": "", "ground_truth": "Sarasin Sons Ltd.", "score": 0.0}, "properties.page_id.title": {"response": "Page Id", "ground_truth": "", "score": 0.0}, "entries[27].location": {"response": "", "ground_truth": "", "score": 1.0}, "properties.page_id.type": {"response": "string", "ground_truth": "", "score": 0.0}, "entries[9].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[8].entry_id": {"response": "", "ground_truth": "156089_1321081_63-9", "score": 0.0}, "entries[6].company_name": {"response": "", "ground_truth": "Paillard, E. & Cie., S. A.", "score": 0.0}, "entries[32].company_name": {"response": "", "ground_truth": "Sarasin, W. & Co.", "score": 0.0}, "entries[14].company_name": {"response": "", "ground_truth": "Porteous, James & Co., Ltd.", "score": 0.0}, "entries[31].entry_id": {"response": "", "ground_truth": "156089_1321081_63-32", "score": 0.0}, "entries[20].entry_id": {"response": "", "ground_truth": "156089_1321081_63-21", "score": 0.0}, "$defs.Entry.required[0]": {"response": "entry_id", "ground_truth": "", "score": 0.0}, "entries[2].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[19].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[22].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[18].company_name": {"response": "", "ground_truth": "Reichenbach & Cie.", "score": 0.0}, "page_id": {"response": "", "ground_truth": "156089_1321081_63", "score": 0.0}, "entries[23].entry_id": {"response": "", "ground_truth": "156089_1321081_63-24", "score": 0.0}, "entries[15].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[11].location": {"response": "", "ground_truth": "", "score": 1.0}, "$defs.Entry.properties.location.type": {"response": "string", "ground_truth": "", "score": 0.0}, "entries[2].entry_id": {"response": "", "ground_truth": "156089_1321081_63-3", "score": 0.0}, "entries[13].entry_id": {"response": "", "ground_truth": "156089_1321081_63-14", "score": 0.0}, "$defs.Entry.required[2]": {"response": "location", "ground_truth": "", "score": 0.0}, "entries[4].company_name": {"response": "", "ground_truth": "North British Rubber Co., Ltd., The", "score": 0.0}, "entries[29].company_name": {"response": "", "ground_truth": "Sandreuter & Co.", "score": 0.0}, "entries[13].company_name": {"response": "", "ground_truth": "Planta, J. & Co.", "score": 0.0}, "entries[8].location": {"response": "", "ground_truth": "Montana", "score": 0.0}, "entries[10].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[1].entry_id": {"response": "", "ground_truth": "156089_1321081_63-2", "score": 0.0}, "entries[0].entry_id": {"response": "", "ground_truth": "156089_1321081_63-1", "score": 0.0}, "entries[10].company_name": {"response": "", "ground_truth": "Perrin & Cie.", "score": 0.0}, "entries[21].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[25].entry_id": {"response": "", "ground_truth": "156089_1321081_63-26", "score": 0.0}, "entries[28].company_name": {"response": "", "ground_truth": "Sandoz Chemical Co., Ltd., The", "score": 0.0}, "entries[22].entry_id": {"response": "", "ground_truth": "156089_1321081_63-23", "score": 0.0}, "entries[7].entry_id": {"response": "", "ground_truth": "156089_1321081_63-8", "score": 0.0}, "required[1]": {"response": "entries", "ground_truth": "", "score": 0.0}, "entries[3].entry_id": {"response": "", "ground_truth": "156089_1321081_63-4", "score": 0.0}, "entries[14].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[21].company_name": {"response": "", "ground_truth": "Rolling & Drawing Mills Ltd.", "score": 0.0}, "$defs.Entry.type": {"response": "object", "ground_truth": "", "score": 0.0}, "entries[4].entry_id": {"response": "", "ground_truth": "156089_1321081_63-5", "score": 0.0}, "entries[2].company_name": {"response": "", "ground_truth": "Nestl\u00e9 & Anglo-Swiss Condensed Milk Co., Ltd.", "score": 0.0}, "entries[26].entry_id": {"response": "", "ground_truth": "156089_1321081_63-27", "score": 0.0}, "entries[3].company_name": {"response": "", "ground_truth": "Neuburger & Co.", "score": 0.0}, "entries[24].entry_id": {"response": "", "ground_truth": "156089_1321081_63-25", "score": 0.0}, "entries[16].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[25].company_name": {"response": "", "ground_truth": "Roussy, Th\u00e9odore", "score": 0.0}, "entries[26].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[19].entry_id": {"response": "", "ground_truth": "156089_1321081_63-20", "score": 0.0}, "entries[17].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[28].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[6].entry_id": {"response": "", "ground_truth": "156089_1321081_63-7", "score": 0.0}, "entries[13].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[22].company_name": {"response": "", "ground_truth": "Rolls, Frank V.", "score": 0.0}, "entries[5].entry_id": {"response": "", "ground_truth": "156089_1321081_63-6", "score": 0.0}, "properties.entries.title": {"response": "Entries", "ground_truth": "", "score": 0.0}, "entries[12].entry_id": {"response": "", "ground_truth": "156089_1321081_63-13", "score": 0.0}, "properties.entries.type": {"response": "array", "ground_truth": "", "score": 0.0}, "entries[3].location": {"response": "", "ground_truth": "", "score": 1.0}, "entries[32].entry_id": {"response": "", "ground_truth": "156089_1321081_63-33", "score": 0.0}, "entries[4].location": {"response": "", "ground_truth": "", "score": 1.0}}, "total_fields": 120}, "raw_response": "{\"id\":\"chatcmpl-4c27d6a3-16ef-951b-ab11-57774e3acc20\",\"choices\":[{\"finish_reason\":\"stop\",\"index\":0,\"logprobs\":null,\"message\":{\"content\":\"{\\n  \\\"$defs\\\": {\\n    \\\"Entry\\\": {\\n      \\\"properties\\\": {\\n        \\\"entry_id\\\": {\\n          \\\"title\\\": \\\"Entry Id\\\",\\n          \\\"type\\\": \\\"string\\\"\\n        },\\n        \\\"company_name\\\": {\\n          \\\"title\\\": \\\"Company Name\\\",\\n          \\\"type\\\": \\\"string\\\"\\n        },\\n        \\\"location\\\": {\\n          \\\"title\\\": \\\"Location\\\",\\n          \\\"type\\\": \\\"string\\\"\\n        }\\n      },\\n      \\\"required\\\": [\\n        \\\"entry_id\\\",\\n        \\\"company_name\\\",\\n        \\\"location\\\"\\n      ],\\n      \\\"title\\\": \\\"Entry\\\",\\n      \\\"type\\\": \\\"object\\\"\\n    }\\n  },\\n  \\\"properties\\\": {\\n    \\\"page_id\\\": {\\n      \\\"title\\\": \\\"Page Id\\\",\\n      \\\"type\\\": \\\"string\\\"\\n    },\\n    \\\"entries\\\": {\\n      \\\"items\\\": {\\n        \\\"$ref\\\": \\\"#/$defs/Entry\\\"\\n      },\\n      \\\"title\\\": \\\"Entries\\\",\\n      \\\"type\\\": \\\"array\\\"\\n    }\\n  },\\n  \\\"required\\\": [\\n    \\\"page_id\\\",\\n    \\\"entries\\\"\\n  ],\\n  \\\"title\\\": \\\"ListPage\\\",\\n  \\\"type\\\": \\\"object\\\"\\n}\",\"refusal\":null,\"role\":\"assistant\",\"annotations\":null,\"audio\":null,\"function_call\":null,\"tool_calls\":null,\"reasoning_content\":\"\"}}],\"created\":1774430686,\"model\":\"qwen3.5-27b\",\"object\":\"chat.completion\",\"service_tier\":null,\"system_fingerprint\":null,\"usage\":{\"completion_tokens\":270,\"prompt_tokens\":3084,\"total_tokens\":3354,\"completion_tokens_details\":{\"accepted_prediction_tokens\":null,\"audio_tokens\":null,\"reasoning_tokens\":null,\"rejected_prediction_tokens\":null,\"text_tokens\":270},\"prompt_tokens_details\":{\"audio_tokens\":null,\"cached_tokens\":null,\"image_tokens\":2503,\"text_tokens\":581}}}"}, "scoring": {"f1_micro": 0.0, "f1_macro": 0.0, "micro_precision": 0.0, "micro_recall": 0.0, "total_instances": 15, "total_tp": 0, "total_fp": 300, "total_fn": 992, "cost_summary": {"total_input_tokens": 46456, "total_output_tokens": 4050, "total_tokens": 50506, "input_cost_usd": 0.0139368, "output_cost_usd": 0.00972, "total_cost_usd": 0.023656800000000002}}, "normalized_score": 0}