{"_id": {"$oid": "6a2c86a59ede3a3eff37d4fd"}, "test_id": "T1110", "benchmark": "company_lists", "date": "2026-06-08", "tags": {"document-type": ["book-page"], "writing": ["printed"], "century": [20], "language": ["en", "de"], "layout": ["list"], "entry-type": ["company"], "task": ["information-extraction"]}, "contributors": ["lea_kasper", "sorin_marti"], "hidden": false, "config": {"provider": "x-ai", "model": "grok-4.3", "dataclass": "ListPage", "temperature": 0.5, "role_description": "You are a Historian", "prompt_file": "prompt.txt", "legacy_test": false}, "prompt": "The image you are presented with stems from a digitized book containing lists of companies.\nYour task is to extract structured information about each company listed on the page.\n\nAbout the source:\n- The image stems from a trade index of the British Swiss Chamber of Commerce.\n- The image can show an alphabetical or a thematic list of companies.\n- The companies are mostly located in Switzerland and the UK.\n- The image stems from a trade index between 1925 and 1958.\n- Most pages have one column but some years have two columns.\n- The source itself is in English and German but the company names can be in English, German, French or Italian.\n\nAbout the entries:\n- Each entry describes a single company or person.\n- Alphabetical entries have filling dots between the company name and the page number. Dots and page numbers are not part of the data and should be ignored.\n- Alphabetical entries seldom to never have locations.\n- Thematic entries often have locations.\n- Thematic entries are listed under headings that describe the type of business.\n- Some thematic headings are only references to other headings, e.g. \"X, s. Y\".\n\nAbout the output:\n- Answer in valid JSON. The JSON should be an array of objects with the following fields:\n- The page ID is given as {page_id}.\n- Do not add country information, if it is not directly written with the location.\n\n{\n  \"entry_id\": \"A unique identifier for the entry, e.g. '{page_id}-1'\",\n  \"company_name\": \"The name of the company or person\",\n  \"location\": \"The location of the company, e.g. 'Zurich' or 'London, UK'. If no location is given, set to null.\"\n  ]\n}\n", "results": {"text": "{\"page_id\":\"16\",\"entries\":[{\"entry_id\":\"16-1\",\"company_name\":\"F\u00e4rbereien Schetty, A.-G.\",\"location\":\"Basle\"},{\"entry_id\":\"16-2\",\"company_name\":\"G. Kappeler A.-G.\",\"location\":\"Zofingen\"},{\"entry_id\":\"16-3\",\"company_name\":\"J. A. Crabtree & Co., Ltd.\",\"location\":\"Walsall, England\"},{\"entry_id\":\"16-4\",\"company_name\":\"Brown Boveri & Cie., A.-G.\",\"location\":\"Baden (Switzerland)\"},{\"entry_id\":\"16-5\",\"company_name\":\"Magn\u00e9tos Lucifer S.A.\",\"location\":\"Carouge-Gen\u00e8ve\"}]}", "model": "grok-4.3", "provider": "x-ai", "finish_reason": "stop", "usage": {"input_tokens": 3033, "output_tokens": 215, "total_tokens": 4506, "cached_tokens": 128, "input_cost_usd": 0.0037912500000000003, "output_cost_usd": 0.0005375, "estimated_cost_usd": 0.00432875}, "duration": 18.46923327445984, "timestamp": "2026-06-08T14:16:06.102462", "parsed": {"page_id": "16", "entries": [{"entry_id": "16-1", "company_name": "F\u00e4rbereien Schetty, A.-G.", "location": "Basle"}, {"entry_id": "16-2", "company_name": "G. Kappeler A.-G.", "location": "Zofingen"}, {"entry_id": "16-3", "company_name": "J. A. Crabtree & Co., Ltd.", "location": "Walsall, England"}, {"entry_id": "16-4", "company_name": "Brown Boveri & Cie., A.-G.", "location": "Baden (Switzerland)"}, {"entry_id": "16-5", "company_name": "Magn\u00e9tos Lucifer S.A.", "location": "Carouge-Gen\u00e8ve"}]}, "conversation_id": "54bbfd78-82a3-478e-a229-bf75c34fb20e", "score": {"f1_score": 0.3, "precision": 0.5, "recall": 0.21621621621621623, "true_positives": 8, "false_positives": 8, "false_negatives": 29, "field_scores": {"entries[8].company_name": {"response": "", "ground_truth": "Brown Boveri & Cie., A.-G.", "score": 0.0}, "entries[9].company_name": {"response": "", "ground_truth": "Brown Boveri & Cie., A.-G.", "score": 0.0}, "entries[3].entry_id": {"response": "16-4", "ground_truth": "156089_1321092_18-4", "score": 0.34782608695652173}, "entries[0].company_name": {"response": "F\u00e4rbereien Schetty, A.-G.", "ground_truth": "F\u00e4rbereien Schetty, A.-G.", "score": 1.0}, "entries[3].location": {"response": "Baden (Switzerland)", "ground_truth": "Baden (Switzerland)", "score": 1.0}, "entries[4].company_name": {"response": "Magn\u00e9tos Lucifer S.A.", "ground_truth": "Brown Boveri & Cie., A.-G.", "score": 0.34042553191489366}, "entries[4].location": {"response": "Carouge-Gen\u00e8ve", "ground_truth": "Baden (Switzerland)", "score": 0.24242424242424243}, "entries[3].company_name": {"response": "Brown Boveri & Cie., A.-G.", "ground_truth": "Brown Boveri & Cie., A.-G.", "score": 1.0}, "entries[8].location": {"response": "", "ground_truth": "Baden (Switzerland)", "score": 0.0}, "entries[2].company_name": {"response": "J. A. Crabtree & Co., Ltd.", "ground_truth": "J. A. Crabtree & Co., Ltd.", "score": 1.0}, "entries[10].company_name": {"response": "", "ground_truth": "Brown Boveri & Cie., A.-G.", "score": 0.0}, "entries[5].location": {"response": "", "ground_truth": "Carouge-Gen\u00e8ve", "score": 0.0}, "entries[6].company_name": {"response": "", "ground_truth": "Brown Boveri & Cie., A.-G.", "score": 0.0}, "entries[9].location": {"response": "", "ground_truth": "Baden (Switzerland)", "score": 0.0}, "entries[1].location": {"response": "Zofingen", "ground_truth": "Zofingen", "score": 1.0}, "entries[2].entry_id": {"response": "16-3", "ground_truth": "156089_1321092_18-3", "score": 0.34782608695652173}, "entries[5].entry_id": {"response": "", "ground_truth": "156089_1321092_18-6", "score": 0.0}, "entries[6].location": {"response": "", "ground_truth": "Baden (Switzerland)", "score": 0.0}, "entries[7].entry_id": {"response": "", "ground_truth": "156089_1321092_18-8", "score": 0.0}, "entries[6].entry_id": {"response": "", "ground_truth": "156089_1321092_18-7", "score": 0.0}, "entries[8].entry_id": {"response": "", "ground_truth": "156089_1321092_18-9", "score": 0.0}, "entries[1].company_name": {"response": "G. Kappeler A.-G.", "ground_truth": "G. Kappeler A.-G.", "score": 1.0}, "entries[5].company_name": {"response": "", "ground_truth": "Magn\u00e9tos Lucifer S. A.", "score": 0.0}, "entries[10].entry_id": {"response": "", "ground_truth": "156089_1321092_18-11", "score": 0.0}, "entries[2].location": {"response": "Walsall, England", "ground_truth": "Walsall, England", "score": 1.0}, "entries[4].entry_id": {"response": "16-5", "ground_truth": "156089_1321092_18-5", "score": 0.34782608695652173}, "entries[7].location": {"response": "", "ground_truth": "Baden (Switzerland)", "score": 0.0}, "entries[0].location": {"response": "Basle", "ground_truth": "Basle", "score": 1.0}, "entries[1].entry_id": {"response": "16-2", "ground_truth": "156089_1321092_18-2", "score": 0.34782608695652173}, "entries[7].company_name": {"response": "", "ground_truth": "Brown Boveri & Cie., A.-G.", "score": 0.0}, "entries[0].entry_id": {"response": "16-1", "ground_truth": "156089_1321092_18-1", "score": 0.34782608695652173}, "entries[10].location": {"response": "", "ground_truth": "Baden (Switzerland)", "score": 0.0}, "entries[11].company_name": {"response": "", "ground_truth": "Brown Boveri & Cie., A.-G.", "score": 0.0}, "page_id": {"response": "16", "ground_truth": "156089_1321092_18", "score": 0.21052631578947367}, "entries[9].entry_id": {"response": "", "ground_truth": "156089_1321092_18-10", "score": 0.0}, "entries[11].location": {"response": "", "ground_truth": "Baden (Switzerland)", "score": 0.0}, "entries[11].entry_id": {"response": "", "ground_truth": "156089_1321092_18-12", "score": 0.0}}, "total_fields": 37}, "raw_response": "{\"id\":\"3d2274bf-41eb-9d59-aa5d-0c77a311ea13\",\"choices\":[{\"finish_reason\":\"stop\",\"index\":0,\"logprobs\":null,\"message\":{\"content\":\"{\\n  \\\"page_id\\\": \\\"16\\\",\\n  \\\"entries\\\": [\\n    {\\n      \\\"entry_id\\\": \\\"16-1\\\",\\n      \\\"company_name\\\": \\\"F\u00e4rbereien Schetty, A.-G.\\\",\\n      \\\"location\\\": \\\"Basle\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"16-2\\\",\\n      \\\"company_name\\\": \\\"G. Kappeler A.-G.\\\",\\n      \\\"location\\\": \\\"Zofingen\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"16-3\\\",\\n      \\\"company_name\\\": \\\"J. A. Crabtree & Co., Ltd.\\\",\\n      \\\"location\\\": \\\"Walsall, England\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"16-4\\\",\\n      \\\"company_name\\\": \\\"Brown Boveri & Cie., A.-G.\\\",\\n      \\\"location\\\": \\\"Baden (Switzerland)\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"16-5\\\",\\n      \\\"company_name\\\": \\\"Magn\u00e9tos Lucifer S.A.\\\",\\n      \\\"location\\\": \\\"Carouge-Gen\u00e8ve\\\"\\n    }\\n  ]\\n}\",\"refusal\":null,\"role\":\"assistant\",\"annotations\":null,\"audio\":null,\"function_call\":null,\"tool_calls\":null,\"parsed\":{\"page_id\":\"16\",\"entries\":[{\"entry_id\":\"16-1\",\"company_name\":\"F\u00e4rbereien Schetty, A.-G.\",\"location\":\"Basle\"},{\"entry_id\":\"16-2\",\"company_name\":\"G. Kappeler A.-G.\",\"location\":\"Zofingen\"},{\"entry_id\":\"16-3\",\"company_name\":\"J. A. Crabtree & Co., Ltd.\",\"location\":\"Walsall, England\"},{\"entry_id\":\"16-4\",\"company_name\":\"Brown Boveri & Cie., A.-G.\",\"location\":\"Baden (Switzerland)\"},{\"entry_id\":\"16-5\",\"company_name\":\"Magn\u00e9tos Lucifer S.A.\",\"location\":\"Carouge-Gen\u00e8ve\"}]},\"reasoning_content\":\"The task is to extract structured information about each company listed on the page from a digitized book containing lists of companies. The output must be in a specific JSON format.\\n\"}}],\"created\":1780920949,\"model\":\"grok-4.3\",\"object\":\"chat.completion\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_eb3c003fc66c14ed\",\"usage\":{\"completion_tokens\":215,\"prompt_tokens\":3033,\"total_tokens\":4506,\"completion_tokens_details\":{\"accepted_prediction_tokens\":0,\"audio_tokens\":0,\"reasoning_tokens\":1258,\"rejected_prediction_tokens\":0},\"prompt_tokens_details\":{\"audio_tokens\":0,\"cached_tokens\":128,\"text_tokens\":631,\"image_tokens\":2402},\"num_sources_used\":0,\"cost_in_usd_ticks\":73393500}}"}, "scoring": {"f1_micro": 0.5699693564862103, "f1_macro": 0.554, "micro_precision": 0.577639751552795, "micro_recall": 0.5625, "total_instances": 15, "total_tp": 558, "total_fp": 408, "total_fn": 434, "cost_summary": {"total_input_tokens": 45466, "total_output_tokens": 11166, "total_tokens": 56632, "input_cost_usd": 0.056832499999999994, "output_cost_usd": 0.027914999999999995, "total_cost_usd": 0.08474749999999999}}, "normalized_score": 55.400000000000006}