{"_id": {"$oid": "69d4eb12b877594bc5718ccd"}, "test_id": "T0702", "benchmark": "company_lists", "date": "2026-03-17", "tags": {"document-type": ["book-page"], "writing": ["printed"], "century": [20], "language": ["en", "de"], "layout": ["list"], "entry-type": ["company"], "task": ["information-extraction"]}, "contributors": ["lea_kasper", "sorin_marti"], "hidden": false, "config": {"provider": "openai", "model": "gpt-5.3-codex", "dataclass": "ListPage", "temperature": 0.5, "role_description": "You are a Historian", "prompt_file": "prompt_min.txt", "rules": "{\"api_style\":\"responses\"}", "legacy_test": false}, "prompt": "- Answer in valid JSON.\n- The page ID is given as {page_id}.\n", "results": {"text": "{\"page_id\":\"{page_id}\",\"entries\":[{\"entry_id\":\"1\",\"company_name\":\"Crowe & Co., Ltd.\",\"location\":\"67, Nauenstrasse, Basle\"},{\"entry_id\":\"2\",\"company_name\":\"Imperial Airways Ltd.\",\"location\":\"Airport of London, Croydon\"},{\"entry_id\":\"3\",\"company_name\":\"Metallwerke A.G.\",\"location\":\"Dornach (Solothurn)\"},{\"entry_id\":\"4\",\"company_name\":\"Aluminium-Industrie-Aktien-Gesellschaft\",\"location\":\"Neuhausen\"},{\"entry_id\":\"5\",\"company_name\":\"Chemische Fabrik vorm. Sandoz\",\"location\":\"Basle\"},{\"entry_id\":\"6\",\"company_name\":\"Clayton Aniline Co., Ltd.\",\"location\":\"Ashton New Road, Clayton, Manchester\"},{\"entry_id\":\"7\",\"company_name\":\"J. R. Geigy, A.-G.\",\"location\":\"Basle\"},{\"entry_id\":\"8\",\"company_name\":\"Society of Chemical Industry in Basle\",\"location\":\"Basle\"},{\"entry_id\":\"9\",\"company_name\":\"J. Hopf & Co. (Merchants)\",\"location\":\"12, Aeschengraben, Basle\"},{\"entry_id\":\"10\",\"company_name\":\"Rayon A.-G. (Merchants)\",\"location\":\"14, Stollenrain, Arlesheim\"},{\"entry_id\":\"11\",\"company_name\":\"Banque F\u00e9d\u00e9rale S.A.\",\"location\":\"Vevey\"},{\"entry_id\":\"12\",\"company_name\":\"Barclays Bank Ltd. (Foreign Branch)\",\"location\":\"168, Fenchurch Street, London E.C. 3\"},{\"entry_id\":\"13\",\"company_name\":\"Galland & Co.\",\"location\":\"3, Place St-Fran\u00e7ois, Lausanne\"},{\"entry_id\":\"14\",\"company_name\":\"Lloyds and National Provincial Foreign Bank, Ltd.\",\"location\":\"1, Place Bel-Air, Geneva\"},{\"entry_id\":\"15\",\"company_name\":\"Swiss Bank Corporation\",\"location\":\"1, Aeschenvorstadt, Basle\"},{\"entry_id\":\"16\",\"company_name\":\"Swiss Bank Corporation\",\"location\":\"Lausanne\"},{\"entry_id\":\"17\",\"company_name\":\"Wagons-Lits/Cook S.A.\",\"location\":\"4, Rue du Mont Blanc, Geneva\"}]}", "model": "gpt-5.3-codex", "provider": "openai", "finish_reason": "completed", "usage": {"input_tokens": 2007, "output_tokens": 458, "total_tokens": 2465, "input_cost_usd": 0.00351225, "output_cost_usd": 0.006412, "estimated_cost_usd": 0.00992425}, "duration": 6.3814122676849365, "timestamp": "2026-03-17T22:47:33.748804", "parsed": {"page_id": "{page_id}", "entries": [{"entry_id": "1", "company_name": "Crowe & Co., Ltd.", "location": "67, Nauenstrasse, Basle"}, {"entry_id": "2", "company_name": "Imperial Airways Ltd.", "location": "Airport of London, Croydon"}, {"entry_id": "3", "company_name": "Metallwerke A.G.", "location": "Dornach (Solothurn)"}, {"entry_id": "4", "company_name": "Aluminium-Industrie-Aktien-Gesellschaft", "location": "Neuhausen"}, {"entry_id": "5", "company_name": "Chemische Fabrik vorm. Sandoz", "location": "Basle"}, {"entry_id": "6", "company_name": "Clayton Aniline Co., Ltd.", "location": "Ashton New Road, Clayton, Manchester"}, {"entry_id": "7", "company_name": "J. R. Geigy, A.-G.", "location": "Basle"}, {"entry_id": "8", "company_name": "Society of Chemical Industry in Basle", "location": "Basle"}, {"entry_id": "9", "company_name": "J. Hopf & Co. (Merchants)", "location": "12, Aeschengraben, Basle"}, {"entry_id": "10", "company_name": "Rayon A.-G. (Merchants)", "location": "14, Stollenrain, Arlesheim"}, {"entry_id": "11", "company_name": "Banque F\u00e9d\u00e9rale S.A.", "location": "Vevey"}, {"entry_id": "12", "company_name": "Barclays Bank Ltd. (Foreign Branch)", "location": "168, Fenchurch Street, London E.C. 3"}, {"entry_id": "13", "company_name": "Galland & Co.", "location": "3, Place St-Fran\u00e7ois, Lausanne"}, {"entry_id": "14", "company_name": "Lloyds and National Provincial Foreign Bank, Ltd.", "location": "1, Place Bel-Air, Geneva"}, {"entry_id": "15", "company_name": "Swiss Bank Corporation", "location": "1, Aeschenvorstadt, Basle"}, {"entry_id": "16", "company_name": "Swiss Bank Corporation", "location": "Lausanne"}, {"entry_id": "17", "company_name": "Wagons-Lits/Cook S.A.", "location": "4, Rue du Mont Blanc, Geneva"}]}, "conversation_id": "16a0a1cc-658c-4791-9624-f168d45664ec", "score": {"f1_score": 0.46, "precision": 0.46153846153846156, "recall": 0.46153846153846156, "true_positives": 24, "false_positives": 28, "false_negatives": 28, "field_scores": {"entries[8].location": {"response": "12, Aeschengraben, Basle", "ground_truth": "Basle", "score": 0.3448275862068966}, "entries[11].entry_id": {"response": "12", "ground_truth": "156089_1321093_11-12", "score": 0.18181818181818177}, "entries[8].company_name": {"response": "J. Hopf & Co. (Merchants)", "ground_truth": "J. Hopf & Co. (Merchants)", "score": 1.0}, "entries[11].company_name": {"response": "Barclays Bank Ltd. (Foreign Branch)", "ground_truth": "Barclays Bank Ltd. (Foreign Branch)", "score": 1.0}, "entries[12].location": {"response": "3, Place St-Fran\u00e7ois, Lausanne", "ground_truth": "Lausanne", "score": 0.42105263157894735}, "entries[6].location": {"response": "Basle", "ground_truth": "Basle", "score": 1.0}, "entries[9].entry_id": {"response": "10", "ground_truth": "156089_1321093_11-10", "score": 0.18181818181818177}, "entries[14].location": {"response": "1, Aeschenvorstadt, Basle", "ground_truth": "Basle", "score": 0.33333333333333337}, "entries[1].company_name": {"response": "Imperial Airways Ltd.", "ground_truth": "Imperial Airways Ltd.", "score": 1.0}, "entries[7].company_name": {"response": "Society of Chemical Industry in Basle", "ground_truth": "Society of Chemical Industry in Basle", "score": 1.0}, "entries[7].location": {"response": "Basle", "ground_truth": "Basle", "score": 1.0}, "entries[6].company_name": {"response": "J. R. Geigy, A.-G.", "ground_truth": "J. R. Geigy, A.-G.", "score": 1.0}, "entries[3].location": {"response": "Neuhausen", "ground_truth": "Neuhausen", "score": 1.0}, "entries[16].company_name": {"response": "Wagons-Lits/Cook S.A.", "ground_truth": "Wagons-Lits/Cook S.A.", "score": 1.0}, "entries[9].company_name": {"response": "Rayon A.-G. (Merchants)", "ground_truth": "Rayon A.-G. (Merchants)", "score": 1.0}, "entries[15].location": {"response": "Lausanne", "ground_truth": "Lausanne", "score": 1.0}, "page_id": {"response": "{page_id}", "ground_truth": "156089_1321093_11", "score": 0.07692307692307687}, "entries[4].company_name": {"response": "Chemische Fabrik vorm. Sandoz", "ground_truth": "Chemische Fabrik vorm. Sandoz", "score": 1.0}, "entries[16].entry_id": {"response": "17", "ground_truth": "156089_1321093_11-17", "score": 0.18181818181818177}, "entries[12].entry_id": {"response": "13", "ground_truth": "156089_1321093_11-13", "score": 0.18181818181818177}, "entries[13].company_name": {"response": "Lloyds and National Provincial Foreign Bank, Ltd.", "ground_truth": "Lloyds and National Provincial Foreign Bank, Ltd.", "score": 1.0}, "entries[0].company_name": {"response": "Crowe & Co., Ltd.", "ground_truth": "Crowe & Co., Ltd.", "score": 1.0}, "entries[11].location": {"response": "168, Fenchurch Street, London E.C. 3", "ground_truth": "London E.C. 3.", "score": 0.52}, "entries[0].location": {"response": "67, Nauenstrasse, Basle", "ground_truth": "Basle", "score": 0.3571428571428571}, "entries[6].entry_id": {"response": "7", "ground_truth": "156089_1321093_11-7", "score": 0.09999999999999998}, "entries[4].location": {"response": "Basle", "ground_truth": "Basle", "score": 1.0}, "entries[4].entry_id": {"response": "5", "ground_truth": "156089_1321093_11-5", "score": 0.09999999999999998}, "entries[14].entry_id": {"response": "15", "ground_truth": "156089_1321093_11-15", "score": 0.18181818181818177}, "entries[5].location": {"response": "Ashton New Road, Clayton, Manchester", "ground_truth": "Manchester", "score": 0.43478260869565216}, "entries[10].entry_id": {"response": "11", "ground_truth": "156089_1321093_11-11", "score": 0.18181818181818177}, "entries[13].entry_id": {"response": "14", "ground_truth": "156089_1321093_11-14", "score": 0.18181818181818177}, "entries[15].entry_id": {"response": "16", "ground_truth": "156089_1321093_11-16", "score": 0.18181818181818177}, "entries[0].entry_id": {"response": "1", "ground_truth": "156089_1321093_11-1", "score": 0.09999999999999998}, "entries[10].company_name": {"response": "Banque F\u00e9d\u00e9rale S.A.", "ground_truth": "Banque F\u00e9d\u00e9rale S.A.", "score": 1.0}, "entries[13].location": {"response": "1, Place Bel-Air, Geneva", "ground_truth": "Geneva", "score": 0.4}, "entries[16].location": {"response": "4, Rue du Mont Blanc, Geneva", "ground_truth": "Geneva", "score": 0.3529411764705882}, "entries[10].location": {"response": "Vevey", "ground_truth": "Vevey", "score": 1.0}, "entries[8].entry_id": {"response": "9", "ground_truth": "156089_1321093_11-9", "score": 0.09999999999999998}, "entries[3].company_name": {"response": "Aluminium-Industrie-Aktien-Gesellschaft", "ground_truth": "Aluminium-Industrie-Aktien-Gesellschaft", "score": 1.0}, "entries[15].company_name": {"response": "Swiss Bank Corporation", "ground_truth": "Swiss Bank Corporation", "score": 1.0}, "entries[2].entry_id": {"response": "3", "ground_truth": "156089_1321093_11-3", "score": 0.09999999999999998}, "entries[7].entry_id": {"response": "8", "ground_truth": "156089_1321093_11-8", "score": 0.09999999999999998}, "entries[2].location": {"response": "Dornach (Solothurn)", "ground_truth": "Dornach (Solothurn)", "score": 1.0}, "entries[1].entry_id": {"response": "2", "ground_truth": "156089_1321093_11-2", "score": 0.09999999999999998}, "entries[9].location": {"response": "14, Stollenrain, Arlesheim", "ground_truth": "Arlesheim", "score": 0.5142857142857142}, "entries[5].entry_id": {"response": "6", "ground_truth": "156089_1321093_11-6", "score": 0.09999999999999998}, "entries[5].company_name": {"response": "Clayton Aniline Co., Ltd.", "ground_truth": "Clayton Aniline Co., Ltd.", "score": 1.0}, "entries[12].company_name": {"response": "Galland & Co.", "ground_truth": "Galland & Co.", "score": 1.0}, "entries[14].company_name": {"response": "Swiss Bank Corporation", "ground_truth": "Swiss Bank Corporation", "score": 1.0}, "entries[2].company_name": {"response": "Metallwerke A.G.", "ground_truth": "Metallwerke A.G.", "score": 1.0}, "entries[1].location": {"response": "Airport of London, Croydon", "ground_truth": "Croydon", "score": 0.4242424242424242}, "entries[3].entry_id": {"response": "4", "ground_truth": "156089_1321093_11-4", "score": 0.09999999999999998}}, "total_fields": 52}, "raw_response": "{\"id\":\"resp_07c959aca44f9b610069b9cbee7434819385c47fdcc620d69d\",\"created_at\":1773784046.0,\"error\":null,\"incomplete_details\":null,\"instructions\":\"You are a Historian\",\"metadata\":{},\"model\":\"gpt-5.3-codex\",\"object\":\"response\",\"output\":[{\"id\":\"msg_07c959aca44f9b610069b9cbef6408819384d5cd531624997c\",\"content\":[{\"annotations\":[],\"text\":\"{\\\"page_id\\\":\\\"{page_id}\\\",\\\"entries\\\":[{\\\"entry_id\\\":\\\"1\\\",\\\"company_name\\\":\\\"Crowe & Co., Ltd.\\\",\\\"location\\\":\\\"67, Nauenstrasse, Basle\\\"},{\\\"entry_id\\\":\\\"2\\\",\\\"company_name\\\":\\\"Imperial Airways Ltd.\\\",\\\"location\\\":\\\"Airport of London, Croydon\\\"},{\\\"entry_id\\\":\\\"3\\\",\\\"company_name\\\":\\\"Metallwerke A.G.\\\",\\\"location\\\":\\\"Dornach (Solothurn)\\\"},{\\\"entry_id\\\":\\\"4\\\",\\\"company_name\\\":\\\"Aluminium-Industrie-Aktien-Gesellschaft\\\",\\\"location\\\":\\\"Neuhausen\\\"},{\\\"entry_id\\\":\\\"5\\\",\\\"company_name\\\":\\\"Chemische Fabrik vorm. Sandoz\\\",\\\"location\\\":\\\"Basle\\\"},{\\\"entry_id\\\":\\\"6\\\",\\\"company_name\\\":\\\"Clayton Aniline Co., Ltd.\\\",\\\"location\\\":\\\"Ashton New Road, Clayton, Manchester\\\"},{\\\"entry_id\\\":\\\"7\\\",\\\"company_name\\\":\\\"J. R. Geigy, A.-G.\\\",\\\"location\\\":\\\"Basle\\\"},{\\\"entry_id\\\":\\\"8\\\",\\\"company_name\\\":\\\"Society of Chemical Industry in Basle\\\",\\\"location\\\":\\\"Basle\\\"},{\\\"entry_id\\\":\\\"9\\\",\\\"company_name\\\":\\\"J. Hopf & Co. (Merchants)\\\",\\\"location\\\":\\\"12, Aeschengraben, Basle\\\"},{\\\"entry_id\\\":\\\"10\\\",\\\"company_name\\\":\\\"Rayon A.-G. (Merchants)\\\",\\\"location\\\":\\\"14, Stollenrain, Arlesheim\\\"},{\\\"entry_id\\\":\\\"11\\\",\\\"company_name\\\":\\\"Banque F\u00e9d\u00e9rale S.A.\\\",\\\"location\\\":\\\"Vevey\\\"},{\\\"entry_id\\\":\\\"12\\\",\\\"company_name\\\":\\\"Barclays Bank Ltd. (Foreign Branch)\\\",\\\"location\\\":\\\"168, Fenchurch Street, London E.C. 3\\\"},{\\\"entry_id\\\":\\\"13\\\",\\\"company_name\\\":\\\"Galland & Co.\\\",\\\"location\\\":\\\"3, Place St-Fran\u00e7ois, Lausanne\\\"},{\\\"entry_id\\\":\\\"14\\\",\\\"company_name\\\":\\\"Lloyds and National Provincial Foreign Bank, Ltd.\\\",\\\"location\\\":\\\"1, Place Bel-Air, Geneva\\\"},{\\\"entry_id\\\":\\\"15\\\",\\\"company_name\\\":\\\"Swiss Bank Corporation\\\",\\\"location\\\":\\\"1, Aeschenvorstadt, Basle\\\"},{\\\"entry_id\\\":\\\"16\\\",\\\"company_name\\\":\\\"Swiss Bank Corporation\\\",\\\"location\\\":\\\"Lausanne\\\"},{\\\"entry_id\\\":\\\"17\\\",\\\"company_name\\\":\\\"Wagons-Lits/Cook S.A.\\\",\\\"location\\\":\\\"4, Rue du Mont Blanc, Geneva\\\"}]}\",\"type\":\"output_text\",\"logprobs\":[]}],\"role\":\"assistant\",\"status\":\"completed\",\"type\":\"message\",\"phase\":\"final_answer\"}],\"parallel_tool_calls\":true,\"temperature\":0.5,\"tool_choice\":\"auto\",\"tools\":[],\"top_p\":0.98,\"background\":false,\"completed_at\":1773784052.0,\"conversation\":null,\"max_output_tokens\":null,\"max_tool_calls\":null,\"previous_response_id\":null,\"prompt\":null,\"prompt_cache_key\":null,\"prompt_cache_retention\":null,\"reasoning\":{\"effort\":\"none\",\"generate_summary\":null,\"summary\":null},\"safety_identifier\":null,\"service_tier\":\"default\",\"status\":\"completed\",\"text\":{\"format\":{\"type\":\"text\"},\"verbosity\":\"medium\"},\"top_logprobs\":0,\"truncation\":\"disabled\",\"usage\":{\"input_tokens\":2007,\"input_tokens_details\":{\"cached_tokens\":0},\"output_tokens\":458,\"output_tokens_details\":{\"reasoning_tokens\":0},\"total_tokens\":2465},\"user\":null,\"billing\":{\"payer\":\"developer\"},\"frequency_penalty\":0.0,\"presence_penalty\":0.0,\"store\":true}"}, "scoring": {"f1_micro": 0.47931382441977793, "f1_macro": 0.47400000000000003, "micro_precision": 0.4797979797979798, "micro_recall": 0.4788306451612903, "total_instances": 15, "total_tp": 475, "total_fp": 515, "total_fn": 517, "cost_summary": {"total_input_tokens": 29878, "total_output_tokens": 8837, "total_tokens": 38715, "input_cost_usd": 0.05228650000000001, "output_cost_usd": 0.123718, "total_cost_usd": 0.1760045}}, "normalized_score": 47.400000000000006}