{"_id": {"$oid": "69d4eb12b877594bc5718a57"}, "test_id": "T0444", "benchmark": "company_lists", "date": "2025-11-25", "tags": {"document-type": ["book-page"], "writing": ["printed"], "century": [20], "language": ["en", "de"], "layout": ["list"], "entry-type": ["company"], "task": ["information-extraction"]}, "contributors": ["lea_kasper", "sorin_marti"], "hidden": false, "config": {"provider": "mistral", "model": "mistral-small-2506", "dataclass": "ListPage", "temperature": 0.5, "role_description": "You are a Historian", "prompt_file": "prompt_min.txt", "legacy_test": false}, "prompt": "- Answer in valid JSON.\n- The page ID is given as {page_id}.\n", "results": {"text": "{\"page_id\":\"{page_id}\",\"entries\":[{\"entry_id\":\"1\",\"company_name\":\"ABC Corp\",\"location\":\"New York\"},{\"entry_id\":\"2\",\"company_name\":\"XYZ Inc\",\"location\":\"San Francisco\"},{\"entry_id\":\"3\",\"company_name\":\"LMN Ltd\",\"location\":\"Chicago\"}]}", "model": "mistral-small-2506", "provider": "mistral", "finish_reason": "stop", "usage": {"input_tokens": 198, "output_tokens": 106, "total_tokens": 304, "input_cost_usd": 1.98e-05, "output_cost_usd": 3.18e-05, "estimated_cost_usd": 5.16e-05}, "duration": 1.4818446636199951, "timestamp": "2025-11-25T11:40:24.286056", "parsed": {"page_id": "{page_id}", "entries": [{"entry_id": "1", "company_name": "ABC Corp", "location": "New York"}, {"entry_id": "2", "company_name": "XYZ Inc", "location": "San Francisco"}, {"entry_id": "3", "company_name": "LMN Ltd", "location": "Chicago"}]}, "score": {"f1_score": 0.0, "precision": 0.0, "recall": 0.0, "true_positives": 0, "false_positives": 10, "false_negatives": 37, "field_scores": {"entries[5].entry_id": {"response": "", "ground_truth": "156089_1321092_18-6", "score": 0.0}, "entries[4].entry_id": {"response": "", "ground_truth": "156089_1321092_18-5", "score": 0.0}, "entries[8].company_name": {"response": "", "ground_truth": "Brown Boveri & Cie., A.-G.", "score": 0.0}, "entries[6].company_name": {"response": "", "ground_truth": "Brown Boveri & Cie., A.-G.", "score": 0.0}, "entries[11].entry_id": {"response": "", "ground_truth": "156089_1321092_18-12", "score": 0.0}, "entries[10].company_name": {"response": "", "ground_truth": "Brown Boveri & Cie., A.-G.", "score": 0.0}, "entries[2].company_name": {"response": "LMN Ltd", "ground_truth": "J. A. Crabtree & Co., Ltd.", "score": 0.24242424242424243}, "entries[6].location": {"response": "", "ground_truth": "Baden (Switzerland)", "score": 0.0}, "entries[9].company_name": {"response": "", "ground_truth": "Brown Boveri & Cie., A.-G.", "score": 0.0}, "entries[3].company_name": {"response": "", "ground_truth": "Brown Boveri & Cie., A.-G.", "score": 0.0}, "entries[0].company_name": {"response": "ABC Corp", "ground_truth": "F\u00e4rbereien Schetty, A.-G.", "score": 0.06060606060606055}, "entries[1].company_name": {"response": "XYZ Inc", "ground_truth": "G. Kappeler A.-G.", "score": 0.08333333333333337}, "entries[11].company_name": {"response": "", "ground_truth": "Brown Boveri & Cie., A.-G.", "score": 0.0}, "page_id": {"response": "{page_id}", "ground_truth": "156089_1321092_18", "score": 0.07692307692307687}, "entries[2].entry_id": {"response": "3", "ground_truth": "156089_1321092_18-3", "score": 0.09999999999999998}, "entries[1].entry_id": {"response": "2", "ground_truth": "156089_1321092_18-2", "score": 0.09999999999999998}, "entries[0].entry_id": {"response": "1", "ground_truth": "156089_1321092_18-1", "score": 0.09999999999999998}, "entries[3].entry_id": {"response": "", "ground_truth": "156089_1321092_18-4", "score": 0.0}, "entries[8].location": {"response": "", "ground_truth": "Baden (Switzerland)", "score": 0.0}, "entries[0].location": {"response": "New York", "ground_truth": "Basle", "score": 0.15384615384615385}, "entries[9].entry_id": {"response": "", "ground_truth": "156089_1321092_18-10", "score": 0.0}, "entries[7].company_name": {"response": "", "ground_truth": "Brown Boveri & Cie., A.-G.", "score": 0.0}, "entries[10].entry_id": {"response": "", "ground_truth": "156089_1321092_18-11", "score": 0.0}, "entries[1].location": {"response": "San Francisco", "ground_truth": "Zofingen", "score": 0.19047619047619047}, "entries[4].location": {"response": "", "ground_truth": "Baden (Switzerland)", "score": 0.0}, "entries[5].company_name": {"response": "", "ground_truth": "Magn\u00e9tos Lucifer S. A.", "score": 0.0}, "entries[9].location": {"response": "", "ground_truth": "Baden (Switzerland)", "score": 0.0}, "entries[11].location": {"response": "", "ground_truth": "Baden (Switzerland)", "score": 0.0}, "entries[4].company_name": {"response": "", "ground_truth": "Brown Boveri & Cie., A.-G.", "score": 0.0}, "entries[7].entry_id": {"response": "", "ground_truth": "156089_1321092_18-8", "score": 0.0}, "entries[6].entry_id": {"response": "", "ground_truth": "156089_1321092_18-7", "score": 0.0}, "entries[8].entry_id": {"response": "", "ground_truth": "156089_1321092_18-9", "score": 0.0}, "entries[10].location": {"response": "", "ground_truth": "Baden (Switzerland)", "score": 0.0}, "entries[5].location": {"response": "", "ground_truth": "Carouge-Gen\u00e8ve", "score": 0.0}, "entries[7].location": {"response": "", "ground_truth": "Baden (Switzerland)", "score": 0.0}, "entries[3].location": {"response": "", "ground_truth": "Baden (Switzerland)", "score": 0.0}, "entries[2].location": {"response": "Chicago", "ground_truth": "Walsall, England", "score": 0.17391304347826086}}, "total_fields": 37}, "raw_response": "{\"id\":\"911b58748e1c405985e20dc32114d646\",\"object\":\"chat.completion\",\"model\":\"mistral-small-2506\",\"usage\":{\"prompt_tokens\":198,\"completion_tokens\":106,\"total_tokens\":304},\"created\":1764067223,\"choices\":[{\"index\":0,\"message\":{\"content\":\"{\\n  \\\"page_id\\\": \\\"{page_id}\\\",\\n  \\\"entries\\\": [\\n    {\\n      \\\"entry_id\\\": \\\"1\\\",\\n      \\\"company_name\\\": \\\"ABC Corp\\\",\\n      \\\"location\\\": \\\"New York\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"2\\\",\\n      \\\"company_name\\\": \\\"XYZ Inc\\\",\\n      \\\"location\\\": \\\"San Francisco\\\"\\n    },\\n    {\\n      \\\"entry_id\\\": \\\"3\\\",\\n      \\\"company_name\\\": \\\"LMN Ltd\\\",\\n      \\\"location\\\": \\\"Chicago\\\"\\n    }\\n  ]\\n}\",\"tool_calls\":null,\"prefix\":false,\"role\":\"assistant\"},\"finish_reason\":\"stop\"}]}"}, "scoring": {"f1_micro": 0.0, "f1_macro": 0.0, "micro_precision": 0.0, "micro_recall": 0.0, "total_instances": 15, "total_tp": 0, "total_fp": 144, "total_fn": 992, "cost_summary": {"total_input_tokens": 2970, "total_output_tokens": 1605, "total_tokens": 4575, "input_cost_usd": 0.00029699999999999996, "output_cost_usd": 0.0004815, "total_cost_usd": 0.0007785000000000001}}, "normalized_score": 0}