Benchmark Run Details

Run Summary

Model phi4:14b:Q4_K_M
Benchmark 0020_definitions
Normed Score 100
Run Timestamp 2025-03-26 20:25:43

Question-Level Details

Question ID Score Evaluation Time (ms) Debug Info
0020_definitions:batch_gemma2_9b:0 100 7372 { "response": "Valley", "correct_answer": "valley", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:1 100 907 { "response": "opal", "correct_answer": "opal", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:10 100 1637 { "response": "echo", "correct_answer": "echo", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:11 100 1555 { "response": "strength", "correct_answer": "strength", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:12 100 1508 { "response": "swift", "correct_answer": "swift", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:13 100 1720 { "response": "Proud", "correct_answer": "proud", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:14 100 1865 { "response": "magic", "correct_answer": "magic", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:15 100 1943 { "response": "adventure", "correct_answer": "adventure", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:16 100 1975 { "response": "Zeal", "correct_answer": "zeal", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:17 100 3295 { "response": "monster", "correct_answer": "monster", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:18 100 3141 { "response": "energy", "correct_answer": "energy", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:19 100 3111 { "response": "justice", "correct_answer": "justice", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:2 100 2758 { "response": "gentle", "correct_answer": "gentle", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:20 100 3140 { "response": "knight", "correct_answer": "knight", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:21 100 3449 { "response": "honor", "correct_answer": "honor", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:22 100 2002 { "response": "queen", "correct_answer": "queen", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:23 100 1708 { "response": "ignite", "correct_answer": "ignite", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:24 100 2280 { "response": "zeal", "correct_answer": "zeal", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:3 100 2010 { "response": "glory", "correct_answer": "glory", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:4 100 1468 { "response": "fighter", "correct_answer": "fighter", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:5 100 1570 { "response": "belief", "correct_answer": "belief", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:6 100 1575 { "response": "justice", "correct_answer": "justice", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:7 100 1694 { "response": "scared", "correct_answer": "scared", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:8 100 1818 { "response": "honest", "correct_answer": "honest", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:9 100 1867 { "response": "village", "correct_answer": "village", "is_correct": true }
[+]