Benchmark Run Details

Run Summary

Model gemma2:9b:Q4_0
Benchmark 0020_definitions
Normed Score 100
Run Timestamp 2025-03-26 20:24:08

Question-Level Details

Question ID Score Evaluation Time (ms) Debug Info
0020_definitions:batch_gemma2_9b:0 100 1138 { "response": "valley \n", "correct_answer": "valley", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:1 100 677 { "response": "opal \n", "correct_answer": "opal", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:10 100 678 { "response": "echo \n", "correct_answer": "echo", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:11 100 694 { "response": "strength \n", "correct_answer": "strength", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:12 100 705 { "response": "swift \n", "correct_answer": "swift", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:13 100 722 { "response": "proud \n", "correct_answer": "proud", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:14 100 891 { "response": "magic \n", "correct_answer": "magic", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:15 100 1295 { "response": "adventure \n", "correct_answer": "adventure", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:16 100 1142 { "response": "zeal \n", "correct_answer": "zeal", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:17 100 944 { "response": "monster \n", "correct_answer": "monster", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:18 100 948 { "response": "energy \n", "correct_answer": "energy", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:19 100 988 { "response": "justice \n", "correct_answer": "justice", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:2 100 682 { "response": "gentle \n", "correct_answer": "gentle", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:20 100 998 { "response": "knight \n", "correct_answer": "knight", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:21 100 1002 { "response": "honor \n", "correct_answer": "honor", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:22 100 990 { "response": "queen \n", "correct_answer": "queen", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:23 100 1032 { "response": "ignite \n", "correct_answer": "ignite", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:24 100 1116 { "response": "zeal \n", "correct_answer": "zeal", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:3 100 674 { "response": "glory \n", "correct_answer": "glory", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:4 100 675 { "response": "fighter \n", "correct_answer": "fighter", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:5 100 672 { "response": "belief \n", "correct_answer": "belief", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:6 100 693 { "response": "justice \n", "correct_answer": "justice", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:7 100 705 { "response": "scared \n", "correct_answer": "scared", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:8 100 699 { "response": "honest \n", "correct_answer": "honest", "is_correct": true }
[+]
0020_definitions:batch_gemma2_9b:9 100 692 { "response": "village \n", "correct_answer": "village", "is_correct": true }
[+]