Benchmark Run Details

Run Summary

Model gemma2:9b:Q4_0
Benchmark 0050_translation_sw_ko
Normed Score 66
Run Timestamp 2025-03-26 20:02:59

Question-Level Details

Question ID Score Evaluation Time (ms) Debug Info
0050_translation_sw_ko:0 0 2839 { "response": "숨다", "expected": "꽃" }
[+]
0050_translation_sw_ko:1 100 1547 {}
[+]
0050_translation_sw_ko:10 100 1314 {}
[+]
0050_translation_sw_ko:11 100 1600 {}
[+]
0050_translation_sw_ko:12 100 1465 {}
[+]
0050_translation_sw_ko:13 100 1340 {}
[+]
0050_translation_sw_ko:14 100 1536 {}
[+]
0050_translation_sw_ko:15 0 1472 { "response": "화학", "expected": "조용한" }
[+]
0050_translation_sw_ko:16 100 1419 {}
[+]
0050_translation_sw_ko:17 100 1475 {}
[+]
0050_translation_sw_ko:18 0 1496 { "response": "바람", "expected": "하늘" }
[+]
0050_translation_sw_ko:19 0 1563 { "response": "춤추다", "expected": "신선한" }
[+]
0050_translation_sw_ko:2 0 1655 { "response": "가장강한", "expected": "가장 강한" }
[+]
0050_translation_sw_ko:20 100 1432 {}
[+]
0050_translation_sw_ko:21 100 1475 {}
[+]
0050_translation_sw_ko:22 0 1528 { "response": "매끄러운", "expected": "둥근" }
[+]
0050_translation_sw_ko:23 100 1356 {}
[+]
0050_translation_sw_ko:24 100 1593 {}
[+]
0050_translation_sw_ko:25 100 1223 {}
[+]
0050_translation_sw_ko:26 0 1417 { "response": "새", "expected": "깊은" }
[+]
0050_translation_sw_ko:27 100 1582 {}
[+]
0050_translation_sw_ko:28 100 1384 {}
[+]
0050_translation_sw_ko:29 0 1426 { "response": "춤추다", "expected": "달콤한" }
[+]
0050_translation_sw_ko:3 100 1383 {}
[+]
0050_translation_sw_ko:30 0 1370 { "response": "수영하다", "expected": "구름" }
[+]
0050_translation_sw_ko:31 100 1510 {}
[+]
0050_translation_sw_ko:32 0 1298 { "response": "친구", "expected": "부드러운" }
[+]
0050_translation_sw_ko:33 100 1223 {}
[+]
0050_translation_sw_ko:34 100 1431 {}
[+]
0050_translation_sw_ko:35 100 1289 {}
[+]
0050_translation_sw_ko:36 100 1224 {}
[+]
0050_translation_sw_ko:37 100 1362 {}
[+]
0050_translation_sw_ko:38 0 1298 { "response": "둥근", "expected": "무거운" }
[+]
0050_translation_sw_ko:39 0 1289 { "response": "오토", "expected": "불" }
[+]
0050_translation_sw_ko:4 0 1542 { "response": "구름", "expected": "먹다" }
[+]
0050_translation_sw_ko:40 100 1353 {}
[+]
0050_translation_sw_ko:41 0 1331 { "response": "꽃", "expected": "날카로운" }
[+]
0050_translation_sw_ko:42 100 1284 {}
[+]
0050_translation_sw_ko:43 100 1375 {}
[+]
0050_translation_sw_ko:44 100 1425 {}
[+]
0050_translation_sw_ko:45 100 1296 {}
[+]
0050_translation_sw_ko:46 0 1222 { "response": "달", "expected": "날다" }
[+]
0050_translation_sw_ko:47 100 1330 {}
[+]
0050_translation_sw_ko:48 0 1221 { "response": "책", "expected": "모래" }
[+]
0050_translation_sw_ko:49 0 1298 { "response": "웃다", "expected": "떨어지다" }
[+]
0050_translation_sw_ko:5 100 1684 {}
[+]
0050_translation_sw_ko:50 100 1435 {}
[+]
0050_translation_sw_ko:6 100 1361 {}
[+]
0050_translation_sw_ko:7 100 1401 {}
[+]
0050_translation_sw_ko:8 100 1277 {}
[+]
0050_translation_sw_ko:9 100 1478 {}
[+]