Benchmark Run Details

Model	gpt-4.1-mini-2025-04-14
Benchmark	0020_definitions
Normed Score	100
Run Timestamp	2025-04-24 17:56:39

Question-Level Details

Question ID	Score	Evaluation Time (ms)	Debug Info
0020_definitions:batch_gemma2_9b:0	100	1011	{ "response": "valley", "expected": "valley", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: A low area of land bordered by hills or mountains.\n\nThe choices are: clever, enemy, kingdom, lightning, patient, peaceful, sincere, slumber, trouble, valley", "answer_type": "multiple_choice", "correct_answer": "valley", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "clever", "enemy", "kingdom", "lightning", "patient", "peaceful", "sincere", "slumber", "trouble", "valley" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }
0020_definitions:batch_gemma2_9b:1	100	896	{ "response": "opal", "expected": "opal", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: It is a gemstone that often displays shifting colors when viewed from different angles.\n\nThe choices are: brother, cloudy, grave, heart, honest, iceberg, ignite, opal, sunset, voyage", "answer_type": "multiple_choice", "correct_answer": "opal", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "brother", "cloudy", "grave", "heart", "honest", "iceberg", "ignite", "opal", "sunset", "voyage" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }
0020_definitions:batch_gemma2_9b:10	100	1022	{ "response": "echo", "expected": "echo", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: A sound that is repeated back after being reflected.\n\nThe choices are: calm, danger, echo, grace, honest, ignite, memory, puzzle, sword, unique", "answer_type": "multiple_choice", "correct_answer": "echo", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "calm", "danger", "echo", "grace", "honest", "ignite", "memory", "puzzle", "sword", "unique" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }
0020_definitions:batch_gemma2_9b:11	100	930	{ "response": "strength", "expected": "strength", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: The quality of being able to exert force or endure pressure.\n\nThe choices are: breeze, grave, honor, iceberg, maze, outcast, rebel, stealth, strength, tactic", "answer_type": "multiple_choice", "correct_answer": "strength", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "breeze", "grave", "honor", "iceberg", "maze", "outcast", "rebel", "stealth", "strength", "tactic" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }
0020_definitions:batch_gemma2_9b:12	100	874	{ "response": "swift", "expected": "swift", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: Moving or capable of moving at high speed.\n\nThe choices are: amber, cloudy, cypress, duel, fable, greedy, noble, passion, swift, valley", "answer_type": "multiple_choice", "correct_answer": "swift", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "amber", "cloudy", "cypress", "duel", "fable", "greedy", "noble", "passion", "swift", "valley" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }
0020_definitions:batch_gemma2_9b:13	100	873	{ "response": "proud", "expected": "proud", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: Feeling a deep sense of satisfaction and pleasure due to one's own achievements or qualities.\n\nThe choices are: gem, glitter, hidden, meadow, oracle, proud, sapphire, skyline, stealth, swift", "answer_type": "multiple_choice", "correct_answer": "proud", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "gem", "glitter", "hidden", "meadow", "oracle", "proud", "sapphire", "skyline", "stealth", "swift" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }
0020_definitions:batch_gemma2_9b:14	100	957	{ "response": "magic", "expected": "magic", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: It refers to seemingly impossible feats or supernatural occurrences.\n\nThe choices are: brother, delight, grace, journey, magic, oracle, phantom, quest, savage, valley", "answer_type": "multiple_choice", "correct_answer": "magic", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "brother", "delight", "grace", "journey", "magic", "oracle", "phantom", "quest", "savage", "valley" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }
0020_definitions:batch_gemma2_9b:15	100	963	{ "response": "adventure", "expected": "adventure", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: A journey or experience that is exciting and often involves some danger.\n\nThe choices are: adventure, magic, passion, peaceful, proud, river, roaming, simple, sword, whistle", "answer_type": "multiple_choice", "correct_answer": "adventure", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "adventure", "magic", "passion", "peaceful", "proud", "river", "roaming", "simple", "sword", "whistle" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }
0020_definitions:batch_gemma2_9b:16	100	481	{ "response": "zeal", "expected": "zeal", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: A strong feeling of excitement and eagerness to do something.\n\nThe choices are: amber, ancient, dragon, energy, heart, splendor, tension, victory, youth, zeal", "answer_type": "multiple_choice", "correct_answer": "zeal", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "amber", "ancient", "dragon", "energy", "heart", "splendor", "tension", "victory", "youth", "zeal" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }
0020_definitions:batch_gemma2_9b:17	100	924	{ "response": "monster", "expected": "monster", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: A frightening or terrifying creature, often mythical or supernatural.\n\nThe choices are: crystal, daring, distant, kingdom, monster, proud, rebel, relic, tame, tower", "answer_type": "multiple_choice", "correct_answer": "monster", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "crystal", "daring", "distant", "kingdom", "monster", "proud", "rebel", "relic", "tame", "tower" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }
0020_definitions:batch_gemma2_9b:18	100	850	{ "response": "energy", "expected": "energy", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: The ability to do work or cause change.\n\nThe choices are: cheerful, courage, energy, fragile, lightning, memory, puzzle, roaming, soldier, wisdom", "answer_type": "multiple_choice", "correct_answer": "energy", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "cheerful", "courage", "energy", "fragile", "lightning", "memory", "puzzle", "roaming", "soldier", "wisdom" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }
0020_definitions:batch_gemma2_9b:19	100	1185	{ "response": "justice", "expected": "justice", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: The state of being fair and impartial, resulting in rightfulness or equity.\n\nThe choices are: ancient, ashes, cypress, energy, grave, justice, melody, phantom, remark, sword", "answer_type": "multiple_choice", "correct_answer": "justice", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "ancient", "ashes", "cypress", "energy", "grave", "justice", "melody", "phantom", "remark", "sword" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }
0020_definitions:batch_gemma2_9b:2	100	870	{ "response": "gentle", "expected": "gentle", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: Describes someone or something that is soft, kind, and not harsh.\n\nThe choices are: charm, gentle, glitter, heart, hunter, mighty, peaceful, peril, universe, valor", "answer_type": "multiple_choice", "correct_answer": "gentle", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "charm", "gentle", "glitter", "heart", "hunter", "mighty", "peaceful", "peril", "universe", "valor" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }
0020_definitions:batch_gemma2_9b:20	100	508	{ "response": "knight", "expected": "knight", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: A mounted warrior who served a lord or king in medieval times.\n\nThe choices are: absent, frost, knight, loyal, peril, realm, sapphire, splendor, unique, void", "answer_type": "multiple_choice", "correct_answer": "knight", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "absent", "frost", "knight", "loyal", "peril", "realm", "sapphire", "splendor", "unique", "void" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }
0020_definitions:batch_gemma2_9b:21	100	700	{ "response": "honor", "expected": "honor", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: It is a quality that commands respect and shows good character.\n\nThe choices are: cheerful, cypress, gentle, glitter, honor, lure, monster, mystery, survivor, tower", "answer_type": "multiple_choice", "correct_answer": "honor", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "cheerful", "cypress", "gentle", "glitter", "honor", "lure", "monster", "mystery", "survivor", "tower" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }
0020_definitions:batch_gemma2_9b:22	100	668	{ "response": "queen", "expected": "queen", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: She is the female monarch of a country.\n\nThe choices are: bronze, delight, grave, keeper, mythic, passion, peril, queen, sword, warrior", "answer_type": "multiple_choice", "correct_answer": "queen", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "bronze", "delight", "grave", "keeper", "mythic", "passion", "peril", "queen", "sword", "warrior" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }
0020_definitions:batch_gemma2_9b:23	100	662	{ "response": "ignite", "expected": "ignite", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: To cause something to start burning suddenly and brightly.\n\nThe choices are: brother, cypress, emblem, ignite, jewel, meadow, mighty, royalty, truth, village", "answer_type": "multiple_choice", "correct_answer": "ignite", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "brother", "cypress", "emblem", "ignite", "jewel", "meadow", "mighty", "royalty", "truth", "village" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }
0020_definitions:batch_gemma2_9b:24	100	936	{ "response": "zeal", "expected": "zeal", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: A strong feeling of enthusiasm and eagerness.\n\nThe choices are: bright, gem, melody, mighty, power, stealth, unique, universe, victory, zeal", "answer_type": "multiple_choice", "correct_answer": "zeal", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "bright", "gem", "melody", "mighty", "power", "stealth", "unique", "universe", "victory", "zeal" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }
0020_definitions:batch_gemma2_9b:3	100	917	{ "response": "glory", "expected": "glory", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: A feeling of great pride and honor.\n\nThe choices are: breeze, fragile, glory, iceberg, magic, outcast, rescue, savage, skeptic, wonder", "answer_type": "multiple_choice", "correct_answer": "glory", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "breeze", "fragile", "glory", "iceberg", "magic", "outcast", "rescue", "savage", "skeptic", "wonder" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }
0020_definitions:batch_gemma2_9b:4	100	844	{ "response": "fighter", "expected": "fighter", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: A person who engages in combat or contests.\n\nThe choices are: ashes, battle, bright, danger, dragon, fearful, fighter, proud, sacred, tension", "answer_type": "multiple_choice", "correct_answer": "fighter", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "ashes", "battle", "bright", "danger", "dragon", "fearful", "fighter", "proud", "sacred", "tension" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }
0020_definitions:batch_gemma2_9b:5	100	973	{ "response": "belief", "expected": "belief", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: A conviction about something, often without proof.\n\nThe choices are: belief, clever, danger, fearful, moonlit, precious, promise, quest, whistle, witty", "answer_type": "multiple_choice", "correct_answer": "belief", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "belief", "clever", "danger", "fearful", "moonlit", "precious", "promise", "quest", "whistle", "witty" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }
0020_definitions:batch_gemma2_9b:6	100	923	{ "response": "justice", "expected": "justice", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: The fair and impartial treatment of people according to established rules and principles.\n\nThe choices are: animal, harmony, hidden, justice, noble, reckless, sapphire, signal, splendor, swift", "answer_type": "multiple_choice", "correct_answer": "justice", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "animal", "harmony", "hidden", "justice", "noble", "reckless", "sapphire", "signal", "splendor", "swift" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }
0020_definitions:batch_gemma2_9b:7	100	928	{ "response": "scared", "expected": "scared", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: Experiencing intense fear or anxiety.\n\nThe choices are: clever, greedy, guardian, harmony, monster, quirk, scared, thunder, treasure, valley", "answer_type": "multiple_choice", "correct_answer": "scared", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "clever", "greedy", "guardian", "harmony", "monster", "quirk", "scared", "thunder", "treasure", "valley" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }
0020_definitions:batch_gemma2_9b:8	100	899	{ "response": "honest", "expected": "honest", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: Someone who always tells the truth and acts with integrity.\n\nThe choices are: bronze, calm, honest, island, ivory, journey, reckless, sincere, tower, wild", "answer_type": "multiple_choice", "correct_answer": "honest", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "bronze", "calm", "honest", "island", "ivory", "journey", "reckless", "sincere", "tower", "wild" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }
0020_definitions:batch_gemma2_9b:9	100	727	{ "response": "village", "expected": "village", "is_correct": true }
[+] Question Information: { "question_text": "Which word has this definition: A small community of homes and residents, typically located in a rural area.\n\nThe choices are: adventure, guardian, heart, honest, loyal, meadow, quest, slumber, sunset, village", "answer_type": "multiple_choice", "correct_answer": "village", "category": "vocabulary", "difficulty": "medium", "tags": [ "vocabulary", "definitions" ], "choices": [ "adventure", "guardian", "heart", "honest", "loyal", "meadow", "quest", "slumber", "sunset", "village" ], "evaluation_criteria": { "exact_match": true, "case_sensitive": false, "contains": false, "required_fields": [], "tolerance": 0.0 } }

System Prompt

Run Summary

Question-Level Details