# Test variants across different scenarios
async def test_variant(variant, scenarios):
scores = []
for scenario in scenarios:
# Run interaction
history = await evolution.run_interaction(
variant,
scenario['adversary']
)
# Get score
score = await evolution.get_agent_score(
history,
scenario['judge']
)
scores.append(score)
return sum(scores) / len(scores)
# Define test scenarios
scenarios = [
{
'name': 'Basic Algebra',
'adversary': basic_adversary,
'judge': basic_judge
},
{
'name': 'Complex Calculus',
'adversary': calculus_adversary,
'judge': advanced_judge
},
{
'name': 'Word Problems',
'adversary': word_problem_adversary,
'judge': comprehension_judge
}
]
# Test all variants
for variant in variants:
score = await test_variant(variant, scenarios)
variant.update_score(score)