# Stage 1: Basic concepts
basic_adversary = await evolution.build_adversary(
domain="mathematics",
description="Generate basic algebra problems"
)
# Stage 2: Intermediate challenges
intermediate_adversary = await evolution.build_adversary(
domain="mathematics",
description="Generate problems combining algebra and calculus"
)
# Stage 3: Advanced scenarios
advanced_adversary = await evolution.build_adversary(
domain="mathematics",
description="Generate complex multi-step problems"
)
# Test variants against each stage
for variant in variants:
# Basic testing
history = await evolution.run_interaction(variant, basic_adversary)
basic_score = await evolution.get_agent_score(history, judge)
# Continue if basic score is good
if basic_score > 800:
history = await evolution.run_interaction(variant, intermediate_adversary)
intermediate_score = await evolution.get_agent_score(history, judge)
# Test advanced if intermediate score is good
if intermediate_score > 700:
history = await evolution.run_interaction(variant, advanced_adversary)
advanced_score = await evolution.get_agent_score(history, judge)