erdo eval suites
erdo eval create landing-variations --agent erdo.artifact-builder --evaluate-artifact --no-cron \
--case '{"name":"voice","input":"{\"artifact_kind\":\"landing_page\",\"description\":\"...\"}","rubric":[{"criterion":"voice widget loads","weight":2}]}'
erdo eval update landing-variations --agent erdo.data-question-answerer # only passed flags change
erdo eval run landing-variations --watch # CI gate: non-zero if a case fails
erdo eval results <run-id>
erdo eval case add <suite> --name x --input "..." --rubric '[{"criterion":"...","weight":1}]'
erdo eval case add <suite> --name y --input "..." --evaluator '{"type":"script","script":"function evaluate(ctx){return {score:5,passed:true,reasoning:\"ok\"}}"}'
# multi-step flow: --setup turns run first (same thread/agent), then --input is the evaluated turn
erdo eval case add <suite> --name voice --setup "Create a voice concierge widget for Lumen Yoga" \
--input "Build the landing page wired to that concierge" --rubric '[{"criterion":"voice widget loads","weight":2}]'