Install
openclaw skills install yuyonghao-agent-eval-suiteProvides benchmark testing, A/B testing, performance regression detection, and simulation environment testing for agent evaluation.
openclaw skills install yuyonghao-agent-eval-suiteAgent 评估套件,提供基准测试、A/B测试、性能回归检测和模拟环境测试。
npm install
const { Benchmark } = require('./src');
const benchmark = new Benchmark({ iterations: 100 });
benchmark.addTest('task-completion', {
execute: async () => await agent.completeTask(task)
});
const results = await benchmark.run();
console.log(results);
const { ABTester } = require('./src');
const ab = new ABTester({ confidenceLevel: 0.95 });
ab.createExperiment('new-prompt', {
control: async () => await oldPrompt(),
treatment: async () => await newPrompt()
});
const result = await ab.run('new-prompt', { sampleSize: 200 });
console.log(result); // { winner: 'treatment', confidence: 0.97 }
const { RegressionDetector } = require('./src');
const detector = new RegressionDetector({ threshold: 0.1 });
detector.record('response-time', { version: 'v1.1.0', value: 1200 });
const regressions = detector.detect();
console.log(regressions);
npm test
MIT