{"skill":{"slug":"skylv-evaluation-benchmark","displayName":"Evaluation Benchmark","summary":"Agent评估测试助手。设计评估指标、构建测试集、生成报告。使用场景：(1) 设计评估指标，(2) 构建测试集，(3) 执行评估测试，(4) 分析评估结果。","tags":{"latest":"1.0.0"},"stats":{"comments":0,"downloads":107,"installsAllTime":0,"installsCurrent":0,"stars":0,"versions":1},"createdAt":1775873376156,"updatedAt":1775873398745},"latestVersion":{"version":"1.0.0","createdAt":1775873376156,"changelog":"Auto-publish","license":"MIT-0"},"metadata":{"os":null,"systems":null},"owner":{"handle":"sky-lv","userId":"s17fgkeb63szvtadtmm753m0gd84e4vz","displayName":"SKY-lv","image":"https://avatars.githubusercontent.com/u/259750852?v=4"},"moderation":null}