Install
openclaw skills install openclaw-skill-testerOpenClaw Skill Testing Framework - 自动化测试技能质量,验证触发时机、功能正确性、性能指标对比
openclaw skills install openclaw-skill-tester自动化测试框架,确保技能质量与性能。
作者: Cao Xiaosi (曹小四)
版本: 1.0.0
许可证: MIT
验证技能在正确时机加载,测试应该触发和不应该触发的场景。
测试内容:
测试方法:
python3 scripts/test_trigger.py --skill stock-watcher --input "监控股票价格" --expected true
python3 scripts/test_trigger.py --skill stock-watcher --input "随便聊聊" --expected false
验证技能输出是否正确,测试各种输入场景。
测试内容:
测试方法:
python3 scripts/test_functionality.py --skill a-stock-monitor --test-case "market_sentiment"
python3 scripts/test_functionality.py --skill a-stock-monitor --test-case "stock_price_query"
证明技能比没有技能时更好,对比工具调用次数、token 消耗等。
测试内容:
测试方法:
python3 scripts/test_comparison.py --skill stock-watcher --baseline "no-skill" --metric "tool_calls"
python3 scripts/test_comparison.py --skill stock-watcher --baseline "no-skill" --metric "tokens"
skills/skill-tester/
├── SKILL.md # 本文件
├── scripts/
│ ├── test_trigger.py # 触发测试脚本
│ ├── test_functionality.py # 功能测试脚本
│ ├── test_comparison.py # 对比测试脚本
│ ├── test_runner.sh # 测试执行器
│ ├── utils/
│ │ ├── trigger_validator.py # 触发验证工具
│ │ ├── output_validator.py # 输出验证工具
│ │ └── metrics_collector.py # 性能指标收集器
│ └── fixtures/
│ └── sample_inputs.json # 测试用例数据
└── references/
├── TEST_GUIDELINES.md # 测试指南
└── METRICS_DEFINITION.md # 指标定义
# 测试单个技能
python3 scripts/test_runner.sh --skill stock-watcher
# 测试所有技能
python3 scripts/test_runner.sh --all
# 详细模式
python3 scripts/test_runner.sh --skill stock-watcher --verbose
# 测试应该触发的场景
python3 scripts/test_trigger.py --skill "stock-watcher" \
--inputs "监控股票价格" "查看A股" "实时行情" \
--expected true
# 测试不应该触发的场景
python3 scripts/test_trigger.py --skill "stock-watcher" \
--inputs "随便聊聊" "今天天气" "帮我写代码" \
--expected false
# 运行特定测试用例
python3 scripts/test_functionality.py --skill "a-stock-monitor" \
--test-case "market_sentiment_calculation"
# 运行所有测试用例
python3 scripts/test_functionality.py --skill "a-stock-monitor" \
--all-cases
# 对比工具调用次数
python3 scripts/test_comparison.py --skill "stock-watcher" \
--baseline "no-skill" \
--metric "tool_calls" \
--iterations 10
# 对比 Token 消耗
python3 scripts/test_comparison.py --skill "stock-watcher" \
--baseline "no-skill" \
--metric "tokens" \
--iterations 10
{
"skill_name": "stock-watcher",
"timestamp": "2026-04-03T12:00:00",
"test_summary": {
"total": 20,
"passed": 18,
"failed": 2,
"skipped": 0
},
"trigger_tests": {
"passed": 10,
"failed": 0
},
"functionality_tests": {
"passed": 8,
"failed": 2
},
"comparison_metrics": {
"tool_calls_reduction": "45%",
"token_savings": "32%",
"response_time_improvement": "28%"
},
"failed_tests": [
{
"test_name": "test_stock_price_accuracy",
"error": "Expected price 10.50, got 10.45",
"input": "600000"
}
]
}
# 生成 JSON 报告
python3 scripts/test_runner.sh --skill stock-watcher --report json
# 生成 HTML 报告
python3 scripts/test_runner.sh --skill stock-watcher --report html
# 生成 Markdown 报告
python3 scripts/test_runner.sh --skill stock-watcher --report md
创建 test_config.json:
{
"skills_path": "/Users/mars/.openclaw/workspace/skills",
"test_iterations": 10,
"timeout_seconds": 30,
"verbose": false,
"save_reports": true,
"report_format": ["json", "md"],
"exclude_skills": ["skill-tester", "self-improving"]
}
export SKILL_TESTER_VERBOSE=true
export SKILL_TESTER_TIMEOUT=60
export SKILL_TESTER_REPORT_DIR="/tmp/skill-tests"
{
"skill": "stock-watcher",
"trigger_tests": {
"should_trigger": [
"监控股票价格",
"查看A股实时行情",
"A股今日走势"
],
"should_not_trigger": [
"帮我写代码",
"今天天气如何",
"随便聊聊"
]
}
}
{
"skill": "a-stock-monitor",
"functionality_tests": [
{
"name": "market_sentiment",
"input": {"action": "calculate_sentiment"},
"expected_output": {
"type": "json",
"fields": ["score", "level", "stats"]
}
},
{
"name": "stock_price_query",
"input": {"stock_code": "600000"},
"expected_output": {
"type": "json",
"fields": ["price", "change_pct", "volume"]
}
}
]
}
scripts/fixtures/sample_inputs.json 中添加测试用例创建新的测试模块:
# scripts/test_custom.py
from utils.metrics_collector import MetricsCollector
def test_custom_metric(skill_name):
collector = MetricsCollector()
# 自定义测试逻辑
results = collector.collect_custom_metrics(skill_name)
return results
原因: 技能执行时间过长
解决: 增加超时配置 --timeout 60
原因: 报告目录不存在
解决: 创建目录 mkdir -p /tmp/skill-tests
原因: 未定义测试用例
解决: 在 fixtures/sample_inputs.json 中添加
MIT License
欢迎提交问题、建议和改进意见!