Install
openclaw skills install yuyonghao-evaluation-suiteProvides API for evaluating RAG quality, logical reasoning, and detecting hallucinations in AI-generated content with batch support.
openclaw skills install yuyonghao-evaluation-suite版本: 0.1.0
功能: RAG评估 + 推理评估 + 幻觉检测
cd skills/evaluation-suite
npm install
import { Evaluator } from './src/evaluator.js';
// 创建评估器
const evaluator = new Evaluator({
rag: { threshold: 0.7 },
reasoning: { threshold: 0.8 },
hallucination: { threshold: 0.5 }
});
// RAG 评估
const ragResult = await evaluator.evaluate('rag', {
query: '什么是 OpenClaw?',
retrievedDocs: ['OpenClaw 是一个 AI 助手框架...'],
generatedAnswer: 'OpenClaw 是一个 AI 助手框架'
});
console.log('RAG Score:', ragResult.score);
// 幻觉检测
const hallucinationResult = await evaluator.evaluate('hallucination', {
context: 'OpenClaw 是一个框架',
generatedText: 'OpenClaw 是一个编程语言'
});
console.log('Hallucination:', hallucinationResult.isHallucination);
new Evaluator(config)
参数:
config.rag.threshold - RAG 评估阈值 (默认: 0.7)config.reasoning.threshold - 推理评估阈值 (默认: 0.8)config.hallucination.threshold - 幻觉检测阈值 (默认: 0.5)运行评估
// RAG 评估
await evaluator.evaluate('rag', {
query: '问题',
retrievedDocs: ['文档1', '文档2'],
generatedAnswer: '生成的答案'
});
// 推理评估
await evaluator.evaluate('reasoning', {
problem: '推理问题',
solution: '解决方案',
expectedAnswer: '期望答案'
});
// 幻觉检测
await evaluator.evaluate('hallucination', {
context: '上下文',
generatedText: '生成的文本'
});
批量评估
const results = await evaluator.batchEvaluate('rag', [
{ query: 'Q1', retrievedDocs: [...], generatedAnswer: 'A1' },
{ query: 'Q2', retrievedDocs: [...], generatedAnswer: 'A2' }
]);
评估 RAG 质量
import RAGEvaluator from './src/rag-eval.js';
const evaluator = new RAGEvaluator({ threshold: 0.7 });
const result = await evaluator.evaluate({
query: '问题',
retrievedDocs: ['文档1', '文档2'],
generatedAnswer: '答案'
});
// 返回: { score, relevanceScore, contextScore, passed }
评估推理能力
import ReasoningEvaluator from './src/reasoning-eval.js';
const evaluator = new ReasoningEvaluator({ threshold: 0.8 });
const result = await evaluator.evaluate({
problem: '逻辑问题',
solution: '解决步骤',
expectedAnswer: '期望答案'
});
// 返回: { score, logicScore, completenessScore, passed }
检测幻觉
import HallucinationDetector from './src/hallucination-detector.js';
const detector = new HallucinationDetector({ threshold: 0.5 });
const result = await detector.detect({
context: '原始上下文',
generatedText: '生成的文本'
});
// 返回: { isHallucination, score, indicators }
npm test
MIT