Install
openclaw skills install voice-chat-skill语音对话集成技能,支持双向语音交流。使用TTS和STT实现完整的语音对话功能。
openclaw skills install voice-chat-skill实现完整的双向语音对话功能,支持语音输入和语音输出。
文本转语音(TTS)
语音转文本(STT)
对话管理
语音输入 → STT转换 → 文本处理 → AI响应 → TTS转换 → 语音输出
# 安装Python库
pip install SpeechRecognition pyaudio
# Windows pyaudio安装(如果失败)
pip install pipwin
pipwin install pyaudio
# voice_chat.py
import speech_recognition as sr
import subprocess
import tempfile
import os
class VoiceChat:
def __init__(self):
self.recognizer = sr.Recognizer()
self.microphone = sr.Microphone()
def listen(self):
"""监听语音输入并转换为文本"""
with self.microphone as source:
print("🎤 请说话...")
audio = self.recognizer.listen(source)
try:
text = self.recognizer.recognize_google(audio, language='zh-CN')
print(f"📝 识别结果: {text}")
return text
except sr.UnknownValueError:
return "无法识别语音"
except sr.RequestError:
return "语音识别服务不可用"
def speak(self, text):
"""使用OpenClaw TTS朗读文本"""
# 调用OpenClaw tts工具
print(f"🗣️ 正在朗读: {text}")
# 这里可以集成OpenClaw tts工具
def conversation_loop(self):
"""对话循环"""
print("🎧 语音对话已启动,按Ctrl+C退出")
while True:
# 监听语音
user_input = self.listen()
if user_input and "退出" not in user_input:
# 生成响应(这里可以集成AI模型)
response = f"我听到你说: {user_input}"
# 语音输出
self.speak(response)
if __name__ == "__main__":
chat = VoiceChat()
chat.conversation_loop()
def openclaw_tts(text, output_file="output.mp3"):
"""调用OpenClaw TTS工具"""
import subprocess
import json
# 创建临时文件
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
tts_request = {
"text": text,
"channel": "webchat"
}
json.dump(tts_request, f)
request_file = f.name
try:
# 调用tts工具(需要OpenClaw环境)
result = subprocess.run([
"node", "path/to/openclaw/tts-tool.js",
"--input", request_file,
"--output", output_file
], capture_output=True, text=True)
if result.returncode == 0:
print(f"✅ 语音文件已生成: {output_file}")
# 播放音频
subprocess.run(["start", output_file], shell=True)
else:
print(f"❌ TTS失败: {result.stderr}")
finally:
os.unlink(request_file)
def whisper_stt(audio_file):
"""使用Whisper进行语音识别"""
import whisper
model = whisper.load_model("base")
result = model.transcribe(audio_file, language="zh")
return result["text"]
def elevenlabs_tts(text, voice_id="21m00Tcm4TlvDq8ikWAM", api_key=None):
"""使用ElevenLabs TTS"""
import requests
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
headers = {
"xi-api-key": api_key or os.environ.get("ELEVENLABS_API_KEY"),
"Content-Type": "application/json"
}
data = {
"text": text,
"model_id": "eleven_multilingual_v2",
"voice_settings": {
"stability": 0.5,
"similarity_boost": 0.5
}
}
response = requests.post(url, json=data, headers=headers)
if response.status_code == 200:
with open("output.mp3", "wb") as f:
f.write(response.content)
return "output.mp3"
else:
raise Exception(f"ElevenLabs TTS失败: {response.text}")
麦克风无法识别
pyaudio安装失败
pipwin install pyaudiobrew install portaudio语音识别准确率低
技能版本: 1.0.0 最后更新: 2026-02-28