328 lines
13 KiB
Python
328 lines
13 KiB
Python
"""
|
|
CosyVoice API 服务类 - 核心语音克隆功能
|
|
通过HTTP调用独立的CosyVoice API服务器
|
|
"""
|
|
import os
|
|
import logging
|
|
import requests
|
|
import tempfile
|
|
from typing import Optional, Dict, Any, Tuple
|
|
logger = logging.getLogger(__name__)
|
|
class CosyVoiceService:
|
|
"""CosyVoice API服务类"""
|
|
|
|
def __init__(self, api_url: str = "http://127.0.0.1:8081"):
|
|
self.api_url = api_url
|
|
self._service_status = "未连接"
|
|
|
|
def connect(self) -> bool:
|
|
"""检查连接状态"""
|
|
try:
|
|
response = requests.get(f"{self.api_url}/health", timeout=5)
|
|
if response.status_code == 200:
|
|
result = response.json()
|
|
self._service_status = "连接正常"
|
|
return True
|
|
else:
|
|
self._service_status = f"连接失败: HTTP {response.status_code}"
|
|
return False
|
|
except Exception as e:
|
|
self._service_status = f"连接失败: {str(e)}"
|
|
return False
|
|
|
|
def get_available_voices(self) -> list:
|
|
"""获取可用的音色列表"""
|
|
try:
|
|
response = requests.get(f"{self.api_url}/voices", timeout=5)
|
|
if response.status_code == 200:
|
|
result = response.json()
|
|
return result.get('voices', [])
|
|
else:
|
|
logger.error(f"获取音色列表失败: HTTP {response.status_code}")
|
|
return []
|
|
except Exception as e:
|
|
logger.error(f"获取音色列表失败: {str(e)}")
|
|
return []
|
|
|
|
def get_reference_audios(self) -> list:
|
|
"""获取参考音频列表(用于兼容)"""
|
|
return ["麦克阿瑟.mp3", "年轻人,不讲武德.mp3"]
|
|
|
|
def recognize_audio(self, audio_file_path: str) -> str:
|
|
"""语音识别:将音频转换为文本 - 核心功能"""
|
|
try:
|
|
logger.info(f"开始语音识别: {audio_file_path}")
|
|
|
|
response = requests.post(
|
|
f"{self.api_url}/recognize",
|
|
json={"audio_path": audio_file_path},
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
result = response.json()
|
|
if result.get('success'):
|
|
text = result.get('text', '')
|
|
logger.info(f"语音识别成功: {text}")
|
|
return text
|
|
else:
|
|
logger.error(f"语音识别失败: {result.get('message')}")
|
|
return ""
|
|
else:
|
|
logger.error(f"语音识别API请求失败: HTTP {response.status_code}")
|
|
return ""
|
|
|
|
except Exception as e:
|
|
logger.error(f"语音识别失败: {str(e)}")
|
|
return ""
|
|
|
|
def generate_speech_with_preset_voice(
|
|
self,
|
|
text: str,
|
|
voice: str = "中文女",
|
|
seed: int = 42,
|
|
speed: float = 1.0,
|
|
stream: bool = False
|
|
) -> Tuple[Optional[str], Optional[str]]:
|
|
"""使用预训练音色生成语音"""
|
|
try:
|
|
logger.info(f"开始生成语音: 文本='{text[:20]}...', 音色={voice}, 种子={seed}")
|
|
|
|
# 发送生成请求
|
|
response = requests.post(
|
|
f"{self.api_url}/generate/preset",
|
|
json={
|
|
"text": text,
|
|
"voice": voice,
|
|
"seed": seed,
|
|
"speed": speed,
|
|
"stream": stream
|
|
},
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
result = response.json()
|
|
if result.get('success'):
|
|
# 直接从响应中获取音频数据
|
|
audio_data = result.get('audio_data')
|
|
if audio_data:
|
|
local_path = self._save_audio_data(audio_data, 'preset')
|
|
if local_path:
|
|
logger.info(f"预训练语音生成成功: {local_path}")
|
|
return local_path, local_path
|
|
else:
|
|
return None, None
|
|
else:
|
|
logger.error("响应中没有音频数据")
|
|
return None, None
|
|
else:
|
|
logger.error(f"语音生成失败: {result.get('message')}")
|
|
return None, None
|
|
else:
|
|
logger.error(f"API请求失败: HTTP {response.status_code}")
|
|
return None, None
|
|
|
|
except Exception as e:
|
|
logger.error(f"预训练音色语音生成失败: {str(e)}")
|
|
return None, None
|
|
|
|
def generate_speech_with_voice_cloning(
|
|
self,
|
|
text: str,
|
|
reference_audio_path: str,
|
|
reference_text: str = "",
|
|
seed: int = 42
|
|
) -> Tuple[Optional[str], Optional[str]]:
|
|
"""使用语音克隆生成语音 - 核心功能"""
|
|
try:
|
|
logger.info(f"开始语音克隆: 文本='{text[:20]}...', 参考音频={reference_audio_path}")
|
|
|
|
# 构建请求数据
|
|
request_data = {
|
|
"text": text,
|
|
"reference_audio": reference_audio_path,
|
|
"reference_text": reference_text,
|
|
"seed": seed
|
|
}
|
|
|
|
logger.info(f"=== 发送给CosyVoice API的数据 ===")
|
|
logger.info(f"API URL: {self.api_url}/generate/clone")
|
|
logger.info(f"Request Data: {request_data}")
|
|
|
|
# 发送克隆请求
|
|
response = requests.post(
|
|
f"{self.api_url}/generate/clone",
|
|
json=request_data,
|
|
timeout=60 # 克隆需要更长时间
|
|
)
|
|
|
|
logger.info(f"=== CosyVoice API响应 ===")
|
|
logger.info(f"Status Code: {response.status_code}")
|
|
logger.info(f"Response Headers: {dict(response.headers)}")
|
|
if response.status_code != 200:
|
|
logger.error(f"Response Text: {response.text}")
|
|
return None, None
|
|
|
|
result = response.json()
|
|
if result.get('success'):
|
|
# 直接从响应中获取音频数据
|
|
audio_data = result.get('audio_data')
|
|
if audio_data:
|
|
local_path = self._save_audio_data(audio_data, 'clone')
|
|
if local_path:
|
|
logger.info(f"语音克隆成功: {local_path}")
|
|
return local_path, local_path
|
|
else:
|
|
return None, None
|
|
else:
|
|
logger.error("响应中没有音频数据")
|
|
return None, None
|
|
else:
|
|
logger.error(f"语音克隆失败: {result.get('message')}")
|
|
return None, None
|
|
|
|
except Exception as e:
|
|
logger.error(f"语音克隆失败: {str(e)}")
|
|
return None, None
|
|
def generate_speech_with_natural_control(
|
|
self,
|
|
text: str,
|
|
instruction: str = "请用温柔甜美的女声朗读",
|
|
seed: int = 42
|
|
) -> Tuple[Optional[str], Optional[str]]:
|
|
"""使用自然语言控制生成语音 - 核心功能"""
|
|
try:
|
|
logger.info(f"开始自然语言控制生成: 文本='{text[:20]}...', 指令='{instruction}'")
|
|
|
|
# 发送生成请求
|
|
response = requests.post(
|
|
f"{self.api_url}/generate/natural",
|
|
json={
|
|
"text": text,
|
|
"instruction": instruction,
|
|
"seed": seed
|
|
},
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
result = response.json()
|
|
if result.get('success'):
|
|
# 直接从响应中获取音频数据
|
|
audio_data = result.get('audio_data')
|
|
if audio_data:
|
|
local_path = self._save_audio_data(audio_data, 'natural')
|
|
if local_path:
|
|
logger.info(f"自然语言控制生成成功: {local_path}")
|
|
return local_path, local_path
|
|
else:
|
|
return None, None
|
|
else:
|
|
logger.error("响应中没有音频数据")
|
|
return None, None
|
|
else:
|
|
logger.error(f"自然语言控制生成失败: {result.get('message')}")
|
|
return None, None
|
|
else:
|
|
logger.error(f"API请求失败: HTTP {response.status_code}")
|
|
return None, None
|
|
|
|
except Exception as e:
|
|
logger.error(f"自然语言控制生成失败: {str(e)}")
|
|
return None, None
|
|
|
|
def _save_audio_data(self, audio_data_hex: str, audio_type: str) -> Optional[str]:
|
|
"""保存音频数据到本地临时文件"""
|
|
try:
|
|
# 解码十六进制音频数据
|
|
audio_bytes = bytes.fromhex(audio_data_hex)
|
|
|
|
# 创建本地临时文件
|
|
temp_fd, local_path = tempfile.mkstemp(suffix='.wav', prefix=f'cosyvoice_{audio_type}_')
|
|
os.close(temp_fd)
|
|
|
|
# 写入音频数据
|
|
with open(local_path, 'wb') as f:
|
|
f.write(audio_bytes)
|
|
|
|
logger.info(f"音频保存成功: {local_path}, 大小: {len(audio_bytes)} 字节")
|
|
return local_path
|
|
except Exception as e:
|
|
logger.error(f"音频保存失败: {str(e)}")
|
|
return None
|
|
|
|
def generate_random_seed(self) -> int:
|
|
"""生成随机种子"""
|
|
try:
|
|
response = requests.get(f"{self.api_url}/random-seed", timeout=5)
|
|
if response.status_code == 200:
|
|
result = response.json()
|
|
return result.get('seed', 42)
|
|
else:
|
|
import random
|
|
return random.randint(1, 999999)
|
|
except Exception as e:
|
|
logger.error(f"生成随机种子失败: {str(e)}")
|
|
import random
|
|
return random.randint(1, 999999)
|
|
|
|
def test_connection(self) -> Dict[str, Any]:
|
|
"""测试与CosyVoice服务的连接"""
|
|
try:
|
|
if not self.connect():
|
|
return {
|
|
"success": False,
|
|
"message": "无法连接到CosyVoice API服务器",
|
|
"api_url": self.api_url,
|
|
"service_status": self._service_status
|
|
}
|
|
|
|
# 获取音色列表
|
|
voices = self.get_available_voices()
|
|
|
|
# 测试语音生成功能(预训练音色)
|
|
generation_status = "未测试"
|
|
try:
|
|
stream_audio, full_audio = self.generate_speech_with_preset_voice(
|
|
text="测试",
|
|
voice=voices[0] if voices else "中文女",
|
|
seed=42
|
|
)
|
|
|
|
if stream_audio or full_audio:
|
|
generation_status = "正常"
|
|
self._service_status = "服务正常"
|
|
else:
|
|
generation_status = "生成失败"
|
|
self._service_status = "语音生成功能异常"
|
|
except Exception as e:
|
|
generation_status = f"测试失败: {str(e)}"
|
|
self._service_status = f"语音生成测试失败: {str(e)}"
|
|
|
|
return {
|
|
"success": True,
|
|
"message": "CosyVoice API服务连接成功",
|
|
"api_url": self.api_url,
|
|
"available_voices": voices,
|
|
"reference_audios": self.get_reference_audios(),
|
|
"generation_status": generation_status,
|
|
"service_status": self._service_status,
|
|
"core_features": {
|
|
"voice_cloning": True,
|
|
"speech_recognition": True,
|
|
"natural_control": True
|
|
}
|
|
}
|
|
|
|
except Exception as e:
|
|
self._service_status = f"连接测试失败: {str(e)}"
|
|
return {
|
|
"success": False,
|
|
"message": f"连接测试失败: {str(e)}",
|
|
"api_url": self.api_url,
|
|
"service_status": self._service_status
|
|
}
|
|
# 全局服务实例
|
|
cosyvoice_service = CosyVoiceService()
|