345 lines
12 KiB
Python
345 lines
12 KiB
Python
"""
|
||
语音测试相关路由
|
||
"""
|
||
import os
|
||
import json
|
||
import tempfile
|
||
import subprocess
|
||
import uuid
|
||
import librosa
|
||
import soundfile as sf
|
||
from flask import Blueprint, request, jsonify, render_template, current_app, send_file
|
||
from flask_login import login_required, current_user
|
||
from app.services.cosyvoice_service import cosyvoice_service
|
||
from werkzeug.utils import secure_filename
|
||
import logging
|
||
logger = logging.getLogger(__name__)
|
||
voice_test_bp = Blueprint('voice_test', __name__)
|
||
def convert_audio_format(input_path, output_path, target_sr=16000):
|
||
"""转换音频格式为标准WAV"""
|
||
try:
|
||
# 使用librosa读取音频(支持多种格式)
|
||
audio, sr = librosa.load(input_path, sr=target_sr, mono=True)
|
||
|
||
# 保存为标准WAV格式
|
||
sf.write(output_path, audio, target_sr, format='WAV', subtype='PCM_16')
|
||
|
||
logger.info(f"音频格式转换成功: {input_path} -> {output_path}")
|
||
return True
|
||
except Exception as e:
|
||
logger.error(f"音频格式转换失败: {str(e)}")
|
||
|
||
# 备用方案:使用ffmpeg
|
||
try:
|
||
cmd = [
|
||
'ffmpeg', '-i', input_path,
|
||
'-ar', '16000', # 采样率
|
||
'-ac', '1', # 单声道
|
||
'-sample_fmt', 's16', # 16位
|
||
'-y', output_path
|
||
]
|
||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||
|
||
if result.returncode == 0:
|
||
logger.info(f"使用ffmpeg转换成功: {input_path} -> {output_path}")
|
||
return True
|
||
else:
|
||
logger.error(f"ffmpeg转换失败: {result.stderr}")
|
||
return False
|
||
except Exception as fe:
|
||
logger.error(f"ffmpeg备用方案也失败: {str(fe)}")
|
||
return False
|
||
@voice_test_bp.route('/voice-test')
|
||
@login_required
|
||
def voice_test_page():
|
||
"""语音测试页面"""
|
||
return render_template('voice_test/index.html')
|
||
@voice_test_bp.route('/api/voice-test/connection', methods=['POST'])
|
||
@login_required
|
||
def test_connection():
|
||
"""测试CosyVoice服务连接"""
|
||
try:
|
||
result = cosyvoice_service.test_connection()
|
||
return jsonify(result)
|
||
except Exception as e:
|
||
logger.error(f"连接测试失败: {str(e)}")
|
||
return jsonify({
|
||
"success": False,
|
||
"message": f"测试失败: {str(e)}"
|
||
})
|
||
@voice_test_bp.route('/api/voice-test/voices', methods=['GET'])
|
||
@login_required
|
||
def get_voices():
|
||
"""获取可用音色列表"""
|
||
try:
|
||
voices = cosyvoice_service.get_available_voices()
|
||
return jsonify({
|
||
"success": True,
|
||
"voices": voices
|
||
})
|
||
except Exception as e:
|
||
logger.error(f"获取音色列表失败: {str(e)}")
|
||
return jsonify({
|
||
"success": False,
|
||
"message": f"获取失败: {str(e)}"
|
||
})
|
||
@voice_test_bp.route('/api/voice-test/generate/preset', methods=['POST'])
|
||
@login_required
|
||
def generate_with_preset_voice():
|
||
"""使用预训练音色生成语音"""
|
||
try:
|
||
data = request.get_json()
|
||
text = data.get('text', '')
|
||
voice = data.get('voice', '中文女')
|
||
seed = data.get('seed', 42)
|
||
speed = data.get('speed', 1.0)
|
||
|
||
if not text:
|
||
return jsonify({
|
||
"success": False,
|
||
"message": "请输入要合成的文本"
|
||
})
|
||
|
||
# 生成语音
|
||
stream_audio, full_audio = cosyvoice_service.generate_speech_with_preset_voice(
|
||
text=text,
|
||
voice=voice,
|
||
seed=seed,
|
||
speed=speed
|
||
)
|
||
|
||
if full_audio:
|
||
return jsonify({
|
||
"success": True,
|
||
"message": "语音生成成功",
|
||
"audio_url": full_audio,
|
||
"stream_audio_url": stream_audio
|
||
})
|
||
else:
|
||
return jsonify({
|
||
"success": False,
|
||
"message": "语音生成失败"
|
||
})
|
||
|
||
except Exception as e:
|
||
logger.error(f"预训练音色生成失败: {str(e)}")
|
||
return jsonify({
|
||
"success": False,
|
||
"message": f"生成失败: {str(e)}"
|
||
})
|
||
@voice_test_bp.route('/api/voice-test/generate/natural', methods=['POST'])
|
||
@login_required
|
||
def generate_with_natural_control():
|
||
"""使用自然语言控制生成语音"""
|
||
try:
|
||
data = request.get_json()
|
||
text = data.get('text', '')
|
||
instruction = data.get('instruction', '请用温柔甜美的女声朗读')
|
||
seed = data.get('seed', 42)
|
||
|
||
if not text:
|
||
return jsonify({
|
||
"success": False,
|
||
"message": "请输入要合成的文本"
|
||
})
|
||
|
||
# 生成语音
|
||
stream_audio, full_audio = cosyvoice_service.generate_speech_with_natural_control(
|
||
text=text,
|
||
instruction=instruction,
|
||
seed=seed
|
||
)
|
||
|
||
if full_audio:
|
||
return jsonify({
|
||
"success": True,
|
||
"message": "语音生成成功",
|
||
"audio_url": full_audio,
|
||
"stream_audio_url": stream_audio
|
||
})
|
||
else:
|
||
return jsonify({
|
||
"success": False,
|
||
"message": "语音生成失败"
|
||
})
|
||
|
||
except Exception as e:
|
||
logger.error(f"自然语言控制生成失败: {str(e)}")
|
||
return jsonify({
|
||
"success": False,
|
||
"message": f"生成失败: {str(e)}"
|
||
})
|
||
@voice_test_bp.route('/api/voice-test/upload-audio', methods=['POST'])
|
||
@login_required
|
||
def upload_audio():
|
||
"""上传音频文件用于语音克隆 - 带格式转换功能"""
|
||
try:
|
||
if 'audio' not in request.files:
|
||
return jsonify({
|
||
"success": False,
|
||
"message": "请选择音频文件"
|
||
})
|
||
|
||
file = request.files['audio']
|
||
if file.filename == '':
|
||
return jsonify({
|
||
"success": False,
|
||
"message": "请选择音频文件"
|
||
})
|
||
|
||
# 生成安全的文件名
|
||
unique_id = str(uuid.uuid4())[:8]
|
||
original_filename = secure_filename(file.filename) if file.filename else f"recording_{unique_id}.wav"
|
||
|
||
# 保存原始文件
|
||
temp_dir = tempfile.gettempdir()
|
||
original_path = os.path.join(temp_dir, f"original_{unique_id}_{original_filename}")
|
||
file.save(original_path)
|
||
|
||
logger.info(f"原始音频文件保存: {original_path}, 大小: {os.path.getsize(original_path)} 字节")
|
||
|
||
# 转换为标准格式
|
||
converted_filename = f"voice_clone_{current_user.id}_{unique_id}.wav"
|
||
converted_path = os.path.join(temp_dir, converted_filename)
|
||
|
||
# 进行格式转换
|
||
if convert_audio_format(original_path, converted_path):
|
||
# 转换成功,删除原始文件
|
||
try:
|
||
os.remove(original_path)
|
||
except:
|
||
pass
|
||
|
||
# 验证转换后的文件
|
||
if os.path.exists(converted_path) and os.path.getsize(converted_path) > 0:
|
||
logger.info(f"音频文件转换并验证成功: {converted_path}")
|
||
|
||
# 尝试识别音频内容
|
||
recognized_text = cosyvoice_service.recognize_audio(converted_path)
|
||
|
||
return jsonify({
|
||
"success": True,
|
||
"message": "音频上传和转换成功",
|
||
"file_path": converted_path,
|
||
"recognized_text": recognized_text,
|
||
"file_info": {
|
||
"size": os.path.getsize(converted_path),
|
||
"format": "WAV 16kHz Mono"
|
||
}
|
||
})
|
||
else:
|
||
return jsonify({
|
||
"success": False,
|
||
"message": "音频文件转换后验证失败"
|
||
})
|
||
else:
|
||
# 转换失败,尝试直接使用原始文件
|
||
logger.warning("音频格式转换失败,尝试直接使用原始文件")
|
||
|
||
try:
|
||
recognized_text = cosyvoice_service.recognize_audio(original_path)
|
||
return jsonify({
|
||
"success": True,
|
||
"message": "音频上传成功(使用原始格式)",
|
||
"file_path": original_path,
|
||
"recognized_text": recognized_text,
|
||
"file_info": {
|
||
"size": os.path.getsize(original_path),
|
||
"format": "原始格式"
|
||
}
|
||
})
|
||
except Exception as e:
|
||
return jsonify({
|
||
"success": False,
|
||
"message": f"音频处理失败: {str(e)}"
|
||
})
|
||
|
||
except Exception as e:
|
||
logger.error(f"音频上传失败: {str(e)}")
|
||
return jsonify({
|
||
"success": False,
|
||
"message": f"上传失败: {str(e)}"
|
||
})
|
||
@voice_test_bp.route('/api/voice-test/generate/clone', methods=['POST'])
|
||
@login_required
|
||
def generate_with_voice_cloning():
|
||
"""使用语音克隆生成语音"""
|
||
try:
|
||
data = request.get_json()
|
||
text = data.get('text', '')
|
||
reference_audio_path = data.get('reference_audio_path', '')
|
||
reference_text = data.get('reference_text', '')
|
||
seed = data.get('seed', 42)
|
||
|
||
if not text:
|
||
return jsonify({
|
||
"success": False,
|
||
"message": "请输入要合成的文本"
|
||
})
|
||
|
||
if not reference_audio_path or not os.path.exists(reference_audio_path):
|
||
return jsonify({
|
||
"success": False,
|
||
"message": "请先上传参考音频"
|
||
})
|
||
|
||
# 生成语音
|
||
stream_audio, full_audio = cosyvoice_service.generate_speech_with_voice_cloning(
|
||
text=text,
|
||
reference_audio_path=reference_audio_path,
|
||
reference_text=reference_text,
|
||
seed=seed
|
||
)
|
||
|
||
if full_audio:
|
||
return jsonify({
|
||
"success": True,
|
||
"message": "语音克隆成功",
|
||
"audio_url": full_audio,
|
||
"stream_audio_url": stream_audio
|
||
})
|
||
else:
|
||
return jsonify({
|
||
"success": False,
|
||
"message": "语音克隆失败"
|
||
})
|
||
|
||
except Exception as e:
|
||
logger.error(f"语音克隆失败: {str(e)}")
|
||
return jsonify({
|
||
"success": False,
|
||
"message": f"克隆失败: {str(e)}"
|
||
})
|
||
@voice_test_bp.route('/api/voice-test/random-seed', methods=['GET'])
|
||
@login_required
|
||
def get_random_seed():
|
||
"""获取随机种子"""
|
||
try:
|
||
seed = cosyvoice_service.generate_random_seed()
|
||
return jsonify({
|
||
"success": True,
|
||
"seed": seed
|
||
})
|
||
except Exception as e:
|
||
logger.error(f"获取随机种子失败: {str(e)}")
|
||
return jsonify({
|
||
"success": False,
|
||
"message": f"获取失败: {str(e)}"
|
||
})
|
||
@voice_test_bp.route('/download-audio/<filename>', methods=['GET'])
|
||
@login_required
|
||
def download_temp_audio(filename):
|
||
"""下载临时音频文件"""
|
||
try:
|
||
temp_dir = tempfile.gettempdir()
|
||
file_path = os.path.join(temp_dir, filename)
|
||
|
||
if os.path.exists(file_path):
|
||
return send_file(file_path, as_attachment=False, mimetype='audio/wav')
|
||
else:
|
||
return jsonify({"success": False, "message": "音频文件不存在"}), 404
|
||
|
||
except Exception as e:
|
||
logger.error(f"音频下载失败: {str(e)}")
|
||
return jsonify({"success": False, "message": "下载失败"}), 500
|