Happy_language/app/routes/voice_test.py
2025-09-15 02:45:50 +08:00

345 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
语音测试相关路由
"""
import os
import json
import tempfile
import subprocess
import uuid
import librosa
import soundfile as sf
from flask import Blueprint, request, jsonify, render_template, current_app, send_file
from flask_login import login_required, current_user
from app.services.cosyvoice_service import cosyvoice_service
from werkzeug.utils import secure_filename
import logging
logger = logging.getLogger(__name__)
voice_test_bp = Blueprint('voice_test', __name__)
def convert_audio_format(input_path, output_path, target_sr=16000):
"""转换音频格式为标准WAV"""
try:
# 使用librosa读取音频支持多种格式
audio, sr = librosa.load(input_path, sr=target_sr, mono=True)
# 保存为标准WAV格式
sf.write(output_path, audio, target_sr, format='WAV', subtype='PCM_16')
logger.info(f"音频格式转换成功: {input_path} -> {output_path}")
return True
except Exception as e:
logger.error(f"音频格式转换失败: {str(e)}")
# 备用方案使用ffmpeg
try:
cmd = [
'ffmpeg', '-i', input_path,
'-ar', '16000', # 采样率
'-ac', '1', # 单声道
'-sample_fmt', 's16', # 16位
'-y', output_path
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
logger.info(f"使用ffmpeg转换成功: {input_path} -> {output_path}")
return True
else:
logger.error(f"ffmpeg转换失败: {result.stderr}")
return False
except Exception as fe:
logger.error(f"ffmpeg备用方案也失败: {str(fe)}")
return False
@voice_test_bp.route('/voice-test')
@login_required
def voice_test_page():
"""语音测试页面"""
return render_template('voice_test/index.html')
@voice_test_bp.route('/api/voice-test/connection', methods=['POST'])
@login_required
def test_connection():
"""测试CosyVoice服务连接"""
try:
result = cosyvoice_service.test_connection()
return jsonify(result)
except Exception as e:
logger.error(f"连接测试失败: {str(e)}")
return jsonify({
"success": False,
"message": f"测试失败: {str(e)}"
})
@voice_test_bp.route('/api/voice-test/voices', methods=['GET'])
@login_required
def get_voices():
"""获取可用音色列表"""
try:
voices = cosyvoice_service.get_available_voices()
return jsonify({
"success": True,
"voices": voices
})
except Exception as e:
logger.error(f"获取音色列表失败: {str(e)}")
return jsonify({
"success": False,
"message": f"获取失败: {str(e)}"
})
@voice_test_bp.route('/api/voice-test/generate/preset', methods=['POST'])
@login_required
def generate_with_preset_voice():
"""使用预训练音色生成语音"""
try:
data = request.get_json()
text = data.get('text', '')
voice = data.get('voice', '中文女')
seed = data.get('seed', 42)
speed = data.get('speed', 1.0)
if not text:
return jsonify({
"success": False,
"message": "请输入要合成的文本"
})
# 生成语音
stream_audio, full_audio = cosyvoice_service.generate_speech_with_preset_voice(
text=text,
voice=voice,
seed=seed,
speed=speed
)
if full_audio:
return jsonify({
"success": True,
"message": "语音生成成功",
"audio_url": full_audio,
"stream_audio_url": stream_audio
})
else:
return jsonify({
"success": False,
"message": "语音生成失败"
})
except Exception as e:
logger.error(f"预训练音色生成失败: {str(e)}")
return jsonify({
"success": False,
"message": f"生成失败: {str(e)}"
})
@voice_test_bp.route('/api/voice-test/generate/natural', methods=['POST'])
@login_required
def generate_with_natural_control():
"""使用自然语言控制生成语音"""
try:
data = request.get_json()
text = data.get('text', '')
instruction = data.get('instruction', '请用温柔甜美的女声朗读')
seed = data.get('seed', 42)
if not text:
return jsonify({
"success": False,
"message": "请输入要合成的文本"
})
# 生成语音
stream_audio, full_audio = cosyvoice_service.generate_speech_with_natural_control(
text=text,
instruction=instruction,
seed=seed
)
if full_audio:
return jsonify({
"success": True,
"message": "语音生成成功",
"audio_url": full_audio,
"stream_audio_url": stream_audio
})
else:
return jsonify({
"success": False,
"message": "语音生成失败"
})
except Exception as e:
logger.error(f"自然语言控制生成失败: {str(e)}")
return jsonify({
"success": False,
"message": f"生成失败: {str(e)}"
})
@voice_test_bp.route('/api/voice-test/upload-audio', methods=['POST'])
@login_required
def upload_audio():
"""上传音频文件用于语音克隆 - 带格式转换功能"""
try:
if 'audio' not in request.files:
return jsonify({
"success": False,
"message": "请选择音频文件"
})
file = request.files['audio']
if file.filename == '':
return jsonify({
"success": False,
"message": "请选择音频文件"
})
# 生成安全的文件名
unique_id = str(uuid.uuid4())[:8]
original_filename = secure_filename(file.filename) if file.filename else f"recording_{unique_id}.wav"
# 保存原始文件
temp_dir = tempfile.gettempdir()
original_path = os.path.join(temp_dir, f"original_{unique_id}_{original_filename}")
file.save(original_path)
logger.info(f"原始音频文件保存: {original_path}, 大小: {os.path.getsize(original_path)} 字节")
# 转换为标准格式
converted_filename = f"voice_clone_{current_user.id}_{unique_id}.wav"
converted_path = os.path.join(temp_dir, converted_filename)
# 进行格式转换
if convert_audio_format(original_path, converted_path):
# 转换成功,删除原始文件
try:
os.remove(original_path)
except:
pass
# 验证转换后的文件
if os.path.exists(converted_path) and os.path.getsize(converted_path) > 0:
logger.info(f"音频文件转换并验证成功: {converted_path}")
# 尝试识别音频内容
recognized_text = cosyvoice_service.recognize_audio(converted_path)
return jsonify({
"success": True,
"message": "音频上传和转换成功",
"file_path": converted_path,
"recognized_text": recognized_text,
"file_info": {
"size": os.path.getsize(converted_path),
"format": "WAV 16kHz Mono"
}
})
else:
return jsonify({
"success": False,
"message": "音频文件转换后验证失败"
})
else:
# 转换失败,尝试直接使用原始文件
logger.warning("音频格式转换失败,尝试直接使用原始文件")
try:
recognized_text = cosyvoice_service.recognize_audio(original_path)
return jsonify({
"success": True,
"message": "音频上传成功(使用原始格式)",
"file_path": original_path,
"recognized_text": recognized_text,
"file_info": {
"size": os.path.getsize(original_path),
"format": "原始格式"
}
})
except Exception as e:
return jsonify({
"success": False,
"message": f"音频处理失败: {str(e)}"
})
except Exception as e:
logger.error(f"音频上传失败: {str(e)}")
return jsonify({
"success": False,
"message": f"上传失败: {str(e)}"
})
@voice_test_bp.route('/api/voice-test/generate/clone', methods=['POST'])
@login_required
def generate_with_voice_cloning():
"""使用语音克隆生成语音"""
try:
data = request.get_json()
text = data.get('text', '')
reference_audio_path = data.get('reference_audio_path', '')
reference_text = data.get('reference_text', '')
seed = data.get('seed', 42)
if not text:
return jsonify({
"success": False,
"message": "请输入要合成的文本"
})
if not reference_audio_path or not os.path.exists(reference_audio_path):
return jsonify({
"success": False,
"message": "请先上传参考音频"
})
# 生成语音
stream_audio, full_audio = cosyvoice_service.generate_speech_with_voice_cloning(
text=text,
reference_audio_path=reference_audio_path,
reference_text=reference_text,
seed=seed
)
if full_audio:
return jsonify({
"success": True,
"message": "语音克隆成功",
"audio_url": full_audio,
"stream_audio_url": stream_audio
})
else:
return jsonify({
"success": False,
"message": "语音克隆失败"
})
except Exception as e:
logger.error(f"语音克隆失败: {str(e)}")
return jsonify({
"success": False,
"message": f"克隆失败: {str(e)}"
})
@voice_test_bp.route('/api/voice-test/random-seed', methods=['GET'])
@login_required
def get_random_seed():
"""获取随机种子"""
try:
seed = cosyvoice_service.generate_random_seed()
return jsonify({
"success": True,
"seed": seed
})
except Exception as e:
logger.error(f"获取随机种子失败: {str(e)}")
return jsonify({
"success": False,
"message": f"获取失败: {str(e)}"
})
@voice_test_bp.route('/download-audio/<filename>', methods=['GET'])
@login_required
def download_temp_audio(filename):
"""下载临时音频文件"""
try:
temp_dir = tempfile.gettempdir()
file_path = os.path.join(temp_dir, filename)
if os.path.exists(file_path):
return send_file(file_path, as_attachment=False, mimetype='audio/wav')
else:
return jsonify({"success": False, "message": "音频文件不存在"}), 404
except Exception as e:
logger.error(f"音频下载失败: {str(e)}")
return jsonify({"success": False, "message": "下载失败"}), 500