""" 语音测试相关路由 """ import os import json import tempfile import subprocess import uuid import librosa import soundfile as sf from flask import Blueprint, request, jsonify, render_template, current_app, send_file from flask_login import login_required, current_user from app.services.cosyvoice_service import cosyvoice_service from werkzeug.utils import secure_filename import logging logger = logging.getLogger(__name__) voice_test_bp = Blueprint('voice_test', __name__) def convert_audio_format(input_path, output_path, target_sr=16000): """转换音频格式为标准WAV""" try: # 使用librosa读取音频(支持多种格式) audio, sr = librosa.load(input_path, sr=target_sr, mono=True) # 保存为标准WAV格式 sf.write(output_path, audio, target_sr, format='WAV', subtype='PCM_16') logger.info(f"音频格式转换成功: {input_path} -> {output_path}") return True except Exception as e: logger.error(f"音频格式转换失败: {str(e)}") # 备用方案:使用ffmpeg try: cmd = [ 'ffmpeg', '-i', input_path, '-ar', '16000', # 采样率 '-ac', '1', # 单声道 '-sample_fmt', 's16', # 16位 '-y', output_path ] result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode == 0: logger.info(f"使用ffmpeg转换成功: {input_path} -> {output_path}") return True else: logger.error(f"ffmpeg转换失败: {result.stderr}") return False except Exception as fe: logger.error(f"ffmpeg备用方案也失败: {str(fe)}") return False @voice_test_bp.route('/voice-test') @login_required def voice_test_page(): """语音测试页面""" return render_template('voice_test/index.html') @voice_test_bp.route('/api/voice-test/connection', methods=['POST']) @login_required def test_connection(): """测试CosyVoice服务连接""" try: result = cosyvoice_service.test_connection() return jsonify(result) except Exception as e: logger.error(f"连接测试失败: {str(e)}") return jsonify({ "success": False, "message": f"测试失败: {str(e)}" }) @voice_test_bp.route('/api/voice-test/voices', methods=['GET']) @login_required def get_voices(): """获取可用音色列表""" try: voices = cosyvoice_service.get_available_voices() return jsonify({ "success": True, "voices": voices }) except Exception as e: logger.error(f"获取音色列表失败: {str(e)}") return jsonify({ "success": False, "message": f"获取失败: {str(e)}" }) @voice_test_bp.route('/api/voice-test/generate/preset', methods=['POST']) @login_required def generate_with_preset_voice(): """使用预训练音色生成语音""" try: data = request.get_json() text = data.get('text', '') voice = data.get('voice', '中文女') seed = data.get('seed', 42) speed = data.get('speed', 1.0) if not text: return jsonify({ "success": False, "message": "请输入要合成的文本" }) # 生成语音 stream_audio, full_audio = cosyvoice_service.generate_speech_with_preset_voice( text=text, voice=voice, seed=seed, speed=speed ) if full_audio: return jsonify({ "success": True, "message": "语音生成成功", "audio_url": full_audio, "stream_audio_url": stream_audio }) else: return jsonify({ "success": False, "message": "语音生成失败" }) except Exception as e: logger.error(f"预训练音色生成失败: {str(e)}") return jsonify({ "success": False, "message": f"生成失败: {str(e)}" }) @voice_test_bp.route('/api/voice-test/generate/natural', methods=['POST']) @login_required def generate_with_natural_control(): """使用自然语言控制生成语音""" try: data = request.get_json() text = data.get('text', '') instruction = data.get('instruction', '请用温柔甜美的女声朗读') seed = data.get('seed', 42) if not text: return jsonify({ "success": False, "message": "请输入要合成的文本" }) # 生成语音 stream_audio, full_audio = cosyvoice_service.generate_speech_with_natural_control( text=text, instruction=instruction, seed=seed ) if full_audio: return jsonify({ "success": True, "message": "语音生成成功", "audio_url": full_audio, "stream_audio_url": stream_audio }) else: return jsonify({ "success": False, "message": "语音生成失败" }) except Exception as e: logger.error(f"自然语言控制生成失败: {str(e)}") return jsonify({ "success": False, "message": f"生成失败: {str(e)}" }) @voice_test_bp.route('/api/voice-test/upload-audio', methods=['POST']) @login_required def upload_audio(): """上传音频文件用于语音克隆 - 带格式转换功能""" try: if 'audio' not in request.files: return jsonify({ "success": False, "message": "请选择音频文件" }) file = request.files['audio'] if file.filename == '': return jsonify({ "success": False, "message": "请选择音频文件" }) # 生成安全的文件名 unique_id = str(uuid.uuid4())[:8] original_filename = secure_filename(file.filename) if file.filename else f"recording_{unique_id}.wav" # 保存原始文件 temp_dir = tempfile.gettempdir() original_path = os.path.join(temp_dir, f"original_{unique_id}_{original_filename}") file.save(original_path) logger.info(f"原始音频文件保存: {original_path}, 大小: {os.path.getsize(original_path)} 字节") # 转换为标准格式 converted_filename = f"voice_clone_{current_user.id}_{unique_id}.wav" converted_path = os.path.join(temp_dir, converted_filename) # 进行格式转换 if convert_audio_format(original_path, converted_path): # 转换成功,删除原始文件 try: os.remove(original_path) except: pass # 验证转换后的文件 if os.path.exists(converted_path) and os.path.getsize(converted_path) > 0: logger.info(f"音频文件转换并验证成功: {converted_path}") # 尝试识别音频内容 recognized_text = cosyvoice_service.recognize_audio(converted_path) return jsonify({ "success": True, "message": "音频上传和转换成功", "file_path": converted_path, "recognized_text": recognized_text, "file_info": { "size": os.path.getsize(converted_path), "format": "WAV 16kHz Mono" } }) else: return jsonify({ "success": False, "message": "音频文件转换后验证失败" }) else: # 转换失败,尝试直接使用原始文件 logger.warning("音频格式转换失败,尝试直接使用原始文件") try: recognized_text = cosyvoice_service.recognize_audio(original_path) return jsonify({ "success": True, "message": "音频上传成功(使用原始格式)", "file_path": original_path, "recognized_text": recognized_text, "file_info": { "size": os.path.getsize(original_path), "format": "原始格式" } }) except Exception as e: return jsonify({ "success": False, "message": f"音频处理失败: {str(e)}" }) except Exception as e: logger.error(f"音频上传失败: {str(e)}") return jsonify({ "success": False, "message": f"上传失败: {str(e)}" }) @voice_test_bp.route('/api/voice-test/generate/clone', methods=['POST']) @login_required def generate_with_voice_cloning(): """使用语音克隆生成语音""" try: data = request.get_json() text = data.get('text', '') reference_audio_path = data.get('reference_audio_path', '') reference_text = data.get('reference_text', '') seed = data.get('seed', 42) if not text: return jsonify({ "success": False, "message": "请输入要合成的文本" }) if not reference_audio_path or not os.path.exists(reference_audio_path): return jsonify({ "success": False, "message": "请先上传参考音频" }) # 生成语音 stream_audio, full_audio = cosyvoice_service.generate_speech_with_voice_cloning( text=text, reference_audio_path=reference_audio_path, reference_text=reference_text, seed=seed ) if full_audio: return jsonify({ "success": True, "message": "语音克隆成功", "audio_url": full_audio, "stream_audio_url": stream_audio }) else: return jsonify({ "success": False, "message": "语音克隆失败" }) except Exception as e: logger.error(f"语音克隆失败: {str(e)}") return jsonify({ "success": False, "message": f"克隆失败: {str(e)}" }) @voice_test_bp.route('/api/voice-test/random-seed', methods=['GET']) @login_required def get_random_seed(): """获取随机种子""" try: seed = cosyvoice_service.generate_random_seed() return jsonify({ "success": True, "seed": seed }) except Exception as e: logger.error(f"获取随机种子失败: {str(e)}") return jsonify({ "success": False, "message": f"获取失败: {str(e)}" }) @voice_test_bp.route('/download-audio/', methods=['GET']) @login_required def download_temp_audio(filename): """下载临时音频文件""" try: temp_dir = tempfile.gettempdir() file_path = os.path.join(temp_dir, filename) if os.path.exists(file_path): return send_file(file_path, as_attachment=False, mimetype='audio/wav') else: return jsonify({"success": False, "message": "音频文件不存在"}), 404 except Exception as e: logger.error(f"音频下载失败: {str(e)}") return jsonify({"success": False, "message": "下载失败"}), 500