diff --git a/app/__init__.py b/app/__init__.py index f298a8e..a40fd9b 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -24,8 +24,10 @@ def create_app(config_name=None): # 注册蓝图 from app.routes.auth import auth_bp from app.routes.main import main_bp + from app.routes.voice_test import voice_test_bp app.register_blueprint(main_bp) app.register_blueprint(auth_bp, url_prefix='/auth') + app.register_blueprint(voice_test_bp, url_prefix='/voice-test') return app diff --git a/app/routes/voice_test.py b/app/routes/voice_test.py new file mode 100644 index 0000000..dd1bdf2 --- /dev/null +++ b/app/routes/voice_test.py @@ -0,0 +1,258 @@ +""" +语音测试相关路由 +""" +import os +import json +import tempfile +from flask import Blueprint, request, jsonify, render_template, current_app +from flask_login import login_required, current_user +from app.services.cosyvoice_service import cosyvoice_service +from werkzeug.utils import secure_filename +import logging + +logger = logging.getLogger(__name__) + +voice_test_bp = Blueprint('voice_test', __name__) + +@voice_test_bp.route('/voice-test') +@login_required +def voice_test_page(): + """语音测试页面""" + return render_template('voice_test/index.html') + +@voice_test_bp.route('/api/voice-test/connection', methods=['POST']) +@login_required +def test_connection(): + """测试CosyVoice服务连接""" + try: + result = cosyvoice_service.test_connection() + return jsonify(result) + except Exception as e: + logger.error(f"连接测试失败: {str(e)}") + return jsonify({ + "success": False, + "message": f"测试失败: {str(e)}" + }) + +@voice_test_bp.route('/api/voice-test/voices', methods=['GET']) +@login_required +def get_voices(): + """获取可用音色列表""" + try: + voices = cosyvoice_service.get_available_voices() + return jsonify({ + "success": True, + "voices": voices + }) + except Exception as e: + logger.error(f"获取音色列表失败: {str(e)}") + return jsonify({ + "success": False, + "message": f"获取失败: {str(e)}" + }) + +@voice_test_bp.route('/api/voice-test/generate/preset', methods=['POST']) +@login_required +def generate_with_preset_voice(): + """使用预训练音色生成语音""" + try: + data = request.get_json() + text = data.get('text', '') + voice = data.get('voice', '中文女') + seed = data.get('seed', 42) + speed = data.get('speed', 1.0) + + if not text: + return jsonify({ + "success": False, + "message": "请输入要合成的文本" + }) + + # 生成语音 + stream_audio, full_audio = cosyvoice_service.generate_speech_with_preset_voice( + text=text, + voice=voice, + seed=seed, + speed=speed + ) + + if full_audio: + return jsonify({ + "success": True, + "message": "语音生成成功", + "audio_url": full_audio, + "stream_audio_url": stream_audio + }) + else: + return jsonify({ + "success": False, + "message": "语音生成失败" + }) + + except Exception as e: + logger.error(f"预训练音色生成失败: {str(e)}") + return jsonify({ + "success": False, + "message": f"生成失败: {str(e)}" + }) + +@voice_test_bp.route('/api/voice-test/generate/natural', methods=['POST']) +@login_required +def generate_with_natural_control(): + """使用自然语言控制生成语音""" + try: + data = request.get_json() + text = data.get('text', '') + instruction = data.get('instruction', '请用温柔甜美的女声朗读') + seed = data.get('seed', 42) + + if not text: + return jsonify({ + "success": False, + "message": "请输入要合成的文本" + }) + + # 生成语音 + stream_audio, full_audio = cosyvoice_service.generate_speech_with_natural_control( + text=text, + instruction=instruction, + seed=seed + ) + + if full_audio: + return jsonify({ + "success": True, + "message": "语音生成成功", + "audio_url": full_audio, + "stream_audio_url": stream_audio + }) + else: + return jsonify({ + "success": False, + "message": "语音生成失败" + }) + + except Exception as e: + logger.error(f"自然语言控制生成失败: {str(e)}") + return jsonify({ + "success": False, + "message": f"生成失败: {str(e)}" + }) + +@voice_test_bp.route('/api/voice-test/upload-audio', methods=['POST']) +@login_required +def upload_audio(): + """上传音频文件用于语音克隆""" + try: + if 'audio' not in request.files: + return jsonify({ + "success": False, + "message": "请选择音频文件" + }) + + file = request.files['audio'] + if file.filename == '': + return jsonify({ + "success": False, + "message": "请选择音频文件" + }) + + # 检查文件类型 + allowed_extensions = {'wav', 'mp3', 'm4a', 'flac'} + if not ('.' in file.filename and + file.filename.rsplit('.', 1)[1].lower() in allowed_extensions): + return jsonify({ + "success": False, + "message": "不支持的音频格式,请使用WAV、MP3、M4A或FLAC格式" + }) + + # 保存文件到临时目录 + filename = secure_filename(file.filename) + temp_dir = tempfile.gettempdir() + file_path = os.path.join(temp_dir, f"voice_clone_{current_user.id}_{filename}") + file.save(file_path) + + # 尝试识别音频内容 + recognized_text = cosyvoice_service.recognize_audio(file_path) + + return jsonify({ + "success": True, + "message": "音频上传成功", + "file_path": file_path, + "recognized_text": recognized_text + }) + + except Exception as e: + logger.error(f"音频上传失败: {str(e)}") + return jsonify({ + "success": False, + "message": f"上传失败: {str(e)}" + }) + +@voice_test_bp.route('/api/voice-test/generate/clone', methods=['POST']) +@login_required +def generate_with_voice_cloning(): + """使用语音克隆生成语音""" + try: + data = request.get_json() + text = data.get('text', '') + reference_audio_path = data.get('reference_audio_path', '') + reference_text = data.get('reference_text', '') + seed = data.get('seed', 42) + + if not text: + return jsonify({ + "success": False, + "message": "请输入要合成的文本" + }) + + if not reference_audio_path or not os.path.exists(reference_audio_path): + return jsonify({ + "success": False, + "message": "请先上传参考音频" + }) + + # 生成语音 + stream_audio, full_audio = cosyvoice_service.generate_speech_with_voice_cloning( + text=text, + reference_audio_path=reference_audio_path, + reference_text=reference_text, + seed=seed + ) + + if full_audio: + return jsonify({ + "success": True, + "message": "语音克隆成功", + "audio_url": full_audio, + "stream_audio_url": stream_audio + }) + else: + return jsonify({ + "success": False, + "message": "语音克隆失败" + }) + + except Exception as e: + logger.error(f"语音克隆失败: {str(e)}") + return jsonify({ + "success": False, + "message": f"克隆失败: {str(e)}" + }) + +@voice_test_bp.route('/api/voice-test/random-seed', methods=['GET']) +@login_required +def get_random_seed(): + """获取随机种子""" + try: + seed = cosyvoice_service.generate_random_seed() + return jsonify({ + "success": True, + "seed": seed + }) + except Exception as e: + logger.error(f"获取随机种子失败: {str(e)}") + return jsonify({ + "success": False, + "message": f"获取失败: {str(e)}" + }) diff --git a/app/services/__init__.py b/app/services/__init__.py new file mode 100644 index 0000000..fa4cb72 --- /dev/null +++ b/app/services/__init__.py @@ -0,0 +1 @@ +# CosyVoice API服务模块 diff --git a/app/services/cosyvoice_service.py b/app/services/cosyvoice_service.py new file mode 100644 index 0000000..41a2077 --- /dev/null +++ b/app/services/cosyvoice_service.py @@ -0,0 +1,215 @@ +""" +CosyVoice API 服务类 +负责与CosyVoice API的交互 +""" +import os +import logging +from typing import Optional, Dict, Any, Tuple +from gradio_client import Client, handle_file + +logger = logging.getLogger(__name__) + +class CosyVoiceService: + """CosyVoice API服务类""" + + def __init__(self, api_url: str = "http://127.0.0.1:8080/"): + self.api_url = api_url + self.client = None + + def connect(self) -> bool: + """连接到CosyVoice服务""" + try: + self.client = Client(self.api_url) + logger.info(f"成功连接到CosyVoice服务: {self.api_url}") + return True + except Exception as e: + logger.error(f"连接CosyVoice服务失败: {str(e)}") + return False + + def get_available_voices(self) -> list: + """获取可用的音色列表""" + try: + if not self.client: + if not self.connect(): + return [] + + voices = self.client.predict(api_name="/refresh_sft_spk") + # 过滤掉不需要的音色 + filtered_voices = [voice for voice in voices if voice != '.ipynb_checkpoints'] + return filtered_voices + except Exception as e: + logger.error(f"获取音色列表失败: {str(e)}") + return [] + + def get_reference_audios(self) -> list: + """获取参考音频列表""" + try: + if not self.client: + if not self.connect(): + return [] + + audio_files = self.client.predict(api_name="/refresh_prompt_wav") + return audio_files + except Exception as e: + logger.error(f"获取参考音频列表失败: {str(e)}") + return [] + + def recognize_audio(self, audio_file_path: str) -> str: + """语音识别:将音频转换为文本""" + try: + if not self.client: + if not self.connect(): + return "" + + text = self.client.predict( + prompt_wav=handle_file(audio_file_path), + api_name="/prompt_wav_recognition" + ) + return text + except Exception as e: + logger.error(f"语音识别失败: {str(e)}") + return "" + + def generate_speech_with_preset_voice( + self, + text: str, + voice: str = "中文女", + seed: int = 42, + speed: float = 1.0, + stream: bool = False + ) -> Tuple[Optional[str], Optional[str]]: + """使用预训练音色生成语音""" + try: + if not self.client: + if not self.connect(): + return None, None + + result = self.client.predict( + tts_text=text, + mode_checkbox_group="预训练音色", + sft_dropdown=voice, + seed=seed, + speed=speed, + stream="true" if stream else "false", + api_name="/generate_audio" + ) + + # result是一个元组 [流式音频路径, 完整音频路径] + if isinstance(result, (list, tuple)) and len(result) >= 2: + return result[0], result[1] + else: + return result, result + + except Exception as e: + logger.error(f"预训练音色语音生成失败: {str(e)}") + return None, None + + def generate_speech_with_voice_cloning( + self, + text: str, + reference_audio_path: str, + reference_text: str = "", + seed: int = 42 + ) -> Tuple[Optional[str], Optional[str]]: + """使用语音克隆生成语音""" + try: + if not self.client: + if not self.connect(): + return None, None + + # 如果没有提供参考文本,先进行语音识别 + if not reference_text: + reference_text = self.recognize_audio(reference_audio_path) + if not reference_text: + logger.warning("参考音频识别失败,使用空文本") + reference_text = "" + + result = self.client.predict( + tts_text=text, + mode_checkbox_group="3s极速复刻", + prompt_text=reference_text, + prompt_wav_upload=handle_file(reference_audio_path), + seed=seed, + api_name="/generate_audio" + ) + + if isinstance(result, (list, tuple)) and len(result) >= 2: + return result[0], result[1] + else: + return result, result + + except Exception as e: + logger.error(f"语音克隆生成失败: {str(e)}") + return None, None + + def generate_speech_with_natural_control( + self, + text: str, + instruction: str = "请用温柔甜美的女声朗读", + seed: int = 42 + ) -> Tuple[Optional[str], Optional[str]]: + """使用自然语言控制生成语音""" + try: + if not self.client: + if not self.connect(): + return None, None + + result = self.client.predict( + tts_text=text, + mode_checkbox_group="自然语言控制", + instruct_text=instruction, + seed=seed, + api_name="/generate_audio" + ) + + if isinstance(result, (list, tuple)) and len(result) >= 2: + return result[0], result[1] + else: + return result, result + + except Exception as e: + logger.error(f"自然语言控制语音生成失败: {str(e)}") + return None, None + + def generate_random_seed(self) -> int: + """生成随机种子""" + try: + if not self.client: + if not self.connect(): + return 42 + + seed = self.client.predict(api_name="/generate_random_seed") + return int(seed) if seed else 42 + except Exception as e: + logger.error(f"生成随机种子失败: {str(e)}") + return 42 + + def test_connection(self) -> Dict[str, Any]: + """测试与CosyVoice服务的连接""" + try: + if not self.connect(): + return { + "success": False, + "message": "无法连接到CosyVoice服务", + "api_url": self.api_url + } + + # 尝试获取音色列表来测试连接 + voices = self.get_available_voices() + + return { + "success": True, + "message": "CosyVoice服务连接成功", + "api_url": self.api_url, + "available_voices": voices + } + + except Exception as e: + return { + "success": False, + "message": f"连接测试失败: {str(e)}", + "api_url": self.api_url + } + +# 全局服务实例 +cosyvoice_service = CosyVoiceService() diff --git a/app/static/js/voice_test.js b/app/static/js/voice_test.js new file mode 100644 index 0000000..18d2fe6 --- /dev/null +++ b/app/static/js/voice_test.js @@ -0,0 +1,501 @@ +/** + * CosyVoice API 测试页面 JavaScript + */ + +// 全局变量 +let uploadedAudioPath = null; +let loadingModal = null; + +// DOM加载完成后初始化 +document.addEventListener('DOMContentLoaded', function() { + initializeComponents(); + bindEvents(); + loadAvailableVoices(); +}); + +/** + * 初始化组件 + */ +function initializeComponents() { + loadingModal = new bootstrap.Modal(document.getElementById('loadingModal')); + + // 语速滑块显示 + const speedSlider = document.getElementById('preset-speed'); + const speedValue = document.getElementById('preset-speed-value'); + speedSlider.addEventListener('input', function() { + speedValue.textContent = this.value; + }); +} + +/** + * 绑定事件 + */ +function bindEvents() { + // 连接测试 + document.getElementById('test-connection-btn').addEventListener('click', testConnection); + + // 预训练音色测试 + document.getElementById('preset-voice-form').addEventListener('submit', generatePresetVoice); + document.getElementById('preset-random-seed').addEventListener('click', () => getRandomSeed('preset-seed')); + + // 自然语言控制测试 + document.getElementById('natural-control-form').addEventListener('submit', generateNaturalControl); + document.getElementById('natural-random-seed').addEventListener('click', () => getRandomSeed('natural-seed')); + + // 语音克隆测试 + document.getElementById('audio-upload-form').addEventListener('submit', uploadReferenceAudio); + document.getElementById('voice-clone-form').addEventListener('submit', generateVoiceClone); + document.getElementById('clone-random-seed').addEventListener('click', () => getRandomSeed('clone-seed')); + + // 清空日志 + document.getElementById('clear-log').addEventListener('click', clearLog); +} + +/** + * 显示加载状态 + */ +function showLoading(message = '正在处理中...') { + document.getElementById('loading-message').textContent = message; + loadingModal.show(); +} + +/** + * 隐藏加载状态 + */ +function hideLoading() { + loadingModal.hide(); +} + +/** + * 添加日志 + */ +function addLog(message, type = 'info') { + const logContainer = document.getElementById('test-log'); + const timestamp = new Date().toLocaleTimeString(); + const logEntry = document.createElement('div'); + + const colors = { + 'info': 'text-primary', + 'success': 'text-success', + 'error': 'text-danger', + 'warning': 'text-warning' + }; + + logEntry.className = `mb-2 ${colors[type] || 'text-primary'}`; + logEntry.innerHTML = `[${timestamp}] ${message}`; + + logContainer.appendChild(logEntry); + logContainer.scrollTop = logContainer.scrollHeight; +} + +/** + * 清空日志 + */ +function clearLog() { + const logContainer = document.getElementById('test-log'); + logContainer.innerHTML = '
测试记录将显示在这里...
'; +} + +/** + * 显示错误信息 + */ +function showError(message) { + const toast = document.createElement('div'); + toast.className = 'toast align-items-center text-white bg-danger border-0 position-fixed top-0 end-0 m-3'; + toast.style.zIndex = '9999'; + toast.innerHTML = ` +测试语音合成的各种功能
+测试记录将显示在这里...
+