345 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			345 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
"""
 | 
						||
语音测试相关路由
 | 
						||
"""
 | 
						||
import os
 | 
						||
import json
 | 
						||
import tempfile
 | 
						||
import subprocess
 | 
						||
import uuid
 | 
						||
import librosa
 | 
						||
import soundfile as sf
 | 
						||
from flask import Blueprint, request, jsonify, render_template, current_app, send_file
 | 
						||
from flask_login import login_required, current_user
 | 
						||
from app.services.cosyvoice_service import cosyvoice_service
 | 
						||
from werkzeug.utils import secure_filename
 | 
						||
import logging
 | 
						||
logger = logging.getLogger(__name__)
 | 
						||
voice_test_bp = Blueprint('voice_test', __name__)
 | 
						||
def convert_audio_format(input_path, output_path, target_sr=16000):
 | 
						||
    """转换音频格式为标准WAV"""
 | 
						||
    try:
 | 
						||
        # 使用librosa读取音频(支持多种格式)
 | 
						||
        audio, sr = librosa.load(input_path, sr=target_sr, mono=True)
 | 
						||
        
 | 
						||
        # 保存为标准WAV格式
 | 
						||
        sf.write(output_path, audio, target_sr, format='WAV', subtype='PCM_16')
 | 
						||
        
 | 
						||
        logger.info(f"音频格式转换成功: {input_path} -> {output_path}")
 | 
						||
        return True
 | 
						||
    except Exception as e:
 | 
						||
        logger.error(f"音频格式转换失败: {str(e)}")
 | 
						||
        
 | 
						||
        # 备用方案:使用ffmpeg
 | 
						||
        try:
 | 
						||
            cmd = [
 | 
						||
                'ffmpeg', '-i', input_path, 
 | 
						||
                '-ar', '16000',  # 采样率
 | 
						||
                '-ac', '1',      # 单声道
 | 
						||
                '-sample_fmt', 's16',  # 16位
 | 
						||
                '-y', output_path
 | 
						||
            ]
 | 
						||
            result = subprocess.run(cmd, capture_output=True, text=True)
 | 
						||
            
 | 
						||
            if result.returncode == 0:
 | 
						||
                logger.info(f"使用ffmpeg转换成功: {input_path} -> {output_path}")
 | 
						||
                return True
 | 
						||
            else:
 | 
						||
                logger.error(f"ffmpeg转换失败: {result.stderr}")
 | 
						||
                return False
 | 
						||
        except Exception as fe:
 | 
						||
            logger.error(f"ffmpeg备用方案也失败: {str(fe)}")
 | 
						||
            return False
 | 
						||
@voice_test_bp.route('/voice-test')
 | 
						||
@login_required
 | 
						||
def voice_test_page():
 | 
						||
    """语音测试页面"""
 | 
						||
    return render_template('voice_test/index.html')
 | 
						||
@voice_test_bp.route('/api/voice-test/connection', methods=['POST'])
 | 
						||
@login_required
 | 
						||
def test_connection():
 | 
						||
    """测试CosyVoice服务连接"""
 | 
						||
    try:
 | 
						||
        result = cosyvoice_service.test_connection()
 | 
						||
        return jsonify(result)
 | 
						||
    except Exception as e:
 | 
						||
        logger.error(f"连接测试失败: {str(e)}")
 | 
						||
        return jsonify({
 | 
						||
            "success": False,
 | 
						||
            "message": f"测试失败: {str(e)}"
 | 
						||
        })
 | 
						||
@voice_test_bp.route('/api/voice-test/voices', methods=['GET'])
 | 
						||
@login_required
 | 
						||
def get_voices():
 | 
						||
    """获取可用音色列表"""
 | 
						||
    try:
 | 
						||
        voices = cosyvoice_service.get_available_voices()
 | 
						||
        return jsonify({
 | 
						||
            "success": True,
 | 
						||
            "voices": voices
 | 
						||
        })
 | 
						||
    except Exception as e:
 | 
						||
        logger.error(f"获取音色列表失败: {str(e)}")
 | 
						||
        return jsonify({
 | 
						||
            "success": False,
 | 
						||
            "message": f"获取失败: {str(e)}"
 | 
						||
        })
 | 
						||
@voice_test_bp.route('/api/voice-test/generate/preset', methods=['POST'])
 | 
						||
@login_required
 | 
						||
def generate_with_preset_voice():
 | 
						||
    """使用预训练音色生成语音"""
 | 
						||
    try:
 | 
						||
        data = request.get_json()
 | 
						||
        text = data.get('text', '')
 | 
						||
        voice = data.get('voice', '中文女')
 | 
						||
        seed = data.get('seed', 42)
 | 
						||
        speed = data.get('speed', 1.0)
 | 
						||
        
 | 
						||
        if not text:
 | 
						||
            return jsonify({
 | 
						||
                "success": False,
 | 
						||
                "message": "请输入要合成的文本"
 | 
						||
            })
 | 
						||
        
 | 
						||
        # 生成语音
 | 
						||
        stream_audio, full_audio = cosyvoice_service.generate_speech_with_preset_voice(
 | 
						||
            text=text,
 | 
						||
            voice=voice,
 | 
						||
            seed=seed,
 | 
						||
            speed=speed
 | 
						||
        )
 | 
						||
        
 | 
						||
        if full_audio:
 | 
						||
            return jsonify({
 | 
						||
                "success": True,
 | 
						||
                "message": "语音生成成功",
 | 
						||
                "audio_url": full_audio,
 | 
						||
                "stream_audio_url": stream_audio
 | 
						||
            })
 | 
						||
        else:
 | 
						||
            return jsonify({
 | 
						||
                "success": False,
 | 
						||
                "message": "语音生成失败"
 | 
						||
            })
 | 
						||
            
 | 
						||
    except Exception as e:
 | 
						||
        logger.error(f"预训练音色生成失败: {str(e)}")
 | 
						||
        return jsonify({
 | 
						||
            "success": False,
 | 
						||
            "message": f"生成失败: {str(e)}"
 | 
						||
        })
 | 
						||
@voice_test_bp.route('/api/voice-test/generate/natural', methods=['POST'])
 | 
						||
@login_required
 | 
						||
def generate_with_natural_control():
 | 
						||
    """使用自然语言控制生成语音"""
 | 
						||
    try:
 | 
						||
        data = request.get_json()
 | 
						||
        text = data.get('text', '')
 | 
						||
        instruction = data.get('instruction', '请用温柔甜美的女声朗读')
 | 
						||
        seed = data.get('seed', 42)
 | 
						||
        
 | 
						||
        if not text:
 | 
						||
            return jsonify({
 | 
						||
                "success": False,
 | 
						||
                "message": "请输入要合成的文本"
 | 
						||
            })
 | 
						||
        
 | 
						||
        # 生成语音
 | 
						||
        stream_audio, full_audio = cosyvoice_service.generate_speech_with_natural_control(
 | 
						||
            text=text,
 | 
						||
            instruction=instruction,
 | 
						||
            seed=seed
 | 
						||
        )
 | 
						||
        
 | 
						||
        if full_audio:
 | 
						||
            return jsonify({
 | 
						||
                "success": True,
 | 
						||
                "message": "语音生成成功",
 | 
						||
                "audio_url": full_audio,
 | 
						||
                "stream_audio_url": stream_audio
 | 
						||
            })
 | 
						||
        else:
 | 
						||
            return jsonify({
 | 
						||
                "success": False,
 | 
						||
                "message": "语音生成失败"
 | 
						||
            })
 | 
						||
            
 | 
						||
    except Exception as e:
 | 
						||
        logger.error(f"自然语言控制生成失败: {str(e)}")
 | 
						||
        return jsonify({
 | 
						||
            "success": False,
 | 
						||
            "message": f"生成失败: {str(e)}"
 | 
						||
        })
 | 
						||
@voice_test_bp.route('/api/voice-test/upload-audio', methods=['POST'])
 | 
						||
@login_required
 | 
						||
def upload_audio():
 | 
						||
    """上传音频文件用于语音克隆 - 带格式转换功能"""
 | 
						||
    try:
 | 
						||
        if 'audio' not in request.files:
 | 
						||
            return jsonify({
 | 
						||
                "success": False,
 | 
						||
                "message": "请选择音频文件"
 | 
						||
            })
 | 
						||
        
 | 
						||
        file = request.files['audio']
 | 
						||
        if file.filename == '':
 | 
						||
            return jsonify({
 | 
						||
                "success": False,
 | 
						||
                "message": "请选择音频文件"
 | 
						||
            })
 | 
						||
        
 | 
						||
        # 生成安全的文件名
 | 
						||
        unique_id = str(uuid.uuid4())[:8]
 | 
						||
        original_filename = secure_filename(file.filename) if file.filename else f"recording_{unique_id}.wav"
 | 
						||
        
 | 
						||
        # 保存原始文件
 | 
						||
        temp_dir = tempfile.gettempdir()
 | 
						||
        original_path = os.path.join(temp_dir, f"original_{unique_id}_{original_filename}")
 | 
						||
        file.save(original_path)
 | 
						||
        
 | 
						||
        logger.info(f"原始音频文件保存: {original_path}, 大小: {os.path.getsize(original_path)} 字节")
 | 
						||
        
 | 
						||
        # 转换为标准格式
 | 
						||
        converted_filename = f"voice_clone_{current_user.id}_{unique_id}.wav"
 | 
						||
        converted_path = os.path.join(temp_dir, converted_filename)
 | 
						||
        
 | 
						||
        # 进行格式转换
 | 
						||
        if convert_audio_format(original_path, converted_path):
 | 
						||
            # 转换成功,删除原始文件
 | 
						||
            try:
 | 
						||
                os.remove(original_path)
 | 
						||
            except:
 | 
						||
                pass
 | 
						||
            
 | 
						||
            # 验证转换后的文件
 | 
						||
            if os.path.exists(converted_path) and os.path.getsize(converted_path) > 0:
 | 
						||
                logger.info(f"音频文件转换并验证成功: {converted_path}")
 | 
						||
                
 | 
						||
                # 尝试识别音频内容
 | 
						||
                recognized_text = cosyvoice_service.recognize_audio(converted_path)
 | 
						||
                
 | 
						||
                return jsonify({
 | 
						||
                    "success": True,
 | 
						||
                    "message": "音频上传和转换成功",
 | 
						||
                    "file_path": converted_path,
 | 
						||
                    "recognized_text": recognized_text,
 | 
						||
                    "file_info": {
 | 
						||
                        "size": os.path.getsize(converted_path),
 | 
						||
                        "format": "WAV 16kHz Mono"
 | 
						||
                    }
 | 
						||
                })
 | 
						||
            else:
 | 
						||
                return jsonify({
 | 
						||
                    "success": False,
 | 
						||
                    "message": "音频文件转换后验证失败"
 | 
						||
                })
 | 
						||
        else:
 | 
						||
            # 转换失败,尝试直接使用原始文件
 | 
						||
            logger.warning("音频格式转换失败,尝试直接使用原始文件")
 | 
						||
            
 | 
						||
            try:
 | 
						||
                recognized_text = cosyvoice_service.recognize_audio(original_path)
 | 
						||
                return jsonify({
 | 
						||
                    "success": True,
 | 
						||
                    "message": "音频上传成功(使用原始格式)",
 | 
						||
                    "file_path": original_path,
 | 
						||
                    "recognized_text": recognized_text,
 | 
						||
                    "file_info": {
 | 
						||
                        "size": os.path.getsize(original_path),
 | 
						||
                        "format": "原始格式"
 | 
						||
                    }
 | 
						||
                })
 | 
						||
            except Exception as e:
 | 
						||
                return jsonify({
 | 
						||
                    "success": False,
 | 
						||
                    "message": f"音频处理失败: {str(e)}"
 | 
						||
                })
 | 
						||
        
 | 
						||
    except Exception as e:
 | 
						||
        logger.error(f"音频上传失败: {str(e)}")
 | 
						||
        return jsonify({
 | 
						||
            "success": False,
 | 
						||
            "message": f"上传失败: {str(e)}"
 | 
						||
        })
 | 
						||
@voice_test_bp.route('/api/voice-test/generate/clone', methods=['POST'])
 | 
						||
@login_required
 | 
						||
def generate_with_voice_cloning():
 | 
						||
    """使用语音克隆生成语音"""
 | 
						||
    try:
 | 
						||
        data = request.get_json()
 | 
						||
        text = data.get('text', '')
 | 
						||
        reference_audio_path = data.get('reference_audio_path', '')
 | 
						||
        reference_text = data.get('reference_text', '')
 | 
						||
        seed = data.get('seed', 42)
 | 
						||
        
 | 
						||
        if not text:
 | 
						||
            return jsonify({
 | 
						||
                "success": False,
 | 
						||
                "message": "请输入要合成的文本"
 | 
						||
            })
 | 
						||
        
 | 
						||
        if not reference_audio_path or not os.path.exists(reference_audio_path):
 | 
						||
            return jsonify({
 | 
						||
                "success": False,
 | 
						||
                "message": "请先上传参考音频"
 | 
						||
            })
 | 
						||
        
 | 
						||
        # 生成语音
 | 
						||
        stream_audio, full_audio = cosyvoice_service.generate_speech_with_voice_cloning(
 | 
						||
            text=text,
 | 
						||
            reference_audio_path=reference_audio_path,
 | 
						||
            reference_text=reference_text,
 | 
						||
            seed=seed
 | 
						||
        )
 | 
						||
        
 | 
						||
        if full_audio:
 | 
						||
            return jsonify({
 | 
						||
                "success": True,
 | 
						||
                "message": "语音克隆成功",
 | 
						||
                "audio_url": full_audio,
 | 
						||
                "stream_audio_url": stream_audio
 | 
						||
            })
 | 
						||
        else:
 | 
						||
            return jsonify({
 | 
						||
                "success": False,
 | 
						||
                "message": "语音克隆失败"
 | 
						||
            })
 | 
						||
            
 | 
						||
    except Exception as e:
 | 
						||
        logger.error(f"语音克隆失败: {str(e)}")
 | 
						||
        return jsonify({
 | 
						||
            "success": False,
 | 
						||
            "message": f"克隆失败: {str(e)}"
 | 
						||
        })
 | 
						||
@voice_test_bp.route('/api/voice-test/random-seed', methods=['GET'])
 | 
						||
@login_required
 | 
						||
def get_random_seed():
 | 
						||
    """获取随机种子"""
 | 
						||
    try:
 | 
						||
        seed = cosyvoice_service.generate_random_seed()
 | 
						||
        return jsonify({
 | 
						||
            "success": True,
 | 
						||
            "seed": seed
 | 
						||
        })
 | 
						||
    except Exception as e:
 | 
						||
        logger.error(f"获取随机种子失败: {str(e)}")
 | 
						||
        return jsonify({
 | 
						||
            "success": False,
 | 
						||
            "message": f"获取失败: {str(e)}"
 | 
						||
        })
 | 
						||
@voice_test_bp.route('/download-audio/<filename>', methods=['GET'])
 | 
						||
@login_required
 | 
						||
def download_temp_audio(filename):
 | 
						||
    """下载临时音频文件"""
 | 
						||
    try:
 | 
						||
        temp_dir = tempfile.gettempdir()
 | 
						||
        file_path = os.path.join(temp_dir, filename)
 | 
						||
        
 | 
						||
        if os.path.exists(file_path):
 | 
						||
            return send_file(file_path, as_attachment=False, mimetype='audio/wav')
 | 
						||
        else:
 | 
						||
            return jsonify({"success": False, "message": "音频文件不存在"}), 404
 | 
						||
            
 | 
						||
    except Exception as e:
 | 
						||
        logger.error(f"音频下载失败: {str(e)}")
 | 
						||
        return jsonify({"success": False, "message": "下载失败"}), 500
 |