diff --git a/app/routes/voice_clone.py b/app/routes/voice_clone.py index da00a95..f10d22d 100644 --- a/app/routes/voice_clone.py +++ b/app/routes/voice_clone.py @@ -4,6 +4,10 @@ import os import tempfile import uuid +import re +import subprocess +import librosa +import soundfile as sf from flask import Blueprint, request, jsonify, render_template, current_app from flask_login import login_required, current_user from app.services.cosyvoice_service import cosyvoice_service @@ -14,6 +18,43 @@ import logging logger = logging.getLogger(__name__) voice_clone_bp = Blueprint('voice_clone', __name__) +def convert_audio_format(input_path, output_path, target_sr=16000): + """转换音频格式为标准WAV""" + try: + # 使用librosa读取音频(支持多种格式) + audio, sr = librosa.load(input_path, sr=target_sr, mono=True) + + # 保存为标准WAV格式 + sf.write(output_path, audio, target_sr, format='WAV', subtype='PCM_16') + + logger.info(f"音频格式转换成功: {input_path} -> {output_path}") + return True + except Exception as e: + logger.error(f"音频格式转换失败: {str(e)}") + + # 备用方案:使用ffmpeg + try: + cmd = [ + 'ffmpeg', '-i', input_path, + '-ar', '16000', # 采样率 + '-ac', '1', # 单声道 + '-sample_fmt', 's16', # 16位 + '-y', output_path + ] + result = subprocess.run(cmd, capture_output=True, text=True) + + if result.returncode == 0: + logger.info(f"使用ffmpeg转换成功: {input_path} -> {output_path}") + return True + else: + logger.error(f"ffmpeg转换失败: {result.stderr}") + return False + except Exception as fe: + logger.error(f"ffmpeg备用方案也失败: {str(fe)}") + return False + + + @voice_clone_bp.route('/voice-clone') @login_required def voice_clone_page(): @@ -45,8 +86,21 @@ def upload_voice_sample(): # 保存到临时目录 temp_dir = tempfile.gettempdir() + original_path = os.path.join(temp_dir, f"original_{filename}") + file.save(original_path) + + # 转换为标准WAV格式 file_path = os.path.join(temp_dir, filename) - file.save(file_path) + if convert_audio_format(original_path, file_path): + # 转换成功,删除原始文件 + try: + os.remove(original_path) + except: + pass + else: + # 转换失败,使用原始文件 + logger.warning("音频格式转换失败,使用原始文件") + os.rename(original_path, file_path) # 识别语音内容 recognized_text = cosyvoice_service.recognize_audio(file_path)