api-test-end

This commit is contained in:
root 2025-09-15 02:45:50 +08:00
parent 28b23647e6
commit 5271267edf
5 changed files with 1065 additions and 461 deletions

View File

@ -4,22 +4,56 @@
import os import os
import json import json
import tempfile import tempfile
from flask import Blueprint, request, jsonify, render_template, current_app import subprocess
import uuid
import librosa
import soundfile as sf
from flask import Blueprint, request, jsonify, render_template, current_app, send_file
from flask_login import login_required, current_user from flask_login import login_required, current_user
from app.services.cosyvoice_service import cosyvoice_service from app.services.cosyvoice_service import cosyvoice_service
from werkzeug.utils import secure_filename from werkzeug.utils import secure_filename
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
voice_test_bp = Blueprint('voice_test', __name__) voice_test_bp = Blueprint('voice_test', __name__)
def convert_audio_format(input_path, output_path, target_sr=16000):
"""转换音频格式为标准WAV"""
try:
# 使用librosa读取音频支持多种格式
audio, sr = librosa.load(input_path, sr=target_sr, mono=True)
# 保存为标准WAV格式
sf.write(output_path, audio, target_sr, format='WAV', subtype='PCM_16')
logger.info(f"音频格式转换成功: {input_path} -> {output_path}")
return True
except Exception as e:
logger.error(f"音频格式转换失败: {str(e)}")
# 备用方案使用ffmpeg
try:
cmd = [
'ffmpeg', '-i', input_path,
'-ar', '16000', # 采样率
'-ac', '1', # 单声道
'-sample_fmt', 's16', # 16位
'-y', output_path
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
logger.info(f"使用ffmpeg转换成功: {input_path} -> {output_path}")
return True
else:
logger.error(f"ffmpeg转换失败: {result.stderr}")
return False
except Exception as fe:
logger.error(f"ffmpeg备用方案也失败: {str(fe)}")
return False
@voice_test_bp.route('/voice-test') @voice_test_bp.route('/voice-test')
@login_required @login_required
def voice_test_page(): def voice_test_page():
"""语音测试页面""" """语音测试页面"""
return render_template('voice_test/index.html') return render_template('voice_test/index.html')
@voice_test_bp.route('/api/voice-test/connection', methods=['POST']) @voice_test_bp.route('/api/voice-test/connection', methods=['POST'])
@login_required @login_required
def test_connection(): def test_connection():
@ -33,7 +67,6 @@ def test_connection():
"success": False, "success": False,
"message": f"测试失败: {str(e)}" "message": f"测试失败: {str(e)}"
}) })
@voice_test_bp.route('/api/voice-test/voices', methods=['GET']) @voice_test_bp.route('/api/voice-test/voices', methods=['GET'])
@login_required @login_required
def get_voices(): def get_voices():
@ -50,7 +83,6 @@ def get_voices():
"success": False, "success": False,
"message": f"获取失败: {str(e)}" "message": f"获取失败: {str(e)}"
}) })
@voice_test_bp.route('/api/voice-test/generate/preset', methods=['POST']) @voice_test_bp.route('/api/voice-test/generate/preset', methods=['POST'])
@login_required @login_required
def generate_with_preset_voice(): def generate_with_preset_voice():
@ -95,7 +127,6 @@ def generate_with_preset_voice():
"success": False, "success": False,
"message": f"生成失败: {str(e)}" "message": f"生成失败: {str(e)}"
}) })
@voice_test_bp.route('/api/voice-test/generate/natural', methods=['POST']) @voice_test_bp.route('/api/voice-test/generate/natural', methods=['POST'])
@login_required @login_required
def generate_with_natural_control(): def generate_with_natural_control():
@ -138,11 +169,10 @@ def generate_with_natural_control():
"success": False, "success": False,
"message": f"生成失败: {str(e)}" "message": f"生成失败: {str(e)}"
}) })
@voice_test_bp.route('/api/voice-test/upload-audio', methods=['POST']) @voice_test_bp.route('/api/voice-test/upload-audio', methods=['POST'])
@login_required @login_required
def upload_audio(): def upload_audio():
"""上传音频文件用于语音克隆""" """上传音频文件用于语音克隆 - 带格式转换功能"""
try: try:
if 'audio' not in request.files: if 'audio' not in request.files:
return jsonify({ return jsonify({
@ -157,29 +187,71 @@ def upload_audio():
"message": "请选择音频文件" "message": "请选择音频文件"
}) })
# 检查文件类型 # 生成安全的文件名
allowed_extensions = {'wav', 'mp3', 'm4a', 'flac'} unique_id = str(uuid.uuid4())[:8]
if not ('.' in file.filename and original_filename = secure_filename(file.filename) if file.filename else f"recording_{unique_id}.wav"
file.filename.rsplit('.', 1)[1].lower() in allowed_extensions):
return jsonify({
"success": False,
"message": "不支持的音频格式请使用WAV、MP3、M4A或FLAC格式"
})
# 保存文件到临时目录 # 保存原始文件
filename = secure_filename(file.filename)
temp_dir = tempfile.gettempdir() temp_dir = tempfile.gettempdir()
file_path = os.path.join(temp_dir, f"voice_clone_{current_user.id}_{filename}") original_path = os.path.join(temp_dir, f"original_{unique_id}_{original_filename}")
file.save(file_path) file.save(original_path)
logger.info(f"原始音频文件保存: {original_path}, 大小: {os.path.getsize(original_path)} 字节")
# 转换为标准格式
converted_filename = f"voice_clone_{current_user.id}_{unique_id}.wav"
converted_path = os.path.join(temp_dir, converted_filename)
# 进行格式转换
if convert_audio_format(original_path, converted_path):
# 转换成功,删除原始文件
try:
os.remove(original_path)
except:
pass
# 验证转换后的文件
if os.path.exists(converted_path) and os.path.getsize(converted_path) > 0:
logger.info(f"音频文件转换并验证成功: {converted_path}")
# 尝试识别音频内容 # 尝试识别音频内容
recognized_text = cosyvoice_service.recognize_audio(file_path) recognized_text = cosyvoice_service.recognize_audio(converted_path)
return jsonify({ return jsonify({
"success": True, "success": True,
"message": "音频上传成功", "message": "音频上传和转换成功",
"file_path": file_path, "file_path": converted_path,
"recognized_text": recognized_text "recognized_text": recognized_text,
"file_info": {
"size": os.path.getsize(converted_path),
"format": "WAV 16kHz Mono"
}
})
else:
return jsonify({
"success": False,
"message": "音频文件转换后验证失败"
})
else:
# 转换失败,尝试直接使用原始文件
logger.warning("音频格式转换失败,尝试直接使用原始文件")
try:
recognized_text = cosyvoice_service.recognize_audio(original_path)
return jsonify({
"success": True,
"message": "音频上传成功(使用原始格式)",
"file_path": original_path,
"recognized_text": recognized_text,
"file_info": {
"size": os.path.getsize(original_path),
"format": "原始格式"
}
})
except Exception as e:
return jsonify({
"success": False,
"message": f"音频处理失败: {str(e)}"
}) })
except Exception as e: except Exception as e:
@ -188,7 +260,6 @@ def upload_audio():
"success": False, "success": False,
"message": f"上传失败: {str(e)}" "message": f"上传失败: {str(e)}"
}) })
@voice_test_bp.route('/api/voice-test/generate/clone', methods=['POST']) @voice_test_bp.route('/api/voice-test/generate/clone', methods=['POST'])
@login_required @login_required
def generate_with_voice_cloning(): def generate_with_voice_cloning():
@ -239,7 +310,6 @@ def generate_with_voice_cloning():
"success": False, "success": False,
"message": f"克隆失败: {str(e)}" "message": f"克隆失败: {str(e)}"
}) })
@voice_test_bp.route('/api/voice-test/random-seed', methods=['GET']) @voice_test_bp.route('/api/voice-test/random-seed', methods=['GET'])
@login_required @login_required
def get_random_seed(): def get_random_seed():
@ -256,3 +326,19 @@ def get_random_seed():
"success": False, "success": False,
"message": f"获取失败: {str(e)}" "message": f"获取失败: {str(e)}"
}) })
@voice_test_bp.route('/download-audio/<filename>', methods=['GET'])
@login_required
def download_temp_audio(filename):
"""下载临时音频文件"""
try:
temp_dir = tempfile.gettempdir()
file_path = os.path.join(temp_dir, filename)
if os.path.exists(file_path):
return send_file(file_path, as_attachment=False, mimetype='audio/wav')
else:
return jsonify({"success": False, "message": "音频文件不存在"}), 404
except Exception as e:
logger.error(f"音频下载失败: {str(e)}")
return jsonify({"success": False, "message": "下载失败"}), 500

View File

@ -1,71 +1,77 @@
""" """
CosyVoice API 服务类 CosyVoice API 服务类 - 核心语音克隆功能
负责与CosyVoice API的交互 通过HTTP调用独立的CosyVoice API服务器
""" """
import os import os
import logging import logging
import requests
import tempfile
from typing import Optional, Dict, Any, Tuple from typing import Optional, Dict, Any, Tuple
from gradio_client import Client, handle_file
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class CosyVoiceService: class CosyVoiceService:
"""CosyVoice API服务类""" """CosyVoice API服务类"""
def __init__(self, api_url: str = "http://127.0.0.1:8080/"): def __init__(self, api_url: str = "http://127.0.0.1:8081"):
self.api_url = api_url self.api_url = api_url
self.client = None self._service_status = "未连接"
def connect(self) -> bool: def connect(self) -> bool:
"""连接到CosyVoice服务""" """检查连接状态"""
try: try:
self.client = Client(self.api_url) response = requests.get(f"{self.api_url}/health", timeout=5)
logger.info(f"成功连接到CosyVoice服务: {self.api_url}") if response.status_code == 200:
result = response.json()
self._service_status = "连接正常"
return True return True
else:
self._service_status = f"连接失败: HTTP {response.status_code}"
return False
except Exception as e: except Exception as e:
logger.error(f"连接CosyVoice服务失败: {str(e)}") self._service_status = f"连接失败: {str(e)}"
return False return False
def get_available_voices(self) -> list: def get_available_voices(self) -> list:
"""获取可用的音色列表""" """获取可用的音色列表"""
try: try:
if not self.client: response = requests.get(f"{self.api_url}/voices", timeout=5)
if not self.connect(): if response.status_code == 200:
result = response.json()
return result.get('voices', [])
else:
logger.error(f"获取音色列表失败: HTTP {response.status_code}")
return [] return []
voices = self.client.predict(api_name="/refresh_sft_spk")
# 过滤掉不需要的音色
filtered_voices = [voice for voice in voices if voice != '.ipynb_checkpoints']
return filtered_voices
except Exception as e: except Exception as e:
logger.error(f"获取音色列表失败: {str(e)}") logger.error(f"获取音色列表失败: {str(e)}")
return [] return []
def get_reference_audios(self) -> list: def get_reference_audios(self) -> list:
"""获取参考音频列表""" """获取参考音频列表(用于兼容)"""
try: return ["麦克阿瑟.mp3", "年轻人,不讲武德.mp3"]
if not self.client:
if not self.connect():
return []
audio_files = self.client.predict(api_name="/refresh_prompt_wav")
return audio_files
except Exception as e:
logger.error(f"获取参考音频列表失败: {str(e)}")
return []
def recognize_audio(self, audio_file_path: str) -> str: def recognize_audio(self, audio_file_path: str) -> str:
"""语音识别:将音频转换为文本""" """语音识别:将音频转换为文本 - 核心功能"""
try: try:
if not self.client: logger.info(f"开始语音识别: {audio_file_path}")
if not self.connect():
response = requests.post(
f"{self.api_url}/recognize",
json={"audio_path": audio_file_path},
timeout=30
)
if response.status_code == 200:
result = response.json()
if result.get('success'):
text = result.get('text', '')
logger.info(f"语音识别成功: {text}")
return text
else:
logger.error(f"语音识别失败: {result.get('message')}")
return ""
else:
logger.error(f"语音识别API请求失败: HTTP {response.status_code}")
return "" return ""
text = self.client.predict(
prompt_wav=handle_file(audio_file_path),
api_name="/prompt_wav_recognition"
)
return text
except Exception as e: except Exception as e:
logger.error(f"语音识别失败: {str(e)}") logger.error(f"语音识别失败: {str(e)}")
return "" return ""
@ -80,25 +86,42 @@ class CosyVoiceService:
) -> Tuple[Optional[str], Optional[str]]: ) -> Tuple[Optional[str], Optional[str]]:
"""使用预训练音色生成语音""" """使用预训练音色生成语音"""
try: try:
if not self.client: logger.info(f"开始生成语音: 文本='{text[:20]}...', 音色={voice}, 种子={seed}")
if not self.connect():
return None, None
result = self.client.predict( # 发送生成请求
tts_text=text, response = requests.post(
mode_checkbox_group="预训练音色", f"{self.api_url}/generate/preset",
sft_dropdown=voice, json={
seed=seed, "text": text,
speed=speed, "voice": voice,
stream="true" if stream else "false", "seed": seed,
api_name="/generate_audio" "speed": speed,
"stream": stream
},
timeout=30
) )
# result是一个元组 [流式音频路径, 完整音频路径] if response.status_code == 200:
if isinstance(result, (list, tuple)) and len(result) >= 2: result = response.json()
return result[0], result[1] if result.get('success'):
# 直接从响应中获取音频数据
audio_data = result.get('audio_data')
if audio_data:
local_path = self._save_audio_data(audio_data, 'preset')
if local_path:
logger.info(f"预训练语音生成成功: {local_path}")
return local_path, local_path
else: else:
return result, result return None, None
else:
logger.error("响应中没有音频数据")
return None, None
else:
logger.error(f"语音生成失败: {result.get('message')}")
return None, None
else:
logger.error(f"API请求失败: HTTP {response.status_code}")
return None, None
except Exception as e: except Exception as e:
logger.error(f"预训练音色语音生成失败: {str(e)}") logger.error(f"预训练音色语音生成失败: {str(e)}")
@ -111,35 +134,46 @@ class CosyVoiceService:
reference_text: str = "", reference_text: str = "",
seed: int = 42 seed: int = 42
) -> Tuple[Optional[str], Optional[str]]: ) -> Tuple[Optional[str], Optional[str]]:
"""使用语音克隆生成语音""" """使用语音克隆生成语音 - 核心功能"""
try: try:
if not self.client: logger.info(f"开始语音克隆: 文本='{text[:20]}...', 参考音频={reference_audio_path}")
if not self.connect():
return None, None
# 如果没有提供参考文本,先进行语音识别 # 发送克隆请求
if not reference_text: response = requests.post(
reference_text = self.recognize_audio(reference_audio_path) f"{self.api_url}/generate/clone",
if not reference_text: json={
logger.warning("参考音频识别失败,使用空文本") "text": text,
reference_text = "" "reference_audio": reference_audio_path,
"reference_text": reference_text,
result = self.client.predict( "seed": seed
tts_text=text, },
mode_checkbox_group="3s极速复刻", timeout=60 # 克隆需要更长时间
prompt_text=reference_text,
prompt_wav_upload=handle_file(reference_audio_path),
seed=seed,
api_name="/generate_audio"
) )
if isinstance(result, (list, tuple)) and len(result) >= 2: if response.status_code == 200:
return result[0], result[1] result = response.json()
if result.get('success'):
# 直接从响应中获取音频数据
audio_data = result.get('audio_data')
if audio_data:
local_path = self._save_audio_data(audio_data, 'clone')
if local_path:
logger.info(f"语音克隆成功: {local_path}")
return local_path, local_path
else: else:
return result, result return None, None
else:
logger.error("响应中没有音频数据")
return None, None
else:
logger.error(f"语音克隆失败: {result.get('message')}")
return None, None
else:
logger.error(f"API请求失败: HTTP {response.status_code}")
return None, None
except Exception as e: except Exception as e:
logger.error(f"语音克隆生成失败: {str(e)}") logger.error(f"语音克隆失败: {str(e)}")
return None, None return None, None
def generate_speech_with_natural_control( def generate_speech_with_natural_control(
@ -148,41 +182,81 @@ class CosyVoiceService:
instruction: str = "请用温柔甜美的女声朗读", instruction: str = "请用温柔甜美的女声朗读",
seed: int = 42 seed: int = 42
) -> Tuple[Optional[str], Optional[str]]: ) -> Tuple[Optional[str], Optional[str]]:
"""使用自然语言控制生成语音""" """使用自然语言控制生成语音 - 核心功能"""
try: try:
if not self.client: logger.info(f"开始自然语言控制生成: 文本='{text[:20]}...', 指令='{instruction}'")
if not self.connect():
return None, None
result = self.client.predict( # 发送生成请求
tts_text=text, response = requests.post(
mode_checkbox_group="自然语言控制", f"{self.api_url}/generate/natural",
instruct_text=instruction, json={
seed=seed, "text": text,
api_name="/generate_audio" "instruction": instruction,
"seed": seed
},
timeout=30
) )
if isinstance(result, (list, tuple)) and len(result) >= 2: if response.status_code == 200:
return result[0], result[1] result = response.json()
if result.get('success'):
# 直接从响应中获取音频数据
audio_data = result.get('audio_data')
if audio_data:
local_path = self._save_audio_data(audio_data, 'natural')
if local_path:
logger.info(f"自然语言控制生成成功: {local_path}")
return local_path, local_path
else: else:
return result, result return None, None
else:
logger.error("响应中没有音频数据")
return None, None
else:
logger.error(f"自然语言控制生成失败: {result.get('message')}")
return None, None
else:
logger.error(f"API请求失败: HTTP {response.status_code}")
return None, None
except Exception as e: except Exception as e:
logger.error(f"自然语言控制语音生成失败: {str(e)}") logger.error(f"自然语言控制生成失败: {str(e)}")
return None, None return None, None
def _save_audio_data(self, audio_data_hex: str, audio_type: str) -> Optional[str]:
"""保存音频数据到本地临时文件"""
try:
# 解码十六进制音频数据
audio_bytes = bytes.fromhex(audio_data_hex)
# 创建本地临时文件
temp_fd, local_path = tempfile.mkstemp(suffix='.wav', prefix=f'cosyvoice_{audio_type}_')
os.close(temp_fd)
# 写入音频数据
with open(local_path, 'wb') as f:
f.write(audio_bytes)
logger.info(f"音频保存成功: {local_path}, 大小: {len(audio_bytes)} 字节")
return local_path
except Exception as e:
logger.error(f"音频保存失败: {str(e)}")
return None
def generate_random_seed(self) -> int: def generate_random_seed(self) -> int:
"""生成随机种子""" """生成随机种子"""
try: try:
if not self.client: response = requests.get(f"{self.api_url}/random-seed", timeout=5)
if not self.connect(): if response.status_code == 200:
return 42 result = response.json()
return result.get('seed', 42)
seed = self.client.predict(api_name="/generate_random_seed") else:
return int(seed) if seed else 42 import random
return random.randint(1, 999999)
except Exception as e: except Exception as e:
logger.error(f"生成随机种子失败: {str(e)}") logger.error(f"生成随机种子失败: {str(e)}")
return 42 import random
return random.randint(1, 999999)
def test_connection(self) -> Dict[str, Any]: def test_connection(self) -> Dict[str, Any]:
"""测试与CosyVoice服务的连接""" """测试与CosyVoice服务的连接"""
@ -190,26 +264,55 @@ class CosyVoiceService:
if not self.connect(): if not self.connect():
return { return {
"success": False, "success": False,
"message": "无法连接到CosyVoice服务", "message": "无法连接到CosyVoice API服务器",
"api_url": self.api_url "api_url": self.api_url,
"service_status": self._service_status
} }
# 尝试获取音色列表来测试连接 # 获取音色列表
voices = self.get_available_voices() voices = self.get_available_voices()
# 测试语音生成功能(预训练音色)
generation_status = "未测试"
try:
stream_audio, full_audio = self.generate_speech_with_preset_voice(
text="测试",
voice=voices[0] if voices else "中文女",
seed=42
)
if stream_audio or full_audio:
generation_status = "正常"
self._service_status = "服务正常"
else:
generation_status = "生成失败"
self._service_status = "语音生成功能异常"
except Exception as e:
generation_status = f"测试失败: {str(e)}"
self._service_status = f"语音生成测试失败: {str(e)}"
return { return {
"success": True, "success": True,
"message": "CosyVoice服务连接成功", "message": "CosyVoice API服务连接成功",
"api_url": self.api_url, "api_url": self.api_url,
"available_voices": voices "available_voices": voices,
"reference_audios": self.get_reference_audios(),
"generation_status": generation_status,
"service_status": self._service_status,
"core_features": {
"voice_cloning": True,
"speech_recognition": True,
"natural_control": True
}
} }
except Exception as e: except Exception as e:
self._service_status = f"连接测试失败: {str(e)}"
return { return {
"success": False, "success": False,
"message": f"连接测试失败: {str(e)}", "message": f"连接测试失败: {str(e)}",
"api_url": self.api_url "api_url": self.api_url,
"service_status": self._service_status
} }
# 全局服务实例 # 全局服务实例
cosyvoice_service = CosyVoiceService() cosyvoice_service = CosyVoiceService()

View File

@ -1,16 +1,26 @@
/** /**
* CosyVoice API 测试页面 JavaScript * 语音克隆测试页面 JavaScript
* 核心功能语音样本采集 识别 克隆 对比
*/ */
// 全局变量 // 全局变量
let uploadedAudioPath = null;
let loadingModal = null; let loadingModal = null;
let mediaRecorder = null;
let audioChunks = [];
let uploadedAudioPath = null;
let recognizedText = "";
let sampleAudioUrl = null;
let recordedAudioBlob = null;
// 当前工作流程状态
let currentStep = 1;
// DOM加载完成后初始化 // DOM加载完成后初始化
document.addEventListener('DOMContentLoaded', function() { document.addEventListener('DOMContentLoaded', function() {
initializeComponents(); initializeComponents();
bindEvents(); bindEvents();
loadAvailableVoices(); loadAvailableVoices();
updateStepIndicators();
}); });
/** /**
@ -18,13 +28,6 @@ document.addEventListener('DOMContentLoaded', function() {
*/ */
function initializeComponents() { function initializeComponents() {
loadingModal = new bootstrap.Modal(document.getElementById('loadingModal')); loadingModal = new bootstrap.Modal(document.getElementById('loadingModal'));
// 语速滑块显示
const speedSlider = document.getElementById('preset-speed');
const speedValue = document.getElementById('preset-speed-value');
speedSlider.addEventListener('input', function() {
speedValue.textContent = this.value;
});
} }
/** /**
@ -34,28 +37,98 @@ function bindEvents() {
// 连接测试 // 连接测试
document.getElementById('test-connection-btn').addEventListener('click', testConnection); document.getElementById('test-connection-btn').addEventListener('click', testConnection);
// 预训练音色测试 // 语音样本采集
document.getElementById('preset-voice-form').addEventListener('submit', generatePresetVoice); document.getElementById('voice-sample-form').addEventListener('submit', uploadVoiceSample);
document.getElementById('preset-random-seed').addEventListener('click', () => getRandomSeed('preset-seed')); document.getElementById('voice-sample-upload').addEventListener('change', handleFileSelect);
document.getElementById('start-recording').addEventListener('click', startRecording);
document.getElementById('stop-recording').addEventListener('click', stopRecording);
// 自然语言控制测试 // 语音克隆生成
document.getElementById('natural-control-form').addEventListener('submit', generateNaturalControl); document.getElementById('clone-generation-form').addEventListener('submit', generateClonedVoice);
document.getElementById('natural-random-seed').addEventListener('click', () => getRandomSeed('natural-seed'));
// 语音克隆测试
document.getElementById('audio-upload-form').addEventListener('submit', uploadReferenceAudio);
document.getElementById('voice-clone-form').addEventListener('submit', generateVoiceClone);
document.getElementById('clone-random-seed').addEventListener('click', () => getRandomSeed('clone-seed')); document.getElementById('clone-random-seed').addEventListener('click', () => getRandomSeed('clone-seed'));
// 清空日志 // 高级功能
document.getElementById('preset-voice-form').addEventListener('submit', generatePresetVoice);
document.getElementById('natural-control-form').addEventListener('submit', generateNaturalControl);
// 其他
document.getElementById('clear-log').addEventListener('click', clearLog); document.getElementById('clear-log').addEventListener('click', clearLog);
} }
/**
* 更新步骤指示器
*/
function updateStepIndicators() {
for (let i = 1; i <= 4; i++) {
const indicator = document.getElementById(`step-${i}-indicator`);
indicator.classList.remove('active', 'completed');
if (i < currentStep) {
indicator.classList.add('completed');
} else if (i === currentStep) {
indicator.classList.add('active');
}
}
// 更新连接线
document.querySelectorAll('.step-line').forEach((line, index) => {
if (index + 1 < currentStep) {
line.classList.add('completed');
} else {
line.classList.remove('completed');
}
});
}
/**
* 跳转到指定步骤
*/
function goToStep(step) {
currentStep = step;
updateStepIndicators();
// 启用/禁用相应按钮
if (step >= 3) {
document.getElementById('generate-clone-btn').disabled = false;
}
}
/**
* 重置工作流程
*/
function resetWorkflow() {
currentStep = 1;
updateStepIndicators();
// 清空数据
uploadedAudioPath = null;
recognizedText = "";
sampleAudioUrl = null;
recordedAudioBlob = null;
// 重置界面
document.getElementById('sample-player').style.display = 'none';
document.getElementById('recognition-result').style.display = 'none';
document.getElementById('recognition-waiting').style.display = 'block';
document.getElementById('comparison-result').style.display = 'none';
document.getElementById('comparison-waiting').style.display = 'block';
// 重置按钮状态
document.getElementById('upload-sample-btn').disabled = true;
document.getElementById('generate-clone-btn').disabled = true;
// 清空文件选择
document.getElementById('voice-sample-upload').value = '';
addLog('工作流程已重置,可以重新开始', 'info');
}
/** /**
* 显示加载状态 * 显示加载状态
*/ */
function showLoading(message = '正在处理中...') { function showLoading(message = '正在处理中...', detail = '请稍候...') {
document.getElementById('loading-message').textContent = message; document.getElementById('loading-message').textContent = message;
document.getElementById('loading-detail').textContent = detail;
loadingModal.show(); loadingModal.show();
} }
@ -93,7 +166,7 @@ function addLog(message, type = 'info') {
*/ */
function clearLog() { function clearLog() {
const logContainer = document.getElementById('test-log'); const logContainer = document.getElementById('test-log');
logContainer.innerHTML = '<p class="text-muted">测试记录将显示在这里...</p>'; logContainer.innerHTML = '<p class="text-muted">操作记录将显示在这里...</p>';
} }
/** /**
@ -116,7 +189,6 @@ function showError(message) {
const bsToast = new bootstrap.Toast(toast); const bsToast = new bootstrap.Toast(toast);
bsToast.show(); bsToast.show();
// 自动移除
setTimeout(() => { setTimeout(() => {
if (toast.parentNode) { if (toast.parentNode) {
toast.parentNode.removeChild(toast); toast.parentNode.removeChild(toast);
@ -144,7 +216,6 @@ function showSuccess(message) {
const bsToast = new bootstrap.Toast(toast); const bsToast = new bootstrap.Toast(toast);
bsToast.show(); bsToast.show();
// 自动移除
setTimeout(() => { setTimeout(() => {
if (toast.parentNode) { if (toast.parentNode) {
toast.parentNode.removeChild(toast); toast.parentNode.removeChild(toast);
@ -152,6 +223,14 @@ function showSuccess(message) {
}, 3000); }, 3000);
} }
/**
* 创建音频播放URL
*/
function createAudioUrl(audioPath) {
const filename = audioPath.split('/').pop();
return `/voice-test/download-audio/${filename}`;
}
/** /**
* 测试连接 * 测试连接
*/ */
@ -179,10 +258,10 @@ async function testConnection() {
statusDiv.innerHTML = ` statusDiv.innerHTML = `
<span class="text-success"> <span class="text-success">
<i class="fas fa-check-circle me-2"></i> <i class="fas fa-check-circle me-2"></i>
<small class="text-muted">${result.api_url}</small> <small class="text-muted">支持语音克隆识别自然控制</small>
</span> </span>
`; `;
addLog(`连接成功!可用音色数量: ${result.available_voices ? result.available_voices.length : 0}`, 'success'); addLog(`连接成功!核心功能可用:语音克隆、识别、自然控制`, 'success');
// 更新音色列表 // 更新音色列表
if (result.available_voices) { if (result.available_voices) {
@ -259,147 +338,155 @@ async function getRandomSeed(inputId) {
} }
/** /**
* 预训练音色语音生成 * 处理文件选择
*/ */
async function generatePresetVoice(e) { function handleFileSelect(e) {
e.preventDefault(); const file = e.target.files[0];
if (file) {
// 重置录音状态
recordedAudioBlob = null;
sampleAudioUrl = null;
const text = document.getElementById('preset-text').value.trim(); addLog(`选择了音频文件: ${file.name} (${(file.size/1024/1024).toFixed(2)} MB)`);
const voice = document.getElementById('preset-voice').value; document.getElementById('upload-sample-btn').disabled = false;
const seed = parseInt(document.getElementById('preset-seed').value); document.getElementById('upload-sample-btn').innerHTML = '<i class="fas fa-upload me-2"></i>上传并识别语音';
const speed = parseFloat(document.getElementById('preset-speed').value);
if (!text) {
showError('请输入要合成的文本');
return;
}
showLoading('正在生成语音...');
addLog(`开始预训练音色生成 - 音色: ${voice}, 种子: ${seed}, 语速: ${speed}x`);
try {
const response = await fetch('/voice-test/api/voice-test/generate/preset', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
text: text,
voice: voice,
seed: seed,
speed: speed
})
});
const result = await response.json();
if (result.success) {
// 显示音频播放器
const audioSource = document.getElementById('preset-audio-source');
const resultDiv = document.getElementById('preset-result');
audioSource.src = result.audio_url;
audioSource.parentElement.load();
resultDiv.style.display = 'block';
addLog(`预训练音色生成成功!音频地址: ${result.audio_url}`, 'success');
showSuccess('语音生成成功!');
} else {
addLog(`预训练音色生成失败: ${result.message}`, 'error');
showError(result.message);
}
} catch (error) {
addLog(`预训练音色生成出错: ${error.message}`, 'error');
showError('生成失败,请检查网络连接');
} finally {
hideLoading();
} }
} }
/** /**
* 自然语言控制语音生成 * 开始录音
*/ */
async function generateNaturalControl(e) { async function startRecording() {
e.preventDefault();
const text = document.getElementById('natural-text').value.trim();
const instruction = document.getElementById('natural-instruction').value.trim();
const seed = parseInt(document.getElementById('natural-seed').value);
if (!text) {
showError('请输入要合成的文本');
return;
}
if (!instruction) {
showError('请输入语音指令');
return;
}
showLoading('正在生成语音...');
addLog(`开始自然语言控制生成 - 指令: ${instruction}, 种子: ${seed}`);
try { try {
const response = await fetch('/voice-test/api/voice-test/generate/natural', { // 重置文件选择
method: 'POST', document.getElementById('voice-sample-upload').value = '';
headers: {
'Content-Type': 'application/json', const stream = await navigator.mediaDevices.getUserMedia({
}, audio: {
body: JSON.stringify({ sampleRate: 16000, // 设置采样率为16kHz
text: text, channelCount: 1, // 单声道
instruction: instruction, echoCancellation: true,
seed: seed noiseSuppression: true
}) }
}); });
const result = await response.json(); // 创建MediaRecorder明确指定格式
const options = {
mimeType: 'audio/webm;codecs=opus' // 使用webm格式
};
if (result.success) { // 检查浏览器支持的格式
// 显示音频播放器 if (!MediaRecorder.isTypeSupported(options.mimeType)) {
const audioSource = document.getElementById('natural-audio-source'); if (MediaRecorder.isTypeSupported('audio/webm')) {
const resultDiv = document.getElementById('natural-result'); options.mimeType = 'audio/webm';
} else if (MediaRecorder.isTypeSupported('audio/wav')) {
audioSource.src = result.audio_url; options.mimeType = 'audio/wav';
audioSource.parentElement.load();
resultDiv.style.display = 'block';
addLog(`自然语言控制生成成功!音频地址: ${result.audio_url}`, 'success');
showSuccess('语音生成成功!');
} else { } else {
addLog(`自然语言控制生成失败: ${result.message}`, 'error'); // 使用默认格式
showError(result.message); delete options.mimeType;
}
} }
mediaRecorder = new MediaRecorder(stream, options);
audioChunks = [];
mediaRecorder.ondataavailable = function(event) {
if (event.data.size > 0) {
audioChunks.push(event.data);
}
};
mediaRecorder.onstop = function() {
// 创建音频Blob
recordedAudioBlob = new Blob(audioChunks, {
type: mediaRecorder.mimeType || 'audio/webm'
});
const audioUrl = URL.createObjectURL(recordedAudioBlob);
// 显示录音预览
const sampleAudio = document.getElementById('sample-audio');
const sampleSource = document.getElementById('sample-audio-source');
sampleSource.src = audioUrl;
sampleAudio.load();
document.getElementById('sample-player').style.display = 'block';
// 启用上传按钮
document.getElementById('upload-sample-btn').disabled = false;
document.getElementById('upload-sample-btn').innerHTML = '<i class="fas fa-upload me-2"></i>上传并识别语音';
// 保存录音数据
sampleAudioUrl = audioUrl;
addLog(`录音完成,格式: ${mediaRecorder.mimeType || 'default'}, 大小: ${(recordedAudioBlob.size/1024).toFixed(1)} KB`, 'success');
};
mediaRecorder.start(100); // 每100ms收集一次数据
// 更新UI
document.getElementById('start-recording').disabled = true;
document.getElementById('stop-recording').disabled = false;
document.getElementById('recording-status').textContent = '正在录音...';
document.getElementById('recording-status').className = 'text-danger';
addLog('开始录音...', 'info');
} catch (error) { } catch (error) {
addLog(`自然语言控制生成出错: ${error.message}`, 'error'); addLog(`录音失败: ${error.message}`, 'error');
showError('生成失败,请检查网络连接'); showError('录音失败,请检查麦克风权限');
} finally {
hideLoading();
} }
} }
/** /**
* 上传参考音频 * 停止录音
*/ */
async function uploadReferenceAudio(e) { function stopRecording() {
if (mediaRecorder && mediaRecorder.state !== 'inactive') {
mediaRecorder.stop();
mediaRecorder.stream.getTracks().forEach(track => track.stop());
}
// 更新UI
document.getElementById('start-recording').disabled = false;
document.getElementById('stop-recording').disabled = true;
document.getElementById('recording-status').textContent = '录音已完成';
document.getElementById('recording-status').className = 'text-success';
addLog('录音停止', 'info');
}
/**
* 上传语音样本并进行识别
*/
async function uploadVoiceSample(e) {
e.preventDefault(); e.preventDefault();
const fileInput = document.getElementById('reference-audio'); showLoading('正在上传和识别语音...', '包括格式转换和语音识别,请稍候');
addLog('开始上传语音样本进行识别...');
try {
const fileInput = document.getElementById('voice-sample-upload');
const file = fileInput.files[0]; const file = fileInput.files[0];
if (!file) { let formData = new FormData();
showError('请选择音频文件');
if (file) {
// 上传文件
formData.append('audio', file);
addLog(`上传文件: ${file.name}`);
} else if (recordedAudioBlob) {
// 上传录音 - 使用正确的文件名和类型
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
const filename = `recording_${timestamp}.webm`;
formData.append('audio', recordedAudioBlob, filename);
addLog(`上传录音: ${filename}, 大小: ${(recordedAudioBlob.size/1024).toFixed(1)} KB`);
} else {
showError('请选择音频文件或先录音');
hideLoading();
return; return;
} }
showLoading('正在上传并识别音频...'); // 上传并识别
addLog(`开始上传音频文件: ${file.name} (${(file.size/1024/1024).toFixed(2)} MB)`);
const formData = new FormData();
formData.append('audio', file);
try {
const response = await fetch('/voice-test/api/voice-test/upload-audio', { const response = await fetch('/voice-test/api/voice-test/upload-audio', {
method: 'POST', method: 'POST',
body: formData body: formData
@ -410,27 +497,34 @@ async function uploadReferenceAudio(e) {
if (result.success) { if (result.success) {
// 保存音频路径 // 保存音频路径
uploadedAudioPath = result.file_path; uploadedAudioPath = result.file_path;
recognizedText = result.recognized_text || '';
// 显示识别结果 // 显示识别结果
const resultDiv = document.getElementById('upload-result'); document.getElementById('recognized-text').value = recognizedText;
const recognizedText = document.getElementById('recognized-text'); document.getElementById('recognition-result').style.display = 'block';
document.getElementById('recognition-waiting').style.display = 'none';
recognizedText.value = result.recognized_text || ''; // 更新步骤
resultDiv.style.display = 'block'; goToStep(2);
setTimeout(() => goToStep(3), 1000);
// 启用克隆按钮 addLog(`语音识别成功: "${recognizedText}"`, 'success');
const cloneBtn = document.querySelector('#voice-clone-form button[type="submit"]'); addLog(`音频处理信息: ${result.file_info?.format || '已转换格式'}`, 'info');
cloneBtn.disabled = false; showSuccess('语音样本上传成功AI已识别出内容');
// 保存原始音频用于对比
const originalAudio = document.getElementById('original-audio');
const originalSource = document.getElementById('original-audio-source');
originalSource.src = createAudioUrl(uploadedAudioPath);
originalAudio.load();
addLog(`音频上传成功!识别文本: ${result.recognized_text || '(无内容)'}`, 'success');
showSuccess('音频上传成功!');
} else { } else {
addLog(`音频上传失败: ${result.message}`, 'error'); addLog(`语音识别失败: ${result.message}`, 'error');
showError(result.message); showError(result.message);
} }
} catch (error) { } catch (error) {
addLog(`音频上传出错: ${error.message}`, 'error'); addLog(`上传出错: ${error.message}`, 'error');
showError('上传失败,请检查网络连接'); showError('上传失败,请检查网络连接');
} finally { } finally {
hideLoading(); hideLoading();
@ -438,27 +532,28 @@ async function uploadReferenceAudio(e) {
} }
/** /**
* 语音克隆生成 * 生成克隆语音
*/ */
async function generateVoiceClone(e) { async function generateClonedVoice(e) {
e.preventDefault(); e.preventDefault();
if (!uploadedAudioPath) { if (!uploadedAudioPath) {
showError('请先上传参考音频'); showError('请先上传语音样本');
return; return;
} }
const text = document.getElementById('clone-text').value.trim(); const text = document.getElementById('clone-text').value.trim();
const referenceText = document.getElementById('recognized-text').value.trim();
const seed = parseInt(document.getElementById('clone-seed').value); const seed = parseInt(document.getElementById('clone-seed').value);
const referenceText = document.getElementById('recognized-text').value.trim();
if (!text) { if (!text) {
showError('请输入要合成的文本'); showError('请输入要合成的文本');
return; return;
} }
showLoading('正在进行语音克隆...'); showLoading('正在克隆你的声音...', '这是最复杂的步骤,请耐心等待');
addLog(`开始语音克隆 - 种子: ${seed}`); addLog(`开始语音克隆 - 目标文本: "${text.substring(0, 20)}..."`);
addLog(`使用音频文件: ${uploadedAudioPath}`);
try { try {
const response = await fetch('/voice-test/api/voice-test/generate/clone', { const response = await fetch('/voice-test/api/voice-test/generate/clone', {
@ -477,25 +572,143 @@ async function generateVoiceClone(e) {
const result = await response.json(); const result = await response.json();
if (result.success) { if (result.success) {
// 显示音频播放器 // 显示克隆语音
const audioSource = document.getElementById('clone-audio-source'); const clonedAudio = document.getElementById('cloned-audio');
const resultDiv = document.getElementById('clone-result'); const clonedSource = document.getElementById('cloned-audio-source');
clonedSource.src = createAudioUrl(result.audio_url);
clonedAudio.load();
audioSource.src = result.audio_url; // 显示对比界面
audioSource.parentElement.load(); document.getElementById('comparison-result').style.display = 'block';
resultDiv.style.display = 'block'; document.getElementById('comparison-waiting').style.display = 'none';
// 更新到最后步骤
goToStep(4);
addLog(`🎉 语音克隆成功!请对比原声和克隆效果`, 'success');
showSuccess('语音克隆完成!请播放音频对比效果');
addLog(`语音克隆成功!音频地址: ${result.audio_url}`, 'success');
showSuccess('语音克隆成功!');
} else { } else {
addLog(`语音克隆失败: ${result.message}`, 'error'); addLog(`语音克隆失败: ${result.message}`, 'error');
showError(result.message); showError(result.message || '语音克隆失败,请重试');
} }
} catch (error) { } catch (error) {
addLog(`语音克隆出错: ${error.message}`, 'error'); addLog(`克隆出错: ${error.message}`, 'error');
showError('克隆失败,请检查网络连接'); showError('克隆失败,请检查网络连接');
} finally { } finally {
hideLoading(); hideLoading();
} }
} }
/**
* 预训练音色语音生成高级功能
*/
async function generatePresetVoice(e) {
e.preventDefault();
const text = document.getElementById('preset-text').value.trim();
const voice = document.getElementById('preset-voice').value;
if (!text) {
showError('请输入要合成的文本');
return;
}
showLoading('正在生成预训练音色语音...');
try {
const response = await fetch('/voice-test/api/voice-test/generate/preset', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
text: text,
voice: voice,
seed: 42,
speed: 1.0
})
});
const result = await response.json();
if (result.success) {
const audioSource = document.getElementById('preset-audio-source');
const resultDiv = document.getElementById('preset-result');
audioSource.src = createAudioUrl(result.audio_url);
audioSource.parentElement.load();
resultDiv.style.display = 'block';
addLog(`预训练音色生成成功 - ${voice}`, 'success');
} else {
addLog(`预训练音色生成失败: ${result.message}`, 'error');
showError(result.message);
}
} catch (error) {
addLog(`生成出错: ${error.message}`, 'error');
showError('生成失败,请检查网络连接');
} finally {
hideLoading();
}
}
/**
* 自然语言控制语音生成高级功能
*/
async function generateNaturalControl(e) {
e.preventDefault();
const text = document.getElementById('natural-text').value.trim();
const instruction = document.getElementById('natural-instruction').value.trim();
if (!text) {
showError('请输入要合成的文本');
return;
}
if (!instruction) {
showError('请输入语音指令');
return;
}
showLoading('正在生成自然语言控制语音...');
try {
const response = await fetch('/voice-test/api/voice-test/generate/natural', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
text: text,
instruction: instruction,
seed: 42
})
});
const result = await response.json();
if (result.success) {
const audioSource = document.getElementById('natural-audio-source');
const resultDiv = document.getElementById('natural-result');
audioSource.src = createAudioUrl(result.audio_url);
audioSource.parentElement.load();
resultDiv.style.display = 'block';
addLog(`自然语言控制生成成功`, 'success');
} else {
addLog(`自然语言控制生成失败: ${result.message}`, 'error');
showError(result.message);
}
} catch (error) {
addLog(`生成出错: ${error.message}`, 'error');
showError('生成失败,请检查网络连接');
} finally {
hideLoading();
}
}

View File

@ -1,6 +1,6 @@
{% extends "base.html" %} {% extends "base.html" %}
{% block title %}CosyVoice API 测试 - 儿童语言学习系统{% endblock %} {% block title %}语音克隆测试 - 儿童语言学习系统{% endblock %}
{% block content %} {% block content %}
<div class="container py-4"> <div class="container py-4">
@ -13,10 +13,10 @@
</a> </a>
<div> <div>
<h2 class="fw-bold mb-1"> <h2 class="fw-bold mb-1">
<i class="fas fa-microphone-alt text-primary me-2"></i> <i class="fas fa-magic text-primary me-2"></i>
CosyVoice API 测试 语音克隆技术测试
</h2> </h2>
<p class="text-muted mb-0">测试语音合成的各种功能</p> <p class="text-muted mb-0">体验"听自己说"的神奇技术 - 让AI学会你的声音</p>
</div> </div>
</div> </div>
</div> </div>
@ -43,150 +43,147 @@
</div> </div>
</div> </div>
<!-- 功能测试区域 --> <!-- 核心功能:语音克隆工作流程 -->
<div class="row"> <div class="row mb-4">
<!-- 预训练音色测试 --> <div class="col-12">
<div class="col-lg-6 mb-4"> <div class="card border-0 shadow-sm border-primary" style="border-width: 2px !important;">
<div class="card border-0 shadow-sm h-100">
<div class="card-body"> <div class="card-body">
<h5 class="card-title text-primary"> <h4 class="card-title text-primary">
<i class="fas fa-user-tie me-2"></i>预训练音色测试 <i class="fas fa-clone me-2"></i>语音克隆工作流程
<span class="badge bg-primary ms-2">核心功能</span>
</h4>
<p class="text-muted mb-4">按照步骤操作,体验完整的语音克隆过程</p>
<!-- 步骤指示器 -->
<div class="row mb-4">
<div class="col-12">
<div class="d-flex justify-content-between position-relative">
<div class="step-indicator active" id="step-1-indicator">
<div class="step-circle">1</div>
<div class="step-label">录制声音</div>
</div>
<div class="step-line"></div>
<div class="step-indicator" id="step-2-indicator">
<div class="step-circle">2</div>
<div class="step-label">识别文字</div>
</div>
<div class="step-line"></div>
<div class="step-indicator" id="step-3-indicator">
<div class="step-circle">3</div>
<div class="step-label">克隆生成</div>
</div>
<div class="step-line"></div>
<div class="step-indicator" id="step-4-indicator">
<div class="step-circle">4</div>
<div class="step-label">效果对比</div>
</div>
</div>
</div>
</div>
<div class="row">
<!-- 步骤1语音样本采集 -->
<div class="col-lg-6 mb-4">
<div class="step-content" id="step-1-content">
<h5 class="text-success">
<i class="fas fa-microphone me-2"></i>步骤1录制你的声音样本
</h5> </h5>
<form id="preset-voice-form"> <p class="text-muted">录制一段3-10秒的清晰语音作为语音克隆的模板</p>
<form id="voice-sample-form" enctype="multipart/form-data">
<div class="mb-3"> <div class="mb-3">
<label class="form-label">要合成的文本</label> <label class="form-label">方式一:上传音频文件</label>
<textarea class="form-control" id="preset-text" rows="3" <input type="file" class="form-control" id="voice-sample-upload"
placeholder="输入要转换成语音的文字...">今天天气真不错,适合出门散步。</textarea> accept=".wav,.mp3,.m4a,.flac">
<div class="form-text">建议WAV格式16kHz采样率3-10秒时长</div>
</div> </div>
<div class="mb-3 text-center">
<span class="text-muted"></span>
</div>
<div class="mb-3"> <div class="mb-3">
<label class="form-label">选择音色</label> <label class="form-label">方式二:在线录音</label>
<select class="form-select" id="preset-voice"> <div class="text-center">
<option value="中文女">中文女</option> <button type="button" id="start-recording" class="btn btn-danger me-2">
<option value="中文男">中文男</option> <i class="fas fa-record-vinyl me-2"></i>开始录音
<option value="播报女">播报女</option> </button>
<option value="新闻播报男">新闻播报男</option> <button type="button" id="stop-recording" class="btn btn-secondary" disabled>
<option value="英文女">英文女</option> <i class="fas fa-stop me-2"></i>停止录音
<option value="英文男">英文男</option> </button>
</select> <div class="mt-2">
<small id="recording-status" class="text-muted">点击开始录音</small>
</div> </div>
</div>
</div>
<button type="submit" class="btn btn-success w-100" disabled id="upload-sample-btn">
<i class="fas fa-upload me-2"></i>上传语音样本
</button>
</form>
<!-- 样本播放区域 -->
<div id="sample-player" class="mt-3" style="display: none;">
<div class="alert alert-info">
<h6><i class="fas fa-headphones me-2"></i>你的语音样本:</h6>
<audio controls class="w-100 mt-2" id="sample-audio">
<source id="sample-audio-source" type="audio/wav">
</audio>
</div>
</div>
</div>
</div>
<!-- 步骤2语音识别 -->
<div class="col-lg-6 mb-4">
<div class="step-content" id="step-2-content">
<h5 class="text-info">
<i class="fas fa-text-width me-2"></i>步骤2语音识别结果
</h5>
<p class="text-muted">AI识别出你说的内容这将用于语音克隆训练</p>
<div id="recognition-result" style="display: none;">
<div class="mb-3">
<label class="form-label">识别的文本内容:</label>
<textarea class="form-control" id="recognized-text" rows="3"
placeholder="语音识别结果将显示在这里..."></textarea>
<div class="form-text">你可以手动修正识别错误的文字</div>
</div>
<div class="d-flex justify-content-between">
<small class="text-success">
<i class="fas fa-check-circle me-1"></i>识别完成
</small>
<button class="btn btn-outline-primary btn-sm" onclick="goToStep(3)">
下一步:克隆生成 <i class="fas fa-arrow-right ms-1"></i>
</button>
</div>
</div>
<div id="recognition-waiting" class="text-center text-muted">
<i class="fas fa-clock fa-2x mb-2"></i>
<p>等待上传语音样本...</p>
</div>
</div>
</div>
<!-- 步骤3语音克隆生成 -->
<div class="col-lg-6 mb-4">
<div class="step-content" id="step-3-content">
<h5 class="text-warning">
<i class="fas fa-magic me-2"></i>步骤3克隆你的声音
</h5>
<p class="text-muted">让AI用你的声音说新的话</p>
<form id="clone-generation-form">
<div class="mb-3">
<label class="form-label">想让AI用你的声音说什么</label>
<textarea class="form-control" id="clone-text" rows="3"
placeholder="例如你好我是AI克隆的声音听起来像真的一样">你好我是AI克隆的声音听起来像真的一样</textarea>
</div>
<div class="row mb-3"> <div class="row mb-3">
<div class="col-6"> <div class="col-6">
<label class="form-label">随机种子</label>
<div class="input-group">
<input type="number" class="form-control" id="preset-seed" value="42">
<button type="button" class="btn btn-outline-secondary" id="preset-random-seed">
<i class="fas fa-dice"></i>
</button>
</div>
</div>
<div class="col-6">
<label class="form-label">语速</label>
<input type="range" class="form-range" id="preset-speed" min="0.5" max="2" step="0.1" value="1">
<small class="text-muted">当前: <span id="preset-speed-value">1.0</span>x</small>
</div>
</div>
<button type="submit" class="btn btn-primary w-100">
<i class="fas fa-play me-2"></i>生成语音
</button>
</form>
<div id="preset-result" class="mt-3" style="display: none;">
<div class="alert alert-success">
<i class="fas fa-check-circle me-2"></i>生成成功!
<audio controls class="d-block mt-2 w-100">
<source id="preset-audio-source" type="audio/wav">
您的浏览器不支持音频播放。
</audio>
</div>
</div>
</div>
</div>
</div>
<!-- 自然语言控制测试 -->
<div class="col-lg-6 mb-4">
<div class="card border-0 shadow-sm h-100">
<div class="card-body">
<h5 class="card-title text-success">
<i class="fas fa-comments me-2"></i>自然语言控制测试
</h5>
<form id="natural-control-form">
<div class="mb-3">
<label class="form-label">要合成的文本</label>
<textarea class="form-control" id="natural-text" rows="3"
placeholder="输入要转换成语音的文字...">这是一段测试文本,用于验证自然语言控制功能。</textarea>
</div>
<div class="mb-3">
<label class="form-label">语音指令</label>
<textarea class="form-control" id="natural-instruction" rows="2"
placeholder="描述你想要的语音风格...">请用温柔甜美的女声朗读</textarea>
<div class="form-text">
示例:请用活泼开朗的语调、请用严肃正式的男声、请用轻柔的语气等
</div>
</div>
<div class="mb-3">
<label class="form-label">随机种子</label>
<div class="input-group">
<input type="number" class="form-control" id="natural-seed" value="42">
<button type="button" class="btn btn-outline-secondary" id="natural-random-seed">
<i class="fas fa-dice"></i>
</button>
</div>
</div>
<button type="submit" class="btn btn-success w-100">
<i class="fas fa-magic me-2"></i>生成语音
</button>
</form>
<div id="natural-result" class="mt-3" style="display: none;">
<div class="alert alert-success">
<i class="fas fa-check-circle me-2"></i>生成成功!
<audio controls class="d-block mt-2 w-100">
<source id="natural-audio-source" type="audio/wav">
您的浏览器不支持音频播放。
</audio>
</div>
</div>
</div>
</div>
</div>
<!-- 语音克隆测试 -->
<div class="col-12 mb-4">
<div class="card border-0 shadow-sm">
<div class="card-body">
<h5 class="card-title text-warning">
<i class="fas fa-clone me-2"></i>语音克隆测试
</h5>
<div class="row">
<div class="col-lg-6">
<h6 class="fw-bold mb-3">1. 上传参考音频</h6>
<form id="audio-upload-form" enctype="multipart/form-data">
<div class="mb-3">
<input type="file" class="form-control" id="reference-audio"
accept=".wav,.mp3,.m4a,.flac" required>
<div class="form-text">
支持格式WAV、MP3、M4A、FLAC建议3-10秒
</div>
</div>
<button type="submit" class="btn btn-outline-warning">
<i class="fas fa-upload me-2"></i>上传并识别
</button>
</form>
<div id="upload-result" class="mt-3" style="display: none;">
<div class="alert alert-info">
<h6><i class="fas fa-text-width me-2"></i>识别的文本内容:</h6>
<textarea class="form-control mt-2" id="recognized-text" rows="3"></textarea>
</div>
</div>
</div>
<div class="col-lg-6">
<h6 class="fw-bold mb-3">2. 生成克隆语音</h6>
<form id="voice-clone-form">
<div class="mb-3">
<label class="form-label">要合成的文本</label>
<textarea class="form-control" id="clone-text" rows="3"
placeholder="输入要用克隆声音说的话...">这是使用克隆声音合成的新内容。</textarea>
</div>
<div class="mb-3">
<label class="form-label">随机种子</label> <label class="form-label">随机种子</label>
<div class="input-group"> <div class="input-group">
<input type="number" class="form-control" id="clone-seed" value="42"> <input type="number" class="form-control" id="clone-seed" value="42">
@ -195,16 +192,141 @@
</button> </button>
</div> </div>
</div> </div>
<button type="submit" class="btn btn-warning w-100" disabled> <div class="col-6">
<i class="fas fa-clone me-2"></i>生成克隆语音 <label class="form-label">克隆质量</label>
<select class="form-select">
<option>高质量(推荐)</option>
</select>
</div>
</div>
<button type="submit" class="btn btn-warning w-100" disabled id="generate-clone-btn">
<i class="fas fa-magic me-2"></i>生成克隆语音
</button> </button>
</form> </form>
<div id="clone-result" class="mt-3" style="display: none;"> </div>
<div class="alert alert-success"> </div>
<i class="fas fa-check-circle me-2"></i>克隆成功!
<audio controls class="d-block mt-2 w-100"> <!-- 步骤4效果对比 -->
<source id="clone-audio-source" type="audio/wav"> <div class="col-lg-6 mb-4">
您的浏览器不支持音频播放。 <div class="step-content" id="step-4-content">
<h5 class="text-success">
<i class="fas fa-balance-scale me-2"></i>步骤4效果对比
</h5>
<p class="text-muted">对比原声和克隆声音的效果</p>
<div id="comparison-result" style="display: none;">
<div class="row">
<div class="col-6">
<h6><i class="fas fa-user me-2"></i>原声样本</h6>
<audio controls class="w-100" id="original-audio">
<source id="original-audio-source" type="audio/wav">
</audio>
<small class="text-muted">你录制的原始声音</small>
</div>
<div class="col-6">
<h6><i class="fas fa-robot me-2"></i>克隆声音</h6>
<audio controls class="w-100" id="cloned-audio">
<source id="cloned-audio-source" type="audio/wav">
</audio>
<small class="text-muted">AI克隆的声音</small>
</div>
</div>
<div class="mt-3 p-3 bg-light rounded">
<h6><i class="fas fa-star me-2"></i>给克隆效果打分:</h6>
<div class="btn-group" role="group">
<input type="radio" class="btn-check" name="rating" id="rating1">
<label class="btn btn-outline-warning" for="rating1">1★</label>
<input type="radio" class="btn-check" name="rating" id="rating2">
<label class="btn btn-outline-warning" for="rating2">2★</label>
<input type="radio" class="btn-check" name="rating" id="rating3">
<label class="btn btn-outline-warning" for="rating3">3★</label>
<input type="radio" class="btn-check" name="rating" id="rating4">
<label class="btn btn-outline-warning" for="rating4">4★</label>
<input type="radio" class="btn-check" name="rating" id="rating5">
<label class="btn btn-outline-warning" for="rating5">5★</label>
</div>
</div>
</div>
<div id="comparison-waiting" class="text-center text-muted">
<i class="fas fa-hourglass-half fa-2x mb-2"></i>
<p>等待生成克隆语音...</p>
</div>
</div>
</div>
</div>
<!-- 重新开始按钮 -->
<div class="text-center mt-4">
<button class="btn btn-outline-primary" onclick="resetWorkflow()">
<i class="fas fa-redo me-2"></i>重新开始克隆流程
</button>
</div>
</div>
</div>
</div>
</div>
<!-- 高级功能区域 -->
<div class="row mb-4">
<div class="col-12">
<div class="card border-0 shadow-sm">
<div class="card-body">
<h5 class="card-title">
<i class="fas fa-cogs me-2"></i>高级功能
<button class="btn btn-outline-secondary btn-sm float-end" type="button" data-bs-toggle="collapse" data-bs-target="#advanced-features">
<i class="fas fa-chevron-down"></i>
</button>
</h5>
<div class="collapse" id="advanced-features">
<div class="row">
<!-- 预训练音色测试 -->
<div class="col-lg-6 mb-4">
<h6 class="text-secondary">预训练音色测试</h6>
<form id="preset-voice-form">
<div class="mb-3">
<textarea class="form-control" id="preset-text" rows="2"
placeholder="输入要转换成语音的文字...">今天天气真不错,适合出门散步。</textarea>
</div>
<div class="mb-3">
<select class="form-select" id="preset-voice">
<option value="中文女">中文女</option>
</select>
</div>
<button type="submit" class="btn btn-secondary btn-sm">
<i class="fas fa-play me-2"></i>生成语音
</button>
</form>
<div id="preset-result" class="mt-2" style="display: none;">
<audio controls class="w-100">
<source id="preset-audio-source" type="audio/wav">
</audio>
</div>
</div>
<!-- 自然语言控制测试 -->
<div class="col-lg-6 mb-4">
<h6 class="text-secondary">自然语言控制</h6>
<form id="natural-control-form">
<div class="mb-3">
<textarea class="form-control" id="natural-text" rows="2"
placeholder="输入要合成的文字...">这是自然语言控制测试。</textarea>
</div>
<div class="mb-3">
<input class="form-control" id="natural-instruction"
placeholder="语音指令,如:请用温柔甜美的女声朗读"
value="请用温柔甜美的女声朗读">
</div>
<button type="submit" class="btn btn-secondary btn-sm">
<i class="fas fa-magic me-2"></i>生成语音
</button>
</form>
<div id="natural-result" class="mt-2" style="display: none;">
<audio controls class="w-100">
<source id="natural-audio-source" type="audio/wav">
</audio> </audio>
</div> </div>
</div> </div>
@ -215,19 +337,19 @@
</div> </div>
</div> </div>
<!-- 测试记录区域 --> <!-- 操作日志 -->
<div class="row"> <div class="row">
<div class="col-12"> <div class="col-12">
<div class="card border-0 shadow-sm"> <div class="card border-0 shadow-sm">
<div class="card-body"> <div class="card-body">
<h5 class="card-title"> <h5 class="card-title">
<i class="fas fa-history me-2"></i>测试记录 <i class="fas fa-history me-2"></i>操作日志
<button class="btn btn-outline-secondary btn-sm float-end" id="clear-log"> <button class="btn btn-outline-secondary btn-sm float-end" id="clear-log">
<i class="fas fa-trash me-1"></i>清空 <i class="fas fa-trash me-1"></i>清空
</button> </button>
</h5> </h5>
<div id="test-log" class="border rounded p-3" style="height: 300px; overflow-y: auto; background-color: #f8f9fa;"> <div id="test-log" class="border rounded p-3" style="height: 300px; overflow-y: auto; background-color: #f8f9fa;">
<p class="text-muted">测试记录将显示在这里...</p> <p class="text-muted">操作记录将显示在这里...</p>
</div> </div>
</div> </div>
</div> </div>
@ -242,10 +364,88 @@
<div class="modal-body text-center py-4"> <div class="modal-body text-center py-4">
<div class="spinner-border text-primary mb-3" role="status"></div> <div class="spinner-border text-primary mb-3" role="status"></div>
<h5 id="loading-message">正在处理中...</h5> <h5 id="loading-message">正在处理中...</h5>
<p id="loading-detail" class="text-muted">请稍候...</p>
</div> </div>
</div> </div>
</div> </div>
</div> </div>
<!-- 添加步骤指示器样式 -->
<style>
.step-indicator {
display: flex;
flex-direction: column;
align-items: center;
position: relative;
z-index: 2;
}
.step-circle {
width: 40px;
height: 40px;
border-radius: 50%;
background-color: #e9ecef;
color: #6c757d;
display: flex;
align-items: center;
justify-content: center;
font-weight: bold;
margin-bottom: 8px;
transition: all 0.3s ease;
}
.step-indicator.active .step-circle {
background-color: #0d6efd;
color: white;
}
.step-indicator.completed .step-circle {
background-color: #198754;
color: white;
}
.step-label {
font-size: 0.9rem;
text-align: center;
color: #6c757d;
}
.step-indicator.active .step-label {
color: #0d6efd;
font-weight: 600;
}
.step-indicator.completed .step-label {
color: #198754;
font-weight: 600;
}
.step-line {
flex: 1;
height: 2px;
background-color: #e9ecef;
margin: 20px 0;
position: relative;
z-index: 1;
}
.step-line.completed {
background-color: #198754;
}
@media (max-width: 768px) {
.step-indicator {
flex-direction: row;
align-items: center;
margin-bottom: 10px;
}
.step-circle {
margin-bottom: 0;
margin-right: 10px;
}
}
</style>
{% endblock %} {% endblock %}
{% block scripts %} {% block scripts %}

View File

@ -10,3 +10,5 @@ email-validator==2.0.0
cryptography==41.0.4 cryptography==41.0.4
Werkzeug==2.3.7 Werkzeug==2.3.7
gradio_client==0.8.1 gradio_client==0.8.1
librosa
soundfile