From c66a3a42b4a98c5a78ef83d81ddba0464b91f168 Mon Sep 17 00:00:00 2001 From: superlishunqin <852326703@qq.com> Date: Mon, 15 Sep 2025 00:49:39 +0800 Subject: [PATCH] test-api-fix --- app/services/cosyvoice_service.py | 116 ++++++++++++++++++++++++++---- 1 file changed, 102 insertions(+), 14 deletions(-) diff --git a/app/services/cosyvoice_service.py b/app/services/cosyvoice_service.py index 41a2077..b7e3603 100644 --- a/app/services/cosyvoice_service.py +++ b/app/services/cosyvoice_service.py @@ -33,10 +33,20 @@ class CosyVoiceService: if not self.connect(): return [] - voices = self.client.predict(api_name="/refresh_sft_spk") - # 过滤掉不需要的音色 - filtered_voices = [voice for voice in voices if voice != '.ipynb_checkpoints'] - return filtered_voices + result = self.client.predict(api_name="/refresh_sft_spk") + + # 处理返回的字典格式 + if isinstance(result, dict) and 'choices' in result: + # 从choices中提取音色名称 + voices = [choice[0] for choice in result['choices'] if choice[0] != '.ipynb_checkpoints'] + return voices + elif isinstance(result, list): + # 直接是列表格式 + return [voice for voice in result if voice != '.ipynb_checkpoints'] + else: + logger.error(f"未知的音色列表格式: {result}") + return [] + except Exception as e: logger.error(f"获取音色列表失败: {str(e)}") return [] @@ -48,8 +58,17 @@ class CosyVoiceService: if not self.connect(): return [] - audio_files = self.client.predict(api_name="/refresh_prompt_wav") - return audio_files + result = self.client.predict(api_name="/refresh_prompt_wav") + + # 处理返回的字典格式 + if isinstance(result, dict) and 'choices' in result: + audios = [choice[0] for choice in result['choices']] + return audios + elif isinstance(result, list): + return result + else: + return [] + except Exception as e: logger.error(f"获取参考音频列表失败: {str(e)}") return [] @@ -65,7 +84,7 @@ class CosyVoiceService: prompt_wav=handle_file(audio_file_path), api_name="/prompt_wav_recognition" ) - return text + return text if isinstance(text, str) else "" except Exception as e: logger.error(f"语音识别失败: {str(e)}") return "" @@ -84,16 +103,41 @@ class CosyVoiceService: if not self.connect(): return None, None + # 创建临时空音频文件用于占位 + import tempfile + import wave + import numpy as np + + # 创建一个短的静音音频作为占位符 + temp_audio = tempfile.NamedTemporaryFile(suffix='.wav', delete=False) + with wave.open(temp_audio.name, 'w') as wav_file: + wav_file.setnchannels(1) # 单声道 + wav_file.setsampwidth(2) # 16位 + wav_file.setframerate(16000) # 16kHz采样率 + # 写入很短的静音(0.1秒) + silence = np.zeros(1600, dtype=np.int16) + wav_file.writeframes(silence.tobytes()) + result = self.client.predict( tts_text=text, mode_checkbox_group="预训练音色", sft_dropdown=voice, - seed=seed, - speed=speed, - stream="true" if stream else "false", + prompt_text="", + prompt_wav_upload=handle_file(temp_audio.name), + prompt_wav_record=handle_file(temp_audio.name), + instruct_text="", + seed=float(seed), + stream="True" if stream else "False", + speed=float(speed), api_name="/generate_audio" ) + # 清理临时文件 + try: + os.unlink(temp_audio.name) + except: + pass + # result是一个元组 [流式音频路径, 完整音频路径] if isinstance(result, (list, tuple)) and len(result) >= 2: return result[0], result[1] @@ -127,9 +171,14 @@ class CosyVoiceService: result = self.client.predict( tts_text=text, mode_checkbox_group="3s极速复刻", + sft_dropdown="中文女", prompt_text=reference_text, prompt_wav_upload=handle_file(reference_audio_path), - seed=seed, + prompt_wav_record=handle_file(reference_audio_path), + instruct_text="", + seed=float(seed), + stream="False", + speed=1.0, api_name="/generate_audio" ) @@ -154,14 +203,41 @@ class CosyVoiceService: if not self.connect(): return None, None + # 创建临时空音频文件用于占位 + import tempfile + import wave + import numpy as np + + # 创建一个短的静音音频作为占位符 + temp_audio = tempfile.NamedTemporaryFile(suffix='.wav', delete=False) + with wave.open(temp_audio.name, 'w') as wav_file: + wav_file.setnchannels(1) # 单声道 + wav_file.setsampwidth(2) # 16位 + wav_file.setframerate(16000) # 16kHz采样率 + # 写入很短的静音(0.1秒) + silence = np.zeros(1600, dtype=np.int16) + wav_file.writeframes(silence.tobytes()) + result = self.client.predict( tts_text=text, mode_checkbox_group="自然语言控制", + sft_dropdown="中文女", + prompt_text="", + prompt_wav_upload=handle_file(temp_audio.name), + prompt_wav_record=handle_file(temp_audio.name), instruct_text=instruction, - seed=seed, + seed=float(seed), + stream="False", + speed=1.0, api_name="/generate_audio" ) + # 清理临时文件 + try: + os.unlink(temp_audio.name) + except: + pass + if isinstance(result, (list, tuple)) and len(result) >= 2: return result[0], result[1] else: @@ -178,8 +254,20 @@ class CosyVoiceService: if not self.connect(): return 42 - seed = self.client.predict(api_name="/generate_random_seed") - return int(seed) if seed else 42 + result = self.client.predict(api_name="/generate_random_seed") + + # 处理返回的字典格式 + if isinstance(result, dict) and 'value' in result: + seed = int(result['value']) + elif isinstance(result, (int, float)): + seed = int(result) + elif isinstance(result, str) and result.isdigit(): + seed = int(result) + else: + logger.warning(f"未知的随机种子格式: {result}") + seed = 42 + + return seed except Exception as e: logger.error(f"生成随机种子失败: {str(e)}") return 42