From cfa15f2c5c7b5ad1bc2aa9bb39dc652fb9527857 Mon Sep 17 00:00:00 2001 From: superlishunqin <852326703@qq.com> Date: Mon, 15 Sep 2025 00:51:19 +0800 Subject: [PATCH] test-api-fix3 --- app/services/cosyvoice_service.py | 112 ++++++++++++++++++------------ 1 file changed, 66 insertions(+), 46 deletions(-) diff --git a/app/services/cosyvoice_service.py b/app/services/cosyvoice_service.py index b7e3603..5a09434 100644 --- a/app/services/cosyvoice_service.py +++ b/app/services/cosyvoice_service.py @@ -34,11 +34,17 @@ class CosyVoiceService: return [] result = self.client.predict(api_name="/refresh_sft_spk") + logger.info(f"音色列表原始返回: {result}") # 处理返回的字典格式 if isinstance(result, dict) and 'choices' in result: - # 从choices中提取音色名称 - voices = [choice[0] for choice in result['choices'] if choice[0] != '.ipynb_checkpoints'] + # 从choices中提取音色名称,格式是 [['name', 'name'], ...] + voices = [] + for choice in result['choices']: + if isinstance(choice, list) and len(choice) > 0: + voice_name = choice[0] + if voice_name != '.ipynb_checkpoints': + voices.append(voice_name) return voices elif isinstance(result, list): # 直接是列表格式 @@ -62,7 +68,10 @@ class CosyVoiceService: # 处理返回的字典格式 if isinstance(result, dict) and 'choices' in result: - audios = [choice[0] for choice in result['choices']] + audios = [] + for choice in result['choices']: + if isinstance(choice, list) and len(choice) > 0: + audios.append(choice[0]) return audios elif isinstance(result, list): return result @@ -89,6 +98,35 @@ class CosyVoiceService: logger.error(f"语音识别失败: {str(e)}") return "" + def _create_empty_audio_file(self) -> str: + """创建临时的空音频文件""" + import tempfile + import wave + import numpy as np + + # 创建临时文件 + temp_fd, temp_path = tempfile.mkstemp(suffix='.wav') + os.close(temp_fd) # 关闭文件描述符 + + try: + # 创建一个很短的静音音频 + with wave.open(temp_path, 'w') as wav_file: + wav_file.setnchannels(1) # 单声道 + wav_file.setsampwidth(2) # 16位 + wav_file.setframerate(16000) # 16kHz采样率 + # 写入0.01秒的静音 + silence = np.zeros(160, dtype=np.int16) + wav_file.writeframes(silence.tobytes()) + + return temp_path + except Exception as e: + # 如果创建失败,删除临时文件 + try: + os.unlink(temp_path) + except: + pass + raise e + def generate_speech_with_preset_voice( self, text: str, @@ -98,33 +136,22 @@ class CosyVoiceService: stream: bool = False ) -> Tuple[Optional[str], Optional[str]]: """使用预训练音色生成语音""" + temp_audio_path = None try: if not self.client: if not self.connect(): return None, None - # 创建临时空音频文件用于占位 - import tempfile - import wave - import numpy as np - - # 创建一个短的静音音频作为占位符 - temp_audio = tempfile.NamedTemporaryFile(suffix='.wav', delete=False) - with wave.open(temp_audio.name, 'w') as wav_file: - wav_file.setnchannels(1) # 单声道 - wav_file.setsampwidth(2) # 16位 - wav_file.setframerate(16000) # 16kHz采样率 - # 写入很短的静音(0.1秒) - silence = np.zeros(1600, dtype=np.int16) - wav_file.writeframes(silence.tobytes()) + # 创建临时空音频文件 + temp_audio_path = self._create_empty_audio_file() result = self.client.predict( tts_text=text, mode_checkbox_group="预训练音色", sft_dropdown=voice, prompt_text="", - prompt_wav_upload=handle_file(temp_audio.name), - prompt_wav_record=handle_file(temp_audio.name), + prompt_wav_upload=handle_file(temp_audio_path), + prompt_wav_record=handle_file(temp_audio_path), instruct_text="", seed=float(seed), stream="True" if stream else "False", @@ -132,11 +159,7 @@ class CosyVoiceService: api_name="/generate_audio" ) - # 清理临时文件 - try: - os.unlink(temp_audio.name) - except: - pass + logger.info(f"预训练音色生成结果: {result}") # result是一个元组 [流式音频路径, 完整音频路径] if isinstance(result, (list, tuple)) and len(result) >= 2: @@ -147,6 +170,13 @@ class CosyVoiceService: except Exception as e: logger.error(f"预训练音色语音生成失败: {str(e)}") return None, None + finally: + # 清理临时文件 + if temp_audio_path and os.path.exists(temp_audio_path): + try: + os.unlink(temp_audio_path) + except: + pass def generate_speech_with_voice_cloning( self, @@ -198,33 +228,22 @@ class CosyVoiceService: seed: int = 42 ) -> Tuple[Optional[str], Optional[str]]: """使用自然语言控制生成语音""" + temp_audio_path = None try: if not self.client: if not self.connect(): return None, None - # 创建临时空音频文件用于占位 - import tempfile - import wave - import numpy as np - - # 创建一个短的静音音频作为占位符 - temp_audio = tempfile.NamedTemporaryFile(suffix='.wav', delete=False) - with wave.open(temp_audio.name, 'w') as wav_file: - wav_file.setnchannels(1) # 单声道 - wav_file.setsampwidth(2) # 16位 - wav_file.setframerate(16000) # 16kHz采样率 - # 写入很短的静音(0.1秒) - silence = np.zeros(1600, dtype=np.int16) - wav_file.writeframes(silence.tobytes()) + # 创建临时空音频文件 + temp_audio_path = self._create_empty_audio_file() result = self.client.predict( tts_text=text, mode_checkbox_group="自然语言控制", sft_dropdown="中文女", prompt_text="", - prompt_wav_upload=handle_file(temp_audio.name), - prompt_wav_record=handle_file(temp_audio.name), + prompt_wav_upload=handle_file(temp_audio_path), + prompt_wav_record=handle_file(temp_audio_path), instruct_text=instruction, seed=float(seed), stream="False", @@ -232,12 +251,6 @@ class CosyVoiceService: api_name="/generate_audio" ) - # 清理临时文件 - try: - os.unlink(temp_audio.name) - except: - pass - if isinstance(result, (list, tuple)) and len(result) >= 2: return result[0], result[1] else: @@ -246,6 +259,13 @@ class CosyVoiceService: except Exception as e: logger.error(f"自然语言控制语音生成失败: {str(e)}") return None, None + finally: + # 清理临时文件 + if temp_audio_path and os.path.exists(temp_audio_path): + try: + os.unlink(temp_audio_path) + except: + pass def generate_random_seed(self) -> int: """生成随机种子"""