import os import subprocess import tempfile import shutil import edge_tts # TTS_ENGINE: "edge" (default) or "piper" TTS_ENGINE = os.getenv("TTS_ENGINE", "edge") # --- Edge TTS config --- # Romanian voices: ro-RO-EmilNeural (male), ro-RO-AlinaNeural (female) EDGE_VOICE = os.getenv("TTS_VOICE", "ro-RO-EmilNeural") # --- Piper TTS config --- PIPER_VOICE_MODEL = None def _find_piper_exe(): piper_path = shutil.which("piper") if piper_path: return piper_path venv_piper = os.path.join(os.path.dirname(os.path.dirname(__file__)), "venv", "Scripts", "piper.exe") if os.path.exists(venv_piper): return venv_piper return "piper" PIPER_EXE = _find_piper_exe() def set_voice(model_path: str): """Set Piper voice model path (called from main.py startup).""" global PIPER_VOICE_MODEL PIPER_VOICE_MODEL = model_path async def synthesize(text: str) -> bytes: if TTS_ENGINE == "piper": return _synthesize_piper(text) else: return await _synthesize_edge(text) # --- Edge TTS --- async def _synthesize_edge(text: str) -> bytes: with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f: tmp_path = f.name try: communicate = edge_tts.Communicate(text, EDGE_VOICE) await communicate.save(tmp_path) with open(tmp_path, "rb") as f: return f.read() finally: if os.path.exists(tmp_path): os.unlink(tmp_path) # --- Piper TTS --- def _synthesize_piper(text: str) -> bytes: if PIPER_VOICE_MODEL is None: raise RuntimeError("No Piper voice model configured. Place .onnx files in backend/voices/") with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: out_path = f.name try: process = subprocess.run( [PIPER_EXE, "--model", PIPER_VOICE_MODEL, "--output_file", out_path], input=text.encode("utf-8"), capture_output=True, timeout=30, ) if process.returncode != 0: raise RuntimeError(f"Piper TTS failed: {process.stderr.decode()}") with open(out_path, "rb") as f: return f.read() finally: if os.path.exists(out_path): os.unlink(out_path)