import os import tempfile from faster_whisper import WhisperModel WHISPER_MODEL = os.getenv("WHISPER_MODEL", "small") WHISPER_DEVICE = os.getenv("WHISPER_DEVICE", "cuda") WHISPER_COMPUTE_TYPE = os.getenv("WHISPER_COMPUTE_TYPE", "float16") WHISPER_LANGUAGE = os.getenv("WHISPER_LANGUAGE", "ro") model = None def load_model(): global model if model is None: model = WhisperModel(WHISPER_MODEL, device=WHISPER_DEVICE, compute_type=WHISPER_COMPUTE_TYPE) return model def transcribe(audio_bytes: bytes) -> str: whisper = load_model() with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: f.write(audio_bytes) tmp_path = f.name try: segments, _ = whisper.transcribe(tmp_path, language=WHISPER_LANGUAGE) text = " ".join(seg.text for seg in segments).strip() return text finally: os.unlink(tmp_path)