Ecobot_Scoala_Verde/backend/stt.py
Stefan Caramizoiu d7a7d2cafd Initial commit
2026-04-01 11:14:26 +03:00

32 lines
898 B
Python

import os
import tempfile
from faster_whisper import WhisperModel
WHISPER_MODEL = os.getenv("WHISPER_MODEL", "small")
WHISPER_DEVICE = os.getenv("WHISPER_DEVICE", "cuda")
WHISPER_COMPUTE_TYPE = os.getenv("WHISPER_COMPUTE_TYPE", "float16")
WHISPER_LANGUAGE = os.getenv("WHISPER_LANGUAGE", "ro")
model = None
def load_model():
global model
if model is None:
model = WhisperModel(WHISPER_MODEL, device=WHISPER_DEVICE, compute_type=WHISPER_COMPUTE_TYPE)
return model
def transcribe(audio_bytes: bytes) -> str:
whisper = load_model()
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
f.write(audio_bytes)
tmp_path = f.name
try:
segments, _ = whisper.transcribe(tmp_path, language=WHISPER_LANGUAGE)
text = " ".join(seg.text for seg in segments).strip()
return text
finally:
os.unlink(tmp_path)