32 lines
898 B
Python
32 lines
898 B
Python
import os
|
|
import tempfile
|
|
from faster_whisper import WhisperModel
|
|
|
|
WHISPER_MODEL = os.getenv("WHISPER_MODEL", "small")
|
|
WHISPER_DEVICE = os.getenv("WHISPER_DEVICE", "cuda")
|
|
WHISPER_COMPUTE_TYPE = os.getenv("WHISPER_COMPUTE_TYPE", "float16")
|
|
WHISPER_LANGUAGE = os.getenv("WHISPER_LANGUAGE", "ro")
|
|
|
|
model = None
|
|
|
|
|
|
def load_model():
|
|
global model
|
|
if model is None:
|
|
model = WhisperModel(WHISPER_MODEL, device=WHISPER_DEVICE, compute_type=WHISPER_COMPUTE_TYPE)
|
|
return model
|
|
|
|
|
|
def transcribe(audio_bytes: bytes) -> str:
|
|
whisper = load_model()
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
|
f.write(audio_bytes)
|
|
tmp_path = f.name
|
|
|
|
try:
|
|
segments, _ = whisper.transcribe(tmp_path, language=WHISPER_LANGUAGE)
|
|
text = " ".join(seg.text for seg in segments).strip()
|
|
return text
|
|
finally:
|
|
os.unlink(tmp_path)
|