Ecobot_Scoala_Verde/backend/main.py

import os
import json
import base64
from pathlib import Path
from contextlib import asynccontextmanager
from dotenv import load_dotenv

# Load .env before importing modules that use env vars
load_dotenv()

from fastapi import FastAPI, WebSocket, WebSocketDisconnect
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse

from stt import transcribe, load_model as load_whisper
from llm import get_response
from tts import synthesize, set_voice, TTS_ENGINE

# Paths
BASE_DIR = Path(__file__).parent.parent
FRONTEND_DIR = BASE_DIR / "frontend"
VOICES_DIR = BASE_DIR / "backend" / "voices"


@asynccontextmanager
async def lifespan(app):
    # Startup
    print("Loading Whisper model...")
    load_whisper()
    print("Whisper model loaded.")

    voice_files = list(VOICES_DIR.glob("*.onnx")) if VOICES_DIR.exists() else []
    if voice_files:
        set_voice(str(voice_files[0]))
        print(f"TTS voice loaded: {voice_files[0].name}")
    else:
        print("WARNING: No voice model found in backend/voices/")
        print("Download a voice from: https://github.com/rhasspy/piper/blob/master/VOICES.md")
        print("Place the .onnx and .onnx.json files in backend/voices/")

    yield
    # Shutdown
    print("Server shutting down.")


app = FastAPI(title="EcoBot - Scoala Verde", lifespan=lifespan)


# Serve frontend
app.mount("/static", StaticFiles(directory=str(FRONTEND_DIR)), name="static")


@app.get("/")
async def index():
    return FileResponse(str(FRONTEND_DIR / "index.html"))


@app.websocket("/ws")
async def websocket_endpoint(ws: WebSocket):
    await ws.accept()
    print("Client connected")

    try:
        while True:
            # Receive audio data from browser
            data = await ws.receive_text()
            message = json.loads(data)

            if message["type"] == "audio":
                audio_bytes = base64.b64decode(message["data"])
                is_wake_mode = message.get("mode") == "wake"

                # Step 1: Speech to Text
                await ws.send_text(json.dumps({"type": "status", "text": "Ascult..."}))
                text = transcribe(audio_bytes)

                if not text:
                    await ws.send_text(json.dumps({"type": "status", "text": "Nu am inteles. Incearca din nou."}))
                    continue

                # Wake word detection
                if is_wake_mode:
                    text_lower = text.lower().strip()
                    wake_words = ["ecobot", "eco bot", "eco-bot", "hello bot", "helo bot"]
                    detected = any(w in text_lower for w in wake_words)
                    await ws.send_text(json.dumps({
                        "type": "wake_detected" if detected else "wake_not_detected",
                        "text": text,
                    }))
                    continue

                await ws.send_text(json.dumps({"type": "user_text", "text": text}))

                # Step 2: Get AI response
                await ws.send_text(json.dumps({"type": "status", "text": "Ma gandesc..."}))
                response_text = get_response(text)
                await ws.send_text(json.dumps({"type": "bot_text", "text": response_text}))

                # Step 3: Text to Speech
                await ws.send_text(json.dumps({"type": "status", "text": "Pregatesc raspunsul..."}))
                print(f"[TTS] Engine: {TTS_ENGINE}")
                print(f"[TTS] Text to synthesize: {response_text.encode('ascii', 'replace').decode()}")
                try:
                    audio_response = await synthesize(response_text)
                    print(f"[TTS] OK - {len(audio_response)} bytes")
                    audio_b64 = base64.b64encode(audio_response).decode("utf-8")
                    audio_mime = "audio/mpeg" if TTS_ENGINE == "edge" else "audio/wav"
                    await ws.send_text(json.dumps({
                        "type": "audio_response",
                        "data": audio_b64,
                        "text": response_text,
                        "mime": audio_mime,
                    }))
                except Exception as e:
                    err_msg = str(e).encode('ascii', 'replace').decode()
                    print(f"[TTS] ERROR: {type(e).__name__}: {err_msg}")
                    import traceback
                    traceback.print_exc()
                    await ws.send_text(json.dumps({
                        "type": "text_only_response",
                        "text": response_text,
                    }))

    except WebSocketDisconnect:
        print("Client disconnected")


if __name__ == "__main__":
    import uvicorn
    host = os.getenv("SERVER_HOST", "0.0.0.0")
    port = int(os.getenv("SERVER_PORT", "8000"))
    uvicorn.run(app, host=host, port=port)