FusionAGI/fusionagi/api/routes/tts.py

"""TTS synthesis routes for per-head voice output."""

from __future__ import annotations

from typing import Any

from fastapi import APIRouter, HTTPException

from fusionagi.api.dependencies import get_session_store
from fusionagi.config.head_voices import get_voice_id_for_head
from fusionagi.schemas.head import HeadId

router = APIRouter()

_tts_adapter: Any = None


def set_tts_adapter(adapter: Any) -> None:
    """Set the global TTS adapter for synthesis routes."""
    global _tts_adapter  # noqa: PLW0603
    _tts_adapter = adapter


def get_tts_adapter() -> Any:
    """Return the current TTS adapter or None."""
    return _tts_adapter


@router.post("/{session_id}/synthesize")
async def synthesize(
    session_id: str,
    body: dict[str, Any],
) -> dict[str, Any]:
    """Synthesize text to audio for a head.

    Body: ``{ "text": "...", "head_id": "logic" }``

    Returns: ``{ "audio_base64": "..." }`` or ``{ "audio_base64": null }``
    if TTS not configured.
    """
    store = get_session_store()
    if not store:
        raise HTTPException(status_code=503, detail="Service not initialized")
    sess = store.get(session_id)
    if not sess:
        raise HTTPException(status_code=404, detail="Session not found")

    text = body.get("text", "")
    head_id_str = body.get("head_id", "")
    if not text:
        raise HTTPException(status_code=400, detail="text is required")

    try:
        head_id = HeadId(head_id_str)
    except ValueError:
        head_id = HeadId.LOGIC

    voice_id = get_voice_id_for_head(head_id)
    audio_base64: str | None = None

    adapter = get_tts_adapter()
    if adapter is not None:
        audio_bytes = await adapter.synthesize(text, voice_id=voice_id)
        if audio_bytes:
            import base64

            audio_base64 = base64.b64encode(audio_bytes).decode()

    return {"audio_base64": audio_base64, "voice_id": voice_id}