"""TTS adapter protocol and implementations for speech synthesis.""" from __future__ import annotations import base64 from abc import ABC, abstractmethod from typing import Any from fusionagi._logger import logger class TTSAdapter(ABC): """Abstract adapter for text-to-speech synthesis. Implementations handle provider-specific API calls (ElevenLabs, Azure Cognitive Services, Google Cloud TTS, etc.). """ @abstractmethod async def synthesize( self, text: str, *, voice_id: str | None = None, language: str = "en", **kwargs: Any, ) -> bytes | None: """Synthesize text to audio bytes. Args: text: Text to synthesize. voice_id: Provider-specific voice identifier. language: Language code (BCP-47). **kwargs: Provider-specific options. Returns: Raw audio bytes (mp3/wav) or None on failure. """ ... class StubTTSAdapter(TTSAdapter): """Stub TTS adapter for testing; returns empty audio.""" async def synthesize( self, text: str, *, voice_id: str | None = None, language: str = "en", **kwargs: Any, ) -> bytes | None: """Return empty bytes for testing.""" logger.debug("StubTTS: synthesize called", extra={"text": text[:50], "voice_id": voice_id}) return b"" class ElevenLabsTTSAdapter(TTSAdapter): """ElevenLabs TTS adapter. Requires the ``httpx`` package and an ElevenLabs API key. """ API_BASE = "https://api.elevenlabs.io/v1" DEFAULT_VOICE = "21m00Tcm4TlvDq8ikWAM" # Rachel def __init__( self, api_key: str, *, default_voice_id: str | None = None, model_id: str = "eleven_monolingual_v1", ) -> None: self._api_key = api_key self._default_voice = default_voice_id or self.DEFAULT_VOICE self._model_id = model_id async def synthesize( self, text: str, *, voice_id: str | None = None, language: str = "en", **kwargs: Any, ) -> bytes | None: """Call ElevenLabs TTS API.""" try: import httpx except ImportError: logger.error("httpx not installed; pip install httpx") return None vid = voice_id or self._default_voice url = f"{self.API_BASE}/text-to-speech/{vid}" headers = {"xi-api-key": self._api_key, "Content-Type": "application/json"} payload = { "text": text, "model_id": self._model_id, "voice_settings": {"stability": 0.5, "similarity_boost": 0.75}, } try: async with httpx.AsyncClient() as client: resp = await client.post(url, json=payload, headers=headers, timeout=30.0) resp.raise_for_status() return resp.content except Exception as e: logger.error("ElevenLabs TTS failed", extra={"error": str(e)}) return None def audio_to_base64(audio_bytes: bytes) -> str: """Encode raw audio bytes to base64 string.""" return base64.b64encode(audio_bytes).decode() __all__ = [ "TTSAdapter", "StubTTSAdapter", "ElevenLabsTTSAdapter", "audio_to_base64", ]