Items completed: 1. Merged PR #2 (starlette/httpx deps) 2. Fixed async race condition in multimodal_ui.py 3. Wired TTSAdapter (ElevenLabs, Azure) in API routes 4. Moved super_big_brain.py from core/ to reasoning/ (backward compat shim) 5. Added API authentication middleware (Bearer token via FUSIONAGI_API_KEY) 6. Added async adapter interface (acomplete/acomplete_structured) 7. Migrated FastAPI on_event to lifespan (fixes 20 deprecation warnings) 8. Liquid Neural Networks (continuous-time adaptive weights) 9. Quantum-AI Hybrid compute backend (simulator + optimization) 10. Embodied Intelligence / Robotics bridge (actuator + sensor protocols) 11. Consciousness Engineering (formal self-model with introspection) 12. ASI Scoring Rubric (C/A/L/N/R self-assessment harness) 13. GPU integration tests for TensorFlow backend 14. Multi-stage production Dockerfile 15. Gitea CI/CD pipeline (lint, test matrix, Docker build) 16. API rate limiting middleware (per-IP sliding window) 17. OpenAPI docs cleanup (auth + rate limiting descriptions) 18. Benchmarking suite (decomposition, multi-path, recomposition, e2e) 19. Plugin system (head registry for custom heads) 427 tests passing, 0 ruff errors, 0 mypy errors. Co-Authored-By: Nakamoto, S <defi@defi-oracle.io>
123 lines
3.3 KiB
Python
123 lines
3.3 KiB
Python
"""TTS adapter protocol and implementations for speech synthesis."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import base64
|
|
from abc import ABC, abstractmethod
|
|
from typing import Any
|
|
|
|
from fusionagi._logger import logger
|
|
|
|
|
|
class TTSAdapter(ABC):
|
|
"""Abstract adapter for text-to-speech synthesis.
|
|
|
|
Implementations handle provider-specific API calls (ElevenLabs,
|
|
Azure Cognitive Services, Google Cloud TTS, etc.).
|
|
"""
|
|
|
|
@abstractmethod
|
|
async def synthesize(
|
|
self,
|
|
text: str,
|
|
*,
|
|
voice_id: str | None = None,
|
|
language: str = "en",
|
|
**kwargs: Any,
|
|
) -> bytes | None:
|
|
"""Synthesize text to audio bytes.
|
|
|
|
Args:
|
|
text: Text to synthesize.
|
|
voice_id: Provider-specific voice identifier.
|
|
language: Language code (BCP-47).
|
|
**kwargs: Provider-specific options.
|
|
|
|
Returns:
|
|
Raw audio bytes (mp3/wav) or None on failure.
|
|
"""
|
|
...
|
|
|
|
|
|
class StubTTSAdapter(TTSAdapter):
|
|
"""Stub TTS adapter for testing; returns empty audio."""
|
|
|
|
async def synthesize(
|
|
self,
|
|
text: str,
|
|
*,
|
|
voice_id: str | None = None,
|
|
language: str = "en",
|
|
**kwargs: Any,
|
|
) -> bytes | None:
|
|
"""Return empty bytes for testing."""
|
|
logger.debug("StubTTS: synthesize called", extra={"text": text[:50], "voice_id": voice_id})
|
|
return b""
|
|
|
|
|
|
class ElevenLabsTTSAdapter(TTSAdapter):
|
|
"""ElevenLabs TTS adapter.
|
|
|
|
Requires the ``httpx`` package and an ElevenLabs API key.
|
|
"""
|
|
|
|
API_BASE = "https://api.elevenlabs.io/v1"
|
|
DEFAULT_VOICE = "21m00Tcm4TlvDq8ikWAM" # Rachel
|
|
|
|
def __init__(
|
|
self,
|
|
api_key: str,
|
|
*,
|
|
default_voice_id: str | None = None,
|
|
model_id: str = "eleven_monolingual_v1",
|
|
) -> None:
|
|
self._api_key = api_key
|
|
self._default_voice = default_voice_id or self.DEFAULT_VOICE
|
|
self._model_id = model_id
|
|
|
|
async def synthesize(
|
|
self,
|
|
text: str,
|
|
*,
|
|
voice_id: str | None = None,
|
|
language: str = "en",
|
|
**kwargs: Any,
|
|
) -> bytes | None:
|
|
"""Call ElevenLabs TTS API."""
|
|
try:
|
|
import httpx
|
|
except ImportError:
|
|
logger.error("httpx not installed; pip install httpx")
|
|
return None
|
|
|
|
vid = voice_id or self._default_voice
|
|
url = f"{self.API_BASE}/text-to-speech/{vid}"
|
|
headers = {"xi-api-key": self._api_key, "Content-Type": "application/json"}
|
|
payload = {
|
|
"text": text,
|
|
"model_id": self._model_id,
|
|
"voice_settings": {"stability": 0.5, "similarity_boost": 0.75},
|
|
}
|
|
|
|
try:
|
|
async with httpx.AsyncClient() as client:
|
|
resp = await client.post(url, json=payload, headers=headers, timeout=30.0)
|
|
resp.raise_for_status()
|
|
return resp.content
|
|
except Exception as e:
|
|
logger.error("ElevenLabs TTS failed", extra={"error": str(e)})
|
|
return None
|
|
|
|
|
|
def audio_to_base64(audio_bytes: bytes) -> str:
|
|
"""Encode raw audio bytes to base64 string."""
|
|
return base64.b64encode(audio_bytes).decode()
|
|
|
|
|
|
__all__ = [
|
|
"TTSAdapter",
|
|
"StubTTSAdapter",
|
|
"ElevenLabsTTSAdapter",
|
|
"audio_to_base64",
|
|
]
|