Frontend (items 1-10):
- WebSocket streaming integration with useWebSocket hook
- Admin Dashboard UI (status, voices, agents, governance tabs)
- Voice playback UI (TTS/STT integration)
- Settings/Preferences page (conversation style, sliders)
- Responsive/mobile layout (breakpoints at 480px, 768px)
- Dark/light theme with CSS variables and localStorage
- Error handling & loading states (retry, empty state, disabled input)
- Authentication UI (login page, Bearer token, logout)
- Head visualization improvements (active/speaking states, animations)
- Consequence/Ethics dashboard (lessons, consequences, insights tabs)
Backend stubs (items 11-21):
- Tool connectors: DocsConnector (text/md/PDF), DBConnector (SQLite/Postgres), CodeRunnerConnector (Python/JS/Bash/Ruby sandboxed)
- STT adapter: WhisperSTTAdapter, AzureSTTAdapter
- Multi-modal interface adapters: Visual, Haptic, Gesture, Biometric
- SSE streaming endpoint (/v1/sessions/{id}/stream/sse)
- Multi-tenant support (X-Tenant-ID header, tenant CRUD)
- Plugin marketplace/registry (register, install, list)
- Backup/restore endpoints
- Versioned API negotiation (Accept-Version header, deprecation)
Infrastructure (items 22-26):
- docker-compose.yml (API + Postgres + Redis + frontend)
- .env.example with all configurable vars
- gunicorn.conf.py production ASGI config
- Prometheus metrics collector and /metrics endpoint
- Structured JSON logging configuration
Documentation (items 27-29):
- Architecture docs with module layout and subsystem descriptions
- Quickstart guide with setup, API tour, and test instructions
Tests (items 30-32):
- Integration tests: 25 end-to-end API tests
- Frontend tests: 10 Vitest tests for hooks (useTheme, useAuth)
- Load/performance tests: latency and throughput benchmarks
- Connector tests: 16 tests for Docs, DB, CodeRunner
- Multi-modal adapter tests: 9 tests
- Metrics collector tests: 5 tests
- STT adapter tests: 2 tests
511 Python tests passing, 10 frontend tests passing, 0 ruff errors.
Co-Authored-By: Nakamoto, S <defi@defi-oracle.io>
139 lines
3.8 KiB
Python
139 lines
3.8 KiB
Python
"""STT adapter: speech-to-text with Whisper, Azure, and stub implementations."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from abc import ABC, abstractmethod
|
|
from typing import Any
|
|
|
|
from fusionagi._logger import logger
|
|
|
|
|
|
class STTAdapter(ABC):
|
|
"""Abstract adapter for speech-to-text transcription."""
|
|
|
|
@abstractmethod
|
|
async def transcribe(
|
|
self,
|
|
audio_data: bytes,
|
|
*,
|
|
language: str = "en",
|
|
**kwargs: Any,
|
|
) -> str | None:
|
|
"""Transcribe audio bytes to text.
|
|
|
|
Args:
|
|
audio_data: Raw audio bytes (wav/mp3/ogg).
|
|
language: BCP-47 language code hint.
|
|
**kwargs: Provider-specific options.
|
|
|
|
Returns:
|
|
Transcribed text or None on failure.
|
|
"""
|
|
...
|
|
|
|
|
|
class StubSTTAdapter(STTAdapter):
|
|
"""Stub STT adapter for testing; returns placeholder text."""
|
|
|
|
async def transcribe(
|
|
self,
|
|
audio_data: bytes,
|
|
*,
|
|
language: str = "en",
|
|
**kwargs: Any,
|
|
) -> str | None:
|
|
logger.debug("StubSTT: transcribe called", extra={"audio_size": len(audio_data)})
|
|
return "[stub transcription]"
|
|
|
|
|
|
class WhisperSTTAdapter(STTAdapter):
|
|
"""OpenAI Whisper STT adapter.
|
|
|
|
Requires the ``openai`` package and an OpenAI API key.
|
|
"""
|
|
|
|
def __init__(self, api_key: str | None = None, model: str = "whisper-1") -> None:
|
|
self._api_key = api_key
|
|
self._model = model
|
|
|
|
async def transcribe(
|
|
self,
|
|
audio_data: bytes,
|
|
*,
|
|
language: str = "en",
|
|
**kwargs: Any,
|
|
) -> str | None:
|
|
try:
|
|
import io
|
|
|
|
import openai
|
|
|
|
client = openai.OpenAI(api_key=self._api_key)
|
|
audio_file = io.BytesIO(audio_data)
|
|
audio_file.name = "audio.wav"
|
|
transcript = client.audio.transcriptions.create(
|
|
model=self._model,
|
|
file=audio_file,
|
|
language=language,
|
|
)
|
|
return transcript.text
|
|
except ImportError:
|
|
logger.error("openai not installed; pip install fusionagi[openai]")
|
|
return None
|
|
except Exception as e:
|
|
logger.error("Whisper STT failed", extra={"error": str(e)})
|
|
return None
|
|
|
|
|
|
class AzureSTTAdapter(STTAdapter):
|
|
"""Azure Cognitive Services STT adapter.
|
|
|
|
Requires ``httpx`` and an Azure Speech Services key.
|
|
"""
|
|
|
|
def __init__(self, api_key: str, region: str = "eastus") -> None:
|
|
self._api_key = api_key
|
|
self._region = region
|
|
self._endpoint = f"https://{region}.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1"
|
|
|
|
async def transcribe(
|
|
self,
|
|
audio_data: bytes,
|
|
*,
|
|
language: str = "en-US",
|
|
**kwargs: Any,
|
|
) -> str | None:
|
|
try:
|
|
import httpx
|
|
|
|
headers = {
|
|
"Ocp-Apim-Subscription-Key": self._api_key,
|
|
"Content-Type": "audio/wav",
|
|
}
|
|
params = {"language": language}
|
|
async with httpx.AsyncClient() as client:
|
|
resp = await client.post(
|
|
self._endpoint,
|
|
headers=headers,
|
|
params=params,
|
|
content=audio_data,
|
|
timeout=30.0,
|
|
)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
return data.get("DisplayText") or data.get("RecognitionStatus")
|
|
except ImportError:
|
|
logger.error("httpx not installed; pip install httpx")
|
|
return None
|
|
except Exception as e:
|
|
logger.error("Azure STT failed", extra={"error": str(e)})
|
|
return None
|
|
|
|
|
|
__all__ = [
|
|
"STTAdapter",
|
|
"StubSTTAdapter",
|
|
"WhisperSTTAdapter",
|
|
"AzureSTTAdapter",
|
|
]
|