FusionAGI/fusionagi/gpu/backend.py

"""TensorBackend protocol and backend registry for GPU-accelerated compute.

Abstracts TensorFlow, JAX, and pure-NumPy backends behind a single protocol.
The system auto-selects the best available backend at import time.
"""

from __future__ import annotations

from abc import ABC, abstractmethod
from enum import Enum
from typing import Any

from fusionagi._logger import logger


class DeviceType(str, Enum):
    """Available compute device types."""

    CPU = "cpu"
    GPU = "gpu"
    TPU = "tpu"


class TensorBackend(ABC):
    """Abstract backend for tensor operations used by FusionAGI's reasoning pipeline.

    Implementations provide:
    - Embedding: text -> dense vector
    - Cosine similarity: batched pairwise similarity
    - Attention: multi-head attention for consensus
    - Batch scoring: parallel hypothesis evaluation
    - Training step: gradient-based parameter update
    """

    @property
    @abstractmethod
    def name(self) -> str:
        """Backend identifier (e.g. 'tensorflow', 'numpy')."""
        ...

    @property
    @abstractmethod
    def device(self) -> DeviceType:
        """Current compute device."""
        ...

    @abstractmethod
    def embed_texts(self, texts: list[str], model_name: str | None = None) -> Any:
        """Embed a batch of texts into dense vectors.

        Args:
            texts: List of text strings to embed.
            model_name: Optional model identifier for the embedding model.

        Returns:
            2D tensor of shape (len(texts), embedding_dim).
        """
        ...

    @abstractmethod
    def cosine_similarity_matrix(self, embeddings_a: Any, embeddings_b: Any) -> Any:
        """Compute pairwise cosine similarity between two embedding matrices.

        Args:
            embeddings_a: Tensor of shape (M, D).
            embeddings_b: Tensor of shape (N, D).

        Returns:
            Similarity matrix of shape (M, N) with values in [-1, 1].
        """
        ...

    @abstractmethod
    def batch_score(
        self,
        hypotheses: Any,
        reference: Any,
        weights: Any | None = None,
    ) -> Any:
        """Score hypotheses against a reference using weighted dot-product.

        Args:
            hypotheses: Tensor of shape (K, D) — hypothesis embeddings.
            reference: Tensor of shape (1, D) or (D,) — reference embedding.
            weights: Optional tensor of shape (D,) for weighted scoring.

        Returns:
            1D tensor of shape (K,) with scores.
        """
        ...

    @abstractmethod
    def multi_head_attention(
        self,
        queries: Any,
        keys: Any,
        values: Any,
        num_heads: int = 4,
    ) -> Any:
        """Multi-head attention for consensus scoring.

        Args:
            queries: Tensor of shape (seq_len_q, D).
            keys: Tensor of shape (seq_len_k, D).
            values: Tensor of shape (seq_len_k, D).
            num_heads: Number of attention heads.

        Returns:
            Attended output tensor of shape (seq_len_q, D).
        """
        ...

    @abstractmethod
    def to_numpy(self, tensor: Any) -> Any:
        """Convert backend tensor to NumPy array."""
        ...

    @abstractmethod
    def from_numpy(self, array: Any) -> Any:
        """Convert NumPy array to backend tensor."""
        ...

    def gpu_available(self) -> bool:
        """Check if GPU acceleration is available for this backend."""
        return self.device != DeviceType.CPU

    def enable_mixed_precision(self) -> None:
        """Enable FP16/BF16 mixed-precision for TensorCore acceleration.

        Default is no-op; TensorFlow backend overrides this.
        """
        pass

    def device_summary(self) -> dict[str, Any]:
        """Return summary of available compute devices."""
        return {"backend": self.name, "device": self.device.value}


class NumPyBackend(TensorBackend):
    """Pure-NumPy fallback backend for CPU-only environments.

    Provides the same API as GPU backends but runs on CPU with NumPy.
    Used when TensorFlow is not installed.
    """

    def __init__(self) -> None:
        import numpy as np

        self._np = np
        logger.info("NumPyBackend initialized (CPU fallback)")

    @property
    def name(self) -> str:
        return "numpy"

    @property
    def device(self) -> DeviceType:
        return DeviceType.CPU

    def embed_texts(self, texts: list[str], model_name: str | None = None) -> Any:
        """Hash-based embedding for CPU fallback.

        Produces deterministic dense vectors from text using character-level hashing.
        Not semantically meaningful — use TensorFlow backend for real embeddings.
        """
        dim = 256
        embeddings = self._np.zeros((len(texts), dim), dtype=self._np.float32)
        for i, text in enumerate(texts):
            words = text.lower().split()
            for j, word in enumerate(words):
                for k, ch in enumerate(word):
                    idx = (hash(word) + k * 31 + j * 7) % dim
                    embeddings[i, idx] += ord(ch) / 128.0
            norm = self._np.linalg.norm(embeddings[i])
            if norm > 0:
                embeddings[i] /= norm
        return embeddings

    def cosine_similarity_matrix(self, embeddings_a: Any, embeddings_b: Any) -> Any:
        a_norm = embeddings_a / (
            self._np.linalg.norm(embeddings_a, axis=1, keepdims=True) + 1e-8
        )
        b_norm = embeddings_b / (
            self._np.linalg.norm(embeddings_b, axis=1, keepdims=True) + 1e-8
        )
        return a_norm @ b_norm.T

    def batch_score(
        self,
        hypotheses: Any,
        reference: Any,
        weights: Any | None = None,
    ) -> Any:
        ref = reference.reshape(1, -1) if reference.ndim == 1 else reference
        if weights is not None:
            hypotheses = hypotheses * weights
            ref = ref * weights
        h_norm = hypotheses / (
            self._np.linalg.norm(hypotheses, axis=1, keepdims=True) + 1e-8
        )
        r_norm = ref / (self._np.linalg.norm(ref, axis=1, keepdims=True) + 1e-8)
        scores = (h_norm @ r_norm.T).squeeze()
        return scores

    def multi_head_attention(
        self,
        queries: Any,
        keys: Any,
        values: Any,
        num_heads: int = 4,
    ) -> Any:
        d_model = queries.shape[-1]
        d_head = d_model // num_heads
        if d_head == 0:
            return queries

        outputs = []
        for h in range(num_heads):
            start = h * d_head
            end = start + d_head
            q = queries[:, start:end]
            k = keys[:, start:end]
            v = values[:, start:end]
            scale = self._np.sqrt(self._np.float32(d_head))
            attn_weights = (q @ k.T) / scale
            attn_weights = self._softmax(attn_weights)
            outputs.append(attn_weights @ v)

        return self._np.concatenate(outputs, axis=-1)

    def to_numpy(self, tensor: Any) -> Any:
        return self._np.asarray(tensor)

    def from_numpy(self, array: Any) -> Any:
        return self._np.asarray(array)

    def _softmax(self, x: Any) -> Any:
        exp_x = self._np.exp(x - self._np.max(x, axis=-1, keepdims=True))
        return exp_x / (self._np.sum(exp_x, axis=-1, keepdims=True) + 1e-8)


# Backend registry
_BACKEND_INSTANCE: TensorBackend | None = None


def get_backend(force: str | None = None) -> TensorBackend:
    """Return the best available tensor backend (cached singleton).

    Args:
        force: Force a specific backend ('tensorflow' or 'numpy').
            If None, auto-selects: TensorFlow > NumPy.

    Returns:
        TensorBackend instance.
    """
    global _BACKEND_INSTANCE

    if _BACKEND_INSTANCE is not None and force is None:
        return _BACKEND_INSTANCE

    if force == "numpy":
        _BACKEND_INSTANCE = NumPyBackend()
        return _BACKEND_INSTANCE

    if force == "tensorflow" or force is None:
        try:
            from fusionagi.gpu.tensorflow_ops import TensorFlowBackend

            _BACKEND_INSTANCE = TensorFlowBackend()
            return _BACKEND_INSTANCE
        except ImportError:
            if force == "tensorflow":
                raise
            logger.info("TensorFlow not available, falling back to NumPy backend")

    _BACKEND_INSTANCE = NumPyBackend()
    return _BACKEND_INSTANCE


def reset_backend() -> None:
    """Reset the cached backend (for testing)."""
    global _BACKEND_INSTANCE
    _BACKEND_INSTANCE = None