feat: GPU/TensorCore integration — TensorFlow backend, GPU-accelerated reasoning, training, and memory

- New fusionagi/gpu/ module with TensorBackend protocol abstraction - TensorFlowBackend: GPU-accelerated ops with TensorCore mixed-precision - NumPyBackend: CPU fallback (always available, no extra deps) - Auto-selects best available backend at runtime - GPU-accelerated operations: - Cosine similarity matrix (batched, XLA-compiled) - Multi-head attention for consensus scoring - Batch hypothesis scoring on GPU - Semantic similarity search (pairwise, nearest-neighbor, deduplication) - New TensorFlowAdapter (fusionagi/adapters/): - LLMAdapter for local TF/Keras model inference - TensorCore mixed-precision support - GPU-accelerated embedding synthesis fallback - Reasoning pipeline integration: - gpu_scoring.py: drop-in GPU replacement for multi_path scoring - Super Big Brain: use_gpu config flag, GPU scoring when available - Memory integration: - gpu_search.py: GPU-accelerated semantic search for SemanticGraphMemory - Self-improvement integration: - gpu_training.py: gradient-based heuristic weight optimization - Reflective memory training loop with loss tracking - Dependencies: gpu extra (tensorflow>=2.16, numpy>=1.26) - 64 new tests (276 total), all passing - Architecture spec: docs/gpu_tensorcore_integration.md Co-Authored-By: Nakamoto, S <defi@defi-oracle.io>
2026-04-28 05:05:50 +00:00
parent c052b07662
commit fa71f973a6
22 changed files with 2448 additions and 3 deletions
--- a/fusionagi/gpu/backend.py
+++ b/fusionagi/gpu/backend.py
@@ -0,0 +1,283 @@
+"""TensorBackend protocol and backend registry for GPU-accelerated compute.
+
+Abstracts TensorFlow, JAX, and pure-NumPy backends behind a single protocol.
+The system auto-selects the best available backend at import time.
+"""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from enum import Enum
+from typing import Any
+
+from fusionagi._logger import logger
+
+
+class DeviceType(str, Enum):
+    """Available compute device types."""
+
+    CPU = "cpu"
+    GPU = "gpu"
+    TPU = "tpu"
+
+
+class TensorBackend(ABC):
+    """Abstract backend for tensor operations used by FusionAGI's reasoning pipeline.
+
+    Implementations provide:
+    - Embedding: text -> dense vector
+    - Cosine similarity: batched pairwise similarity
+    - Attention: multi-head attention for consensus
+    - Batch scoring: parallel hypothesis evaluation
+    - Training step: gradient-based parameter update
+    """
+
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """Backend identifier (e.g. 'tensorflow', 'numpy')."""
+        ...
+
+    @property
+    @abstractmethod
+    def device(self) -> DeviceType:
+        """Current compute device."""
+        ...
+
+    @abstractmethod
+    def embed_texts(self, texts: list[str], model_name: str | None = None) -> Any:
+        """Embed a batch of texts into dense vectors.
+
+        Args:
+            texts: List of text strings to embed.
+            model_name: Optional model identifier for the embedding model.
+
+        Returns:
+            2D tensor of shape (len(texts), embedding_dim).
+        """
+        ...
+
+    @abstractmethod
+    def cosine_similarity_matrix(self, embeddings_a: Any, embeddings_b: Any) -> Any:
+        """Compute pairwise cosine similarity between two embedding matrices.
+
+        Args:
+            embeddings_a: Tensor of shape (M, D).
+            embeddings_b: Tensor of shape (N, D).
+
+        Returns:
+            Similarity matrix of shape (M, N) with values in [-1, 1].
+        """
+        ...
+
+    @abstractmethod
+    def batch_score(
+        self,
+        hypotheses: Any,
+        reference: Any,
+        weights: Any | None = None,
+    ) -> Any:
+        """Score hypotheses against a reference using weighted dot-product.
+
+        Args:
+            hypotheses: Tensor of shape (K, D) — hypothesis embeddings.
+            reference: Tensor of shape (1, D) or (D,) — reference embedding.
+            weights: Optional tensor of shape (D,) for weighted scoring.
+
+        Returns:
+            1D tensor of shape (K,) with scores.
+        """
+        ...
+
+    @abstractmethod
+    def multi_head_attention(
+        self,
+        queries: Any,
+        keys: Any,
+        values: Any,
+        num_heads: int = 4,
+    ) -> Any:
+        """Multi-head attention for consensus scoring.
+
+        Args:
+            queries: Tensor of shape (seq_len_q, D).
+            keys: Tensor of shape (seq_len_k, D).
+            values: Tensor of shape (seq_len_k, D).
+            num_heads: Number of attention heads.
+
+        Returns:
+            Attended output tensor of shape (seq_len_q, D).
+        """
+        ...
+
+    @abstractmethod
+    def to_numpy(self, tensor: Any) -> Any:
+        """Convert backend tensor to NumPy array."""
+        ...
+
+    @abstractmethod
+    def from_numpy(self, array: Any) -> Any:
+        """Convert NumPy array to backend tensor."""
+        ...
+
+    def gpu_available(self) -> bool:
+        """Check if GPU acceleration is available for this backend."""
+        return self.device != DeviceType.CPU
+
+    def enable_mixed_precision(self) -> None:
+        """Enable FP16/BF16 mixed-precision for TensorCore acceleration.
+
+        Default is no-op; TensorFlow backend overrides this.
+        """
+        pass
+
+    def device_summary(self) -> dict[str, Any]:
+        """Return summary of available compute devices."""
+        return {"backend": self.name, "device": self.device.value}
+
+
+class NumPyBackend(TensorBackend):
+    """Pure-NumPy fallback backend for CPU-only environments.
+
+    Provides the same API as GPU backends but runs on CPU with NumPy.
+    Used when TensorFlow is not installed.
+    """
+
+    def __init__(self) -> None:
+        import numpy as np
+
+        self._np = np
+        logger.info("NumPyBackend initialized (CPU fallback)")
+
+    @property
+    def name(self) -> str:
+        return "numpy"
+
+    @property
+    def device(self) -> DeviceType:
+        return DeviceType.CPU
+
+    def embed_texts(self, texts: list[str], model_name: str | None = None) -> Any:
+        """Hash-based embedding for CPU fallback.
+
+        Produces deterministic dense vectors from text using character-level hashing.
+        Not semantically meaningful — use TensorFlow backend for real embeddings.
+        """
+        dim = 256
+        embeddings = self._np.zeros((len(texts), dim), dtype=self._np.float32)
+        for i, text in enumerate(texts):
+            words = text.lower().split()
+            for j, word in enumerate(words):
+                for k, ch in enumerate(word):
+                    idx = (hash(word) + k * 31 + j * 7) % dim
+                    embeddings[i, idx] += ord(ch) / 128.0
+            norm = self._np.linalg.norm(embeddings[i])
+            if norm > 0:
+                embeddings[i] /= norm
+        return embeddings
+
+    def cosine_similarity_matrix(self, embeddings_a: Any, embeddings_b: Any) -> Any:
+        a_norm = embeddings_a / (
+            self._np.linalg.norm(embeddings_a, axis=1, keepdims=True) + 1e-8
+        )
+        b_norm = embeddings_b / (
+            self._np.linalg.norm(embeddings_b, axis=1, keepdims=True) + 1e-8
+        )
+        return a_norm @ b_norm.T
+
+    def batch_score(
+        self,
+        hypotheses: Any,
+        reference: Any,
+        weights: Any | None = None,
+    ) -> Any:
+        ref = reference.reshape(1, -1) if reference.ndim == 1 else reference
+        if weights is not None:
+            hypotheses = hypotheses * weights
+            ref = ref * weights
+        h_norm = hypotheses / (
+            self._np.linalg.norm(hypotheses, axis=1, keepdims=True) + 1e-8
+        )
+        r_norm = ref / (self._np.linalg.norm(ref, axis=1, keepdims=True) + 1e-8)
+        scores = (h_norm @ r_norm.T).squeeze()
+        return scores
+
+    def multi_head_attention(
+        self,
+        queries: Any,
+        keys: Any,
+        values: Any,
+        num_heads: int = 4,
+    ) -> Any:
+        d_model = queries.shape[-1]
+        d_head = d_model // num_heads
+        if d_head == 0:
+            return queries
+
+        outputs = []
+        for h in range(num_heads):
+            start = h * d_head
+            end = start + d_head
+            q = queries[:, start:end]
+            k = keys[:, start:end]
+            v = values[:, start:end]
+            scale = self._np.sqrt(self._np.float32(d_head))
+            attn_weights = (q @ k.T) / scale
+            attn_weights = self._softmax(attn_weights)
+            outputs.append(attn_weights @ v)
+
+        return self._np.concatenate(outputs, axis=-1)
+
+    def to_numpy(self, tensor: Any) -> Any:
+        return self._np.asarray(tensor)
+
+    def from_numpy(self, array: Any) -> Any:
+        return self._np.asarray(array)
+
+    def _softmax(self, x: Any) -> Any:
+        exp_x = self._np.exp(x - self._np.max(x, axis=-1, keepdims=True))
+        return exp_x / (self._np.sum(exp_x, axis=-1, keepdims=True) + 1e-8)
+
+
+# Backend registry
+_BACKEND_INSTANCE: TensorBackend | None = None
+
+
+def get_backend(force: str | None = None) -> TensorBackend:
+    """Return the best available tensor backend (cached singleton).
+
+    Args:
+        force: Force a specific backend ('tensorflow' or 'numpy').
+            If None, auto-selects: TensorFlow > NumPy.
+
+    Returns:
+        TensorBackend instance.
+    """
+    global _BACKEND_INSTANCE
+
+    if _BACKEND_INSTANCE is not None and force is None:
+        return _BACKEND_INSTANCE
+
+    if force == "numpy":
+        _BACKEND_INSTANCE = NumPyBackend()
+        return _BACKEND_INSTANCE
+
+    if force == "tensorflow" or force is None:
+        try:
+            from fusionagi.gpu.tensorflow_ops import TensorFlowBackend
+
+            _BACKEND_INSTANCE = TensorFlowBackend()
+            return _BACKEND_INSTANCE
+        except ImportError:
+            if force == "tensorflow":
+                raise
+            logger.info("TensorFlow not available, falling back to NumPy backend")
+
+    _BACKEND_INSTANCE = NumPyBackend()
+    return _BACKEND_INSTANCE
+
+
+def reset_backend() -> None:
+    """Reset the cached backend (for testing)."""
+    global _BACKEND_INSTANCE
+    _BACKEND_INSTANCE = None