Some checks failed
- New fusionagi/gpu/ module with TensorBackend protocol abstraction - TensorFlowBackend: GPU-accelerated ops with TensorCore mixed-precision - NumPyBackend: CPU fallback (always available, no extra deps) - Auto-selects best available backend at runtime - GPU-accelerated operations: - Cosine similarity matrix (batched, XLA-compiled) - Multi-head attention for consensus scoring - Batch hypothesis scoring on GPU - Semantic similarity search (pairwise, nearest-neighbor, deduplication) - New TensorFlowAdapter (fusionagi/adapters/): - LLMAdapter for local TF/Keras model inference - TensorCore mixed-precision support - GPU-accelerated embedding synthesis fallback - Reasoning pipeline integration: - gpu_scoring.py: drop-in GPU replacement for multi_path scoring - Super Big Brain: use_gpu config flag, GPU scoring when available - Memory integration: - gpu_search.py: GPU-accelerated semantic search for SemanticGraphMemory - Self-improvement integration: - gpu_training.py: gradient-based heuristic weight optimization - Reflective memory training loop with loss tracking - Dependencies: gpu extra (tensorflow>=2.16, numpy>=1.26) - 64 new tests (276 total), all passing - Architecture spec: docs/gpu_tensorcore_integration.md Co-Authored-By: Nakamoto, S <defi@defi-oracle.io>
284 lines
8.3 KiB
Python
284 lines
8.3 KiB
Python
"""TensorBackend protocol and backend registry for GPU-accelerated compute.
|
|
|
|
Abstracts TensorFlow, JAX, and pure-NumPy backends behind a single protocol.
|
|
The system auto-selects the best available backend at import time.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from abc import ABC, abstractmethod
|
|
from enum import Enum
|
|
from typing import Any
|
|
|
|
from fusionagi._logger import logger
|
|
|
|
|
|
class DeviceType(str, Enum):
|
|
"""Available compute device types."""
|
|
|
|
CPU = "cpu"
|
|
GPU = "gpu"
|
|
TPU = "tpu"
|
|
|
|
|
|
class TensorBackend(ABC):
|
|
"""Abstract backend for tensor operations used by FusionAGI's reasoning pipeline.
|
|
|
|
Implementations provide:
|
|
- Embedding: text -> dense vector
|
|
- Cosine similarity: batched pairwise similarity
|
|
- Attention: multi-head attention for consensus
|
|
- Batch scoring: parallel hypothesis evaluation
|
|
- Training step: gradient-based parameter update
|
|
"""
|
|
|
|
@property
|
|
@abstractmethod
|
|
def name(self) -> str:
|
|
"""Backend identifier (e.g. 'tensorflow', 'numpy')."""
|
|
...
|
|
|
|
@property
|
|
@abstractmethod
|
|
def device(self) -> DeviceType:
|
|
"""Current compute device."""
|
|
...
|
|
|
|
@abstractmethod
|
|
def embed_texts(self, texts: list[str], model_name: str | None = None) -> Any:
|
|
"""Embed a batch of texts into dense vectors.
|
|
|
|
Args:
|
|
texts: List of text strings to embed.
|
|
model_name: Optional model identifier for the embedding model.
|
|
|
|
Returns:
|
|
2D tensor of shape (len(texts), embedding_dim).
|
|
"""
|
|
...
|
|
|
|
@abstractmethod
|
|
def cosine_similarity_matrix(self, embeddings_a: Any, embeddings_b: Any) -> Any:
|
|
"""Compute pairwise cosine similarity between two embedding matrices.
|
|
|
|
Args:
|
|
embeddings_a: Tensor of shape (M, D).
|
|
embeddings_b: Tensor of shape (N, D).
|
|
|
|
Returns:
|
|
Similarity matrix of shape (M, N) with values in [-1, 1].
|
|
"""
|
|
...
|
|
|
|
@abstractmethod
|
|
def batch_score(
|
|
self,
|
|
hypotheses: Any,
|
|
reference: Any,
|
|
weights: Any | None = None,
|
|
) -> Any:
|
|
"""Score hypotheses against a reference using weighted dot-product.
|
|
|
|
Args:
|
|
hypotheses: Tensor of shape (K, D) — hypothesis embeddings.
|
|
reference: Tensor of shape (1, D) or (D,) — reference embedding.
|
|
weights: Optional tensor of shape (D,) for weighted scoring.
|
|
|
|
Returns:
|
|
1D tensor of shape (K,) with scores.
|
|
"""
|
|
...
|
|
|
|
@abstractmethod
|
|
def multi_head_attention(
|
|
self,
|
|
queries: Any,
|
|
keys: Any,
|
|
values: Any,
|
|
num_heads: int = 4,
|
|
) -> Any:
|
|
"""Multi-head attention for consensus scoring.
|
|
|
|
Args:
|
|
queries: Tensor of shape (seq_len_q, D).
|
|
keys: Tensor of shape (seq_len_k, D).
|
|
values: Tensor of shape (seq_len_k, D).
|
|
num_heads: Number of attention heads.
|
|
|
|
Returns:
|
|
Attended output tensor of shape (seq_len_q, D).
|
|
"""
|
|
...
|
|
|
|
@abstractmethod
|
|
def to_numpy(self, tensor: Any) -> Any:
|
|
"""Convert backend tensor to NumPy array."""
|
|
...
|
|
|
|
@abstractmethod
|
|
def from_numpy(self, array: Any) -> Any:
|
|
"""Convert NumPy array to backend tensor."""
|
|
...
|
|
|
|
def gpu_available(self) -> bool:
|
|
"""Check if GPU acceleration is available for this backend."""
|
|
return self.device != DeviceType.CPU
|
|
|
|
def enable_mixed_precision(self) -> None:
|
|
"""Enable FP16/BF16 mixed-precision for TensorCore acceleration.
|
|
|
|
Default is no-op; TensorFlow backend overrides this.
|
|
"""
|
|
pass
|
|
|
|
def device_summary(self) -> dict[str, Any]:
|
|
"""Return summary of available compute devices."""
|
|
return {"backend": self.name, "device": self.device.value}
|
|
|
|
|
|
class NumPyBackend(TensorBackend):
|
|
"""Pure-NumPy fallback backend for CPU-only environments.
|
|
|
|
Provides the same API as GPU backends but runs on CPU with NumPy.
|
|
Used when TensorFlow is not installed.
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
import numpy as np
|
|
|
|
self._np = np
|
|
logger.info("NumPyBackend initialized (CPU fallback)")
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "numpy"
|
|
|
|
@property
|
|
def device(self) -> DeviceType:
|
|
return DeviceType.CPU
|
|
|
|
def embed_texts(self, texts: list[str], model_name: str | None = None) -> Any:
|
|
"""Hash-based embedding for CPU fallback.
|
|
|
|
Produces deterministic dense vectors from text using character-level hashing.
|
|
Not semantically meaningful — use TensorFlow backend for real embeddings.
|
|
"""
|
|
dim = 256
|
|
embeddings = self._np.zeros((len(texts), dim), dtype=self._np.float32)
|
|
for i, text in enumerate(texts):
|
|
words = text.lower().split()
|
|
for j, word in enumerate(words):
|
|
for k, ch in enumerate(word):
|
|
idx = (hash(word) + k * 31 + j * 7) % dim
|
|
embeddings[i, idx] += ord(ch) / 128.0
|
|
norm = self._np.linalg.norm(embeddings[i])
|
|
if norm > 0:
|
|
embeddings[i] /= norm
|
|
return embeddings
|
|
|
|
def cosine_similarity_matrix(self, embeddings_a: Any, embeddings_b: Any) -> Any:
|
|
a_norm = embeddings_a / (
|
|
self._np.linalg.norm(embeddings_a, axis=1, keepdims=True) + 1e-8
|
|
)
|
|
b_norm = embeddings_b / (
|
|
self._np.linalg.norm(embeddings_b, axis=1, keepdims=True) + 1e-8
|
|
)
|
|
return a_norm @ b_norm.T
|
|
|
|
def batch_score(
|
|
self,
|
|
hypotheses: Any,
|
|
reference: Any,
|
|
weights: Any | None = None,
|
|
) -> Any:
|
|
ref = reference.reshape(1, -1) if reference.ndim == 1 else reference
|
|
if weights is not None:
|
|
hypotheses = hypotheses * weights
|
|
ref = ref * weights
|
|
h_norm = hypotheses / (
|
|
self._np.linalg.norm(hypotheses, axis=1, keepdims=True) + 1e-8
|
|
)
|
|
r_norm = ref / (self._np.linalg.norm(ref, axis=1, keepdims=True) + 1e-8)
|
|
scores = (h_norm @ r_norm.T).squeeze()
|
|
return scores
|
|
|
|
def multi_head_attention(
|
|
self,
|
|
queries: Any,
|
|
keys: Any,
|
|
values: Any,
|
|
num_heads: int = 4,
|
|
) -> Any:
|
|
d_model = queries.shape[-1]
|
|
d_head = d_model // num_heads
|
|
if d_head == 0:
|
|
return queries
|
|
|
|
outputs = []
|
|
for h in range(num_heads):
|
|
start = h * d_head
|
|
end = start + d_head
|
|
q = queries[:, start:end]
|
|
k = keys[:, start:end]
|
|
v = values[:, start:end]
|
|
scale = self._np.sqrt(self._np.float32(d_head))
|
|
attn_weights = (q @ k.T) / scale
|
|
attn_weights = self._softmax(attn_weights)
|
|
outputs.append(attn_weights @ v)
|
|
|
|
return self._np.concatenate(outputs, axis=-1)
|
|
|
|
def to_numpy(self, tensor: Any) -> Any:
|
|
return self._np.asarray(tensor)
|
|
|
|
def from_numpy(self, array: Any) -> Any:
|
|
return self._np.asarray(array)
|
|
|
|
def _softmax(self, x: Any) -> Any:
|
|
exp_x = self._np.exp(x - self._np.max(x, axis=-1, keepdims=True))
|
|
return exp_x / (self._np.sum(exp_x, axis=-1, keepdims=True) + 1e-8)
|
|
|
|
|
|
# Backend registry
|
|
_BACKEND_INSTANCE: TensorBackend | None = None
|
|
|
|
|
|
def get_backend(force: str | None = None) -> TensorBackend:
|
|
"""Return the best available tensor backend (cached singleton).
|
|
|
|
Args:
|
|
force: Force a specific backend ('tensorflow' or 'numpy').
|
|
If None, auto-selects: TensorFlow > NumPy.
|
|
|
|
Returns:
|
|
TensorBackend instance.
|
|
"""
|
|
global _BACKEND_INSTANCE
|
|
|
|
if _BACKEND_INSTANCE is not None and force is None:
|
|
return _BACKEND_INSTANCE
|
|
|
|
if force == "numpy":
|
|
_BACKEND_INSTANCE = NumPyBackend()
|
|
return _BACKEND_INSTANCE
|
|
|
|
if force == "tensorflow" or force is None:
|
|
try:
|
|
from fusionagi.gpu.tensorflow_ops import TensorFlowBackend
|
|
|
|
_BACKEND_INSTANCE = TensorFlowBackend()
|
|
return _BACKEND_INSTANCE
|
|
except ImportError:
|
|
if force == "tensorflow":
|
|
raise
|
|
logger.info("TensorFlow not available, falling back to NumPy backend")
|
|
|
|
_BACKEND_INSTANCE = NumPyBackend()
|
|
return _BACKEND_INSTANCE
|
|
|
|
|
|
def reset_backend() -> None:
|
|
"""Reset the cached backend (for testing)."""
|
|
global _BACKEND_INSTANCE
|
|
_BACKEND_INSTANCE = None
|