feat: GPU/TensorCore integration — TensorFlow backend, GPU-accelerated reasoning, training, and memory
Some checks failed
Some checks failed
- New fusionagi/gpu/ module with TensorBackend protocol abstraction - TensorFlowBackend: GPU-accelerated ops with TensorCore mixed-precision - NumPyBackend: CPU fallback (always available, no extra deps) - Auto-selects best available backend at runtime - GPU-accelerated operations: - Cosine similarity matrix (batched, XLA-compiled) - Multi-head attention for consensus scoring - Batch hypothesis scoring on GPU - Semantic similarity search (pairwise, nearest-neighbor, deduplication) - New TensorFlowAdapter (fusionagi/adapters/): - LLMAdapter for local TF/Keras model inference - TensorCore mixed-precision support - GPU-accelerated embedding synthesis fallback - Reasoning pipeline integration: - gpu_scoring.py: drop-in GPU replacement for multi_path scoring - Super Big Brain: use_gpu config flag, GPU scoring when available - Memory integration: - gpu_search.py: GPU-accelerated semantic search for SemanticGraphMemory - Self-improvement integration: - gpu_training.py: gradient-based heuristic weight optimization - Reflective memory training loop with loss tracking - Dependencies: gpu extra (tensorflow>=2.16, numpy>=1.26) - 64 new tests (276 total), all passing - Architecture spec: docs/gpu_tensorcore_integration.md Co-Authored-By: Nakamoto, S <defi@defi-oracle.io>
This commit is contained in:
283
fusionagi/gpu/backend.py
Normal file
283
fusionagi/gpu/backend.py
Normal file
@@ -0,0 +1,283 @@
|
||||
"""TensorBackend protocol and backend registry for GPU-accelerated compute.
|
||||
|
||||
Abstracts TensorFlow, JAX, and pure-NumPy backends behind a single protocol.
|
||||
The system auto-selects the best available backend at import time.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
from fusionagi._logger import logger
|
||||
|
||||
|
||||
class DeviceType(str, Enum):
|
||||
"""Available compute device types."""
|
||||
|
||||
CPU = "cpu"
|
||||
GPU = "gpu"
|
||||
TPU = "tpu"
|
||||
|
||||
|
||||
class TensorBackend(ABC):
|
||||
"""Abstract backend for tensor operations used by FusionAGI's reasoning pipeline.
|
||||
|
||||
Implementations provide:
|
||||
- Embedding: text -> dense vector
|
||||
- Cosine similarity: batched pairwise similarity
|
||||
- Attention: multi-head attention for consensus
|
||||
- Batch scoring: parallel hypothesis evaluation
|
||||
- Training step: gradient-based parameter update
|
||||
"""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def name(self) -> str:
|
||||
"""Backend identifier (e.g. 'tensorflow', 'numpy')."""
|
||||
...
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def device(self) -> DeviceType:
|
||||
"""Current compute device."""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def embed_texts(self, texts: list[str], model_name: str | None = None) -> Any:
|
||||
"""Embed a batch of texts into dense vectors.
|
||||
|
||||
Args:
|
||||
texts: List of text strings to embed.
|
||||
model_name: Optional model identifier for the embedding model.
|
||||
|
||||
Returns:
|
||||
2D tensor of shape (len(texts), embedding_dim).
|
||||
"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def cosine_similarity_matrix(self, embeddings_a: Any, embeddings_b: Any) -> Any:
|
||||
"""Compute pairwise cosine similarity between two embedding matrices.
|
||||
|
||||
Args:
|
||||
embeddings_a: Tensor of shape (M, D).
|
||||
embeddings_b: Tensor of shape (N, D).
|
||||
|
||||
Returns:
|
||||
Similarity matrix of shape (M, N) with values in [-1, 1].
|
||||
"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def batch_score(
|
||||
self,
|
||||
hypotheses: Any,
|
||||
reference: Any,
|
||||
weights: Any | None = None,
|
||||
) -> Any:
|
||||
"""Score hypotheses against a reference using weighted dot-product.
|
||||
|
||||
Args:
|
||||
hypotheses: Tensor of shape (K, D) — hypothesis embeddings.
|
||||
reference: Tensor of shape (1, D) or (D,) — reference embedding.
|
||||
weights: Optional tensor of shape (D,) for weighted scoring.
|
||||
|
||||
Returns:
|
||||
1D tensor of shape (K,) with scores.
|
||||
"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def multi_head_attention(
|
||||
self,
|
||||
queries: Any,
|
||||
keys: Any,
|
||||
values: Any,
|
||||
num_heads: int = 4,
|
||||
) -> Any:
|
||||
"""Multi-head attention for consensus scoring.
|
||||
|
||||
Args:
|
||||
queries: Tensor of shape (seq_len_q, D).
|
||||
keys: Tensor of shape (seq_len_k, D).
|
||||
values: Tensor of shape (seq_len_k, D).
|
||||
num_heads: Number of attention heads.
|
||||
|
||||
Returns:
|
||||
Attended output tensor of shape (seq_len_q, D).
|
||||
"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def to_numpy(self, tensor: Any) -> Any:
|
||||
"""Convert backend tensor to NumPy array."""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def from_numpy(self, array: Any) -> Any:
|
||||
"""Convert NumPy array to backend tensor."""
|
||||
...
|
||||
|
||||
def gpu_available(self) -> bool:
|
||||
"""Check if GPU acceleration is available for this backend."""
|
||||
return self.device != DeviceType.CPU
|
||||
|
||||
def enable_mixed_precision(self) -> None:
|
||||
"""Enable FP16/BF16 mixed-precision for TensorCore acceleration.
|
||||
|
||||
Default is no-op; TensorFlow backend overrides this.
|
||||
"""
|
||||
pass
|
||||
|
||||
def device_summary(self) -> dict[str, Any]:
|
||||
"""Return summary of available compute devices."""
|
||||
return {"backend": self.name, "device": self.device.value}
|
||||
|
||||
|
||||
class NumPyBackend(TensorBackend):
|
||||
"""Pure-NumPy fallback backend for CPU-only environments.
|
||||
|
||||
Provides the same API as GPU backends but runs on CPU with NumPy.
|
||||
Used when TensorFlow is not installed.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
import numpy as np
|
||||
|
||||
self._np = np
|
||||
logger.info("NumPyBackend initialized (CPU fallback)")
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "numpy"
|
||||
|
||||
@property
|
||||
def device(self) -> DeviceType:
|
||||
return DeviceType.CPU
|
||||
|
||||
def embed_texts(self, texts: list[str], model_name: str | None = None) -> Any:
|
||||
"""Hash-based embedding for CPU fallback.
|
||||
|
||||
Produces deterministic dense vectors from text using character-level hashing.
|
||||
Not semantically meaningful — use TensorFlow backend for real embeddings.
|
||||
"""
|
||||
dim = 256
|
||||
embeddings = self._np.zeros((len(texts), dim), dtype=self._np.float32)
|
||||
for i, text in enumerate(texts):
|
||||
words = text.lower().split()
|
||||
for j, word in enumerate(words):
|
||||
for k, ch in enumerate(word):
|
||||
idx = (hash(word) + k * 31 + j * 7) % dim
|
||||
embeddings[i, idx] += ord(ch) / 128.0
|
||||
norm = self._np.linalg.norm(embeddings[i])
|
||||
if norm > 0:
|
||||
embeddings[i] /= norm
|
||||
return embeddings
|
||||
|
||||
def cosine_similarity_matrix(self, embeddings_a: Any, embeddings_b: Any) -> Any:
|
||||
a_norm = embeddings_a / (
|
||||
self._np.linalg.norm(embeddings_a, axis=1, keepdims=True) + 1e-8
|
||||
)
|
||||
b_norm = embeddings_b / (
|
||||
self._np.linalg.norm(embeddings_b, axis=1, keepdims=True) + 1e-8
|
||||
)
|
||||
return a_norm @ b_norm.T
|
||||
|
||||
def batch_score(
|
||||
self,
|
||||
hypotheses: Any,
|
||||
reference: Any,
|
||||
weights: Any | None = None,
|
||||
) -> Any:
|
||||
ref = reference.reshape(1, -1) if reference.ndim == 1 else reference
|
||||
if weights is not None:
|
||||
hypotheses = hypotheses * weights
|
||||
ref = ref * weights
|
||||
h_norm = hypotheses / (
|
||||
self._np.linalg.norm(hypotheses, axis=1, keepdims=True) + 1e-8
|
||||
)
|
||||
r_norm = ref / (self._np.linalg.norm(ref, axis=1, keepdims=True) + 1e-8)
|
||||
scores = (h_norm @ r_norm.T).squeeze()
|
||||
return scores
|
||||
|
||||
def multi_head_attention(
|
||||
self,
|
||||
queries: Any,
|
||||
keys: Any,
|
||||
values: Any,
|
||||
num_heads: int = 4,
|
||||
) -> Any:
|
||||
d_model = queries.shape[-1]
|
||||
d_head = d_model // num_heads
|
||||
if d_head == 0:
|
||||
return queries
|
||||
|
||||
outputs = []
|
||||
for h in range(num_heads):
|
||||
start = h * d_head
|
||||
end = start + d_head
|
||||
q = queries[:, start:end]
|
||||
k = keys[:, start:end]
|
||||
v = values[:, start:end]
|
||||
scale = self._np.sqrt(self._np.float32(d_head))
|
||||
attn_weights = (q @ k.T) / scale
|
||||
attn_weights = self._softmax(attn_weights)
|
||||
outputs.append(attn_weights @ v)
|
||||
|
||||
return self._np.concatenate(outputs, axis=-1)
|
||||
|
||||
def to_numpy(self, tensor: Any) -> Any:
|
||||
return self._np.asarray(tensor)
|
||||
|
||||
def from_numpy(self, array: Any) -> Any:
|
||||
return self._np.asarray(array)
|
||||
|
||||
def _softmax(self, x: Any) -> Any:
|
||||
exp_x = self._np.exp(x - self._np.max(x, axis=-1, keepdims=True))
|
||||
return exp_x / (self._np.sum(exp_x, axis=-1, keepdims=True) + 1e-8)
|
||||
|
||||
|
||||
# Backend registry
|
||||
_BACKEND_INSTANCE: TensorBackend | None = None
|
||||
|
||||
|
||||
def get_backend(force: str | None = None) -> TensorBackend:
|
||||
"""Return the best available tensor backend (cached singleton).
|
||||
|
||||
Args:
|
||||
force: Force a specific backend ('tensorflow' or 'numpy').
|
||||
If None, auto-selects: TensorFlow > NumPy.
|
||||
|
||||
Returns:
|
||||
TensorBackend instance.
|
||||
"""
|
||||
global _BACKEND_INSTANCE
|
||||
|
||||
if _BACKEND_INSTANCE is not None and force is None:
|
||||
return _BACKEND_INSTANCE
|
||||
|
||||
if force == "numpy":
|
||||
_BACKEND_INSTANCE = NumPyBackend()
|
||||
return _BACKEND_INSTANCE
|
||||
|
||||
if force == "tensorflow" or force is None:
|
||||
try:
|
||||
from fusionagi.gpu.tensorflow_ops import TensorFlowBackend
|
||||
|
||||
_BACKEND_INSTANCE = TensorFlowBackend()
|
||||
return _BACKEND_INSTANCE
|
||||
except ImportError:
|
||||
if force == "tensorflow":
|
||||
raise
|
||||
logger.info("TensorFlow not available, falling back to NumPy backend")
|
||||
|
||||
_BACKEND_INSTANCE = NumPyBackend()
|
||||
return _BACKEND_INSTANCE
|
||||
|
||||
|
||||
def reset_backend() -> None:
|
||||
"""Reset the cached backend (for testing)."""
|
||||
global _BACKEND_INSTANCE
|
||||
_BACKEND_INSTANCE = None
|
||||
Reference in New Issue
Block a user