Some checks failed
- New fusionagi/gpu/ module with TensorBackend protocol abstraction - TensorFlowBackend: GPU-accelerated ops with TensorCore mixed-precision - NumPyBackend: CPU fallback (always available, no extra deps) - Auto-selects best available backend at runtime - GPU-accelerated operations: - Cosine similarity matrix (batched, XLA-compiled) - Multi-head attention for consensus scoring - Batch hypothesis scoring on GPU - Semantic similarity search (pairwise, nearest-neighbor, deduplication) - New TensorFlowAdapter (fusionagi/adapters/): - LLMAdapter for local TF/Keras model inference - TensorCore mixed-precision support - GPU-accelerated embedding synthesis fallback - Reasoning pipeline integration: - gpu_scoring.py: drop-in GPU replacement for multi_path scoring - Super Big Brain: use_gpu config flag, GPU scoring when available - Memory integration: - gpu_search.py: GPU-accelerated semantic search for SemanticGraphMemory - Self-improvement integration: - gpu_training.py: gradient-based heuristic weight optimization - Reflective memory training loop with loss tracking - Dependencies: gpu extra (tensorflow>=2.16, numpy>=1.26) - 64 new tests (276 total), all passing - Architecture spec: docs/gpu_tensorcore_integration.md Co-Authored-By: Nakamoto, S <defi@defi-oracle.io>
136 lines
3.9 KiB
Python
136 lines
3.9 KiB
Python
"""GPU-accelerated hypothesis scoring for reasoning pipelines.
|
|
|
|
Provides batched scoring of hypotheses against atomic semantic units
|
|
using GPU-accelerated tensor operations. Replaces the CPU-bound
|
|
ThreadPoolExecutor-based scoring in multi_path.py.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from fusionagi._logger import logger
|
|
from fusionagi.gpu.backend import TensorBackend, get_backend
|
|
from fusionagi.reasoning.tot import ThoughtNode
|
|
from fusionagi.schemas.atomic import AtomicSemanticUnit
|
|
|
|
|
|
def gpu_score_hypotheses(
|
|
hypotheses: list[str],
|
|
units: list[AtomicSemanticUnit],
|
|
backend: TensorBackend | None = None,
|
|
) -> list[tuple[ThoughtNode, float]]:
|
|
"""Score hypotheses against atomic units using GPU-accelerated similarity.
|
|
|
|
Replaces the CPU-based generate_and_score_parallel with batched GPU operations.
|
|
|
|
Args:
|
|
hypotheses: List of hypothesis text strings.
|
|
units: List of atomic semantic units for reference.
|
|
backend: TensorBackend to use.
|
|
|
|
Returns:
|
|
List of (ThoughtNode, score) tuples sorted by score descending.
|
|
"""
|
|
if not hypotheses:
|
|
return []
|
|
|
|
be = backend or get_backend()
|
|
import numpy as np
|
|
|
|
hyp_embeddings = be.embed_texts(hypotheses)
|
|
|
|
unit_texts = [u.content for u in units if u.content]
|
|
if not unit_texts:
|
|
nodes = []
|
|
for h in hypotheses:
|
|
node = ThoughtNode(
|
|
thought=h,
|
|
trace=[h],
|
|
unit_refs=[u.unit_id for u in units[:10]],
|
|
score=0.5,
|
|
)
|
|
nodes.append((node, 0.5))
|
|
return nodes
|
|
|
|
unit_embeddings = be.embed_texts(unit_texts)
|
|
|
|
sim_matrix = be.to_numpy(be.cosine_similarity_matrix(hyp_embeddings, unit_embeddings))
|
|
|
|
coherence_scores = np.mean(sim_matrix, axis=1)
|
|
|
|
max_sim = np.max(sim_matrix, axis=1)
|
|
consistency_scores = max_sim
|
|
|
|
combined_scores = 0.5 * coherence_scores + 0.5 * consistency_scores
|
|
combined_scores = np.clip(combined_scores, 0.0, 1.0)
|
|
|
|
results: list[tuple[ThoughtNode, float]] = []
|
|
for i, h in enumerate(hypotheses):
|
|
score = float(combined_scores[i])
|
|
node = ThoughtNode(
|
|
thought=h,
|
|
trace=[h],
|
|
unit_refs=[u.unit_id for u in units[:10]],
|
|
score=score,
|
|
metadata={"gpu_scored": True, "coherence": float(coherence_scores[i])},
|
|
)
|
|
results.append((node, score))
|
|
|
|
results.sort(key=lambda x: x[1], reverse=True)
|
|
|
|
logger.debug(
|
|
"GPU hypothesis scoring complete",
|
|
extra={
|
|
"hypotheses": len(hypotheses),
|
|
"units": len(units),
|
|
"best_score": results[0][1] if results else 0.0,
|
|
"backend": be.name,
|
|
},
|
|
)
|
|
return results
|
|
|
|
|
|
def gpu_score_claims_against_reference(
|
|
claims: list[str],
|
|
reference: str,
|
|
weights: list[float] | None = None,
|
|
backend: TensorBackend | None = None,
|
|
) -> list[float]:
|
|
"""Score a batch of claims against a single reference using GPU batch_score.
|
|
|
|
Args:
|
|
claims: List of claim texts.
|
|
reference: Reference text to score against.
|
|
weights: Optional per-dimension weights.
|
|
backend: TensorBackend to use.
|
|
|
|
Returns:
|
|
List of scores for each claim.
|
|
"""
|
|
if not claims:
|
|
return []
|
|
|
|
be = backend or get_backend()
|
|
|
|
claim_emb = be.embed_texts(claims)
|
|
ref_emb = be.embed_texts([reference])
|
|
|
|
weight_tensor = None
|
|
if weights is not None:
|
|
import numpy as np
|
|
|
|
dim = be.to_numpy(ref_emb).shape[-1]
|
|
w = np.ones(dim, dtype=np.float32)
|
|
for i, wt in enumerate(weights[:dim]):
|
|
w[i] = wt
|
|
weight_tensor = be.from_numpy(w)
|
|
|
|
import numpy as np
|
|
|
|
ref_squeezed = be.to_numpy(ref_emb)[0]
|
|
scores = be.to_numpy(
|
|
be.batch_score(claim_emb, be.from_numpy(ref_squeezed), weight_tensor)
|
|
)
|
|
|
|
scores = np.atleast_1d(scores)
|
|
return list(scores.tolist())
|