FusionAGI/fusionagi/gpu/tensor_scoring.py

"""GPU-accelerated hypothesis scoring for reasoning pipelines.

Provides batched scoring of hypotheses against atomic semantic units
using GPU-accelerated tensor operations. Replaces the CPU-bound
ThreadPoolExecutor-based scoring in multi_path.py.
"""

from __future__ import annotations

from fusionagi._logger import logger
from fusionagi.gpu.backend import TensorBackend, get_backend
from fusionagi.reasoning.tot import ThoughtNode
from fusionagi.schemas.atomic import AtomicSemanticUnit


def gpu_score_hypotheses(
    hypotheses: list[str],
    units: list[AtomicSemanticUnit],
    backend: TensorBackend | None = None,
) -> list[tuple[ThoughtNode, float]]:
    """Score hypotheses against atomic units using GPU-accelerated similarity.

    Replaces the CPU-based generate_and_score_parallel with batched GPU operations.

    Args:
        hypotheses: List of hypothesis text strings.
        units: List of atomic semantic units for reference.
        backend: TensorBackend to use.

    Returns:
        List of (ThoughtNode, score) tuples sorted by score descending.
    """
    if not hypotheses:
        return []

    be = backend or get_backend()
    import numpy as np

    hyp_embeddings = be.embed_texts(hypotheses)

    unit_texts = [u.content for u in units if u.content]
    if not unit_texts:
        nodes = []
        for h in hypotheses:
            node = ThoughtNode(
                thought=h,
                trace=[h],
                unit_refs=[u.unit_id for u in units[:10]],
                score=0.5,
            )
            nodes.append((node, 0.5))
        return nodes

    unit_embeddings = be.embed_texts(unit_texts)

    sim_matrix = be.to_numpy(be.cosine_similarity_matrix(hyp_embeddings, unit_embeddings))

    coherence_scores = np.mean(sim_matrix, axis=1)

    max_sim = np.max(sim_matrix, axis=1)
    consistency_scores = max_sim

    combined_scores = 0.5 * coherence_scores + 0.5 * consistency_scores
    combined_scores = np.clip(combined_scores, 0.0, 1.0)

    results: list[tuple[ThoughtNode, float]] = []
    for i, h in enumerate(hypotheses):
        score = float(combined_scores[i])
        node = ThoughtNode(
            thought=h,
            trace=[h],
            unit_refs=[u.unit_id for u in units[:10]],
            score=score,
            metadata={"gpu_scored": True, "coherence": float(coherence_scores[i])},
        )
        results.append((node, score))

    results.sort(key=lambda x: x[1], reverse=True)

    logger.debug(
        "GPU hypothesis scoring complete",
        extra={
            "hypotheses": len(hypotheses),
            "units": len(units),
            "best_score": results[0][1] if results else 0.0,
            "backend": be.name,
        },
    )
    return results


def gpu_score_claims_against_reference(
    claims: list[str],
    reference: str,
    weights: list[float] | None = None,
    backend: TensorBackend | None = None,
) -> list[float]:
    """Score a batch of claims against a single reference using GPU batch_score.

    Args:
        claims: List of claim texts.
        reference: Reference text to score against.
        weights: Optional per-dimension weights.
        backend: TensorBackend to use.

    Returns:
        List of scores for each claim.
    """
    if not claims:
        return []

    be = backend or get_backend()

    claim_emb = be.embed_texts(claims)
    ref_emb = be.embed_texts([reference])

    weight_tensor = None
    if weights is not None:
        import numpy as np

        dim = be.to_numpy(ref_emb).shape[-1]
        w = np.ones(dim, dtype=np.float32)
        for i, wt in enumerate(weights[:dim]):
            w[i] = wt
        weight_tensor = be.from_numpy(w)

    import numpy as np

    ref_squeezed = be.to_numpy(ref_emb)[0]
    scores = be.to_numpy(
        be.batch_score(claim_emb, be.from_numpy(ref_squeezed), weight_tensor)
    )

    scores = np.atleast_1d(scores)
    return list(scores.tolist())