feat: consequence engine, causal world model, metacognition, interpretability, claim verification

Choice → Consequence → Learning: - ConsequenceEngine tracks every decision point with alternatives, risk/reward estimates, and actual outcomes - Consequences feed into AdaptiveEthics for experience-based learning - FusionAGILoop now wires ethics + consequences into task lifecycle Causal World Model: - CausalWorldModel learns state-transition patterns from execution history - Predicts outcomes based on observed action→effect patterns - Uncertainty estimates decrease as more evidence accumulates Metacognition: - assess_head_outputs() evaluates reasoning quality from head outputs - Detects knowledge gaps, measures head agreement, identifies uncertainty - Actively recommends whether to seek more information Interpretability: - ReasoningTracer captures full prompt→answer reasoning traces - Each step records stage, component, input/output, timing - explain() generates human-readable reasoning explanations Claim Verification: - ClaimVerifier cross-checks claims for evidence, consistency, grounding - Flags high-confidence claims lacking evidence support - Detects contradictions between claims from different heads 325 tests passing, 0 ruff errors, 0 mypy errors. Co-Authored-By: Nakamoto, S <defi@defi-oracle.io>
2026-04-28 06:25:35 +00:00
parent 039440672e
commit 9a8affae9a
14 changed files with 1961 additions and 39 deletions
--- a/fusionagi/reasoning/init.py
+++ b/fusionagi/reasoning/init.py
@@ -10,11 +10,21 @@ from fusionagi.reasoning.gpu_scoring import (
    generate_and_score_gpu,
    score_claims_gpu,
 )
+from fusionagi.reasoning.interpretability import (
+    ReasoningTrace,
+    ReasoningTracer,
+    TraceStep,
+)
 from fusionagi.reasoning.meta_reasoning import (
    challenge_assumptions,
    detect_contradictions,
    revisit_node,
 )
+from fusionagi.reasoning.metacognition import (
+    KnowledgeGap,
+    MetacognitiveAssessment,
+    assess_head_outputs,
+)
 from fusionagi.reasoning.multi_path import generate_and_score_parallel
 from fusionagi.reasoning.native import (
    NativeReasoningProvider,
@@ -61,4 +71,10 @@ __all__ = [
    "generate_and_score_gpu",
    "score_claims_gpu",
    "deduplicate_claims_gpu",
+    "MetacognitiveAssessment",
+    "KnowledgeGap",
+    "assess_head_outputs",
+    "ReasoningTrace",
+    "ReasoningTracer",
+    "TraceStep",
 ]
--- a/fusionagi/reasoning/interpretability.py
+++ b/fusionagi/reasoning/interpretability.py
@@ -0,0 +1,247 @@
+"""Interpretability: full reasoning trace from prompt to final answer.
+
+Every step of the reasoning pipeline can be traced and explained:
+- Prompt decomposition decisions
+- Head selection and dispatch
+- Per-head claim generation with evidence chains
+- Consensus process (agreements, disputes)
+- Metacognitive assessment
+- Verification results
+- Final synthesis rationale
+
+The ReasoningTrace captures all of this in a structured, queryable format
+that can be serialized for debugging, auditing, or user explanation.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from typing import Any
+
+
+def _utc_now() -> datetime:
+    """Return current UTC time (timezone-aware)."""
+    return datetime.now(timezone.utc)
+
+
+@dataclass
+class TraceStep:
+    """A single step in the reasoning trace.
+
+    Attributes:
+        step_id: Unique identifier for this step.
+        stage: Pipeline stage (e.g. ``decomposition``, ``head_dispatch``).
+        component: Component that executed this step.
+        input_summary: Brief summary of the step's input.
+        output_summary: Brief summary of the step's output.
+        duration_ms: Execution time in milliseconds (if measured).
+        metadata: Additional structured data.
+        timestamp: When this step was recorded.
+    """
+
+    step_id: str = ""
+    stage: str = ""
+    component: str = ""
+    input_summary: str = ""
+    output_summary: str = ""
+    duration_ms: float | None = None
+    metadata: dict[str, Any] = field(default_factory=dict)
+    timestamp: datetime = field(default_factory=_utc_now)
+
+
+@dataclass
+class ReasoningTrace:
+    """Complete reasoning trace for a single prompt→response cycle.
+
+    Attributes:
+        trace_id: Unique identifier for this trace.
+        task_id: Associated task ID.
+        prompt: Original user prompt.
+        steps: Ordered list of trace steps.
+        final_answer: The produced answer.
+        overall_confidence: Final confidence score.
+        metacognitive_summary: Summary of metacognitive assessment.
+        verification_summary: Summary of claim verification.
+        created_at: When the trace was started.
+    """
+
+    trace_id: str = ""
+    task_id: str = ""
+    prompt: str = ""
+    steps: list[TraceStep] = field(default_factory=list)
+    final_answer: str = ""
+    overall_confidence: float = 0.0
+    metacognitive_summary: dict[str, Any] = field(default_factory=dict)
+    verification_summary: dict[str, Any] = field(default_factory=dict)
+    created_at: datetime = field(default_factory=_utc_now)
+
+
+class ReasoningTracer:
+    """Records interpretable reasoning traces for the pipeline.
+
+    Attach to the reasoning pipeline to capture every decision point.
+    Each trace can be serialized, stored, and queried for debugging
+    or explanation.
+
+    Args:
+        max_traces: Maximum traces to retain in memory (FIFO).
+    """
+
+    def __init__(self, max_traces: int = 1000) -> None:
+        self._traces: dict[str, ReasoningTrace] = {}
+        self._trace_order: list[str] = []
+        self._max_traces = max_traces
+        self._step_counter = 0
+
+    def start_trace(self, trace_id: str, task_id: str, prompt: str) -> ReasoningTrace:
+        """Begin a new reasoning trace.
+
+        Args:
+            trace_id: Unique ID for this trace.
+            task_id: Associated task ID.
+            prompt: The user's prompt.
+
+        Returns:
+            The newly created trace.
+        """
+        if len(self._traces) >= self._max_traces and self._trace_order:
+            oldest = self._trace_order.pop(0)
+            self._traces.pop(oldest, None)
+
+        trace = ReasoningTrace(
+            trace_id=trace_id,
+            task_id=task_id,
+            prompt=prompt,
+        )
+        self._traces[trace_id] = trace
+        self._trace_order.append(trace_id)
+        return trace
+
+    def add_step(
+        self,
+        trace_id: str,
+        stage: str,
+        component: str,
+        input_summary: str = "",
+        output_summary: str = "",
+        duration_ms: float | None = None,
+        metadata: dict[str, Any] | None = None,
+    ) -> TraceStep | None:
+        """Add a step to an existing trace.
+
+        Args:
+            trace_id: The trace to add the step to.
+            stage: Pipeline stage name.
+            component: Component that executed this step.
+            input_summary: Brief input description.
+            output_summary: Brief output description.
+            duration_ms: Execution time.
+            metadata: Additional data.
+
+        Returns:
+            The added step, or ``None`` if trace not found.
+        """
+        trace = self._traces.get(trace_id)
+        if trace is None:
+            return None
+
+        self._step_counter += 1
+        step = TraceStep(
+            step_id=f"step_{self._step_counter}",
+            stage=stage,
+            component=component,
+            input_summary=input_summary[:200],
+            output_summary=output_summary[:200],
+            duration_ms=duration_ms,
+            metadata=metadata or {},
+        )
+        trace.steps.append(step)
+        return step
+
+    def finalize_trace(
+        self,
+        trace_id: str,
+        final_answer: str,
+        confidence: float,
+        metacognitive_summary: dict[str, Any] | None = None,
+        verification_summary: dict[str, Any] | None = None,
+    ) -> ReasoningTrace | None:
+        """Finalize a trace with the final answer and assessments.
+
+        Args:
+            trace_id: The trace to finalize.
+            final_answer: The produced answer.
+            confidence: Overall confidence score.
+            metacognitive_summary: Metacognition assessment summary.
+            verification_summary: Claim verification summary.
+
+        Returns:
+            The finalized trace, or ``None`` if not found.
+        """
+        trace = self._traces.get(trace_id)
+        if trace is None:
+            return None
+
+        trace.final_answer = final_answer
+        trace.overall_confidence = confidence
+        if metacognitive_summary:
+            trace.metacognitive_summary = metacognitive_summary
+        if verification_summary:
+            trace.verification_summary = verification_summary
+        return trace
+
+    def get_trace(self, trace_id: str) -> ReasoningTrace | None:
+        """Retrieve a trace by ID."""
+        return self._traces.get(trace_id)
+
+    def get_recent_traces(self, limit: int = 10) -> list[ReasoningTrace]:
+        """Retrieve the most recent traces."""
+        recent_ids = self._trace_order[-limit:]
+        return [self._traces[tid] for tid in recent_ids if tid in self._traces]
+
+    def explain(self, trace_id: str) -> str:
+        """Generate a human-readable explanation of a reasoning trace.
+
+        Args:
+            trace_id: The trace to explain.
+
+        Returns:
+            Formatted explanation string.
+        """
+        trace = self._traces.get(trace_id)
+        if trace is None:
+            return f"Trace '{trace_id}' not found."
+
+        lines: list[str] = [
+            f"Reasoning Trace: {trace.trace_id}",
+            f"Task: {trace.task_id}",
+            f"Prompt: {trace.prompt[:100]}",
+            f"Steps: {len(trace.steps)}",
+            "",
+        ]
+
+        for i, step in enumerate(trace.steps, 1):
+            lines.append(f"  {i}. [{step.stage}] {step.component}")
+            if step.input_summary:
+                lines.append(f"     Input:  {step.input_summary}")
+            if step.output_summary:
+                lines.append(f"     Output: {step.output_summary}")
+            if step.duration_ms is not None:
+                lines.append(f"     Time:   {step.duration_ms:.1f}ms")
+
+        lines.append("")
+        lines.append(f"Final Answer: {trace.final_answer[:200]}")
+        lines.append(f"Confidence: {trace.overall_confidence:.2f}")
+
+        if trace.metacognitive_summary:
+            lines.append(f"Metacognition: {trace.metacognitive_summary}")
+        if trace.verification_summary:
+            lines.append(f"Verification: {trace.verification_summary}")
+
+        return "\n".join(lines)
+
+    @property
+    def total_traces(self) -> int:
+        """Number of traces stored."""
+        return len(self._traces)
--- a/fusionagi/reasoning/metacognition.py
+++ b/fusionagi/reasoning/metacognition.py
@@ -0,0 +1,262 @@
+"""Metacognition: self-awareness of knowledge boundaries and reasoning quality.
+
+The metacognition engine monitors the system's own reasoning processes
+and produces self-assessments:
+- Does the system have enough evidence to answer confidently?
+- Which knowledge gaps exist?
+- Where are the reasoning weak points?
+- Should the system seek more information before answering?
+
+This is distinct from meta_reasoning.py (which challenges assumptions
+and detects contradictions in content).  Metacognition operates on
+the *process* level — it reasons about the quality of reasoning itself.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+from fusionagi._logger import logger
+from fusionagi.schemas.head import HeadOutput
+
+
+@dataclass
+class KnowledgeGap:
+    """An identified gap in the system's knowledge.
+
+    Attributes:
+        domain: Knowledge domain (e.g. ``legal``, ``medical``).
+        description: What the system doesn't know.
+        severity: Impact on answer quality (``low``, ``medium``, ``high``).
+        resolvable: Whether additional tool calls could fill this gap.
+    """
+
+    domain: str
+    description: str
+    severity: str = "medium"
+    resolvable: bool = True
+
+
+@dataclass
+class MetacognitiveAssessment:
+    """Self-assessment of reasoning quality for a given task.
+
+    Attributes:
+        overall_confidence: System's confidence in its answer (0.0–1.0).
+        evidence_sufficiency: Whether evidence is sufficient (0.0–1.0).
+        knowledge_gaps: Identified gaps in knowledge.
+        reasoning_quality: Assessment of the reasoning chain quality.
+        should_seek_more: Whether the system should seek more info.
+        head_agreement: Fraction of heads that agree (0.0–1.0).
+        uncertainty_sources: Where uncertainty comes from.
+        recommendations: What the system should do next.
+    """
+
+    overall_confidence: float = 0.5
+    evidence_sufficiency: float = 0.5
+    knowledge_gaps: list[KnowledgeGap] = field(default_factory=list)
+    reasoning_quality: float = 0.5
+    should_seek_more: bool = False
+    head_agreement: float = 0.5
+    uncertainty_sources: list[str] = field(default_factory=list)
+    recommendations: list[str] = field(default_factory=list)
+
+
+def assess_head_outputs(
+    outputs: list[HeadOutput],
+    user_prompt: str = "",
+) -> MetacognitiveAssessment:
+    """Assess reasoning quality from head outputs.
+
+    Analyzes the collection of head outputs for agreement patterns,
+    confidence distribution, evidence coverage, and knowledge gaps.
+
+    Args:
+        outputs: Outputs from Dvādaśa content heads.
+        user_prompt: Original user prompt for context.
+
+    Returns:
+        Metacognitive assessment of reasoning quality.
+    """
+    if not outputs:
+        return MetacognitiveAssessment(
+            overall_confidence=0.0,
+            evidence_sufficiency=0.0,
+            should_seek_more=True,
+            uncertainty_sources=["No head outputs available"],
+            recommendations=["Execute head pipeline before assessment"],
+        )
+
+    confidences: list[float] = []
+    for out in outputs:
+        if out.claims:
+            confidences.extend(c.confidence for c in out.claims)
+        else:
+            confidences.append(0.0)
+    avg_confidence = sum(confidences) / len(confidences) if confidences else 0.0
+
+    all_claims: list[str] = []
+    for out in outputs:
+        all_claims.extend(c.claim_text for c in out.claims)
+
+    evidence_counts = []
+    for out in outputs:
+        for c in out.claims:
+            evidence_counts.append(len(c.evidence))
+    avg_evidence = sum(evidence_counts) / max(len(evidence_counts), 1)
+    evidence_sufficiency = min(1.0, avg_evidence / 3.0)
+
+    head_agreement = _compute_head_agreement(outputs)
+
+    gaps = _detect_knowledge_gaps(outputs, user_prompt)
+
+    uncertainty_sources: list[str] = []
+    if avg_confidence < 0.5:
+        uncertainty_sources.append(f"Low average head confidence: {avg_confidence:.2f}")
+    if head_agreement < 0.4:
+        uncertainty_sources.append(f"Low head agreement: {head_agreement:.2f}")
+    if evidence_sufficiency < 0.3:
+        uncertainty_sources.append(f"Insufficient evidence: avg {avg_evidence:.1f} per claim")
+    if gaps:
+        uncertainty_sources.append(f"{len(gaps)} knowledge gap(s) detected")
+
+    conf_variance = _variance(confidences) if len(confidences) > 1 else 0.0
+    if conf_variance > 0.1:
+        uncertainty_sources.append(
+            f"High confidence variance across heads: {conf_variance:.3f}"
+        )
+
+    reasoning_quality = (
+        0.4 * avg_confidence
+        + 0.3 * head_agreement
+        + 0.2 * evidence_sufficiency
+        + 0.1 * (1.0 - min(1.0, len(gaps) * 0.2))
+    )
+
+    should_seek_more = (
+        reasoning_quality < 0.4
+        or evidence_sufficiency < 0.3
+        or any(g.severity == "high" and g.resolvable for g in gaps)
+    )
+
+    recommendations: list[str] = []
+    if should_seek_more:
+        recommendations.append("Seek additional evidence before finalizing answer")
+    if head_agreement < 0.4:
+        recommendations.append("Run second-pass with disputed heads for clarification")
+    for gap in gaps:
+        if gap.resolvable:
+            recommendations.append(f"Fill knowledge gap: {gap.description}")
+
+    overall = min(1.0, 0.5 * reasoning_quality + 0.3 * head_agreement + 0.2 * evidence_sufficiency)
+
+    assessment = MetacognitiveAssessment(
+        overall_confidence=overall,
+        evidence_sufficiency=evidence_sufficiency,
+        knowledge_gaps=gaps,
+        reasoning_quality=reasoning_quality,
+        should_seek_more=should_seek_more,
+        head_agreement=head_agreement,
+        uncertainty_sources=uncertainty_sources,
+        recommendations=recommendations,
+    )
+
+    logger.info(
+        "Metacognition: assessment complete",
+        extra={
+            "overall_confidence": overall,
+            "reasoning_quality": reasoning_quality,
+            "head_agreement": head_agreement,
+            "gaps": len(gaps),
+            "should_seek_more": should_seek_more,
+        },
+    )
+    return assessment
+
+
+def _compute_head_agreement(outputs: list[HeadOutput]) -> float:
+    """Measure how much heads agree with each other.
+
+    Uses claim text overlap across heads as a proxy for agreement.
+    """
+    if len(outputs) < 2:
+        return 1.0
+
+    claim_sets: list[set[str]] = []
+    for out in outputs:
+        words: set[str] = set()
+        for c in out.claims:
+            words.update(c.claim_text.lower().split())
+        claim_sets.append(words)
+
+    agreements: float = 0.0
+    comparisons = 0
+    for i in range(len(claim_sets)):
+        for j in range(i + 1, len(claim_sets)):
+            if not claim_sets[i] or not claim_sets[j]:
+                continue
+            overlap = len(claim_sets[i] & claim_sets[j])
+            union = len(claim_sets[i] | claim_sets[j])
+            if union > 0:
+                agreements += overlap / union
+                comparisons += 1
+
+    return agreements / max(comparisons, 1)
+
+
+def _detect_knowledge_gaps(
+    outputs: list[HeadOutput],
+    user_prompt: str,
+) -> list[KnowledgeGap]:
+    """Detect knowledge gaps from head outputs and prompt analysis."""
+    gaps: list[KnowledgeGap] = []
+
+    for out in outputs:
+        if out.claims:
+            avg_claim_conf = sum(c.confidence for c in out.claims) / len(out.claims)
+        else:
+            avg_claim_conf = 0.0
+        if avg_claim_conf < 0.3:
+            gaps.append(KnowledgeGap(
+                domain=out.head_id.value,
+                description=f"Head '{out.head_id.value}' has very low confidence ({avg_claim_conf:.2f})",
+                severity="high" if avg_claim_conf < 0.15 else "medium",
+                resolvable=True,
+            ))
+
+    empty_heads = [o for o in outputs if not o.claims]
+    for out in empty_heads:
+        gaps.append(KnowledgeGap(
+            domain=out.head_id.value,
+            description=f"Head '{out.head_id.value}' produced no claims",
+            severity="medium",
+            resolvable=True,
+        ))
+
+    prompt_lower = user_prompt.lower()
+    domain_indicators = {
+        "legal": ["law", "legal", "court", "statute", "regulation", "compliance"],
+        "medical": ["medical", "health", "disease", "treatment", "clinical", "patient"],
+        "financial": ["financial", "stock", "market", "investment", "trading", "portfolio"],
+        "scientific": ["experiment", "hypothesis", "data", "study", "research", "evidence"],
+    }
+    for domain, keywords in domain_indicators.items():
+        if any(kw in prompt_lower for kw in keywords):
+            head_domains = {o.head_id.value for o in outputs}
+            if domain not in head_domains:
+                gaps.append(KnowledgeGap(
+                    domain=domain,
+                    description=f"Prompt references '{domain}' domain but no specialized head covers it",
+                    severity="medium",
+                    resolvable=False,
+                ))
+
+    return gaps
+
+
+def _variance(values: list[float]) -> float:
+    """Compute variance of a list of floats."""
+    if len(values) < 2:
+        return 0.0
+    mean = sum(values) / len(values)
+    return sum((v - mean) ** 2 for v in values) / len(values)