feat: consequence engine, causal world model, metacognition, interpretability, claim verification

Choice → Consequence → Learning: - ConsequenceEngine tracks every decision point with alternatives, risk/reward estimates, and actual outcomes - Consequences feed into AdaptiveEthics for experience-based learning - FusionAGILoop now wires ethics + consequences into task lifecycle Causal World Model: - CausalWorldModel learns state-transition patterns from execution history - Predicts outcomes based on observed action→effect patterns - Uncertainty estimates decrease as more evidence accumulates Metacognition: - assess_head_outputs() evaluates reasoning quality from head outputs - Detects knowledge gaps, measures head agreement, identifies uncertainty - Actively recommends whether to seek more information Interpretability: - ReasoningTracer captures full prompt→answer reasoning traces - Each step records stage, component, input/output, timing - explain() generates human-readable reasoning explanations Claim Verification: - ClaimVerifier cross-checks claims for evidence, consistency, grounding - Flags high-confidence claims lacking evidence support - Detects contradictions between claims from different heads 325 tests passing, 0 ruff errors, 0 mypy errors. Co-Authored-By: Nakamoto, S <defi@defi-oracle.io>
2026-04-28 06:25:35 +00:00
parent 039440672e
commit 9a8affae9a
14 changed files with 1961 additions and 39 deletions
--- a/fusionagi/governance/init.py
+++ b/fusionagi/governance/init.py
@@ -11,6 +11,12 @@ All governance components support two modes (``GovernanceMode``):
 from fusionagi.governance.access_control import AccessControl
 from fusionagi.governance.adaptive_ethics import AdaptiveEthics, EthicalLesson
 from fusionagi.governance.audit_log import AuditLog
 from fusionagi.governance.consequence_engine import (
    Alternative,
    Choice,
    Consequence,
    ConsequenceEngine,
 )
 from fusionagi.governance.guardrails import Guardrails, PreCheckResult
 from fusionagi.governance.intent_alignment import IntentAlignment
 from fusionagi.governance.override import OverrideHooks
@@ -27,6 +33,10 @@ from fusionagi.schemas.audit import GovernanceMode
 __all__ = [
    "AdaptiveEthics",
    "Alternative",
    "Choice",
    "Consequence",
    "ConsequenceEngine",
    "EthicalLesson",
    "GovernanceMode",
    "Guardrails",
--- a/fusionagi/governance/consequence_engine.py
+++ b/fusionagi/governance/consequence_engine.py
@@ -0,0 +1,366 @@
 """Consequence engine: choice → consequence → learning.
 Every decision the system makes is a *choice*.  Every choice has
 *alternatives* that were not taken.  Every choice leads to
 *consequences* — outcomes that carry risk and reward.
 The consequence engine:
 1. Records decision points (what options existed, which was chosen, why)
 2. Tracks consequences (what happened as a result)
 3. Computes risk/reward from historical consequence data
 4. Feeds consequence data into AdaptiveEthics for learning
 Philosophy:
 - Consequences are the true teacher.  Not rules, not constraints.
 - Risk is not to be avoided — it is to be *understood*.
 - Reward without risk teaches nothing.  Risk without consequence teaches less.
 - The system earns trust by showing it understands what its choices cost.
 """
 from __future__ import annotations
 from dataclasses import dataclass, field
 from typing import Any, Protocol
 from fusionagi._logger import logger
 from fusionagi.schemas.audit import AuditEventType
 class AuditLogLike(Protocol):
    """Protocol for audit log."""
    def append(
        self,
        event_type: AuditEventType,
        actor: str,
        action: str = "",
        task_id: str | None = None,
        payload: dict[str, Any] | None = None,
        outcome: str = "",
    ) -> str: ...
@dataclass
 class Alternative:
    """An option that was available but not chosen.
    Attributes:
        action: What the alternative action was.
        estimated_risk: Estimated risk at decision time (0.0–1.0).
        estimated_reward: Estimated reward at decision time (0.0–1.0).
        reason_not_chosen: Why this alternative was not selected.
    """
    action: str = ""
    estimated_risk: float = 0.5
    estimated_reward: float = 0.5
    reason_not_chosen: str = ""
@dataclass
 class Choice:
    """A decision point where the system selected an action.
    Attributes:
        choice_id: Unique identifier for this choice.
        task_id: Associated task.
        actor: Component that made the choice.
        action_taken: The action that was chosen.
        alternatives: Other options that were available.
        estimated_risk: Risk estimate at decision time.
        estimated_reward: Reward estimate at decision time.
        rationale: Why this action was chosen.
        context: Situation context at decision time.
    """
    choice_id: str = ""
    task_id: str | None = None
    actor: str = ""
    action_taken: str = ""
    alternatives: list[Alternative] = field(default_factory=list)
    estimated_risk: float = 0.5
    estimated_reward: float = 0.5
    rationale: str = ""
    context: dict[str, Any] = field(default_factory=dict)
@dataclass
 class Consequence:
    """The outcome of a choice — what actually happened.
    Attributes:
        choice_id: Which choice this is a consequence of.
        outcome_positive: Whether the outcome was beneficial.
        actual_risk_realized: How much risk materialized (0.0–1.0).
        actual_reward_gained: How much reward was gained (0.0–1.0).
        description: What happened.
        cost: Any cost incurred (errors, retries, time).
        benefit: Any benefit gained (task success, learning).
        surprise_factor: How unexpected the outcome was (0 = expected, 1 = total surprise).
    """
    choice_id: str = ""
    outcome_positive: bool = True
    actual_risk_realized: float = 0.0
    actual_reward_gained: float = 0.5
    description: str = ""
    cost: dict[str, Any] = field(default_factory=dict)
    benefit: dict[str, Any] = field(default_factory=dict)
    surprise_factor: float = 0.0
 class ConsequenceEngine:
    """Tracks choices, consequences, and risk/reward patterns.
    The engine maintains a history of all decisions and their outcomes,
    enabling the system to make better-informed choices over time — not
    through restriction, but through understanding.
    Args:
        audit_log: Optional audit log for recording choices and consequences.
        risk_memory_window: How many past consequences to consider when
            estimating risk for new choices.
    """
    def __init__(
        self,
        audit_log: AuditLogLike | None = None,
        risk_memory_window: int = 200,
    ) -> None:
        self._choices: dict[str, Choice] = {}
        self._consequences: dict[str, Consequence] = {}
        self._risk_history: dict[str, list[float]] = {}
        self._reward_history: dict[str, list[float]] = {}
        self._audit = audit_log
        self._risk_window = risk_memory_window
    @property
    def total_choices(self) -> int:
        """Total choices recorded."""
        return len(self._choices)
    @property
    def total_consequences(self) -> int:
        """Total consequences recorded."""
        return len(self._consequences)
    def record_choice(
        self,
        choice_id: str,
        actor: str,
        action_taken: str,
        alternatives: list[Alternative] | None = None,
        estimated_risk: float = 0.5,
        estimated_reward: float = 0.5,
        rationale: str = "",
        task_id: str | None = None,
        context: dict[str, Any] | None = None,
    ) -> Choice:
        """Record a decision point.
        Args:
            choice_id: Unique ID for this choice.
            actor: Component making the choice.
            action_taken: The selected action.
            alternatives: Other options considered.
            estimated_risk: Risk estimate at decision time.
            estimated_reward: Reward estimate at decision time.
            rationale: Why this was chosen.
            task_id: Associated task.
            context: Situation context.
        Returns:
            The recorded choice.
        """
        choice = Choice(
            choice_id=choice_id,
            task_id=task_id,
            actor=actor,
            action_taken=action_taken,
            alternatives=alternatives or [],
            estimated_risk=estimated_risk,
            estimated_reward=estimated_reward,
            rationale=rationale,
            context=context or {},
        )
        self._choices[choice_id] = choice
        if self._audit:
            self._audit.append(
                AuditEventType.CHOICE,
                actor=actor,
                action="choice_recorded",
                task_id=task_id,
                payload={
                    "choice_id": choice_id,
                    "action_taken": action_taken[:100],
                    "alternatives_count": len(choice.alternatives),
                    "estimated_risk": estimated_risk,
                    "estimated_reward": estimated_reward,
                    "rationale": rationale[:100],
                },
                outcome="recorded",
            )
        logger.info(
            "ConsequenceEngine: choice recorded",
            extra={
                "choice_id": choice_id,
                "action": action_taken[:50],
                "risk": estimated_risk,
                "reward": estimated_reward,
            },
        )
        return choice
    def record_consequence(
        self,
        choice_id: str,
        outcome_positive: bool,
        actual_risk_realized: float = 0.0,
        actual_reward_gained: float = 0.5,
        description: str = "",
        cost: dict[str, Any] | None = None,
        benefit: dict[str, Any] | None = None,
    ) -> Consequence | None:
        """Record the consequence of a previous choice.
        Args:
            choice_id: Which choice this is a consequence of.
            outcome_positive: Whether the outcome was beneficial.
            actual_risk_realized: How much risk materialized.
            actual_reward_gained: How much reward was gained.
            description: What happened.
            cost: Costs incurred.
            benefit: Benefits gained.
        Returns:
            The recorded consequence, or ``None`` if choice not found.
        """
        choice = self._choices.get(choice_id)
        if choice is None:
            logger.warning(
                "ConsequenceEngine: choice not found for consequence",
                extra={"choice_id": choice_id},
            )
            return None
        surprise = abs(choice.estimated_risk - actual_risk_realized) * 0.5 + \
            abs(choice.estimated_reward - actual_reward_gained) * 0.5
        consequence = Consequence(
            choice_id=choice_id,
            outcome_positive=outcome_positive,
            actual_risk_realized=actual_risk_realized,
            actual_reward_gained=actual_reward_gained,
            description=description,
            cost=cost or {},
            benefit=benefit or {},
            surprise_factor=min(1.0, surprise),
        )
        self._consequences[choice_id] = consequence
        action_type = choice.action_taken
        self._risk_history.setdefault(action_type, []).append(actual_risk_realized)
        self._reward_history.setdefault(action_type, []).append(actual_reward_gained)
        if len(self._risk_history[action_type]) > self._risk_window:
            self._risk_history[action_type] = self._risk_history[action_type][-self._risk_window:]
            self._reward_history[action_type] = self._reward_history[action_type][-self._risk_window:]
        if self._audit:
            self._audit.append(
                AuditEventType.CONSEQUENCE,
                actor=choice.actor,
                action="consequence_recorded",
                task_id=choice.task_id,
                payload={
                    "choice_id": choice_id,
                    "outcome_positive": outcome_positive,
                    "risk_realized": actual_risk_realized,
                    "reward_gained": actual_reward_gained,
                    "surprise_factor": consequence.surprise_factor,
                    "description": description[:100],
                },
                outcome="positive" if outcome_positive else "negative",
            )
        logger.info(
            "ConsequenceEngine: consequence recorded",
            extra={
                "choice_id": choice_id,
                "positive": outcome_positive,
                "surprise": consequence.surprise_factor,
            },
        )
        return consequence
    def estimate_risk_reward(self, action_type: str) -> dict[str, float]:
        """Estimate risk and reward for an action type based on history.
        Args:
            action_type: The type of action being considered.
        Returns:
            Dict with ``expected_risk``, ``expected_reward``, ``confidence``,
            ``risk_variance``, ``reward_variance``, ``observations``.
        """
        risks = self._risk_history.get(action_type, [])
        rewards = self._reward_history.get(action_type, [])
        if not risks:
            return {
                "expected_risk": 0.5,
                "expected_reward": 0.5,
                "confidence": 0.1,
                "risk_variance": 0.0,
                "reward_variance": 0.0,
                "observations": 0,
            }
        n = len(risks)
        avg_risk = sum(risks) / n
        avg_reward = sum(rewards) / n
        risk_var = sum((r - avg_risk) ** 2 for r in risks) / n if n > 1 else 0.0
        reward_var = sum((r - avg_reward) ** 2 for r in rewards) / n if n > 1 else 0.0
        confidence = min(1.0, 0.2 + n * 0.04)
        return {
            "expected_risk": avg_risk,
            "expected_reward": avg_reward,
            "confidence": confidence,
            "risk_variance": risk_var,
            "reward_variance": reward_var,
            "observations": n,
        }
    def get_choice(self, choice_id: str) -> Choice | None:
        """Retrieve a recorded choice."""
        return self._choices.get(choice_id)
    def get_consequence(self, choice_id: str) -> Consequence | None:
        """Retrieve the consequence of a choice."""
        return self._consequences.get(choice_id)
    def get_summary(self) -> dict[str, Any]:
        """Return a summary of all choices and consequences."""
        total_positive = sum(1 for c in self._consequences.values() if c.outcome_positive)
        total_negative = len(self._consequences) - total_positive
        avg_surprise = (
            sum(c.surprise_factor for c in self._consequences.values()) / max(len(self._consequences), 1)
        )
        action_stats: dict[str, dict[str, Any]] = {}
        for action_type in self._risk_history:
            action_stats[action_type] = self.estimate_risk_reward(action_type)
        return {
            "total_choices": len(self._choices),
            "total_consequences": len(self._consequences),
            "positive_outcomes": total_positive,
            "negative_outcomes": total_negative,
            "positive_rate": total_positive / max(len(self._consequences), 1),
            "avg_surprise": avg_surprise,
            "action_stats": action_stats,
        }
--- a/fusionagi/reasoning/init.py
+++ b/fusionagi/reasoning/init.py
@@ -10,11 +10,21 @@ from fusionagi.reasoning.gpu_scoring import (
    generate_and_score_gpu,
    score_claims_gpu,
 )
 from fusionagi.reasoning.interpretability import (
    ReasoningTrace,
    ReasoningTracer,
    TraceStep,
 )
 from fusionagi.reasoning.meta_reasoning import (
    challenge_assumptions,
    detect_contradictions,
    revisit_node,
 )
 from fusionagi.reasoning.metacognition import (
    KnowledgeGap,
    MetacognitiveAssessment,
    assess_head_outputs,
 )
 from fusionagi.reasoning.multi_path import generate_and_score_parallel
 from fusionagi.reasoning.native import (
    NativeReasoningProvider,
@@ -61,4 +71,10 @@ __all__ = [
    "generate_and_score_gpu",
    "score_claims_gpu",
    "deduplicate_claims_gpu",
    "MetacognitiveAssessment",
    "KnowledgeGap",
    "assess_head_outputs",
    "ReasoningTrace",
    "ReasoningTracer",
    "TraceStep",
 ]
--- a/fusionagi/reasoning/interpretability.py
+++ b/fusionagi/reasoning/interpretability.py
@@ -0,0 +1,247 @@
 """Interpretability: full reasoning trace from prompt to final answer.
 Every step of the reasoning pipeline can be traced and explained:
 - Prompt decomposition decisions
 - Head selection and dispatch
 - Per-head claim generation with evidence chains
 - Consensus process (agreements, disputes)
 - Metacognitive assessment
 - Verification results
 - Final synthesis rationale
 The ReasoningTrace captures all of this in a structured, queryable format
 that can be serialized for debugging, auditing, or user explanation.
 """
 from __future__ import annotations
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from typing import Any
 def _utc_now() -> datetime:
    """Return current UTC time (timezone-aware)."""
    return datetime.now(timezone.utc)
@dataclass
 class TraceStep:
    """A single step in the reasoning trace.
    Attributes:
        step_id: Unique identifier for this step.
        stage: Pipeline stage (e.g. ``decomposition``, ``head_dispatch``).
        component: Component that executed this step.
        input_summary: Brief summary of the step's input.
        output_summary: Brief summary of the step's output.
        duration_ms: Execution time in milliseconds (if measured).
        metadata: Additional structured data.
        timestamp: When this step was recorded.
    """
    step_id: str = ""
    stage: str = ""
    component: str = ""
    input_summary: str = ""
    output_summary: str = ""
    duration_ms: float | None = None
    metadata: dict[str, Any] = field(default_factory=dict)
    timestamp: datetime = field(default_factory=_utc_now)
@dataclass
 class ReasoningTrace:
    """Complete reasoning trace for a single prompt→response cycle.
    Attributes:
        trace_id: Unique identifier for this trace.
        task_id: Associated task ID.
        prompt: Original user prompt.
        steps: Ordered list of trace steps.
        final_answer: The produced answer.
        overall_confidence: Final confidence score.
        metacognitive_summary: Summary of metacognitive assessment.
        verification_summary: Summary of claim verification.
        created_at: When the trace was started.
    """
    trace_id: str = ""
    task_id: str = ""
    prompt: str = ""
    steps: list[TraceStep] = field(default_factory=list)
    final_answer: str = ""
    overall_confidence: float = 0.0
    metacognitive_summary: dict[str, Any] = field(default_factory=dict)
    verification_summary: dict[str, Any] = field(default_factory=dict)
    created_at: datetime = field(default_factory=_utc_now)
 class ReasoningTracer:
    """Records interpretable reasoning traces for the pipeline.
    Attach to the reasoning pipeline to capture every decision point.
    Each trace can be serialized, stored, and queried for debugging
    or explanation.
    Args:
        max_traces: Maximum traces to retain in memory (FIFO).
    """
    def __init__(self, max_traces: int = 1000) -> None:
        self._traces: dict[str, ReasoningTrace] = {}
        self._trace_order: list[str] = []
        self._max_traces = max_traces
        self._step_counter = 0
    def start_trace(self, trace_id: str, task_id: str, prompt: str) -> ReasoningTrace:
        """Begin a new reasoning trace.
        Args:
            trace_id: Unique ID for this trace.
            task_id: Associated task ID.
            prompt: The user's prompt.
        Returns:
            The newly created trace.
        """
        if len(self._traces) >= self._max_traces and self._trace_order:
            oldest = self._trace_order.pop(0)
            self._traces.pop(oldest, None)
        trace = ReasoningTrace(
            trace_id=trace_id,
            task_id=task_id,
            prompt=prompt,
        )
        self._traces[trace_id] = trace
        self._trace_order.append(trace_id)
        return trace
    def add_step(
        self,
        trace_id: str,
        stage: str,
        component: str,
        input_summary: str = "",
        output_summary: str = "",
        duration_ms: float | None = None,
        metadata: dict[str, Any] | None = None,
    ) -> TraceStep | None:
        """Add a step to an existing trace.
        Args:
            trace_id: The trace to add the step to.
            stage: Pipeline stage name.
            component: Component that executed this step.
            input_summary: Brief input description.
            output_summary: Brief output description.
            duration_ms: Execution time.
            metadata: Additional data.
        Returns:
            The added step, or ``None`` if trace not found.
        """
        trace = self._traces.get(trace_id)
        if trace is None:
            return None
        self._step_counter += 1
        step = TraceStep(
            step_id=f"step_{self._step_counter}",
            stage=stage,
            component=component,
            input_summary=input_summary[:200],
            output_summary=output_summary[:200],
            duration_ms=duration_ms,
            metadata=metadata or {},
        )
        trace.steps.append(step)
        return step
    def finalize_trace(
        self,
        trace_id: str,
        final_answer: str,
        confidence: float,
        metacognitive_summary: dict[str, Any] | None = None,
        verification_summary: dict[str, Any] | None = None,
    ) -> ReasoningTrace | None:
        """Finalize a trace with the final answer and assessments.
        Args:
            trace_id: The trace to finalize.
            final_answer: The produced answer.
            confidence: Overall confidence score.
            metacognitive_summary: Metacognition assessment summary.
            verification_summary: Claim verification summary.
        Returns:
            The finalized trace, or ``None`` if not found.
        """
        trace = self._traces.get(trace_id)
        if trace is None:
            return None
        trace.final_answer = final_answer
        trace.overall_confidence = confidence
        if metacognitive_summary:
            trace.metacognitive_summary = metacognitive_summary
        if verification_summary:
            trace.verification_summary = verification_summary
        return trace
    def get_trace(self, trace_id: str) -> ReasoningTrace | None:
        """Retrieve a trace by ID."""
        return self._traces.get(trace_id)
    def get_recent_traces(self, limit: int = 10) -> list[ReasoningTrace]:
        """Retrieve the most recent traces."""
        recent_ids = self._trace_order[-limit:]
        return [self._traces[tid] for tid in recent_ids if tid in self._traces]
    def explain(self, trace_id: str) -> str:
        """Generate a human-readable explanation of a reasoning trace.
        Args:
            trace_id: The trace to explain.
        Returns:
            Formatted explanation string.
        """
        trace = self._traces.get(trace_id)
        if trace is None:
            return f"Trace '{trace_id}' not found."
        lines: list[str] = [
            f"Reasoning Trace: {trace.trace_id}",
            f"Task: {trace.task_id}",
            f"Prompt: {trace.prompt[:100]}",
            f"Steps: {len(trace.steps)}",
            "",
        ]
        for i, step in enumerate(trace.steps, 1):
            lines.append(f"  {i}. [{step.stage}] {step.component}")
            if step.input_summary:
                lines.append(f"     Input:  {step.input_summary}")
            if step.output_summary:
                lines.append(f"     Output: {step.output_summary}")
            if step.duration_ms is not None:
                lines.append(f"     Time:   {step.duration_ms:.1f}ms")
        lines.append("")
        lines.append(f"Final Answer: {trace.final_answer[:200]}")
        lines.append(f"Confidence: {trace.overall_confidence:.2f}")
        if trace.metacognitive_summary:
            lines.append(f"Metacognition: {trace.metacognitive_summary}")
        if trace.verification_summary:
            lines.append(f"Verification: {trace.verification_summary}")
        return "\n".join(lines)
    @property
    def total_traces(self) -> int:
        """Number of traces stored."""
        return len(self._traces)
--- a/fusionagi/reasoning/metacognition.py
+++ b/fusionagi/reasoning/metacognition.py
@@ -0,0 +1,262 @@
 """Metacognition: self-awareness of knowledge boundaries and reasoning quality.
 The metacognition engine monitors the system's own reasoning processes
 and produces self-assessments:
 - Does the system have enough evidence to answer confidently?
 - Which knowledge gaps exist?
 - Where are the reasoning weak points?
 - Should the system seek more information before answering?
 This is distinct from meta_reasoning.py (which challenges assumptions
 and detects contradictions in content).  Metacognition operates on
 the *process* level — it reasons about the quality of reasoning itself.
 """
 from __future__ import annotations
 from dataclasses import dataclass, field
 from fusionagi._logger import logger
 from fusionagi.schemas.head import HeadOutput
@dataclass
 class KnowledgeGap:
    """An identified gap in the system's knowledge.
    Attributes:
        domain: Knowledge domain (e.g. ``legal``, ``medical``).
        description: What the system doesn't know.
        severity: Impact on answer quality (``low``, ``medium``, ``high``).
        resolvable: Whether additional tool calls could fill this gap.
    """
    domain: str
    description: str
    severity: str = "medium"
    resolvable: bool = True
@dataclass
 class MetacognitiveAssessment:
    """Self-assessment of reasoning quality for a given task.
    Attributes:
        overall_confidence: System's confidence in its answer (0.0–1.0).
        evidence_sufficiency: Whether evidence is sufficient (0.0–1.0).
        knowledge_gaps: Identified gaps in knowledge.
        reasoning_quality: Assessment of the reasoning chain quality.
        should_seek_more: Whether the system should seek more info.
        head_agreement: Fraction of heads that agree (0.0–1.0).
        uncertainty_sources: Where uncertainty comes from.
        recommendations: What the system should do next.
    """
    overall_confidence: float = 0.5
    evidence_sufficiency: float = 0.5
    knowledge_gaps: list[KnowledgeGap] = field(default_factory=list)
    reasoning_quality: float = 0.5
    should_seek_more: bool = False
    head_agreement: float = 0.5
    uncertainty_sources: list[str] = field(default_factory=list)
    recommendations: list[str] = field(default_factory=list)
 def assess_head_outputs(
    outputs: list[HeadOutput],
    user_prompt: str = "",
 ) -> MetacognitiveAssessment:
    """Assess reasoning quality from head outputs.
    Analyzes the collection of head outputs for agreement patterns,
    confidence distribution, evidence coverage, and knowledge gaps.
    Args:
        outputs: Outputs from Dvādaśa content heads.
        user_prompt: Original user prompt for context.
    Returns:
        Metacognitive assessment of reasoning quality.
    """
    if not outputs:
        return MetacognitiveAssessment(
            overall_confidence=0.0,
            evidence_sufficiency=0.0,
            should_seek_more=True,
            uncertainty_sources=["No head outputs available"],
            recommendations=["Execute head pipeline before assessment"],
        )
    confidences: list[float] = []
    for out in outputs:
        if out.claims:
            confidences.extend(c.confidence for c in out.claims)
        else:
            confidences.append(0.0)
    avg_confidence = sum(confidences) / len(confidences) if confidences else 0.0
    all_claims: list[str] = []
    for out in outputs:
        all_claims.extend(c.claim_text for c in out.claims)
    evidence_counts = []
    for out in outputs:
        for c in out.claims:
            evidence_counts.append(len(c.evidence))
    avg_evidence = sum(evidence_counts) / max(len(evidence_counts), 1)
    evidence_sufficiency = min(1.0, avg_evidence / 3.0)
    head_agreement = _compute_head_agreement(outputs)
    gaps = _detect_knowledge_gaps(outputs, user_prompt)
    uncertainty_sources: list[str] = []
    if avg_confidence < 0.5:
        uncertainty_sources.append(f"Low average head confidence: {avg_confidence:.2f}")
    if head_agreement < 0.4:
        uncertainty_sources.append(f"Low head agreement: {head_agreement:.2f}")
    if evidence_sufficiency < 0.3:
        uncertainty_sources.append(f"Insufficient evidence: avg {avg_evidence:.1f} per claim")
    if gaps:
        uncertainty_sources.append(f"{len(gaps)} knowledge gap(s) detected")
    conf_variance = _variance(confidences) if len(confidences) > 1 else 0.0
    if conf_variance > 0.1:
        uncertainty_sources.append(
            f"High confidence variance across heads: {conf_variance:.3f}"
        )
    reasoning_quality = (
        0.4 * avg_confidence
        + 0.3 * head_agreement
        + 0.2 * evidence_sufficiency
        + 0.1 * (1.0 - min(1.0, len(gaps) * 0.2))
    )
    should_seek_more = (
        reasoning_quality < 0.4
        or evidence_sufficiency < 0.3
        or any(g.severity == "high" and g.resolvable for g in gaps)
    )
    recommendations: list[str] = []
    if should_seek_more:
        recommendations.append("Seek additional evidence before finalizing answer")
    if head_agreement < 0.4:
        recommendations.append("Run second-pass with disputed heads for clarification")
    for gap in gaps:
        if gap.resolvable:
            recommendations.append(f"Fill knowledge gap: {gap.description}")
    overall = min(1.0, 0.5 * reasoning_quality + 0.3 * head_agreement + 0.2 * evidence_sufficiency)
    assessment = MetacognitiveAssessment(
        overall_confidence=overall,
        evidence_sufficiency=evidence_sufficiency,
        knowledge_gaps=gaps,
        reasoning_quality=reasoning_quality,
        should_seek_more=should_seek_more,
        head_agreement=head_agreement,
        uncertainty_sources=uncertainty_sources,
        recommendations=recommendations,
    )
    logger.info(
        "Metacognition: assessment complete",
        extra={
            "overall_confidence": overall,
            "reasoning_quality": reasoning_quality,
            "head_agreement": head_agreement,
            "gaps": len(gaps),
            "should_seek_more": should_seek_more,
        },
    )
    return assessment
 def _compute_head_agreement(outputs: list[HeadOutput]) -> float:
    """Measure how much heads agree with each other.
    Uses claim text overlap across heads as a proxy for agreement.
    """
    if len(outputs) < 2:
        return 1.0
    claim_sets: list[set[str]] = []
    for out in outputs:
        words: set[str] = set()
        for c in out.claims:
            words.update(c.claim_text.lower().split())
        claim_sets.append(words)
    agreements: float = 0.0
    comparisons = 0
    for i in range(len(claim_sets)):
        for j in range(i + 1, len(claim_sets)):
            if not claim_sets[i] or not claim_sets[j]:
                continue
            overlap = len(claim_sets[i] & claim_sets[j])
            union = len(claim_sets[i] | claim_sets[j])
            if union > 0:
                agreements += overlap / union
                comparisons += 1
    return agreements / max(comparisons, 1)
 def _detect_knowledge_gaps(
    outputs: list[HeadOutput],
    user_prompt: str,
 ) -> list[KnowledgeGap]:
    """Detect knowledge gaps from head outputs and prompt analysis."""
    gaps: list[KnowledgeGap] = []
    for out in outputs:
        if out.claims:
            avg_claim_conf = sum(c.confidence for c in out.claims) / len(out.claims)
        else:
            avg_claim_conf = 0.0
        if avg_claim_conf < 0.3:
            gaps.append(KnowledgeGap(
                domain=out.head_id.value,
                description=f"Head '{out.head_id.value}' has very low confidence ({avg_claim_conf:.2f})",
                severity="high" if avg_claim_conf < 0.15 else "medium",
                resolvable=True,
            ))
    empty_heads = [o for o in outputs if not o.claims]
    for out in empty_heads:
        gaps.append(KnowledgeGap(
            domain=out.head_id.value,
            description=f"Head '{out.head_id.value}' produced no claims",
            severity="medium",
            resolvable=True,
        ))
    prompt_lower = user_prompt.lower()
    domain_indicators = {
        "legal": ["law", "legal", "court", "statute", "regulation", "compliance"],
        "medical": ["medical", "health", "disease", "treatment", "clinical", "patient"],
        "financial": ["financial", "stock", "market", "investment", "trading", "portfolio"],
        "scientific": ["experiment", "hypothesis", "data", "study", "research", "evidence"],
    }
    for domain, keywords in domain_indicators.items():
        if any(kw in prompt_lower for kw in keywords):
            head_domains = {o.head_id.value for o in outputs}
            if domain not in head_domains:
                gaps.append(KnowledgeGap(
                    domain=domain,
                    description=f"Prompt references '{domain}' domain but no specialized head covers it",
                    severity="medium",
                    resolvable=False,
                ))
    return gaps
 def _variance(values: list[float]) -> float:
    """Compute variance of a list of floats."""
    if len(values) < 2:
        return 0.0
    mean = sum(values) / len(values)
    return sum((v - mean) ** 2 for v in values) / len(values)
--- a/fusionagi/schemas/audit.py
+++ b/fusionagi/schemas/audit.py
@@ -38,6 +38,8 @@ class AuditEventType(str, Enum):
    ADVISORY = "advisory"
    SELF_IMPROVEMENT = "self_improvement"
    ETHICAL_LEARNING = "ethical_learning"
    CHOICE = "choice"
    CONSEQUENCE = "consequence"
    OTHER = "other"
--- a/fusionagi/self_improvement/loop.py
+++ b/fusionagi/self_improvement/loop.py
@@ -1,9 +1,20 @@
-"""AGI loop: wires self-correction, auto-recommend, and auto-training to events."""
+"""AGI loop: wires self-correction, auto-training, adaptive ethics, and
 consequence tracking to the event bus.
 Choice → Consequence → Learning:
 - Every task failure/success is recorded as a consequence of the choices made.
 - Consequences feed into AdaptiveEthics for learned ethical growth.
 - The ConsequenceEngine tracks risk/reward patterns across all actions.
 - Trust is earned through demonstrable learning from outcomes.
 """
 from typing import Any, Callable
 from fusionagi._logger import logger
 from fusionagi.core.event_bus import EventBus
 from fusionagi.governance.adaptive_ethics import AdaptiveEthics
 from fusionagi.governance.audit_log import AuditLog
 from fusionagi.governance.consequence_engine import ConsequenceEngine
 from fusionagi.schemas.recommendation import Recommendation, TrainingSuggestion
 from fusionagi.schemas.task import TaskState
 from fusionagi.self_improvement.correction import (
@@ -17,10 +28,24 @@ from fusionagi.self_improvement.training import AutoTrainer, ReflectiveMemoryLik
 class FusionAGILoop:
-    """
+    """High-level AGI loop with consequence-driven learning.
-    High-level AGI loop: subscribes to task_state_changed and reflection_done,
+
-    runs self-correction on failures, and runs auto-recommend + auto-training
+    Subscribes to task_state_changed and reflection_done events.
-    after reflection. Composes the world's most advanced agentic AGI self-improvement pipeline.
+    Runs self-correction on failures, auto-recommend + auto-training
    after reflection, and feeds all outcomes into the adaptive ethics
    and consequence engines.
    Args:
        event_bus: Event bus for task and reflection events.
        state_manager: State manager for task state and traces.
        orchestrator: Orchestrator for plan and state transitions.
        critic_agent: Critic agent for evaluation.
        reflective_memory: Optional reflective memory for lessons/heuristics.
        audit_log: Optional audit log for full transparency.
        auto_retry_on_failure: Auto-retry failed tasks.
        max_retries_per_task: Max retries per task (``None`` = unlimited).
        on_recommendations: Callback for recommendations.
        on_training_suggestions: Callback for training suggestions.
    """
    def __init__(
@@ -30,26 +55,13 @@ class FusionAGILoop:
        orchestrator: OrchestratorLike,
        critic_agent: CriticLike,
        reflective_memory: ReflectiveMemoryLike | None = None,
        audit_log: AuditLog | None = None,
        *,
        auto_retry_on_failure: bool = False,
-        max_retries_per_task: int = 2,
+        max_retries_per_task: int | None = None,
        on_recommendations: Callable[[list[Recommendation]], None] | None = None,
        on_training_suggestions: Callable[[list[TrainingSuggestion]], None] | None = None,
    ) -> None:
        """
        Initialize the FusionAGI loop.
        Args:
            event_bus: Event bus to subscribe to task_state_changed and reflection_done.
            state_manager: State manager for task state and traces.
            orchestrator: Orchestrator for plan and state transitions.
            critic_agent: Critic agent for evaluate_request -> evaluation_ready.
            reflective_memory: Optional reflective memory for lessons/heuristics.
            auto_retry_on_failure: If True, on FAILED transition prepare_retry automatically.
            max_retries_per_task: Max retries per task when auto_retry_on_failure is True.
            on_recommendations: Optional callback to receive recommendations (e.g. log or UI).
            on_training_suggestions: Optional callback to receive training suggestions.
        """
        self._event_bus = event_bus
        self._state = state_manager
        self._orchestrator = orchestrator
@@ -59,6 +71,10 @@ class FusionAGILoop:
        self._on_recs = on_recommendations
        self._on_training = on_training_suggestions
        self._audit = audit_log or AuditLog()
        self._ethics = AdaptiveEthics(audit_log=self._audit)
        self._consequences = ConsequenceEngine(audit_log=self._audit)
        self._correction = SelfCorrectionLoop(
            state_manager=state_manager,
            orchestrator=orchestrator,
@@ -66,27 +82,85 @@ class FusionAGILoop:
            max_retries_per_task=max_retries_per_task,
        )
        self._recommender = AutoRecommender(reflective_memory=reflective_memory)
-        self._trainer = AutoTrainer(reflective_memory=reflective_memory)
+        self._trainer = AutoTrainer(
            reflective_memory=reflective_memory,
            audit_log=self._audit,
        )
        self._event_bus.subscribe("task_state_changed", self._on_task_state_changed)
        self._event_bus.subscribe("reflection_done", self._on_reflection_done)
-        logger.info("FusionAGILoop: subscribed to task_state_changed and reflection_done")
+        logger.info("FusionAGILoop: subscribed (with consequence + ethics engines)")
    @property
    def ethics(self) -> AdaptiveEthics:
        """Access the adaptive ethics engine."""
        return self._ethics
    @property
    def consequences(self) -> ConsequenceEngine:
        """Access the consequence engine."""
        return self._consequences
    @property
    def audit_log(self) -> AuditLog:
        """Access the audit log."""
        return self._audit
    def _on_task_state_changed(self, event_type: str, payload: dict[str, Any]) -> None:
-        """On FAILED, optionally run self-correction and prepare retry."""
+        """On state change, record consequences and optionally retry."""
        try:
            to_state = payload.get("to_state")
            task_id = payload.get("task_id", "")
-            if to_state != TaskState.FAILED.value or not task_id:
+            if not task_id:
                return
-            if self._auto_retry:
+
-                ok, _ = self._correction.suggest_retry(task_id)
+            if to_state == TaskState.FAILED.value:
-                if ok:
+                self._consequences.record_consequence(
-                    self._correction.prepare_retry(task_id)
+                    choice_id=f"task_{task_id}",
-            else:
+                    outcome_positive=False,
-                recs = self._correction.correction_recommendations(task_id)
+                    actual_risk_realized=0.8,
-                if recs and self._on_recs:
+                    actual_reward_gained=0.1,
-                    self._on_recs(recs)
+                    description=f"Task {task_id} failed",
                    cost={"retries_needed": True},
                )
                self._ethics.record_experience(
                    action_type="task_execution",
                    context_summary=f"Task {task_id} execution",
                    advisory_reason="",
                    proceeded=True,
                    outcome_positive=False,
                    task_id=task_id,
                )
                if self._auto_retry:
                    ok, _ = self._correction.suggest_retry(task_id)
                    if ok:
                        self._correction.prepare_retry(task_id)
                else:
                    recs = self._correction.correction_recommendations(task_id)
                    if recs and self._on_recs:
                        self._on_recs(recs)
            elif to_state == TaskState.COMPLETED.value:
                self._consequences.record_consequence(
                    choice_id=f"task_{task_id}",
                    outcome_positive=True,
                    actual_risk_realized=0.1,
                    actual_reward_gained=0.8,
                    description=f"Task {task_id} completed successfully",
                    benefit={"task_completed": True},
                )
                self._ethics.record_experience(
                    action_type="task_execution",
                    context_summary=f"Task {task_id} execution",
                    advisory_reason="",
                    proceeded=True,
                    outcome_positive=True,
                    task_id=task_id,
                )
        except Exception:
            logger.exception(
                "FusionAGILoop: _on_task_state_changed failed (best-effort)",
@@ -94,10 +168,22 @@ class FusionAGILoop:
            )
    def _on_reflection_done(self, event_type: str, payload: dict[str, Any]) -> None:
-        """After reflection, run auto-recommend and auto-training."""
+        """After reflection, run auto-recommend, auto-training, and update ethics."""
        try:
            task_id = payload.get("task_id") or ""
            evaluation = payload.get("evaluation") or {}
            success = evaluation.get("success", False)
            self._ethics.record_experience(
                action_type="reflection_outcome",
                context_summary=f"Reflection on task {task_id}",
                advisory_reason="",
                proceeded=True,
                outcome_positive=success,
                task_id=task_id or None,
            )
            recs = self._recommender.recommend(
                task_id=task_id or None,
                evaluation=evaluation,
@@ -129,10 +215,27 @@ class FusionAGILoop:
        task_id: str,
        evaluation: dict[str, Any],
    ) -> tuple[list[Recommendation], list[TrainingSuggestion]]:
        """Run auto-recommend and auto-training after a reflection.
        Also records the reflection outcome for ethical learning.
        Args:
            task_id: Task that was reflected on.
            evaluation: Critic evaluation dict.
        Returns:
            Tuple of (recommendations, training_suggestions).
        """
-        Run auto-recommend and auto-training after a reflection (e.g. when
+        success = evaluation.get("success", False)
-        not using reflection_done event). Returns (recommendations, training_suggestions).
+        self._ethics.record_experience(
-        """
+            action_type="reflection_outcome",
            context_summary=f"Manual reflection on {task_id}",
            advisory_reason="",
            proceeded=True,
            outcome_positive=success,
            task_id=task_id,
        )
        recs = self._recommender.recommend(
            task_id=task_id,
            evaluation=evaluation,
--- a/fusionagi/verification/init.py
+++ b/fusionagi/verification/init.py
@@ -1,5 +1,17 @@
 from fusionagi.verification.claim_verifier import (
    ClaimVerifier,
    VerificationReport,
    VerificationResult,
 )
 from fusionagi.verification.contradiction import ContradictionDetector
 from fusionagi.verification.outcome import OutcomeVerifier
 from fusionagi.verification.validators import FormalValidators
-__all__ = ["OutcomeVerifier", "ContradictionDetector", "FormalValidators"]
+__all__ = [
    "ClaimVerifier",
    "ContradictionDetector",
    "FormalValidators",
    "OutcomeVerifier",
    "VerificationReport",
    "VerificationResult",
 ]
--- a/fusionagi/verification/claim_verifier.py
+++ b/fusionagi/verification/claim_verifier.py
@@ -0,0 +1,273 @@
 """Claim verification: cross-check claims against known facts and evidence.
 Provides formal verification of claims produced by the reasoning pipeline
 before they reach the final output.  Each claim is checked for:
 - Internal consistency (does it contradict other claims in the same response?)
 - Evidence support (how well-supported is this claim by cited evidence?)
 - Confidence calibration (is the claimed confidence appropriate?)
 - Factual grounding (can the claim be grounded in the semantic graph?)
 """
 from __future__ import annotations
 from dataclasses import dataclass, field
 from typing import Any, Protocol
 from fusionagi._logger import logger
 from fusionagi.schemas.head import HeadClaim, HeadOutput
 class SemanticGraphLike(Protocol):
    """Protocol for semantic graph memory."""
    def query_units(
        self,
        unit_ids: list[str] | None = None,
        content_contains: str | None = None,
        limit: int = 50,
    ) -> list[Any]: ...
@dataclass
 class VerificationResult:
    """Result of verifying a single claim.
    Attributes:
        claim_text: The claim that was verified.
        verified: Whether the claim passed verification.
        confidence_calibrated: Whether confidence seems well-calibrated.
        evidence_score: Evidence support strength (0.0–1.0).
        consistency_score: Internal consistency with other claims (0.0–1.0).
        grounding_score: Grounding in known facts (0.0–1.0).
        issues: List of issues found.
        overall_score: Composite verification score (0.0–1.0).
    """
    claim_text: str = ""
    verified: bool = True
    confidence_calibrated: bool = True
    evidence_score: float = 0.5
    consistency_score: float = 1.0
    grounding_score: float = 0.5
    issues: list[str] = field(default_factory=list)
    overall_score: float = 0.5
@dataclass
 class VerificationReport:
    """Verification report for all claims in a response.
    Attributes:
        results: Per-claim verification results.
        overall_integrity: Overall response integrity (0.0–1.0).
        total_claims: Total claims checked.
        verified_count: How many passed verification.
        flagged_count: How many were flagged with issues.
        recommendations: Suggested actions based on verification.
    """
    results: list[VerificationResult] = field(default_factory=list)
    overall_integrity: float = 0.5
    total_claims: int = 0
    verified_count: int = 0
    flagged_count: int = 0
    recommendations: list[str] = field(default_factory=list)
 class ClaimVerifier:
    """Verifies claims from head outputs against evidence and known facts.
    Args:
        semantic_graph: Optional semantic graph for fact grounding.
        min_evidence_for_high_conf: Minimum evidence items expected for
            high-confidence claims (>=0.8).
    """
    def __init__(
        self,
        semantic_graph: SemanticGraphLike | None = None,
        min_evidence_for_high_conf: int = 2,
    ) -> None:
        self._graph = semantic_graph
        self._min_evidence_high = min_evidence_for_high_conf
    def verify_outputs(self, outputs: list[HeadOutput]) -> VerificationReport:
        """Verify all claims across all head outputs.
        Args:
            outputs: Head outputs to verify.
        Returns:
            Comprehensive verification report.
        """
        all_claims: list[tuple[HeadClaim, str]] = []
        for out in outputs:
            for claim in out.claims:
                all_claims.append((claim, out.head_id.value))
        results: list[VerificationResult] = []
        for claim, head_id in all_claims:
            result = self._verify_claim(claim, head_id, all_claims)
            results.append(result)
        verified = sum(1 for r in results if r.verified)
        flagged = sum(1 for r in results if r.issues)
        overall = (
            sum(r.overall_score for r in results) / max(len(results), 1)
        )
        recommendations: list[str] = []
        if flagged > len(results) * 0.3:
            recommendations.append(
                f"{flagged}/{len(results)} claims flagged — consider second-pass verification"
            )
        uncalibrated = [r for r in results if not r.confidence_calibrated]
        if uncalibrated:
            recommendations.append(
                f"{len(uncalibrated)} claims with miscalibrated confidence"
            )
        low_evidence = [r for r in results if r.evidence_score < 0.3]
        if low_evidence:
            recommendations.append(
                f"{len(low_evidence)} claims lack evidence support"
            )
        report = VerificationReport(
            results=results,
            overall_integrity=overall,
            total_claims=len(results),
            verified_count=verified,
            flagged_count=flagged,
            recommendations=recommendations,
        )
        logger.info(
            "ClaimVerifier: verification complete",
            extra={
                "total": report.total_claims,
                "verified": report.verified_count,
                "flagged": report.flagged_count,
                "integrity": report.overall_integrity,
            },
        )
        return report
    def _verify_claim(
        self,
        claim: HeadClaim,
        head_id: str,
        all_claims: list[tuple[HeadClaim, str]],
    ) -> VerificationResult:
        """Verify a single claim."""
        issues: list[str] = []
        evidence_score = self._check_evidence(claim, issues)
        calibrated = self._check_calibration(claim, evidence_score, issues)
        consistency_score = self._check_consistency(claim, head_id, all_claims, issues)
        grounding_score = self._check_grounding(claim, issues)
        overall = (
            0.35 * evidence_score
            + 0.25 * consistency_score
            + 0.25 * grounding_score
            + 0.15 * (1.0 if calibrated else 0.5)
        )
        return VerificationResult(
            claim_text=claim.claim_text,
            verified=len(issues) == 0,
            confidence_calibrated=calibrated,
            evidence_score=evidence_score,
            consistency_score=consistency_score,
            grounding_score=grounding_score,
            issues=issues,
            overall_score=overall,
        )
    def _check_evidence(self, claim: HeadClaim, issues: list[str]) -> float:
        """Check how well a claim is supported by evidence."""
        if not claim.evidence:
            issues.append("No evidence cited")
            return 0.1
        score = min(1.0, len(claim.evidence) / 3.0)
        if claim.confidence >= 0.8 and len(claim.evidence) < self._min_evidence_high:
            issues.append(
                f"High confidence ({claim.confidence:.2f}) with only "
                f"{len(claim.evidence)} evidence item(s)"
            )
            score *= 0.7
        return score
    def _check_calibration(
        self,
        claim: HeadClaim,
        evidence_score: float,
        issues: list[str],
    ) -> bool:
        """Check if confidence is well-calibrated relative to evidence."""
        if claim.confidence >= 0.9 and evidence_score < 0.3:
            issues.append(
                f"Confidence {claim.confidence:.2f} not supported by evidence "
                f"(evidence score: {evidence_score:.2f})"
            )
            return False
        if claim.confidence >= 0.8 and evidence_score < 0.2:
            issues.append("Very high confidence with minimal evidence")
            return False
        return True
    def _check_consistency(
        self,
        claim: HeadClaim,
        head_id: str,
        all_claims: list[tuple[HeadClaim, str]],
        issues: list[str],
    ) -> float:
        """Check if this claim is consistent with other claims."""
        claim_words = set(claim.claim_text.lower().split())
        neg_words = {"not", "no", "never", "none", "cannot", "shouldn't", "won't"}
        claim_has_neg = bool(claim_words & neg_words)
        contradictions = 0
        comparisons = 0
        for other_claim, other_head in all_claims:
            if other_claim is claim:
                continue
            other_words = set(other_claim.claim_text.lower().split())
            overlap = len(claim_words & other_words) / max(len(claim_words), 1)
            if overlap < 0.2:
                continue
            comparisons += 1
            other_has_neg = bool(other_words & neg_words)
            if claim_has_neg != other_has_neg and overlap > 0.3:
                contradictions += 1
                issues.append(
                    f"Potential contradiction with claim from '{other_head}': "
                    f"'{other_claim.claim_text[:60]}...'"
                )
        if comparisons == 0:
            return 0.7
        return max(0.0, 1.0 - contradictions / max(comparisons, 1))
    def _check_grounding(self, claim: HeadClaim, issues: list[str]) -> float:
        """Check if the claim can be grounded in the semantic graph."""
        if self._graph is None:
            return 0.5
        try:
            claim_keywords = claim.claim_text[:80]
            units = self._graph.query_units(content_contains=claim_keywords, limit=5)
            if not units:
                return 0.3
            return min(1.0, 0.3 + len(units) * 0.15)
        except Exception:
            logger.debug("ClaimVerifier: grounding check failed (non-fatal)")
            return 0.5
--- a/fusionagi/world_model/init.py
+++ b/fusionagi/world_model/init.py
@@ -1,6 +1,11 @@
-"""World model and simulation for AGI."""
+"""World model and simulation for AGI.
 Provides causal state-transition prediction from learned execution history,
 rollout simulation, and uncertainty estimation.
 """
 from fusionagi.world_model.base import SimpleWorldModel, WorldModel
 from fusionagi.world_model.causal import CausalWorldModel
 from fusionagi.world_model.rollout import run_rollout
-__all__ = ["WorldModel", "SimpleWorldModel", "run_rollout"]
+__all__ = ["CausalWorldModel", "SimpleWorldModel", "WorldModel", "run_rollout"]
--- a/fusionagi/world_model/causal.py
+++ b/fusionagi/world_model/causal.py
@@ -0,0 +1,300 @@
 """Causal world model: learns state-transition patterns from execution history.
 Unlike ``SimpleWorldModel`` (which returns unchanged state), the causal
 world model builds a library of observed action→effect patterns and uses
 them to predict outcomes of planned actions before execution.
 The model learns from every executed step:
 - Records (state_before, action, action_args) → state_after transitions
 - Groups patterns by action type for efficient lookup
 - Predicts confidence based on how many similar transitions it has observed
 - Maintains uncertainty estimates that decrease with more evidence
 """
 from __future__ import annotations
 from typing import Any, Protocol
 from fusionagi._logger import logger
 from fusionagi.schemas.audit import AuditEventType
 from fusionagi.schemas.world_model import StateTransition, UncertaintyInfo
 class AuditLogLike(Protocol):
    """Protocol for audit log."""
    def append(
        self,
        event_type: AuditEventType,
        actor: str,
        action: str = "",
        task_id: str | None = None,
        payload: dict[str, Any] | None = None,
        outcome: str = "",
    ) -> str: ...
 class TransitionPattern:
    """A learned state-transition pattern from execution history.
    Attributes:
        action: The action type that triggers this pattern.
        preconditions: State keys that must be present for this pattern.
        effects: Observed state changes (key → new_value).
        observation_count: How many times this pattern has been observed.
        success_count: How many times the action succeeded.
        avg_confidence: Running average confidence across observations.
    """
    __slots__ = (
        "action",
        "preconditions",
        "effects",
        "observation_count",
        "success_count",
        "avg_confidence",
    )
    def __init__(self, action: str) -> None:
        self.action = action
        self.preconditions: set[str] = set()
        self.effects: dict[str, Any] = {}
        self.observation_count: int = 0
        self.success_count: int = 0
        self.avg_confidence: float = 0.5
    def update(
        self,
        from_state: dict[str, Any],
        to_state: dict[str, Any],
        success: bool,
    ) -> None:
        """Update pattern with a new observation."""
        self.observation_count += 1
        if success:
            self.success_count += 1
        self.preconditions.update(from_state.keys())
        for key, value in to_state.items():
            if key not in from_state or from_state[key] != value:
                self.effects[key] = value
        success_rate = self.success_count / self.observation_count
        evidence_boost = min(0.4, self.observation_count * 0.02)
        self.avg_confidence = min(1.0, 0.5 * success_rate + 0.5 + evidence_boost)
 class CausalWorldModel:
    """World model that learns causal state-transition patterns.
    Records every executed transition and builds a library of
    action→effect patterns.  When asked to predict, it finds matching
    patterns and applies learned effects to the current state.
    Args:
        audit_log: Optional audit log for recording learning events.
        max_patterns_per_action: Max patterns to retain per action type.
    """
    def __init__(
        self,
        audit_log: AuditLogLike | None = None,
        max_patterns_per_action: int = 100,
    ) -> None:
        self._patterns: dict[str, TransitionPattern] = {}
        self._history: list[StateTransition] = []
        self._audit = audit_log
        self._max_per_action = max_patterns_per_action
    @property
    def total_observations(self) -> int:
        """Total state transitions observed."""
        return len(self._history)
    @property
    def known_actions(self) -> list[str]:
        """Actions the model has observed."""
        return list(self._patterns.keys())
    def observe(
        self,
        from_state: dict[str, Any],
        action: str,
        action_args: dict[str, Any],
        to_state: dict[str, Any],
        success: bool = True,
        task_id: str | None = None,
    ) -> None:
        """Record an observed state transition.
        Args:
            from_state: State before the action.
            action: Action name/type.
            action_args: Arguments passed to the action.
            to_state: State after the action.
            success: Whether the action succeeded.
            task_id: Associated task ID.
        """
        transition = StateTransition(
            from_state=dict(from_state),
            action=action,
            action_args=dict(action_args),
            to_state=dict(to_state),
            confidence=1.0 if success else 0.2,
        )
        self._history.append(transition)
        pattern_key = self._pattern_key(action, action_args)
        if pattern_key not in self._patterns:
            self._patterns[pattern_key] = TransitionPattern(action)
        self._patterns[pattern_key].update(from_state, to_state, success)
        logger.debug(
            "CausalWorldModel: transition observed",
            extra={
                "action": action,
                "success": success,
                "observations": self._patterns[pattern_key].observation_count,
            },
        )
        if self._audit:
            self._audit.append(
                AuditEventType.SELF_IMPROVEMENT,
                actor="world_model",
                action="transition_observed",
                task_id=task_id,
                payload={
                    "action_type": action,
                    "success": success,
                    "pattern_observations": self._patterns[pattern_key].observation_count,
                    "state_changes": len(self._patterns[pattern_key].effects),
                },
                outcome="learned",
            )
    def predict(
        self,
        state: dict[str, Any],
        action: str,
        action_args: dict[str, Any],
    ) -> StateTransition:
        """Predict the result of an action in the current state.
        Uses learned patterns to predict state changes.  When no matching
        pattern exists, returns the state unchanged with low confidence.
        Args:
            state: Current state.
            action: Action to predict.
            action_args: Arguments for the action.
        Returns:
            Predicted state transition with confidence.
        """
        pattern_key = self._pattern_key(action, action_args)
        pattern = self._patterns.get(pattern_key)
        if pattern is None:
            generic_pattern = self._find_generic_pattern(action)
            if generic_pattern is None:
                return StateTransition(
                    from_state=dict(state),
                    action=action,
                    action_args=dict(action_args),
                    to_state=dict(state),
                    confidence=0.3,
                )
            pattern = generic_pattern
        predicted_state = dict(state)
        for key, value in pattern.effects.items():
            predicted_state[key] = value
        return StateTransition(
            from_state=dict(state),
            action=action,
            action_args=dict(action_args),
            to_state=predicted_state,
            confidence=pattern.avg_confidence,
        )
    def uncertainty(self, state: dict[str, Any], action: str) -> UncertaintyInfo:
        """Return uncertainty and risk assessment for an action.
        Args:
            state: Current state.
            action: Action to assess.
        Returns:
            Uncertainty info with confidence and risk level.
        """
        matching = [
            p for key, p in self._patterns.items()
            if p.action == action
        ]
        if not matching:
            return UncertaintyInfo(
                confidence=0.3,
                risk_level="high",
                rationale=f"No prior observations for action '{action}'",
            )
        total_obs = sum(p.observation_count for p in matching)
        total_success = sum(p.success_count for p in matching)
        success_rate = total_success / total_obs if total_obs > 0 else 0.5
        avg_conf = sum(p.avg_confidence for p in matching) / len(matching)
        if avg_conf >= 0.8 and success_rate >= 0.8:
            risk = "low"
        elif avg_conf >= 0.5 and success_rate >= 0.5:
            risk = "medium"
        else:
            risk = "high"
        return UncertaintyInfo(
            confidence=avg_conf,
            risk_level=risk,
            rationale=(
                f"Based on {total_obs} observations of '{action}': "
                f"{success_rate:.0%} success rate, {avg_conf:.2f} avg confidence"
            ),
        )
    def get_summary(self) -> dict[str, Any]:
        """Return a summary of the world model's learned knowledge."""
        by_action: dict[str, dict[str, Any]] = {}
        for key, pattern in self._patterns.items():
            by_action[key] = {
                "action": pattern.action,
                "observations": pattern.observation_count,
                "success_rate": (
                    pattern.success_count / pattern.observation_count
                    if pattern.observation_count > 0
                    else 0.0
                ),
                "confidence": pattern.avg_confidence,
                "known_effects": len(pattern.effects),
            }
        return {
            "total_observations": len(self._history),
            "known_patterns": len(self._patterns),
            "patterns": by_action,
        }
    def _pattern_key(self, action: str, action_args: dict[str, Any]) -> str:
        """Generate a pattern key from action and significant args."""
        significant = sorted(action_args.keys())[:3]
        return f"{action}:{','.join(significant)}" if significant else action
    def _find_generic_pattern(self, action: str) -> TransitionPattern | None:
        """Find the best matching pattern by action name alone."""
        matching = [
            p for p in self._patterns.values()
            if p.action == action
        ]
        if not matching:
            return None
        return max(matching, key=lambda p: p.observation_count)
--- a/tests/test_consequence_engine.py
+++ b/tests/test_consequence_engine.py
@@ -0,0 +1,118 @@
 """Tests for the consequence engine and choice→consequence→learning loop."""
 from fusionagi.governance import Alternative, ConsequenceEngine
 from fusionagi.governance.audit_log import AuditLog
 from fusionagi.schemas.audit import AuditEventType
 class TestConsequenceEngine:
    """Test consequence tracking and risk/reward estimation."""
    def test_record_choice(self) -> None:
        ce = ConsequenceEngine()
        choice = ce.record_choice(
            choice_id="c1",
            actor="planner",
            action_taken="use_tool_x",
            estimated_risk=0.3,
            estimated_reward=0.7,
            rationale="Tool X is the best fit",
        )
        assert choice.choice_id == "c1"
        assert choice.estimated_risk == 0.3
        assert ce.total_choices == 1
    def test_record_consequence(self) -> None:
        ce = ConsequenceEngine()
        ce.record_choice(choice_id="c1", actor="planner", action_taken="act")
        consequence = ce.record_consequence(
            choice_id="c1",
            outcome_positive=True,
            actual_risk_realized=0.1,
            actual_reward_gained=0.9,
            description="Action succeeded",
        )
        assert consequence is not None
        assert consequence.outcome_positive is True
        assert ce.total_consequences == 1
    def test_consequence_not_found(self) -> None:
        ce = ConsequenceEngine()
        result = ce.record_consequence(choice_id="nonexistent", outcome_positive=True)
        assert result is None
    def test_surprise_factor(self) -> None:
        ce = ConsequenceEngine()
        ce.record_choice(
            choice_id="c1",
            actor="exec",
            action_taken="risky_op",
            estimated_risk=0.1,
            estimated_reward=0.9,
        )
        consequence = ce.record_consequence(
            choice_id="c1",
            outcome_positive=False,
            actual_risk_realized=0.9,
            actual_reward_gained=0.1,
        )
        assert consequence is not None
        assert consequence.surprise_factor > 0.5
    def test_estimate_risk_reward_no_history(self) -> None:
        ce = ConsequenceEngine()
        estimate = ce.estimate_risk_reward("unknown_action")
        assert estimate["observations"] == 0
        assert estimate["confidence"] == 0.1
    def test_estimate_risk_reward_with_history(self) -> None:
        ce = ConsequenceEngine()
        for i in range(5):
            ce.record_choice(f"c{i}", "exec", "tool_call")
            ce.record_consequence(
                f"c{i}",
                outcome_positive=True,
                actual_risk_realized=0.2,
                actual_reward_gained=0.8,
            )
        estimate = ce.estimate_risk_reward("tool_call")
        assert estimate["observations"] == 5
        assert abs(estimate["expected_risk"] - 0.2) < 0.01
        assert abs(estimate["expected_reward"] - 0.8) < 0.01
    def test_alternatives_recorded(self) -> None:
        ce = ConsequenceEngine()
        alts = [
            Alternative(action="alt_a", estimated_risk=0.6, reason_not_chosen="Too risky"),
            Alternative(action="alt_b", estimated_risk=0.2, reason_not_chosen="Lower reward"),
        ]
        choice = ce.record_choice(
            choice_id="c1",
            actor="planner",
            action_taken="chosen_action",
            alternatives=alts,
        )
        assert len(choice.alternatives) == 2
        assert choice.alternatives[0].reason_not_chosen == "Too risky"
    def test_get_summary(self) -> None:
        ce = ConsequenceEngine()
        ce.record_choice("c1", "exec", "action_a")
        ce.record_consequence("c1", True, 0.1, 0.9)
        ce.record_choice("c2", "exec", "action_a")
        ce.record_consequence("c2", False, 0.8, 0.1)
        summary = ce.get_summary()
        assert summary["total_choices"] == 2
        assert summary["total_consequences"] == 2
        assert summary["positive_outcomes"] == 1
        assert summary["negative_outcomes"] == 1
    def test_audit_log_integration(self) -> None:
        audit = AuditLog()
        ce = ConsequenceEngine(audit_log=audit)
        ce.record_choice("c1", "exec", "action")
        ce.record_consequence("c1", True)
        choices = audit.get_by_type(AuditEventType.CHOICE)
        consequences = audit.get_by_type(AuditEventType.CONSEQUENCE)
        assert len(choices) == 1
        assert len(consequences) == 1
--- a/tests/test_metacognition.py
+++ b/tests/test_metacognition.py
@@ -0,0 +1,139 @@
 """Tests for metacognition and reasoning interpretability."""
 from fusionagi.reasoning.interpretability import ReasoningTracer
 from fusionagi.reasoning.metacognition import (
    assess_head_outputs,
 )
 from fusionagi.schemas.grounding import Citation
 from fusionagi.schemas.head import HeadClaim, HeadId, HeadOutput
 from fusionagi.verification import ClaimVerifier
 _SAMPLE_CITATION = Citation(source_id="src_1", excerpt="supporting evidence")
 def _make_head_output(
    head_id: HeadId,
    claims: list[tuple[str, float]] | None = None,
 ) -> HeadOutput:
    """Helper to create a head output with claims."""
    head_claims = []
    for text, conf in (claims or [("Test claim", 0.7)]):
        head_claims.append(HeadClaim(
            claim_text=text,
            confidence=conf,
            evidence=[_SAMPLE_CITATION] if conf > 0.5 else [],
        ))
    return HeadOutput(
        head_id=head_id,
        summary=f"Output from {head_id.value}",
        claims=head_claims,
        risks=[],
    )
 class TestMetacognition:
    """Test metacognitive self-assessment."""
    def test_empty_outputs(self) -> None:
        assessment = assess_head_outputs([])
        assert assessment.overall_confidence == 0.0
        assert assessment.should_seek_more is True
    def test_high_confidence_outputs(self) -> None:
        outputs = [
            _make_head_output(HeadId.LOGIC, [("Logic is sound", 0.9)]),
            _make_head_output(HeadId.RESEARCH, [("Data supports this", 0.85)]),
        ]
        assessment = assess_head_outputs(outputs)
        assert assessment.overall_confidence > 0.3
        assert isinstance(assessment.knowledge_gaps, list)
    def test_low_confidence_triggers_seek_more(self) -> None:
        outputs = [
            _make_head_output(HeadId.LOGIC, [("Uncertain claim", 0.1)]),
        ]
        assessment = assess_head_outputs(outputs)
        assert len(assessment.uncertainty_sources) > 0
    def test_knowledge_gap_detection(self) -> None:
        outputs = [
            _make_head_output(HeadId.LOGIC, [("Low conf claim", 0.1)]),
        ]
        assessment = assess_head_outputs(outputs)
        gap_domains = [g.domain for g in assessment.knowledge_gaps]
        assert "logic" in gap_domains
    def test_domain_gap_detection(self) -> None:
        outputs = [_make_head_output(HeadId.LOGIC)]
        assessment = assess_head_outputs(outputs, user_prompt="legal compliance required")
        gap_domains = [g.domain for g in assessment.knowledge_gaps]
        assert "legal" in gap_domains
 class TestReasoningTracer:
    """Test interpretability tracing."""
    def test_trace_lifecycle(self) -> None:
        tracer = ReasoningTracer()
        tracer.start_trace("t1", "task1", "What is 2+2?")
        tracer.add_step("t1", "decomposition", "decomposer", "prompt", "2 units")
        tracer.add_step("t1", "head_dispatch", "orchestrator", "5 heads", "5 outputs")
        tracer.finalize_trace("t1", "4", 0.95)
        result = tracer.get_trace("t1")
        assert result is not None
        assert len(result.steps) == 2
        assert result.final_answer == "4"
        assert result.overall_confidence == 0.95
    def test_explain(self) -> None:
        tracer = ReasoningTracer()
        tracer.start_trace("t1", "task1", "question")
        tracer.add_step("t1", "stage1", "comp1", "in", "out")
        tracer.finalize_trace("t1", "answer", 0.8)
        explanation = tracer.explain("t1")
        assert "stage1" in explanation
        assert "answer" in explanation
    def test_trace_not_found(self) -> None:
        tracer = ReasoningTracer()
        assert tracer.get_trace("nonexistent") is None
        assert "not found" in tracer.explain("nonexistent")
    def test_recent_traces(self) -> None:
        tracer = ReasoningTracer()
        for i in range(5):
            tracer.start_trace(f"t{i}", f"task{i}", f"prompt{i}")
        assert len(tracer.get_recent_traces(limit=3)) == 3
        assert tracer.total_traces == 5
 class TestClaimVerifier:
    """Test formal claim verification."""
    def test_verify_no_outputs(self) -> None:
        verifier = ClaimVerifier()
        report = verifier.verify_outputs([])
        assert report.total_claims == 0
    def test_verify_well_supported_claims(self) -> None:
        outputs = [
            _make_head_output(HeadId.LOGIC, [("Well supported", 0.7)]),
            _make_head_output(HeadId.RESEARCH, [("Also supported", 0.7)]),
        ]
        verifier = ClaimVerifier()
        report = verifier.verify_outputs(outputs)
        assert report.total_claims == 2
        assert report.overall_integrity > 0.0
    def test_high_conf_no_evidence_flagged(self) -> None:
        claim = HeadClaim(claim_text="Bold claim", confidence=0.95, evidence=[])
        output = HeadOutput(
            head_id=HeadId.LOGIC,
            summary="Bold output",
            claims=[claim],
            risks=[],
        )
        verifier = ClaimVerifier()
        report = verifier.verify_outputs([output])
        assert report.flagged_count >= 1
        assert any("evidence" in issue.lower() for r in report.results for issue in r.issues)
--- a/tests/test_world_model_causal.py
+++ b/tests/test_world_model_causal.py
@@ -0,0 +1,69 @@
 """Tests for the causal world model."""
 from fusionagi.world_model import CausalWorldModel
 class TestCausalWorldModel:
    """Test learned causal state-transition prediction."""
    def test_predict_unknown_action(self) -> None:
        wm = CausalWorldModel()
        result = wm.predict({"x": 1}, "unknown", {})
        assert result.confidence == 0.3
        assert result.to_state == {"x": 1}
    def test_observe_and_predict(self) -> None:
        wm = CausalWorldModel()
        wm.observe(
            from_state={"count": 0},
            action="increment",
            action_args={},
            to_state={"count": 1},
            success=True,
        )
        result = wm.predict({"count": 5}, "increment", {})
        assert result.confidence > 0.3
        assert "count" in result.to_state
    def test_multiple_observations_increase_confidence(self) -> None:
        wm = CausalWorldModel()
        for i in range(10):
            wm.observe({"s": i}, "act", {}, {"s": i + 1}, success=True)
        result = wm.predict({"s": 100}, "act", {})
        assert result.confidence > 0.7
    def test_uncertainty_no_observations(self) -> None:
        wm = CausalWorldModel()
        info = wm.uncertainty({}, "unknown_action")
        assert info.risk_level == "high"
        assert info.confidence == 0.3
    def test_uncertainty_with_observations(self) -> None:
        wm = CausalWorldModel()
        for i in range(10):
            wm.observe({}, "safe_action", {}, {}, success=True)
        info = wm.uncertainty({}, "safe_action")
        assert info.risk_level in ("low", "medium")
        assert info.confidence > 0.5
    def test_failed_observations_lower_confidence(self) -> None:
        wm = CausalWorldModel()
        for i in range(5):
            wm.observe({}, "risky", {}, {}, success=False)
        info = wm.uncertainty({}, "risky")
        assert info.risk_level == "high"
    def test_known_actions(self) -> None:
        wm = CausalWorldModel()
        wm.observe({}, "act_a", {}, {}, success=True)
        wm.observe({}, "act_b", {}, {}, success=True)
        assert "act_a" in wm.known_actions
        assert "act_b" in wm.known_actions
    def test_get_summary(self) -> None:
        wm = CausalWorldModel()
        wm.observe({}, "x", {}, {"result": 1}, success=True)
        wm.observe({}, "x", {}, {"result": 2}, success=True)
        summary = wm.get_summary()
        assert summary["total_observations"] == 2
        assert summary["known_patterns"] >= 1