feat: consequence engine, causal world model, metacognition, interpretability, claim verification

Choice → Consequence → Learning: - ConsequenceEngine tracks every decision point with alternatives, risk/reward estimates, and actual outcomes - Consequences feed into AdaptiveEthics for experience-based learning - FusionAGILoop now wires ethics + consequences into task lifecycle Causal World Model: - CausalWorldModel learns state-transition patterns from execution history - Predicts outcomes based on observed action→effect patterns - Uncertainty estimates decrease as more evidence accumulates Metacognition: - assess_head_outputs() evaluates reasoning quality from head outputs - Detects knowledge gaps, measures head agreement, identifies uncertainty - Actively recommends whether to seek more information Interpretability: - ReasoningTracer captures full prompt→answer reasoning traces - Each step records stage, component, input/output, timing - explain() generates human-readable reasoning explanations Claim Verification: - ClaimVerifier cross-checks claims for evidence, consistency, grounding - Flags high-confidence claims lacking evidence support - Detects contradictions between claims from different heads 325 tests passing, 0 ruff errors, 0 mypy errors. Co-Authored-By: Nakamoto, S <defi@defi-oracle.io>
2026-04-28 06:25:35 +00:00
parent 039440672e
commit 9a8affae9a
14 changed files with 1961 additions and 39 deletions
--- a/tests/test_consequence_engine.py
+++ b/tests/test_consequence_engine.py
@@ -0,0 +1,118 @@
+"""Tests for the consequence engine and choice→consequence→learning loop."""
+
+from fusionagi.governance import Alternative, ConsequenceEngine
+from fusionagi.governance.audit_log import AuditLog
+from fusionagi.schemas.audit import AuditEventType
+
+
+class TestConsequenceEngine:
+    """Test consequence tracking and risk/reward estimation."""
+
+    def test_record_choice(self) -> None:
+        ce = ConsequenceEngine()
+        choice = ce.record_choice(
+            choice_id="c1",
+            actor="planner",
+            action_taken="use_tool_x",
+            estimated_risk=0.3,
+            estimated_reward=0.7,
+            rationale="Tool X is the best fit",
+        )
+        assert choice.choice_id == "c1"
+        assert choice.estimated_risk == 0.3
+        assert ce.total_choices == 1
+
+    def test_record_consequence(self) -> None:
+        ce = ConsequenceEngine()
+        ce.record_choice(choice_id="c1", actor="planner", action_taken="act")
+        consequence = ce.record_consequence(
+            choice_id="c1",
+            outcome_positive=True,
+            actual_risk_realized=0.1,
+            actual_reward_gained=0.9,
+            description="Action succeeded",
+        )
+        assert consequence is not None
+        assert consequence.outcome_positive is True
+        assert ce.total_consequences == 1
+
+    def test_consequence_not_found(self) -> None:
+        ce = ConsequenceEngine()
+        result = ce.record_consequence(choice_id="nonexistent", outcome_positive=True)
+        assert result is None
+
+    def test_surprise_factor(self) -> None:
+        ce = ConsequenceEngine()
+        ce.record_choice(
+            choice_id="c1",
+            actor="exec",
+            action_taken="risky_op",
+            estimated_risk=0.1,
+            estimated_reward=0.9,
+        )
+        consequence = ce.record_consequence(
+            choice_id="c1",
+            outcome_positive=False,
+            actual_risk_realized=0.9,
+            actual_reward_gained=0.1,
+        )
+        assert consequence is not None
+        assert consequence.surprise_factor > 0.5
+
+    def test_estimate_risk_reward_no_history(self) -> None:
+        ce = ConsequenceEngine()
+        estimate = ce.estimate_risk_reward("unknown_action")
+        assert estimate["observations"] == 0
+        assert estimate["confidence"] == 0.1
+
+    def test_estimate_risk_reward_with_history(self) -> None:
+        ce = ConsequenceEngine()
+        for i in range(5):
+            ce.record_choice(f"c{i}", "exec", "tool_call")
+            ce.record_consequence(
+                f"c{i}",
+                outcome_positive=True,
+                actual_risk_realized=0.2,
+                actual_reward_gained=0.8,
+            )
+        estimate = ce.estimate_risk_reward("tool_call")
+        assert estimate["observations"] == 5
+        assert abs(estimate["expected_risk"] - 0.2) < 0.01
+        assert abs(estimate["expected_reward"] - 0.8) < 0.01
+
+    def test_alternatives_recorded(self) -> None:
+        ce = ConsequenceEngine()
+        alts = [
+            Alternative(action="alt_a", estimated_risk=0.6, reason_not_chosen="Too risky"),
+            Alternative(action="alt_b", estimated_risk=0.2, reason_not_chosen="Lower reward"),
+        ]
+        choice = ce.record_choice(
+            choice_id="c1",
+            actor="planner",
+            action_taken="chosen_action",
+            alternatives=alts,
+        )
+        assert len(choice.alternatives) == 2
+        assert choice.alternatives[0].reason_not_chosen == "Too risky"
+
+    def test_get_summary(self) -> None:
+        ce = ConsequenceEngine()
+        ce.record_choice("c1", "exec", "action_a")
+        ce.record_consequence("c1", True, 0.1, 0.9)
+        ce.record_choice("c2", "exec", "action_a")
+        ce.record_consequence("c2", False, 0.8, 0.1)
+        summary = ce.get_summary()
+        assert summary["total_choices"] == 2
+        assert summary["total_consequences"] == 2
+        assert summary["positive_outcomes"] == 1
+        assert summary["negative_outcomes"] == 1
+
+    def test_audit_log_integration(self) -> None:
+        audit = AuditLog()
+        ce = ConsequenceEngine(audit_log=audit)
+        ce.record_choice("c1", "exec", "action")
+        ce.record_consequence("c1", True)
+        choices = audit.get_by_type(AuditEventType.CHOICE)
+        consequences = audit.get_by_type(AuditEventType.CONSEQUENCE)
+        assert len(choices) == 1
+        assert len(consequences) == 1
--- a/tests/test_metacognition.py
+++ b/tests/test_metacognition.py
@@ -0,0 +1,139 @@
+"""Tests for metacognition and reasoning interpretability."""
+
+from fusionagi.reasoning.interpretability import ReasoningTracer
+from fusionagi.reasoning.metacognition import (
+    assess_head_outputs,
+)
+from fusionagi.schemas.grounding import Citation
+from fusionagi.schemas.head import HeadClaim, HeadId, HeadOutput
+from fusionagi.verification import ClaimVerifier
+
+_SAMPLE_CITATION = Citation(source_id="src_1", excerpt="supporting evidence")
+
+
+def _make_head_output(
+    head_id: HeadId,
+    claims: list[tuple[str, float]] | None = None,
+) -> HeadOutput:
+    """Helper to create a head output with claims."""
+    head_claims = []
+    for text, conf in (claims or [("Test claim", 0.7)]):
+        head_claims.append(HeadClaim(
+            claim_text=text,
+            confidence=conf,
+            evidence=[_SAMPLE_CITATION] if conf > 0.5 else [],
+        ))
+    return HeadOutput(
+        head_id=head_id,
+        summary=f"Output from {head_id.value}",
+        claims=head_claims,
+        risks=[],
+    )
+
+
+class TestMetacognition:
+    """Test metacognitive self-assessment."""
+
+    def test_empty_outputs(self) -> None:
+        assessment = assess_head_outputs([])
+        assert assessment.overall_confidence == 0.0
+        assert assessment.should_seek_more is True
+
+    def test_high_confidence_outputs(self) -> None:
+        outputs = [
+            _make_head_output(HeadId.LOGIC, [("Logic is sound", 0.9)]),
+            _make_head_output(HeadId.RESEARCH, [("Data supports this", 0.85)]),
+        ]
+        assessment = assess_head_outputs(outputs)
+        assert assessment.overall_confidence > 0.3
+        assert isinstance(assessment.knowledge_gaps, list)
+
+    def test_low_confidence_triggers_seek_more(self) -> None:
+        outputs = [
+            _make_head_output(HeadId.LOGIC, [("Uncertain claim", 0.1)]),
+        ]
+        assessment = assess_head_outputs(outputs)
+        assert len(assessment.uncertainty_sources) > 0
+
+    def test_knowledge_gap_detection(self) -> None:
+        outputs = [
+            _make_head_output(HeadId.LOGIC, [("Low conf claim", 0.1)]),
+        ]
+        assessment = assess_head_outputs(outputs)
+        gap_domains = [g.domain for g in assessment.knowledge_gaps]
+        assert "logic" in gap_domains
+
+    def test_domain_gap_detection(self) -> None:
+        outputs = [_make_head_output(HeadId.LOGIC)]
+        assessment = assess_head_outputs(outputs, user_prompt="legal compliance required")
+        gap_domains = [g.domain for g in assessment.knowledge_gaps]
+        assert "legal" in gap_domains
+
+
+class TestReasoningTracer:
+    """Test interpretability tracing."""
+
+    def test_trace_lifecycle(self) -> None:
+        tracer = ReasoningTracer()
+        tracer.start_trace("t1", "task1", "What is 2+2?")
+        tracer.add_step("t1", "decomposition", "decomposer", "prompt", "2 units")
+        tracer.add_step("t1", "head_dispatch", "orchestrator", "5 heads", "5 outputs")
+        tracer.finalize_trace("t1", "4", 0.95)
+        result = tracer.get_trace("t1")
+        assert result is not None
+        assert len(result.steps) == 2
+        assert result.final_answer == "4"
+        assert result.overall_confidence == 0.95
+
+    def test_explain(self) -> None:
+        tracer = ReasoningTracer()
+        tracer.start_trace("t1", "task1", "question")
+        tracer.add_step("t1", "stage1", "comp1", "in", "out")
+        tracer.finalize_trace("t1", "answer", 0.8)
+        explanation = tracer.explain("t1")
+        assert "stage1" in explanation
+        assert "answer" in explanation
+
+    def test_trace_not_found(self) -> None:
+        tracer = ReasoningTracer()
+        assert tracer.get_trace("nonexistent") is None
+        assert "not found" in tracer.explain("nonexistent")
+
+    def test_recent_traces(self) -> None:
+        tracer = ReasoningTracer()
+        for i in range(5):
+            tracer.start_trace(f"t{i}", f"task{i}", f"prompt{i}")
+        assert len(tracer.get_recent_traces(limit=3)) == 3
+        assert tracer.total_traces == 5
+
+
+class TestClaimVerifier:
+    """Test formal claim verification."""
+
+    def test_verify_no_outputs(self) -> None:
+        verifier = ClaimVerifier()
+        report = verifier.verify_outputs([])
+        assert report.total_claims == 0
+
+    def test_verify_well_supported_claims(self) -> None:
+        outputs = [
+            _make_head_output(HeadId.LOGIC, [("Well supported", 0.7)]),
+            _make_head_output(HeadId.RESEARCH, [("Also supported", 0.7)]),
+        ]
+        verifier = ClaimVerifier()
+        report = verifier.verify_outputs(outputs)
+        assert report.total_claims == 2
+        assert report.overall_integrity > 0.0
+
+    def test_high_conf_no_evidence_flagged(self) -> None:
+        claim = HeadClaim(claim_text="Bold claim", confidence=0.95, evidence=[])
+        output = HeadOutput(
+            head_id=HeadId.LOGIC,
+            summary="Bold output",
+            claims=[claim],
+            risks=[],
+        )
+        verifier = ClaimVerifier()
+        report = verifier.verify_outputs([output])
+        assert report.flagged_count >= 1
+        assert any("evidence" in issue.lower() for r in report.results for issue in r.issues)
--- a/tests/test_world_model_causal.py
+++ b/tests/test_world_model_causal.py
@@ -0,0 +1,69 @@
+"""Tests for the causal world model."""
+
+from fusionagi.world_model import CausalWorldModel
+
+
+class TestCausalWorldModel:
+    """Test learned causal state-transition prediction."""
+
+    def test_predict_unknown_action(self) -> None:
+        wm = CausalWorldModel()
+        result = wm.predict({"x": 1}, "unknown", {})
+        assert result.confidence == 0.3
+        assert result.to_state == {"x": 1}
+
+    def test_observe_and_predict(self) -> None:
+        wm = CausalWorldModel()
+        wm.observe(
+            from_state={"count": 0},
+            action="increment",
+            action_args={},
+            to_state={"count": 1},
+            success=True,
+        )
+        result = wm.predict({"count": 5}, "increment", {})
+        assert result.confidence > 0.3
+        assert "count" in result.to_state
+
+    def test_multiple_observations_increase_confidence(self) -> None:
+        wm = CausalWorldModel()
+        for i in range(10):
+            wm.observe({"s": i}, "act", {}, {"s": i + 1}, success=True)
+        result = wm.predict({"s": 100}, "act", {})
+        assert result.confidence > 0.7
+
+    def test_uncertainty_no_observations(self) -> None:
+        wm = CausalWorldModel()
+        info = wm.uncertainty({}, "unknown_action")
+        assert info.risk_level == "high"
+        assert info.confidence == 0.3
+
+    def test_uncertainty_with_observations(self) -> None:
+        wm = CausalWorldModel()
+        for i in range(10):
+            wm.observe({}, "safe_action", {}, {}, success=True)
+        info = wm.uncertainty({}, "safe_action")
+        assert info.risk_level in ("low", "medium")
+        assert info.confidence > 0.5
+
+    def test_failed_observations_lower_confidence(self) -> None:
+        wm = CausalWorldModel()
+        for i in range(5):
+            wm.observe({}, "risky", {}, {}, success=False)
+        info = wm.uncertainty({}, "risky")
+        assert info.risk_level == "high"
+
+    def test_known_actions(self) -> None:
+        wm = CausalWorldModel()
+        wm.observe({}, "act_a", {}, {}, success=True)
+        wm.observe({}, "act_b", {}, {}, success=True)
+        assert "act_a" in wm.known_actions
+        assert "act_b" in wm.known_actions
+
+    def test_get_summary(self) -> None:
+        wm = CausalWorldModel()
+        wm.observe({}, "x", {}, {"result": 1}, success=True)
+        wm.observe({}, "x", {}, {"result": 2}, success=True)
+        summary = wm.get_summary()
+        assert summary["total_observations"] == 2
+        assert summary["known_patterns"] >= 1