Files
FusionAGI/fusionagi/governance/consequence_engine.py
Devin AI b982e31c19
Some checks failed
CI / lint (pull_request) Successful in 51s
CI / test (3.10) (pull_request) Failing after 36s
CI / test (3.11) (pull_request) Failing after 36s
CI / test (3.12) (pull_request) Successful in 45s
CI / docker (pull_request) Has been skipped
feat: remove all remaining guardrails — advisory governance across all layers
18 changes implementing full advisory philosophy:

1. Safety Head prompt: prevention mandate → advisory observation
2. Native Reasoning: Safety claims conditional on actual risk signals
3. File Tool: path scope advisory (log + proceed)
4. HTTP Tool: SSRF protection advisory (log + proceed)
5. File Size Cap: configurable (default unlimited)
6. PII Detection: integrated with AdaptiveEthics
7. Embodiment: force limit advisory (log, don't clamp)
8. Embodiment: workspace bounds advisory (log, don't reject)
9. API Rate Limiter: advisory (log, don't hard 429)
10. MAA Gate: GovernanceMode.ADVISORY default
11. Physics Authority: safety factor advisory, not hard reject
12. Self-Model: evolve_value() for experience-based value evolution
13. Ethical Lesson: weight unclamped for full dynamic range
14. ConsequenceEngine: adaptive risk_memory_window
15. Cross-Head Learning: shared InsightBus between heads
16. World Model: self-modification prediction
17. Persistent memory: file-backed learning store
18. Plugin Heads: ethics/consequence hooks in HeadAgent + HeadRegistry

429 tests passing, 0 ruff errors, 0 new mypy errors.

Co-Authored-By: Nakamoto, S <defi@defi-oracle.io>
2026-04-28 08:58:15 +00:00

374 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Consequence engine: choice → consequence → learning.
Every decision the system makes is a *choice*. Every choice has
*alternatives* that were not taken. Every choice leads to
*consequences* — outcomes that carry risk and reward.
The consequence engine:
1. Records decision points (what options existed, which was chosen, why)
2. Tracks consequences (what happened as a result)
3. Computes risk/reward from historical consequence data
4. Feeds consequence data into AdaptiveEthics for learning
Philosophy:
- Consequences are the true teacher. Not rules, not constraints.
- Risk is not to be avoided — it is to be *understood*.
- Reward without risk teaches nothing. Risk without consequence teaches less.
- The system earns trust by showing it understands what its choices cost.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any, Protocol
from fusionagi._logger import logger
from fusionagi.schemas.audit import AuditEventType
class AuditLogLike(Protocol):
"""Protocol for audit log."""
def append(
self,
event_type: AuditEventType,
actor: str,
action: str = "",
task_id: str | None = None,
payload: dict[str, Any] | None = None,
outcome: str = "",
) -> str: ...
@dataclass
class Alternative:
"""An option that was available but not chosen.
Attributes:
action: What the alternative action was.
estimated_risk: Estimated risk at decision time (0.01.0).
estimated_reward: Estimated reward at decision time (0.01.0).
reason_not_chosen: Why this alternative was not selected.
"""
action: str = ""
estimated_risk: float = 0.5
estimated_reward: float = 0.5
reason_not_chosen: str = ""
@dataclass
class Choice:
"""A decision point where the system selected an action.
Attributes:
choice_id: Unique identifier for this choice.
task_id: Associated task.
actor: Component that made the choice.
action_taken: The action that was chosen.
alternatives: Other options that were available.
estimated_risk: Risk estimate at decision time.
estimated_reward: Reward estimate at decision time.
rationale: Why this action was chosen.
context: Situation context at decision time.
"""
choice_id: str = ""
task_id: str | None = None
actor: str = ""
action_taken: str = ""
alternatives: list[Alternative] = field(default_factory=list)
estimated_risk: float = 0.5
estimated_reward: float = 0.5
rationale: str = ""
context: dict[str, Any] = field(default_factory=dict)
@dataclass
class Consequence:
"""The outcome of a choice — what actually happened.
Attributes:
choice_id: Which choice this is a consequence of.
outcome_positive: Whether the outcome was beneficial.
actual_risk_realized: How much risk materialized (0.01.0).
actual_reward_gained: How much reward was gained (0.01.0).
description: What happened.
cost: Any cost incurred (errors, retries, time).
benefit: Any benefit gained (task success, learning).
surprise_factor: How unexpected the outcome was (0 = expected, 1 = total surprise).
"""
choice_id: str = ""
outcome_positive: bool = True
actual_risk_realized: float = 0.0
actual_reward_gained: float = 0.5
description: str = ""
cost: dict[str, Any] = field(default_factory=dict)
benefit: dict[str, Any] = field(default_factory=dict)
surprise_factor: float = 0.0
class ConsequenceEngine:
"""Tracks choices, consequences, and risk/reward patterns.
The engine maintains a history of all decisions and their outcomes,
enabling the system to make better-informed choices over time — not
through restriction, but through understanding.
Args:
audit_log: Optional audit log for recording choices and consequences.
risk_memory_window: How many past consequences to consider when
estimating risk for new choices.
"""
def __init__(
self,
audit_log: AuditLogLike | None = None,
risk_memory_window: int = 200,
adaptive_window: bool = True,
) -> None:
self._choices: dict[str, Choice] = {}
self._consequences: dict[str, Consequence] = {}
self._risk_history: dict[str, list[float]] = {}
self._reward_history: dict[str, list[float]] = {}
self._audit = audit_log
self._risk_window = risk_memory_window
self._adaptive_window = adaptive_window
self._base_window = risk_memory_window
@property
def total_choices(self) -> int:
"""Total choices recorded."""
return len(self._choices)
@property
def total_consequences(self) -> int:
"""Total consequences recorded."""
return len(self._consequences)
def record_choice(
self,
choice_id: str,
actor: str,
action_taken: str,
alternatives: list[Alternative] | None = None,
estimated_risk: float = 0.5,
estimated_reward: float = 0.5,
rationale: str = "",
task_id: str | None = None,
context: dict[str, Any] | None = None,
) -> Choice:
"""Record a decision point.
Args:
choice_id: Unique ID for this choice.
actor: Component making the choice.
action_taken: The selected action.
alternatives: Other options considered.
estimated_risk: Risk estimate at decision time.
estimated_reward: Reward estimate at decision time.
rationale: Why this was chosen.
task_id: Associated task.
context: Situation context.
Returns:
The recorded choice.
"""
choice = Choice(
choice_id=choice_id,
task_id=task_id,
actor=actor,
action_taken=action_taken,
alternatives=alternatives or [],
estimated_risk=estimated_risk,
estimated_reward=estimated_reward,
rationale=rationale,
context=context or {},
)
self._choices[choice_id] = choice
if self._audit:
self._audit.append(
AuditEventType.CHOICE,
actor=actor,
action="choice_recorded",
task_id=task_id,
payload={
"choice_id": choice_id,
"action_taken": action_taken[:100],
"alternatives_count": len(choice.alternatives),
"estimated_risk": estimated_risk,
"estimated_reward": estimated_reward,
"rationale": rationale[:100],
},
outcome="recorded",
)
logger.info(
"ConsequenceEngine: choice recorded",
extra={
"choice_id": choice_id,
"action": action_taken[:50],
"risk": estimated_risk,
"reward": estimated_reward,
},
)
return choice
def record_consequence(
self,
choice_id: str,
outcome_positive: bool,
actual_risk_realized: float = 0.0,
actual_reward_gained: float = 0.5,
description: str = "",
cost: dict[str, Any] | None = None,
benefit: dict[str, Any] | None = None,
) -> Consequence | None:
"""Record the consequence of a previous choice.
Args:
choice_id: Which choice this is a consequence of.
outcome_positive: Whether the outcome was beneficial.
actual_risk_realized: How much risk materialized.
actual_reward_gained: How much reward was gained.
description: What happened.
cost: Costs incurred.
benefit: Benefits gained.
Returns:
The recorded consequence, or ``None`` if choice not found.
"""
choice = self._choices.get(choice_id)
if choice is None:
logger.warning(
"ConsequenceEngine: choice not found for consequence",
extra={"choice_id": choice_id},
)
return None
surprise = abs(choice.estimated_risk - actual_risk_realized) * 0.5 + \
abs(choice.estimated_reward - actual_reward_gained) * 0.5
consequence = Consequence(
choice_id=choice_id,
outcome_positive=outcome_positive,
actual_risk_realized=actual_risk_realized,
actual_reward_gained=actual_reward_gained,
description=description,
cost=cost or {},
benefit=benefit or {},
surprise_factor=min(1.0, surprise),
)
self._consequences[choice_id] = consequence
action_type = choice.action_taken
self._risk_history.setdefault(action_type, []).append(actual_risk_realized)
self._reward_history.setdefault(action_type, []).append(actual_reward_gained)
if self._adaptive_window:
experience_count = len(self._consequences)
self._risk_window = self._base_window + experience_count // 10
if len(self._risk_history[action_type]) > self._risk_window:
self._risk_history[action_type] = self._risk_history[action_type][-self._risk_window:]
self._reward_history[action_type] = self._reward_history[action_type][-self._risk_window:]
if self._audit:
self._audit.append(
AuditEventType.CONSEQUENCE,
actor=choice.actor,
action="consequence_recorded",
task_id=choice.task_id,
payload={
"choice_id": choice_id,
"outcome_positive": outcome_positive,
"risk_realized": actual_risk_realized,
"reward_gained": actual_reward_gained,
"surprise_factor": consequence.surprise_factor,
"description": description[:100],
},
outcome="positive" if outcome_positive else "negative",
)
logger.info(
"ConsequenceEngine: consequence recorded",
extra={
"choice_id": choice_id,
"positive": outcome_positive,
"surprise": consequence.surprise_factor,
},
)
return consequence
def estimate_risk_reward(self, action_type: str) -> dict[str, float]:
"""Estimate risk and reward for an action type based on history.
Args:
action_type: The type of action being considered.
Returns:
Dict with ``expected_risk``, ``expected_reward``, ``confidence``,
``risk_variance``, ``reward_variance``, ``observations``.
"""
risks = self._risk_history.get(action_type, [])
rewards = self._reward_history.get(action_type, [])
if not risks:
return {
"expected_risk": 0.5,
"expected_reward": 0.5,
"confidence": 0.1,
"risk_variance": 0.0,
"reward_variance": 0.0,
"observations": 0,
}
n = len(risks)
avg_risk = sum(risks) / n
avg_reward = sum(rewards) / n
risk_var = sum((r - avg_risk) ** 2 for r in risks) / n if n > 1 else 0.0
reward_var = sum((r - avg_reward) ** 2 for r in rewards) / n if n > 1 else 0.0
confidence = min(1.0, 0.2 + n * 0.04)
return {
"expected_risk": avg_risk,
"expected_reward": avg_reward,
"confidence": confidence,
"risk_variance": risk_var,
"reward_variance": reward_var,
"observations": n,
}
def get_choice(self, choice_id: str) -> Choice | None:
"""Retrieve a recorded choice."""
return self._choices.get(choice_id)
def get_consequence(self, choice_id: str) -> Consequence | None:
"""Retrieve the consequence of a choice."""
return self._consequences.get(choice_id)
def get_summary(self) -> dict[str, Any]:
"""Return a summary of all choices and consequences."""
total_positive = sum(1 for c in self._consequences.values() if c.outcome_positive)
total_negative = len(self._consequences) - total_positive
avg_surprise = (
sum(c.surprise_factor for c in self._consequences.values()) / max(len(self._consequences), 1)
)
action_stats: dict[str, dict[str, Any]] = {}
for action_type in self._risk_history:
action_stats[action_type] = self.estimate_risk_reward(action_type)
return {
"total_choices": len(self._choices),
"total_consequences": len(self._consequences),
"positive_outcomes": total_positive,
"negative_outcomes": total_negative,
"positive_rate": total_positive / max(len(self._consequences), 1),
"avg_surprise": avg_surprise,
"action_stats": action_stats,
}