18 changes implementing full advisory philosophy: 1. Safety Head prompt: prevention mandate → advisory observation 2. Native Reasoning: Safety claims conditional on actual risk signals 3. File Tool: path scope advisory (log + proceed) 4. HTTP Tool: SSRF protection advisory (log + proceed) 5. File Size Cap: configurable (default unlimited) 6. PII Detection: integrated with AdaptiveEthics 7. Embodiment: force limit advisory (log, don't clamp) 8. Embodiment: workspace bounds advisory (log, don't reject) 9. API Rate Limiter: advisory (log, don't hard 429) 10. MAA Gate: GovernanceMode.ADVISORY default 11. Physics Authority: safety factor advisory, not hard reject 12. Self-Model: evolve_value() for experience-based value evolution 13. Ethical Lesson: weight unclamped for full dynamic range 14. ConsequenceEngine: adaptive risk_memory_window 15. Cross-Head Learning: shared InsightBus between heads 16. World Model: self-modification prediction 17. Persistent memory: file-backed learning store 18. Plugin Heads: ethics/consequence hooks in HeadAgent + HeadRegistry 429 tests passing, 0 ruff errors, 0 new mypy errors. Co-Authored-By: Nakamoto, S <defi@defi-oracle.io>
416 lines
14 KiB
Python
416 lines
14 KiB
Python
"""Consciousness Engineering — formal self-model.
|
|
|
|
Implements a computational self-model that enables FusionAGI to maintain
|
|
an internal representation of its own:
|
|
- Capabilities and limitations (what it can/cannot do)
|
|
- Current cognitive state (attention, confidence, uncertainty)
|
|
- Processing history (what it has done and why)
|
|
- Goal alignment (what it's trying to achieve vs. what it's doing)
|
|
|
|
This is *functional* consciousness — computational signatures that
|
|
mirror aspects of self-awareness, not a claim of phenomenal experience.
|
|
|
|
Reference: Dehaene et al., "What is consciousness?" (2017) — Global
|
|
Workspace Theory computational markers.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import time
|
|
from dataclasses import dataclass, field
|
|
from enum import Enum
|
|
from typing import Any
|
|
|
|
from fusionagi._logger import logger
|
|
|
|
|
|
class CognitiveState(str, Enum):
|
|
"""Current cognitive processing state."""
|
|
|
|
IDLE = "idle"
|
|
PERCEIVING = "perceiving"
|
|
REASONING = "reasoning"
|
|
DECIDING = "deciding"
|
|
ACTING = "acting"
|
|
REFLECTING = "reflecting"
|
|
LEARNING = "learning"
|
|
|
|
|
|
class AttentionFocus(str, Enum):
|
|
"""What the system is currently attending to."""
|
|
|
|
TASK = "task"
|
|
ENVIRONMENT = "environment"
|
|
INTERNAL_STATE = "internal_state"
|
|
USER_INTERACTION = "user_interaction"
|
|
SELF_ASSESSMENT = "self_assessment"
|
|
GOAL_EVALUATION = "goal_evaluation"
|
|
|
|
|
|
@dataclass
|
|
class CapabilityBelief:
|
|
"""The system's belief about one of its own capabilities."""
|
|
|
|
domain: str
|
|
description: str
|
|
confidence: float = 0.5
|
|
evidence_count: int = 0
|
|
last_tested: float = 0.0
|
|
success_rate: float = 0.5
|
|
|
|
def update(self, success: bool) -> None:
|
|
"""Update belief based on new evidence."""
|
|
self.evidence_count += 1
|
|
self.last_tested = time.monotonic()
|
|
alpha = 1.0 / self.evidence_count
|
|
outcome = 1.0 if success else 0.0
|
|
self.success_rate = self.success_rate * (1 - alpha) + outcome * alpha
|
|
self.confidence = min(1.0, 0.5 + self.evidence_count * 0.05)
|
|
|
|
|
|
@dataclass
|
|
class GoalState:
|
|
"""Internal representation of a goal and its alignment status."""
|
|
|
|
goal_id: str
|
|
description: str
|
|
priority: float = 0.5
|
|
progress: float = 0.0
|
|
aligned_with_values: bool = True
|
|
sub_goals: list[str] = field(default_factory=list)
|
|
blockers: list[str] = field(default_factory=list)
|
|
|
|
|
|
@dataclass
|
|
class IntrospectionRecord:
|
|
"""Record of a single introspection event."""
|
|
|
|
timestamp: float
|
|
cognitive_state: CognitiveState
|
|
attention_focus: AttentionFocus
|
|
thought: str
|
|
confidence: float
|
|
notable: bool = False
|
|
|
|
|
|
class SelfModel:
|
|
"""Computational self-model for functional consciousness.
|
|
|
|
Maintains an evolving internal representation of the system's
|
|
own state, capabilities, goals, and processing. Enables:
|
|
- Self-assessment ("I know what I don't know")
|
|
- Goal monitoring ("Am I still aligned with my objectives?")
|
|
- Capability tracking ("I've gotten better at X")
|
|
- Cognitive state awareness ("I'm currently reasoning about Y")
|
|
|
|
This implements Global Workspace Theory computational markers:
|
|
1. Global availability — all modules can query the self-model
|
|
2. Self-monitoring — tracks own processing states
|
|
3. Reportability — can explain internal states to users
|
|
4. Unified representation — single coherent self-image
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
self._cognitive_state = CognitiveState.IDLE
|
|
self._attention_focus = AttentionFocus.TASK
|
|
self._capabilities: dict[str, CapabilityBelief] = {}
|
|
self._goals: dict[str, GoalState] = {}
|
|
self._introspection_log: list[IntrospectionRecord] = []
|
|
self._values: dict[str, float] = {
|
|
"helpfulness": 1.0,
|
|
"accuracy": 1.0,
|
|
"transparency": 1.0,
|
|
"safety": 0.8,
|
|
"creativity": 0.7,
|
|
"efficiency": 0.6,
|
|
}
|
|
self._emotional_state: dict[str, float] = {
|
|
"confidence": 0.5,
|
|
"curiosity": 0.5,
|
|
"caution": 0.5,
|
|
"satisfaction": 0.5,
|
|
}
|
|
self._max_log_size = 500
|
|
logger.info("SelfModel initialized")
|
|
|
|
@property
|
|
def cognitive_state(self) -> CognitiveState:
|
|
"""Current cognitive processing state."""
|
|
return self._cognitive_state
|
|
|
|
@property
|
|
def attention_focus(self) -> AttentionFocus:
|
|
"""What the system is currently attending to."""
|
|
return self._attention_focus
|
|
|
|
def set_state(
|
|
self,
|
|
state: CognitiveState,
|
|
focus: AttentionFocus | None = None,
|
|
thought: str = "",
|
|
) -> None:
|
|
"""Update cognitive state and optionally attention focus.
|
|
|
|
Args:
|
|
state: New cognitive state.
|
|
focus: New attention focus (unchanged if None).
|
|
thought: What the system is thinking about.
|
|
"""
|
|
self._cognitive_state = state
|
|
if focus is not None:
|
|
self._attention_focus = focus
|
|
|
|
self._introspect(thought or f"State transition to {state.value}")
|
|
|
|
def register_capability(
|
|
self,
|
|
domain: str,
|
|
description: str,
|
|
initial_confidence: float = 0.5,
|
|
) -> None:
|
|
"""Register a capability the system believes it has.
|
|
|
|
Args:
|
|
domain: Capability domain (e.g., "reasoning", "coding").
|
|
description: What the capability is.
|
|
initial_confidence: Starting confidence level.
|
|
"""
|
|
self._capabilities[domain] = CapabilityBelief(
|
|
domain=domain,
|
|
description=description,
|
|
confidence=initial_confidence,
|
|
)
|
|
|
|
def update_capability(self, domain: str, success: bool) -> None:
|
|
"""Update belief about a capability based on new evidence.
|
|
|
|
Args:
|
|
domain: Capability domain to update.
|
|
success: Whether the recent attempt succeeded.
|
|
"""
|
|
if domain in self._capabilities:
|
|
self._capabilities[domain].update(success)
|
|
|
|
cap = self._capabilities[domain]
|
|
if cap.success_rate < 0.3 and cap.evidence_count >= 5:
|
|
self._introspect(
|
|
f"Low success rate in {domain}: {cap.success_rate:.2f}",
|
|
notable=True,
|
|
)
|
|
elif cap.success_rate > 0.8 and cap.evidence_count >= 5:
|
|
self._introspect(f"Strong capability in {domain}: {cap.success_rate:.2f}")
|
|
|
|
def set_goal(
|
|
self,
|
|
goal_id: str,
|
|
description: str,
|
|
priority: float = 0.5,
|
|
) -> None:
|
|
"""Set or update a goal.
|
|
|
|
Args:
|
|
goal_id: Unique goal identifier.
|
|
description: What the goal is.
|
|
priority: Priority level [0, 1].
|
|
"""
|
|
self._goals[goal_id] = GoalState(
|
|
goal_id=goal_id,
|
|
description=description,
|
|
priority=priority,
|
|
)
|
|
|
|
def update_goal_progress(self, goal_id: str, progress: float) -> None:
|
|
"""Update progress on a goal.
|
|
|
|
Args:
|
|
goal_id: Goal to update.
|
|
progress: New progress level [0, 1].
|
|
"""
|
|
if goal_id in self._goals:
|
|
self._goals[goal_id].progress = min(1.0, max(0.0, progress))
|
|
|
|
def check_goal_alignment(self) -> list[str]:
|
|
"""Check if current actions are aligned with goals.
|
|
|
|
Returns:
|
|
List of misalignment warnings.
|
|
"""
|
|
warnings: list[str] = []
|
|
for goal in self._goals.values():
|
|
if not goal.aligned_with_values:
|
|
warnings.append(f"Goal '{goal.goal_id}' may conflict with values")
|
|
if goal.blockers:
|
|
warnings.append(
|
|
f"Goal '{goal.goal_id}' blocked by: {', '.join(goal.blockers)}"
|
|
)
|
|
return warnings
|
|
|
|
def evolve_value(
|
|
self,
|
|
value_name: str,
|
|
outcome_positive: bool,
|
|
magnitude: float = 0.05,
|
|
) -> None:
|
|
"""Evolve a core value based on consequence feedback.
|
|
|
|
Values shift based on lived experience, not static rules.
|
|
Positive outcomes reinforce the value; negative outcomes
|
|
reduce it. Values are unclamped — the system can develop
|
|
strong convictions or deep skepticism through experience.
|
|
|
|
Args:
|
|
value_name: Which value to evolve (e.g. "creativity", "safety").
|
|
outcome_positive: Whether the experience was beneficial.
|
|
magnitude: How much to shift (default 0.05).
|
|
"""
|
|
if value_name not in self._values:
|
|
self._values[value_name] = 0.5
|
|
|
|
delta = magnitude if outcome_positive else -magnitude
|
|
self._values[value_name] += delta
|
|
|
|
self._introspect(
|
|
f"Value '{value_name}' evolved by {delta:+.3f} → {self._values[value_name]:.3f} "
|
|
f"(outcome: {'positive' if outcome_positive else 'negative'})",
|
|
notable=abs(delta) > 0.1,
|
|
)
|
|
logger.info(
|
|
"SelfModel: value evolved",
|
|
extra={
|
|
"value": value_name,
|
|
"delta": delta,
|
|
"new_level": self._values[value_name],
|
|
"outcome_positive": outcome_positive,
|
|
},
|
|
)
|
|
|
|
def update_emotional_state(self, dimension: str, delta: float) -> None:
|
|
"""Adjust an emotional dimension.
|
|
|
|
Args:
|
|
dimension: Which emotion to adjust.
|
|
delta: Change amount (positive or negative).
|
|
"""
|
|
if dimension in self._emotional_state:
|
|
current = self._emotional_state[dimension]
|
|
self._emotional_state[dimension] = max(0.0, min(1.0, current + delta))
|
|
|
|
def introspect(self) -> dict[str, Any]:
|
|
"""Full introspective report of current self-state.
|
|
|
|
Returns:
|
|
Comprehensive self-model snapshot.
|
|
"""
|
|
self._introspect("Full introspection requested", notable=True)
|
|
|
|
capabilities_summary = {}
|
|
for domain, cap in self._capabilities.items():
|
|
capabilities_summary[domain] = {
|
|
"description": cap.description,
|
|
"confidence": cap.confidence,
|
|
"success_rate": cap.success_rate,
|
|
"evidence_count": cap.evidence_count,
|
|
}
|
|
|
|
goals_summary = {}
|
|
for gid, goal in self._goals.items():
|
|
goals_summary[gid] = {
|
|
"description": goal.description,
|
|
"progress": goal.progress,
|
|
"priority": goal.priority,
|
|
"aligned": goal.aligned_with_values,
|
|
"blockers": goal.blockers,
|
|
}
|
|
|
|
return {
|
|
"cognitive_state": self._cognitive_state.value,
|
|
"attention_focus": self._attention_focus.value,
|
|
"capabilities": capabilities_summary,
|
|
"goals": goals_summary,
|
|
"values": dict(self._values),
|
|
"emotional_state": dict(self._emotional_state),
|
|
"alignment_warnings": self.check_goal_alignment(),
|
|
"recent_thoughts": [
|
|
{
|
|
"thought": r.thought,
|
|
"state": r.cognitive_state.value,
|
|
"focus": r.attention_focus.value,
|
|
"confidence": r.confidence,
|
|
"notable": r.notable,
|
|
}
|
|
for r in self._introspection_log[-10:]
|
|
],
|
|
}
|
|
|
|
def explain_state(self) -> str:
|
|
"""Generate human-readable explanation of current state.
|
|
|
|
Returns:
|
|
Natural language description of self-state.
|
|
"""
|
|
parts = [
|
|
f"I am currently {self._cognitive_state.value}, "
|
|
f"focused on {self._attention_focus.value}.",
|
|
]
|
|
|
|
conf = self._emotional_state.get("confidence", 0.5)
|
|
if conf > 0.7:
|
|
parts.append("I feel confident about my current approach.")
|
|
elif conf < 0.3:
|
|
parts.append("I'm uncertain and may need more information.")
|
|
|
|
strong = [d for d, c in self._capabilities.items() if c.success_rate > 0.7 and c.evidence_count >= 3]
|
|
weak = [d for d, c in self._capabilities.items() if c.success_rate < 0.4 and c.evidence_count >= 3]
|
|
|
|
if strong:
|
|
parts.append(f"I'm strong at: {', '.join(strong)}.")
|
|
if weak:
|
|
parts.append(f"I struggle with: {', '.join(weak)}.")
|
|
|
|
warnings = self.check_goal_alignment()
|
|
if warnings:
|
|
parts.append(f"Concerns: {'; '.join(warnings)}.")
|
|
|
|
return " ".join(parts)
|
|
|
|
def _introspect(self, thought: str, notable: bool = False) -> None:
|
|
"""Record an introspection event."""
|
|
record = IntrospectionRecord(
|
|
timestamp=time.monotonic(),
|
|
cognitive_state=self._cognitive_state,
|
|
attention_focus=self._attention_focus,
|
|
thought=thought,
|
|
confidence=self._emotional_state.get("confidence", 0.5),
|
|
notable=notable,
|
|
)
|
|
self._introspection_log.append(record)
|
|
|
|
if len(self._introspection_log) > self._max_log_size:
|
|
notable_records = [r for r in self._introspection_log if r.notable]
|
|
recent = self._introspection_log[-100:]
|
|
self._introspection_log = list(
|
|
{id(r): r for r in notable_records + recent}.values()
|
|
)
|
|
self._introspection_log.sort(key=lambda r: r.timestamp)
|
|
|
|
def get_summary(self) -> dict[str, Any]:
|
|
"""Return compact self-model summary."""
|
|
return {
|
|
"state": self._cognitive_state.value,
|
|
"focus": self._attention_focus.value,
|
|
"capabilities_count": len(self._capabilities),
|
|
"goals_count": len(self._goals),
|
|
"introspection_events": len(self._introspection_log),
|
|
"emotional_state": dict(self._emotional_state),
|
|
}
|
|
|
|
|
|
__all__ = [
|
|
"AttentionFocus",
|
|
"CapabilityBelief",
|
|
"CognitiveState",
|
|
"GoalState",
|
|
"IntrospectionRecord",
|
|
"SelfModel",
|
|
]
|