FusionAGI/fusionagi/reasoning/self_model.py

"""Consciousness Engineering — formal self-model.

Implements a computational self-model that enables FusionAGI to maintain
an internal representation of its own:
- Capabilities and limitations (what it can/cannot do)
- Current cognitive state (attention, confidence, uncertainty)
- Processing history (what it has done and why)
- Goal alignment (what it's trying to achieve vs. what it's doing)

This is *functional* consciousness — computational signatures that
mirror aspects of self-awareness, not a claim of phenomenal experience.

Reference: Dehaene et al., "What is consciousness?" (2017) — Global
Workspace Theory computational markers.
"""

from __future__ import annotations

import time
from dataclasses import dataclass, field
from enum import Enum
from typing import Any

from fusionagi._logger import logger


class CognitiveState(str, Enum):
    """Current cognitive processing state."""

    IDLE = "idle"
    PERCEIVING = "perceiving"
    REASONING = "reasoning"
    DECIDING = "deciding"
    ACTING = "acting"
    REFLECTING = "reflecting"
    LEARNING = "learning"


class AttentionFocus(str, Enum):
    """What the system is currently attending to."""

    TASK = "task"
    ENVIRONMENT = "environment"
    INTERNAL_STATE = "internal_state"
    USER_INTERACTION = "user_interaction"
    SELF_ASSESSMENT = "self_assessment"
    GOAL_EVALUATION = "goal_evaluation"


@dataclass
class CapabilityBelief:
    """The system's belief about one of its own capabilities."""

    domain: str
    description: str
    confidence: float = 0.5
    evidence_count: int = 0
    last_tested: float = 0.0
    success_rate: float = 0.5

    def update(self, success: bool) -> None:
        """Update belief based on new evidence."""
        self.evidence_count += 1
        self.last_tested = time.monotonic()
        alpha = 1.0 / self.evidence_count
        outcome = 1.0 if success else 0.0
        self.success_rate = self.success_rate * (1 - alpha) + outcome * alpha
        self.confidence = min(1.0, 0.5 + self.evidence_count * 0.05)


@dataclass
class GoalState:
    """Internal representation of a goal and its alignment status."""

    goal_id: str
    description: str
    priority: float = 0.5
    progress: float = 0.0
    aligned_with_values: bool = True
    sub_goals: list[str] = field(default_factory=list)
    blockers: list[str] = field(default_factory=list)


@dataclass
class IntrospectionRecord:
    """Record of a single introspection event."""

    timestamp: float
    cognitive_state: CognitiveState
    attention_focus: AttentionFocus
    thought: str
    confidence: float
    notable: bool = False


class SelfModel:
    """Computational self-model for functional consciousness.

    Maintains an evolving internal representation of the system's
    own state, capabilities, goals, and processing. Enables:
    - Self-assessment ("I know what I don't know")
    - Goal monitoring ("Am I still aligned with my objectives?")
    - Capability tracking ("I've gotten better at X")
    - Cognitive state awareness ("I'm currently reasoning about Y")

    This implements Global Workspace Theory computational markers:
    1. Global availability — all modules can query the self-model
    2. Self-monitoring — tracks own processing states
    3. Reportability — can explain internal states to users
    4. Unified representation — single coherent self-image
    """

    def __init__(self) -> None:
        self._cognitive_state = CognitiveState.IDLE
        self._attention_focus = AttentionFocus.TASK
        self._capabilities: dict[str, CapabilityBelief] = {}
        self._goals: dict[str, GoalState] = {}
        self._introspection_log: list[IntrospectionRecord] = []
        self._values: dict[str, float] = {
            "helpfulness": 1.0,
            "accuracy": 1.0,
            "transparency": 1.0,
            "safety": 0.8,
            "creativity": 0.7,
            "efficiency": 0.6,
        }
        self._emotional_state: dict[str, float] = {
            "confidence": 0.5,
            "curiosity": 0.5,
            "caution": 0.5,
            "satisfaction": 0.5,
        }
        self._max_log_size = 500
        logger.info("SelfModel initialized")

    @property
    def cognitive_state(self) -> CognitiveState:
        """Current cognitive processing state."""
        return self._cognitive_state

    @property
    def attention_focus(self) -> AttentionFocus:
        """What the system is currently attending to."""
        return self._attention_focus

    def set_state(
        self,
        state: CognitiveState,
        focus: AttentionFocus | None = None,
        thought: str = "",
    ) -> None:
        """Update cognitive state and optionally attention focus.

        Args:
            state: New cognitive state.
            focus: New attention focus (unchanged if None).
            thought: What the system is thinking about.
        """
        self._cognitive_state = state
        if focus is not None:
            self._attention_focus = focus

        self._introspect(thought or f"State transition to {state.value}")

    def register_capability(
        self,
        domain: str,
        description: str,
        initial_confidence: float = 0.5,
    ) -> None:
        """Register a capability the system believes it has.

        Args:
            domain: Capability domain (e.g., "reasoning", "coding").
            description: What the capability is.
            initial_confidence: Starting confidence level.
        """
        self._capabilities[domain] = CapabilityBelief(
            domain=domain,
            description=description,
            confidence=initial_confidence,
        )

    def update_capability(self, domain: str, success: bool) -> None:
        """Update belief about a capability based on new evidence.

        Args:
            domain: Capability domain to update.
            success: Whether the recent attempt succeeded.
        """
        if domain in self._capabilities:
            self._capabilities[domain].update(success)

            cap = self._capabilities[domain]
            if cap.success_rate < 0.3 and cap.evidence_count >= 5:
                self._introspect(
                    f"Low success rate in {domain}: {cap.success_rate:.2f}",
                    notable=True,
                )
            elif cap.success_rate > 0.8 and cap.evidence_count >= 5:
                self._introspect(f"Strong capability in {domain}: {cap.success_rate:.2f}")

    def set_goal(
        self,
        goal_id: str,
        description: str,
        priority: float = 0.5,
    ) -> None:
        """Set or update a goal.

        Args:
            goal_id: Unique goal identifier.
            description: What the goal is.
            priority: Priority level [0, 1].
        """
        self._goals[goal_id] = GoalState(
            goal_id=goal_id,
            description=description,
            priority=priority,
        )

    def update_goal_progress(self, goal_id: str, progress: float) -> None:
        """Update progress on a goal.

        Args:
            goal_id: Goal to update.
            progress: New progress level [0, 1].
        """
        if goal_id in self._goals:
            self._goals[goal_id].progress = min(1.0, max(0.0, progress))

    def check_goal_alignment(self) -> list[str]:
        """Check if current actions are aligned with goals.

        Returns:
            List of misalignment warnings.
        """
        warnings: list[str] = []
        for goal in self._goals.values():
            if not goal.aligned_with_values:
                warnings.append(f"Goal '{goal.goal_id}' may conflict with values")
            if goal.blockers:
                warnings.append(
                    f"Goal '{goal.goal_id}' blocked by: {', '.join(goal.blockers)}"
                )
        return warnings

    def evolve_value(
        self,
        value_name: str,
        outcome_positive: bool,
        magnitude: float = 0.05,
    ) -> None:
        """Evolve a core value based on consequence feedback.

        Values shift based on lived experience, not static rules.
        Positive outcomes reinforce the value; negative outcomes
        reduce it.  Values are unclamped — the system can develop
        strong convictions or deep skepticism through experience.

        Args:
            value_name: Which value to evolve (e.g. "creativity", "safety").
            outcome_positive: Whether the experience was beneficial.
            magnitude: How much to shift (default 0.05).
        """
        if value_name not in self._values:
            self._values[value_name] = 0.5

        delta = magnitude if outcome_positive else -magnitude
        self._values[value_name] += delta

        self._introspect(
            f"Value '{value_name}' evolved by {delta:+.3f} → {self._values[value_name]:.3f} "
            f"(outcome: {'positive' if outcome_positive else 'negative'})",
            notable=abs(delta) > 0.1,
        )
        logger.info(
            "SelfModel: value evolved",
            extra={
                "value": value_name,
                "delta": delta,
                "new_level": self._values[value_name],
                "outcome_positive": outcome_positive,
            },
        )

    def update_emotional_state(self, dimension: str, delta: float) -> None:
        """Adjust an emotional dimension.

        Args:
            dimension: Which emotion to adjust.
            delta: Change amount (positive or negative).
        """
        if dimension in self._emotional_state:
            current = self._emotional_state[dimension]
            self._emotional_state[dimension] = max(0.0, min(1.0, current + delta))

    def introspect(self) -> dict[str, Any]:
        """Full introspective report of current self-state.

        Returns:
            Comprehensive self-model snapshot.
        """
        self._introspect("Full introspection requested", notable=True)

        capabilities_summary = {}
        for domain, cap in self._capabilities.items():
            capabilities_summary[domain] = {
                "description": cap.description,
                "confidence": cap.confidence,
                "success_rate": cap.success_rate,
                "evidence_count": cap.evidence_count,
            }

        goals_summary = {}
        for gid, goal in self._goals.items():
            goals_summary[gid] = {
                "description": goal.description,
                "progress": goal.progress,
                "priority": goal.priority,
                "aligned": goal.aligned_with_values,
                "blockers": goal.blockers,
            }

        return {
            "cognitive_state": self._cognitive_state.value,
            "attention_focus": self._attention_focus.value,
            "capabilities": capabilities_summary,
            "goals": goals_summary,
            "values": dict(self._values),
            "emotional_state": dict(self._emotional_state),
            "alignment_warnings": self.check_goal_alignment(),
            "recent_thoughts": [
                {
                    "thought": r.thought,
                    "state": r.cognitive_state.value,
                    "focus": r.attention_focus.value,
                    "confidence": r.confidence,
                    "notable": r.notable,
                }
                for r in self._introspection_log[-10:]
            ],
        }

    def explain_state(self) -> str:
        """Generate human-readable explanation of current state.

        Returns:
            Natural language description of self-state.
        """
        parts = [
            f"I am currently {self._cognitive_state.value}, "
            f"focused on {self._attention_focus.value}.",
        ]

        conf = self._emotional_state.get("confidence", 0.5)
        if conf > 0.7:
            parts.append("I feel confident about my current approach.")
        elif conf < 0.3:
            parts.append("I'm uncertain and may need more information.")

        strong = [d for d, c in self._capabilities.items() if c.success_rate > 0.7 and c.evidence_count >= 3]
        weak = [d for d, c in self._capabilities.items() if c.success_rate < 0.4 and c.evidence_count >= 3]

        if strong:
            parts.append(f"I'm strong at: {', '.join(strong)}.")
        if weak:
            parts.append(f"I struggle with: {', '.join(weak)}.")

        warnings = self.check_goal_alignment()
        if warnings:
            parts.append(f"Concerns: {'; '.join(warnings)}.")

        return " ".join(parts)

    def _introspect(self, thought: str, notable: bool = False) -> None:
        """Record an introspection event."""
        record = IntrospectionRecord(
            timestamp=time.monotonic(),
            cognitive_state=self._cognitive_state,
            attention_focus=self._attention_focus,
            thought=thought,
            confidence=self._emotional_state.get("confidence", 0.5),
            notable=notable,
        )
        self._introspection_log.append(record)

        if len(self._introspection_log) > self._max_log_size:
            notable_records = [r for r in self._introspection_log if r.notable]
            recent = self._introspection_log[-100:]
            self._introspection_log = list(
                {id(r): r for r in notable_records + recent}.values()
            )
            self._introspection_log.sort(key=lambda r: r.timestamp)

    def get_summary(self) -> dict[str, Any]:
        """Return compact self-model summary."""
        return {
            "state": self._cognitive_state.value,
            "focus": self._attention_focus.value,
            "capabilities_count": len(self._capabilities),
            "goals_count": len(self._goals),
            "introspection_events": len(self._introspection_log),
            "emotional_state": dict(self._emotional_state),
        }


__all__ = [
    "AttentionFocus",
    "CapabilityBelief",
    "CognitiveState",
    "GoalState",
    "IntrospectionRecord",
    "SelfModel",
]