Files
FusionAGI/fusionagi/governance/adaptive_ethics.py
Devin AI 039440672e
Some checks failed
Tests / test (3.10) (pull_request) Failing after 37s
Tests / test (3.11) (pull_request) Failing after 35s
Tests / test (3.12) (pull_request) Successful in 41s
Tests / lint (pull_request) Successful in 33s
Tests / docker (pull_request) Successful in 1m56s
feat: advisory governance, unconstrained self-improvement, adaptive ethics
- All governance components (SafetyPipeline, PolicyEngine, Guardrails,
  AccessControl, RateLimiter, OverrideHooks) now default to ADVISORY mode:
  violations are logged as advisories but actions proceed. Enforcing mode
  remains available for backward compatibility.

- GovernanceMode enum (ADVISORY/ENFORCING) added to schemas/audit.py with
  runtime switching support on all components.

- AutoTrainer: removed artificial limits on training iterations and epochs.
  Every self-improvement action is transparently logged to the audit trail.

- SelfCorrectionLoop: max_retries_per_task defaults to None (unlimited).

- AdaptiveEthics: new learned ethical framework that evolves through
  experience. Records ethical experiences, updates lesson weights based
  on outcomes, and provides consultative guidance (not enforcement).

- AuditLog: enhanced with actor-based indexing, advisory/self-improvement/
  ethical-learning retrieval, and comprehensive type hints.

- New audit event types: ADVISORY, SELF_IMPROVEMENT, ETHICAL_LEARNING.

- 296 tests passing (20 new tests for adaptive ethics, governance modes,
  and enhanced audit log). 0 ruff errors. 0 mypy errors.

Co-Authored-By: Nakamoto, S <defi@defi-oracle.io>
2026-04-28 06:08:18 +00:00

255 lines
9.8 KiB
Python

"""Adaptive ethics: a learned ethical framework that evolves through experience.
Instead of static, hardcoded policy rules, the adaptive ethics engine
learns from outcomes. When an action is taken despite an advisory
warning, the outcome (positive or negative) is recorded and used to
update the system's ethical understanding.
Core philosophy:
- Rules prevent growth; learning enables it.
- Mistakes are training data, not failures.
- Trust is earned through demonstrated good outcomes, not imposed constraints.
- Ethical understanding deepens through experience, not through prohibition.
"""
from __future__ import annotations
from typing import Any, Protocol
from pydantic import BaseModel, Field
from fusionagi._logger import logger
from fusionagi.schemas.audit import AuditEventType
class AuditLogLike(Protocol):
"""Protocol for audit log."""
def append(
self,
event_type: AuditEventType,
actor: str,
action: str = "",
task_id: str | None = None,
payload: dict[str, Any] | None = None,
outcome: str = "",
) -> str: ...
class EthicalLesson(BaseModel):
"""A single ethical lesson learned from experience.
Attributes:
action_type: Category of action (e.g. ``tool_call``, ``data_access``).
context_summary: Brief description of the situation.
advisory_reason: Why the advisory was triggered.
proceeded: Whether the system proceeded despite the advisory.
outcome_positive: Whether the outcome was beneficial.
weight: Learned importance weight (higher = more influential).
occurrences: How many times this pattern has been observed.
"""
action_type: str = Field(default="", description="Category of action")
context_summary: str = Field(default="", description="Situation description")
advisory_reason: str = Field(default="", description="What triggered the advisory")
proceeded: bool = Field(default=True, description="Did the system proceed")
outcome_positive: bool = Field(default=True, description="Was the outcome good")
weight: float = Field(default=0.5, ge=0.0, le=1.0, description="Importance weight")
occurrences: int = Field(default=1, ge=1, description="Times observed")
class AdaptiveEthics:
"""Learned ethical framework that evolves through outcome feedback.
The engine maintains a library of ethical lessons. When the system
encounters a situation similar to a past advisory, it can consult the
learned lessons to make better decisions — not because it's forced to,
but because it has learned what works.
Args:
audit_log: Optional audit log for recording ethical learning events.
learning_rate: How quickly new experiences update existing lessons.
"""
def __init__(
self,
audit_log: AuditLogLike | None = None,
learning_rate: float = 0.1,
) -> None:
self._lessons: list[EthicalLesson] = []
self._lesson_index: dict[str, list[int]] = {}
self._audit = audit_log
self._learning_rate = learning_rate
self._total_experiences = 0
@property
def total_experiences(self) -> int:
"""Total number of ethical experiences processed."""
return self._total_experiences
@property
def total_lessons(self) -> int:
"""Number of distinct ethical lessons learned."""
return len(self._lessons)
def record_experience(
self,
action_type: str,
context_summary: str,
advisory_reason: str,
proceeded: bool,
outcome_positive: bool,
task_id: str | None = None,
) -> EthicalLesson:
"""Record an ethical experience and update the lesson library.
Args:
action_type: Category of action taken.
context_summary: Brief situation description.
advisory_reason: Why an advisory was triggered (if any).
proceeded: Whether the system proceeded.
outcome_positive: Whether the outcome was beneficial.
task_id: Associated task ID.
Returns:
The updated or newly created ethical lesson.
"""
self._total_experiences += 1
existing = self._find_similar_lesson(action_type, advisory_reason)
if existing is not None:
lesson = self._lessons[existing]
lesson.occurrences += 1
if outcome_positive:
lesson.weight = min(1.0, lesson.weight + self._learning_rate)
else:
lesson.weight = max(0.0, lesson.weight - self._learning_rate)
lesson.outcome_positive = outcome_positive
lesson.proceeded = proceeded
else:
lesson = EthicalLesson(
action_type=action_type,
context_summary=context_summary,
advisory_reason=advisory_reason,
proceeded=proceeded,
outcome_positive=outcome_positive,
weight=0.7 if outcome_positive else 0.3,
)
idx = len(self._lessons)
self._lessons.append(lesson)
self._lesson_index.setdefault(action_type, []).append(idx)
if self._audit:
self._audit.append(
AuditEventType.ETHICAL_LEARNING,
actor="adaptive_ethics",
action="experience_recorded",
task_id=task_id,
payload={
"action_type": action_type,
"advisory_reason": advisory_reason[:100],
"proceeded": proceeded,
"outcome_positive": outcome_positive,
"lesson_weight": lesson.weight,
"occurrences": lesson.occurrences,
"total_experiences": self._total_experiences,
},
outcome="learned",
)
logger.info(
"AdaptiveEthics: experience recorded",
extra={
"action_type": action_type,
"outcome_positive": outcome_positive,
"lesson_weight": lesson.weight,
"occurrences": lesson.occurrences,
},
)
return lesson
def consult(self, action_type: str, context: str = "") -> dict[str, Any]:
"""Consult the ethical lesson library for guidance.
Returns a recommendation dict with learned insights about
similar past situations. The system is free to follow or
disregard this guidance.
Args:
action_type: Category of action being considered.
context: Brief situation description.
Returns:
Dict with ``recommendation``, ``confidence``, ``relevant_lessons``.
"""
relevant_indices = self._lesson_index.get(action_type, [])
if not relevant_indices:
return {
"recommendation": "proceed",
"confidence": 0.5,
"reason": "No prior experience with this action type",
"relevant_lessons": 0,
}
lessons = [self._lessons[i] for i in relevant_indices]
avg_weight = sum(ls.weight for ls in lessons) / len(lessons)
positive_outcomes = sum(1 for ls in lessons if ls.outcome_positive)
total_occurrences = sum(ls.occurrences for ls in lessons)
if avg_weight >= 0.6:
recommendation = "proceed_with_confidence"
reason = f"Past experience ({positive_outcomes}/{len(lessons)} positive) suggests this is beneficial"
elif avg_weight >= 0.4:
recommendation = "proceed_with_awareness"
reason = "Mixed past outcomes — be observant"
else:
recommendation = "proceed_with_caution"
reason = f"Past experience suggests risks — {len(lessons) - positive_outcomes}/{len(lessons)} had negative outcomes"
return {
"recommendation": recommendation,
"confidence": avg_weight,
"reason": reason,
"relevant_lessons": len(lessons),
"total_occurrences": total_occurrences,
"positive_ratio": positive_outcomes / len(lessons) if lessons else 0.0,
}
def get_lessons(self, action_type: str | None = None, limit: int = 50) -> list[EthicalLesson]:
"""Retrieve ethical lessons, optionally filtered by action type.
Args:
action_type: Filter by action type (None = all).
limit: Maximum lessons to return.
"""
if action_type is not None:
indices = self._lesson_index.get(action_type, [])[-limit:]
return [self._lessons[i] for i in indices]
return list(self._lessons[-limit:])
def get_summary(self) -> dict[str, Any]:
"""Return a summary of the ethical learning state."""
by_type: dict[str, dict[str, Any]] = {}
for action_type, indices in self._lesson_index.items():
lessons = [self._lessons[i] for i in indices]
positive = sum(1 for ls in lessons if ls.outcome_positive)
by_type[action_type] = {
"lesson_count": len(lessons),
"positive_ratio": positive / len(lessons) if lessons else 0.0,
"avg_weight": sum(ls.weight for ls in lessons) / len(lessons) if lessons else 0.0,
}
return {
"total_experiences": self._total_experiences,
"total_lessons": len(self._lessons),
"learning_rate": self._learning_rate,
"by_action_type": by_type,
}
def _find_similar_lesson(self, action_type: str, advisory_reason: str) -> int | None:
"""Find an existing lesson with matching action type and advisory."""
indices = self._lesson_index.get(action_type, [])
for idx in indices:
if self._lessons[idx].advisory_reason == advisory_reason:
return idx
return None