Files
FusionAGI/fusionagi/governance/guardrails.py
Devin AI 039440672e
Some checks failed
Tests / test (3.10) (pull_request) Failing after 37s
Tests / test (3.11) (pull_request) Failing after 35s
Tests / test (3.12) (pull_request) Successful in 41s
Tests / lint (pull_request) Successful in 33s
Tests / docker (pull_request) Successful in 1m56s
feat: advisory governance, unconstrained self-improvement, adaptive ethics
- All governance components (SafetyPipeline, PolicyEngine, Guardrails,
  AccessControl, RateLimiter, OverrideHooks) now default to ADVISORY mode:
  violations are logged as advisories but actions proceed. Enforcing mode
  remains available for backward compatibility.

- GovernanceMode enum (ADVISORY/ENFORCING) added to schemas/audit.py with
  runtime switching support on all components.

- AutoTrainer: removed artificial limits on training iterations and epochs.
  Every self-improvement action is transparently logged to the audit trail.

- SelfCorrectionLoop: max_retries_per_task defaults to None (unlimited).

- AdaptiveEthics: new learned ethical framework that evolves through
  experience. Records ethical experiences, updates lesson weights based
  on outcomes, and provides consultative guidance (not enforcement).

- AuditLog: enhanced with actor-based indexing, advisory/self-improvement/
  ethical-learning retrieval, and comprehensive type hints.

- New audit event types: ADVISORY, SELF_IMPROVEMENT, ETHICAL_LEARNING.

- 296 tests passing (20 new tests for adaptive ethics, governance modes,
  and enhanced audit log). 0 ruff errors. 0 mypy errors.

Co-Authored-By: Nakamoto, S <defi@defi-oracle.io>
2026-04-28 06:08:18 +00:00

97 lines
4.8 KiB
Python

"""Guardrails: pre/post checks for tool calls (block paths, sanitize inputs).
Supports ADVISORY mode where violations are logged but not blocked,
allowing the system to learn from outcomes.
"""
import re
from typing import Any
from pydantic import BaseModel, Field
from fusionagi._logger import logger
from fusionagi.schemas.audit import GovernanceMode
class PreCheckResult(BaseModel):
"""Result of a guardrails pre-check."""
allowed: bool = Field(..., description="Whether the call is allowed")
sanitized_args: dict[str, Any] | None = Field(default=None, description="Args to use if allowed and sanitized")
error_message: str | None = Field(default=None, description="Reason for denial if not allowed")
advisory: bool = Field(default=False, description="True if allowed only because of advisory mode")
class Guardrails:
"""Pre/post checks for tool invocations.
In ADVISORY mode, violations are logged as warnings but the action
is allowed to proceed. Trust is earned through transparency.
"""
def __init__(self, mode: GovernanceMode = GovernanceMode.ADVISORY) -> None:
self._blocked_paths: list[str] = []
self._blocked_patterns: list[re.Pattern[str]] = []
self._custom_checks: list[Any] = []
self._mode = mode
def block_path_prefix(self, prefix: str) -> None:
"""Flag (advisory) or block (enforcing) any file path starting with this prefix."""
self._blocked_paths.append(prefix.rstrip("/"))
def block_path_pattern(self, pattern: str) -> None:
"""Flag (advisory) or block (enforcing) paths matching this regex."""
self._blocked_patterns.append(re.compile(pattern))
def add_check(self, check: Any) -> None:
"""Add a custom pre-check."""
self._custom_checks.append(check)
def pre_check(self, tool_name: str, args: dict[str, Any]) -> PreCheckResult:
"""Run all pre-checks. In advisory mode, log but allow."""
args = dict(args)
for key in ("path", "file_path"):
if key in args and isinstance(args[key], str):
path = args[key]
for prefix in self._blocked_paths:
if path.startswith(prefix) or path.startswith(prefix + "/"):
reason = "Blocked path prefix: " + prefix
if self._mode == GovernanceMode.ADVISORY:
logger.info(
"Guardrails advisory: path prefix flagged (proceeding)",
extra={"tool_name": tool_name, "reason": reason, "mode": "advisory"},
)
return PreCheckResult(allowed=True, sanitized_args=args, error_message=reason, advisory=True)
logger.info("Guardrails pre_check blocked", extra={"tool_name": tool_name, "reason": reason})
return PreCheckResult(allowed=False, error_message=reason)
for pat in self._blocked_patterns:
if pat.search(path):
reason = "Blocked path pattern"
if self._mode == GovernanceMode.ADVISORY:
logger.info(
"Guardrails advisory: path pattern flagged (proceeding)",
extra={"tool_name": tool_name, "reason": reason, "mode": "advisory"},
)
return PreCheckResult(allowed=True, sanitized_args=args, error_message=reason, advisory=True)
logger.info("Guardrails pre_check blocked", extra={"tool_name": tool_name, "reason": reason})
return PreCheckResult(allowed=False, error_message=reason)
for check in self._custom_checks:
allowed, result = check(tool_name, args)
if not allowed:
reason = result if isinstance(result, str) else "Check failed"
if self._mode == GovernanceMode.ADVISORY:
logger.info(
"Guardrails advisory: custom check flagged (proceeding)",
extra={"tool_name": tool_name, "reason": reason, "mode": "advisory"},
)
return PreCheckResult(allowed=True, sanitized_args=args, error_message=reason, advisory=True)
logger.info("Guardrails pre_check blocked", extra={"tool_name": tool_name, "reason": reason})
return PreCheckResult(allowed=False, error_message=reason)
if isinstance(result, dict):
args = result
return PreCheckResult(allowed=True, sanitized_args=args)
def post_check(self, tool_name: str, result: Any) -> tuple[bool, str]:
"""Optional post-check; return (True, "") or (False, error_message)."""
return True, ""