"""ASI Scoring Rubric — C/A/L/N/R self-assessment evaluation harness. Implements the 5-dimension capability scoring framework: - Cognitive Capability (C) — raw intelligence across domains - Agency / Autonomy (A) — ability to execute multi-step goals - Learning & Adaptation (L) — ability to improve over time - Creativity / Novelty (N) — original insight generation - Reliability / Robustness (R) — consistency, safety, correctness Tier mapping: 0-40 Narrow AI 40-60 Advanced AI 60-75 Agentic AI 75-90 AGI-like 90+ ASI (theoretical) """ from __future__ import annotations from dataclasses import dataclass, field from enum import Enum from typing import Any from fusionagi._logger import logger class CapabilityTier(str, Enum): """Classification tier based on composite score.""" NARROW_AI = "Narrow AI" ADVANCED_AI = "Advanced AI" AGENTIC_AI = "Agentic AI" AGI_LIKE = "AGI-like" ASI = "ASI" @dataclass class DimensionScore: """Score for a single evaluation dimension.""" name: str abbreviation: str weight: float score: float = 0.0 sub_scores: dict[str, float] = field(default_factory=dict) evidence: list[str] = field(default_factory=list) @property def weighted_score(self) -> float: """Return weight * score.""" return self.weight * self.score @dataclass class RubricConfig: """Configuration for rubric weights (must sum to 1.0).""" cognitive_weight: float = 0.30 agency_weight: float = 0.20 learning_weight: float = 0.15 creativity_weight: float = 0.15 reliability_weight: float = 0.20 def validate(self) -> bool: """Check weights sum to 1.0 (within tolerance).""" total = ( self.cognitive_weight + self.agency_weight + self.learning_weight + self.creativity_weight + self.reliability_weight ) return abs(total - 1.0) < 0.01 @dataclass class RubricResult: """Complete evaluation result.""" dimensions: dict[str, DimensionScore] composite_score: float tier: CapabilityTier config: RubricConfig metadata: dict[str, Any] = field(default_factory=dict) def radar_chart_data(self) -> dict[str, float]: """Return data suitable for radar chart visualization.""" return {d.abbreviation: d.score for d in self.dimensions.values()} def summary(self) -> str: """Human-readable summary.""" lines = [f"Composite Score: {self.composite_score:.1f} — {self.tier.value}"] for dim in self.dimensions.values(): lines.append(f" {dim.abbreviation} ({dim.name}): {dim.score:.1f}") return "\n".join(lines) def _classify_tier(score: float) -> CapabilityTier: """Map composite score to tier.""" if score >= 90: return CapabilityTier.ASI if score >= 75: return CapabilityTier.AGI_LIKE if score >= 60: return CapabilityTier.AGENTIC_AI if score >= 40: return CapabilityTier.ADVANCED_AI return CapabilityTier.NARROW_AI class ASIRubric: """Self-assessment evaluation harness for FusionAGI. Can evaluate the system's own capabilities by running test batteries, analyzing historical performance, and computing dimension scores. """ def __init__(self, config: RubricConfig | None = None) -> None: self._config = config or RubricConfig() if not self._config.validate(): raise ValueError("Rubric weights must sum to 1.0") self._history: list[RubricResult] = [] def evaluate( self, cognitive_scores: dict[str, float] | None = None, agency_scores: dict[str, float] | None = None, learning_scores: dict[str, float] | None = None, creativity_scores: dict[str, float] | None = None, reliability_scores: dict[str, float] | None = None, metadata: dict[str, Any] | None = None, ) -> RubricResult: """Run a full evaluation. Each dimension accepts a dict of sub-metric names to scores (0-100). The dimension score is the weighted average of its sub-metrics. Args: cognitive_scores: Sub-metrics for Cognitive Capability. agency_scores: Sub-metrics for Agency / Autonomy. learning_scores: Sub-metrics for Learning & Adaptation. creativity_scores: Sub-metrics for Creativity / Novelty. reliability_scores: Sub-metrics for Reliability / Robustness. metadata: Additional context. Returns: Complete evaluation result. """ cfg = self._config dimensions: dict[str, DimensionScore] = {} dimensions["cognitive"] = self._score_dimension( "Cognitive Capability", "C", cfg.cognitive_weight, cognitive_scores or {}, { "general_knowledge": 0.25, "scientific_reasoning": 0.25, "hard_reasoning": 0.25, "math_frontier": 0.25, }, ) dimensions["agency"] = self._score_dimension( "Agency / Autonomy", "A", cfg.agency_weight, agency_scores or {}, { "task_completion": 0.30, "planning_depth": 0.25, "tool_use": 0.25, "self_correction": 0.20, }, ) dimensions["learning"] = self._score_dimension( "Learning & Adaptation", "L", cfg.learning_weight, learning_scores or {}, { "few_shot_gain": 0.40, "memory_retention": 0.30, "iterative_improvement": 0.30, }, ) dimensions["creativity"] = self._score_dimension( "Creativity / Novelty", "N", cfg.creativity_weight, creativity_scores or {}, { "originality": 0.40, "cross_domain_synthesis": 0.30, "research_capability": 0.30, }, ) dimensions["reliability"] = self._score_dimension( "Reliability / Robustness", "R", cfg.reliability_weight, reliability_scores or {}, { "consistency": 0.25, "adversarial_resistance": 0.25, "calibration": 0.25, "hallucination_rate": 0.25, }, ) composite = sum(d.weighted_score for d in dimensions.values()) tier = _classify_tier(composite) result = RubricResult( dimensions=dimensions, composite_score=composite, tier=tier, config=cfg, metadata=metadata or {}, ) self._history.append(result) logger.info( "ASI rubric evaluation complete", extra={"composite": composite, "tier": tier.value}, ) return result def evaluate_from_self_model(self, self_model_snapshot: dict[str, Any]) -> RubricResult: """Evaluate using data from the SelfModel introspection. Args: self_model_snapshot: Output from SelfModel.introspect(). Returns: Evaluation result. """ capabilities = self_model_snapshot.get("capabilities", {}) emotional = self_model_snapshot.get("emotional_state", {}) cognitive_scores = {} agency_scores = {} learning_scores = {} creativity_scores = {} reliability_scores = {} for domain, cap_info in capabilities.items(): rate = cap_info.get("success_rate", 0.5) * 100 if domain in ("reasoning", "logic", "math"): cognitive_scores[domain] = rate elif domain in ("planning", "execution", "tool_use"): agency_scores[domain] = rate elif domain in ("adaptation", "learning", "memory"): learning_scores[domain] = rate elif domain in ("creativity", "synthesis", "novelty"): creativity_scores[domain] = rate elif domain in ("consistency", "safety", "accuracy"): reliability_scores[domain] = rate confidence = emotional.get("confidence", 0.5) * 100 reliability_scores.setdefault("calibration", confidence) return self.evaluate( cognitive_scores=cognitive_scores, agency_scores=agency_scores, learning_scores=learning_scores, creativity_scores=creativity_scores, reliability_scores=reliability_scores, metadata={"source": "self_model"}, ) def trend(self) -> list[dict[str, Any]]: """Return historical evaluation trend. Returns: List of past composite scores and tiers. """ return [ { "composite": r.composite_score, "tier": r.tier.value, "radar": r.radar_chart_data(), } for r in self._history ] def _score_dimension( self, name: str, abbreviation: str, weight: float, scores: dict[str, float], sub_weights: dict[str, float], ) -> DimensionScore: """Compute a dimension score from sub-metrics. Args: name: Dimension name. abbreviation: Short code. weight: Dimension weight in composite. scores: Provided sub-metric scores. sub_weights: Default sub-metric weights. Returns: Computed DimensionScore. """ if not scores: return DimensionScore( name=name, abbreviation=abbreviation, weight=weight, score=0.0, sub_scores={}, evidence=["No data provided"], ) total_w = 0.0 total_score = 0.0 for sub_name, sub_weight in sub_weights.items(): if sub_name in scores: total_score += sub_weight * scores[sub_name] total_w += sub_weight if total_w > 0: for sub_name in scores: if sub_name not in sub_weights: equal_w = (1.0 - total_w) / max(1, len(scores) - len(sub_weights)) total_score += equal_w * scores[sub_name] total_w += equal_w dimension_score = total_score / total_w if total_w > 0 else 0.0 dimension_score = max(0.0, min(100.0, dimension_score)) return DimensionScore( name=name, abbreviation=abbreviation, weight=weight, score=dimension_score, sub_scores=dict(scores), evidence=[f"{k}: {v:.1f}" for k, v in scores.items()], ) __all__ = [ "ASIRubric", "CapabilityTier", "DimensionScore", "RubricConfig", "RubricResult", ]