Items completed: 1. Merged PR #2 (starlette/httpx deps) 2. Fixed async race condition in multimodal_ui.py 3. Wired TTSAdapter (ElevenLabs, Azure) in API routes 4. Moved super_big_brain.py from core/ to reasoning/ (backward compat shim) 5. Added API authentication middleware (Bearer token via FUSIONAGI_API_KEY) 6. Added async adapter interface (acomplete/acomplete_structured) 7. Migrated FastAPI on_event to lifespan (fixes 20 deprecation warnings) 8. Liquid Neural Networks (continuous-time adaptive weights) 9. Quantum-AI Hybrid compute backend (simulator + optimization) 10. Embodied Intelligence / Robotics bridge (actuator + sensor protocols) 11. Consciousness Engineering (formal self-model with introspection) 12. ASI Scoring Rubric (C/A/L/N/R self-assessment harness) 13. GPU integration tests for TensorFlow backend 14. Multi-stage production Dockerfile 15. Gitea CI/CD pipeline (lint, test matrix, Docker build) 16. API rate limiting middleware (per-IP sliding window) 17. OpenAPI docs cleanup (auth + rate limiting descriptions) 18. Benchmarking suite (decomposition, multi-path, recomposition, e2e) 19. Plugin system (head registry for custom heads) 427 tests passing, 0 ruff errors, 0 mypy errors. Co-Authored-By: Nakamoto, S <defi@defi-oracle.io>
107 lines
3.7 KiB
Python
107 lines
3.7 KiB
Python
"""Tests for ASI Scoring Rubric evaluation harness."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from fusionagi.evaluation.asi_rubric import (
|
|
ASIRubric,
|
|
CapabilityTier,
|
|
RubricConfig,
|
|
)
|
|
|
|
|
|
class TestRubricConfig:
|
|
def test_default_weights_valid(self) -> None:
|
|
cfg = RubricConfig()
|
|
assert cfg.validate()
|
|
|
|
def test_invalid_weights(self) -> None:
|
|
cfg = RubricConfig(cognitive_weight=0.5, agency_weight=0.5, learning_weight=0.5)
|
|
assert not cfg.validate()
|
|
|
|
|
|
class TestASIRubric:
|
|
def test_evaluate_empty(self) -> None:
|
|
rubric = ASIRubric()
|
|
result = rubric.evaluate()
|
|
assert result.composite_score == 0.0
|
|
assert result.tier == CapabilityTier.NARROW_AI
|
|
|
|
def test_evaluate_full_scores(self) -> None:
|
|
rubric = ASIRubric()
|
|
result = rubric.evaluate(
|
|
cognitive_scores={"general_knowledge": 80, "scientific_reasoning": 75},
|
|
agency_scores={"task_completion": 70, "planning_depth": 65},
|
|
learning_scores={"few_shot_gain": 60},
|
|
creativity_scores={"originality": 55},
|
|
reliability_scores={"consistency": 85, "calibration": 80},
|
|
)
|
|
assert 0 < result.composite_score < 100
|
|
assert result.tier in CapabilityTier
|
|
|
|
def test_tier_mapping(self) -> None:
|
|
rubric = ASIRubric()
|
|
# Low scores -> Narrow AI
|
|
result_low = rubric.evaluate(
|
|
cognitive_scores={"general_knowledge": 20},
|
|
)
|
|
assert result_low.tier == CapabilityTier.NARROW_AI
|
|
|
|
# High scores -> AGI-like or above
|
|
result_high = rubric.evaluate(
|
|
cognitive_scores={"general_knowledge": 90, "scientific_reasoning": 85},
|
|
agency_scores={"task_completion": 85, "planning_depth": 80},
|
|
learning_scores={"few_shot_gain": 80, "memory_retention": 75},
|
|
creativity_scores={"originality": 80, "cross_domain_synthesis": 75},
|
|
reliability_scores={"consistency": 85, "calibration": 82},
|
|
)
|
|
assert result_high.tier in (CapabilityTier.AGI_LIKE, CapabilityTier.ASI)
|
|
|
|
def test_radar_chart_data(self) -> None:
|
|
rubric = ASIRubric()
|
|
result = rubric.evaluate(
|
|
cognitive_scores={"general_knowledge": 70},
|
|
agency_scores={"task_completion": 60},
|
|
)
|
|
radar = result.radar_chart_data()
|
|
assert "C" in radar
|
|
assert "A" in radar
|
|
|
|
def test_summary(self) -> None:
|
|
rubric = ASIRubric()
|
|
result = rubric.evaluate(
|
|
cognitive_scores={"general_knowledge": 50},
|
|
)
|
|
summary = result.summary()
|
|
assert "Composite Score" in summary
|
|
|
|
def test_trend_tracking(self) -> None:
|
|
rubric = ASIRubric()
|
|
rubric.evaluate(cognitive_scores={"general_knowledge": 50})
|
|
rubric.evaluate(cognitive_scores={"general_knowledge": 60})
|
|
trend = rubric.trend()
|
|
assert len(trend) == 2
|
|
|
|
def test_evaluate_from_self_model(self) -> None:
|
|
rubric = ASIRubric()
|
|
snapshot = {
|
|
"capabilities": {
|
|
"reasoning": {"success_rate": 0.8, "evidence_count": 10},
|
|
"planning": {"success_rate": 0.7, "evidence_count": 5},
|
|
},
|
|
"emotional_state": {"confidence": 0.75},
|
|
}
|
|
result = rubric.evaluate_from_self_model(snapshot)
|
|
assert result.composite_score >= 0
|
|
|
|
def test_invalid_config_raises(self) -> None:
|
|
with pytest.raises(ValueError, match="sum to 1.0"):
|
|
ASIRubric(config=RubricConfig(
|
|
cognitive_weight=0.9,
|
|
agency_weight=0.9,
|
|
learning_weight=0.9,
|
|
creativity_weight=0.9,
|
|
reliability_weight=0.9,
|
|
))
|