Items completed: 1. Merged PR #2 (starlette/httpx deps) 2. Fixed async race condition in multimodal_ui.py 3. Wired TTSAdapter (ElevenLabs, Azure) in API routes 4. Moved super_big_brain.py from core/ to reasoning/ (backward compat shim) 5. Added API authentication middleware (Bearer token via FUSIONAGI_API_KEY) 6. Added async adapter interface (acomplete/acomplete_structured) 7. Migrated FastAPI on_event to lifespan (fixes 20 deprecation warnings) 8. Liquid Neural Networks (continuous-time adaptive weights) 9. Quantum-AI Hybrid compute backend (simulator + optimization) 10. Embodied Intelligence / Robotics bridge (actuator + sensor protocols) 11. Consciousness Engineering (formal self-model with introspection) 12. ASI Scoring Rubric (C/A/L/N/R self-assessment harness) 13. GPU integration tests for TensorFlow backend 14. Multi-stage production Dockerfile 15. Gitea CI/CD pipeline (lint, test matrix, Docker build) 16. API rate limiting middleware (per-IP sliding window) 17. OpenAPI docs cleanup (auth + rate limiting descriptions) 18. Benchmarking suite (decomposition, multi-path, recomposition, e2e) 19. Plugin system (head registry for custom heads) 427 tests passing, 0 ruff errors, 0 mypy errors. Co-Authored-By: Nakamoto, S <defi@defi-oracle.io>
232 lines
7.2 KiB
Python
232 lines
7.2 KiB
Python
"""Benchmarking suite — performance baselines for reasoning pipeline latency.
|
|
|
|
Provides repeatable micro-benchmarks for:
|
|
- Decomposition latency
|
|
- Multi-path scoring throughput
|
|
- Consensus engine latency
|
|
- Memory search latency
|
|
- End-to-end Super Big Brain pipeline
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import time
|
|
from dataclasses import dataclass, field
|
|
from typing import Any, Callable
|
|
|
|
from fusionagi._logger import logger
|
|
|
|
|
|
@dataclass
|
|
class BenchmarkResult:
|
|
"""Result of a single benchmark run."""
|
|
|
|
name: str
|
|
iterations: int
|
|
total_seconds: float
|
|
mean_ms: float
|
|
min_ms: float
|
|
max_ms: float
|
|
std_ms: float
|
|
metadata: dict[str, Any] = field(default_factory=dict)
|
|
|
|
def summary(self) -> str:
|
|
"""Human-readable summary."""
|
|
return (
|
|
f"{self.name}: mean={self.mean_ms:.2f}ms "
|
|
f"min={self.min_ms:.2f}ms max={self.max_ms:.2f}ms "
|
|
f"std={self.std_ms:.2f}ms ({self.iterations} iters)"
|
|
)
|
|
|
|
|
|
def _compute_stats(times: list[float]) -> tuple[float, float, float, float]:
|
|
"""Compute mean, min, max, std from a list of times in seconds."""
|
|
n = len(times)
|
|
if n == 0:
|
|
return 0.0, 0.0, 0.0, 0.0
|
|
times_ms = [t * 1000 for t in times]
|
|
mean = sum(times_ms) / n
|
|
mn = min(times_ms)
|
|
mx = max(times_ms)
|
|
variance = sum((t - mean) ** 2 for t in times_ms) / n
|
|
std = variance ** 0.5
|
|
return mean, mn, mx, std
|
|
|
|
|
|
def run_benchmark(
|
|
name: str,
|
|
fn: Callable[[], Any],
|
|
iterations: int = 100,
|
|
warmup: int = 5,
|
|
metadata: dict[str, Any] | None = None,
|
|
) -> BenchmarkResult:
|
|
"""Run a micro-benchmark.
|
|
|
|
Args:
|
|
name: Benchmark name.
|
|
fn: Function to benchmark (called with no args).
|
|
iterations: Number of timed iterations.
|
|
warmup: Number of warmup iterations (not timed).
|
|
metadata: Additional context.
|
|
|
|
Returns:
|
|
Benchmark result with timing statistics.
|
|
"""
|
|
for _ in range(warmup):
|
|
fn()
|
|
|
|
times: list[float] = []
|
|
total_start = time.perf_counter()
|
|
for _ in range(iterations):
|
|
start = time.perf_counter()
|
|
fn()
|
|
elapsed = time.perf_counter() - start
|
|
times.append(elapsed)
|
|
total_elapsed = time.perf_counter() - total_start
|
|
|
|
mean, mn, mx, std = _compute_stats(times)
|
|
result = BenchmarkResult(
|
|
name=name,
|
|
iterations=iterations,
|
|
total_seconds=total_elapsed,
|
|
mean_ms=mean,
|
|
min_ms=mn,
|
|
max_ms=mx,
|
|
std_ms=std,
|
|
metadata=metadata or {},
|
|
)
|
|
|
|
logger.info("Benchmark complete", extra={"name": name, "mean_ms": mean})
|
|
return result
|
|
|
|
|
|
class BenchmarkSuite:
|
|
"""Collection of benchmarks for the FusionAGI pipeline."""
|
|
|
|
def __init__(self) -> None:
|
|
self._results: list[BenchmarkResult] = []
|
|
|
|
def add_result(self, result: BenchmarkResult) -> None:
|
|
"""Add a benchmark result."""
|
|
self._results.append(result)
|
|
|
|
def run_decomposition_benchmark(self, iterations: int = 50) -> BenchmarkResult:
|
|
"""Benchmark the decomposition pipeline."""
|
|
from fusionagi.reasoning.decomposition import decompose_recursive
|
|
|
|
prompt = (
|
|
"Explain the implications of quantum computing on modern cryptography, "
|
|
"including RSA, elliptic curve, and lattice-based schemes."
|
|
)
|
|
result = run_benchmark(
|
|
"decomposition",
|
|
lambda: decompose_recursive(prompt, max_depth=2),
|
|
iterations=iterations,
|
|
)
|
|
self._results.append(result)
|
|
return result
|
|
|
|
def run_multi_path_benchmark(self, iterations: int = 50) -> BenchmarkResult:
|
|
"""Benchmark multi-path hypothesis scoring."""
|
|
from fusionagi.reasoning.decomposition import decompose_recursive
|
|
from fusionagi.reasoning.multi_path import generate_and_score_parallel
|
|
|
|
prompt = "Evaluate the risk-reward tradeoff of early AGI deployment."
|
|
decomp = decompose_recursive(prompt, max_depth=2)
|
|
hypotheses = [u.content for u in decomp.units[:3] if u.content]
|
|
if not hypotheses:
|
|
hypotheses = ["test hypothesis"]
|
|
|
|
result = run_benchmark(
|
|
"multi_path_scoring",
|
|
lambda: generate_and_score_parallel(hypotheses, decomp.units),
|
|
iterations=iterations,
|
|
)
|
|
self._results.append(result)
|
|
return result
|
|
|
|
def run_recomposition_benchmark(self, iterations: int = 50) -> BenchmarkResult:
|
|
"""Benchmark the recomposition step."""
|
|
from fusionagi.reasoning.decomposition import decompose_recursive
|
|
from fusionagi.reasoning.recomposition import recompose
|
|
from fusionagi.reasoning.tot import ThoughtNode
|
|
|
|
prompt = "What are the key challenges in aligning superintelligent AI?"
|
|
decomp = decompose_recursive(prompt, max_depth=2)
|
|
node = ThoughtNode(
|
|
thought="Alignment requires both technical and governance solutions.",
|
|
unit_refs=[u.unit_id for u in decomp.units[:5]],
|
|
)
|
|
|
|
result = run_benchmark(
|
|
"recomposition",
|
|
lambda: recompose([node], decomp.units),
|
|
iterations=iterations,
|
|
)
|
|
self._results.append(result)
|
|
return result
|
|
|
|
def run_end_to_end_benchmark(self, iterations: int = 20) -> BenchmarkResult:
|
|
"""Benchmark the full Super Big Brain pipeline."""
|
|
from fusionagi.core.super_big_brain import SuperBigBrainConfig, run_super_big_brain
|
|
from fusionagi.memory import SemanticGraphMemory
|
|
|
|
graph = SemanticGraphMemory()
|
|
config = SuperBigBrainConfig(max_decomposition_depth=2, parallel_hypotheses=2)
|
|
prompt = "What is the most promising path from AGI to ASI?"
|
|
|
|
result = run_benchmark(
|
|
"end_to_end_super_big_brain",
|
|
lambda: run_super_big_brain(prompt, graph, config),
|
|
iterations=iterations,
|
|
warmup=2,
|
|
)
|
|
self._results.append(result)
|
|
return result
|
|
|
|
def run_all(self, iterations: int = 30) -> list[BenchmarkResult]:
|
|
"""Run all benchmarks.
|
|
|
|
Args:
|
|
iterations: Number of iterations per benchmark.
|
|
|
|
Returns:
|
|
List of all benchmark results.
|
|
"""
|
|
self._results.clear()
|
|
self.run_decomposition_benchmark(iterations)
|
|
self.run_multi_path_benchmark(iterations)
|
|
self.run_recomposition_benchmark(iterations)
|
|
self.run_end_to_end_benchmark(max(iterations // 3, 5))
|
|
return list(self._results)
|
|
|
|
def summary(self) -> str:
|
|
"""Generate summary report."""
|
|
if not self._results:
|
|
return "No benchmarks run."
|
|
lines = ["FusionAGI Benchmark Results", "=" * 40]
|
|
for r in self._results:
|
|
lines.append(r.summary())
|
|
return "\n".join(lines)
|
|
|
|
def to_dict(self) -> list[dict[str, Any]]:
|
|
"""Export results as list of dicts."""
|
|
return [
|
|
{
|
|
"name": r.name,
|
|
"mean_ms": r.mean_ms,
|
|
"min_ms": r.min_ms,
|
|
"max_ms": r.max_ms,
|
|
"std_ms": r.std_ms,
|
|
"iterations": r.iterations,
|
|
}
|
|
for r in self._results
|
|
]
|
|
|
|
|
|
__all__ = [
|
|
"BenchmarkResult",
|
|
"BenchmarkSuite",
|
|
"run_benchmark",
|
|
]
|