FusionAGI/tests/test_load.py

"""Load/performance tests for FusionAGI API.

These tests measure response times and throughput.
Run with: pytest tests/test_load.py -v
"""

from __future__ import annotations

import time
from concurrent.futures import ThreadPoolExecutor, as_completed

starlette = __import__("pytest").importorskip("starlette")
fastapi = __import__("pytest").importorskip("fastapi")

from starlette.testclient import TestClient  # noqa: E402

from fusionagi.api.app import create_app  # noqa: E402


def _client() -> TestClient:
    app = create_app(cors_origins=["*"])
    return TestClient(app)


class TestLatency:
    """Test response latency for key endpoints."""

    def test_status_latency(self) -> None:
        c = _client()
        start = time.monotonic()
        for _ in range(10):
            resp = c.get("/v1/admin/status")
            assert resp.status_code == 200
        elapsed = time.monotonic() - start
        avg_ms = (elapsed / 10) * 1000
        assert avg_ms < 500, f"Average status latency too high: {avg_ms:.1f}ms"

    def test_session_create_latency(self) -> None:
        c = _client()
        start = time.monotonic()
        for _ in range(5):
            resp = c.post("/v1/sessions", json={"user_id": "load-test"})
            assert resp.status_code == 200
        elapsed = time.monotonic() - start
        avg_ms = (elapsed / 5) * 1000
        assert avg_ms < 2000, f"Average session create latency too high: {avg_ms:.1f}ms"


class TestThroughput:
    """Test request throughput under concurrent load."""

    def test_concurrent_status_requests(self) -> None:
        c = _client()
        n_requests = 50

        def hit_status() -> int:
            resp = c.get("/v1/admin/status")
            return resp.status_code

        start = time.monotonic()
        with ThreadPoolExecutor(max_workers=10) as pool:
            futures = [pool.submit(hit_status) for _ in range(n_requests)]
            results = [f.result() for f in as_completed(futures)]
        elapsed = time.monotonic() - start

        success = sum(1 for r in results if r == 200)
        rps = n_requests / elapsed if elapsed > 0 else 0

        assert success == n_requests, f"Only {success}/{n_requests} succeeded"
        assert rps > 5, f"Throughput too low: {rps:.1f} req/s"

    def test_concurrent_session_creates(self) -> None:
        c = _client()
        n_requests = 20

        def create_session() -> int:
            resp = c.post("/v1/sessions", json={"user_id": "load-test"})
            return resp.status_code

        with ThreadPoolExecutor(max_workers=5) as pool:
            futures = [pool.submit(create_session) for _ in range(n_requests)]
            results = [f.result() for f in as_completed(futures)]

        success = sum(1 for r in results if r == 200)
        assert success == n_requests