Some checks failed
- Integrate GPU scoring inline into reasoning/multi_path.py (auto-uses GPU when available) - Integrate GPU deduplication into multi_agent/consensus_engine.py - Add semantic_search() method to memory/semantic_graph.py with GPU acceleration - Integrate GPU training into self_improvement/training.py AutoTrainer - Fix all 758 ruff lint issues (whitespace, import sorting, unused imports, ambiguous vars, undefined names) - Fix all 40 mypy type errors across the codebase (no-any-return, union-attr, arg-type, etc.) - Fix deprecated ruff config keys (select/ignore -> [tool.ruff.lint]) - Add .dockerignore to exclude .venv/, tests/, docs/ from Docker builds - Add type hints and docstrings to verification/outcome.py - Fix E402 import ordering in witness_agent.py - Fix F821 undefined names in vector_pgvector.py and native.py - Fix E741 ambiguous variable names in reflective.py and recommender.py All 276 tests pass. 0 ruff errors. 0 mypy errors. Co-Authored-By: Nakamoto, S <defi@defi-oracle.io>
96 lines
3.7 KiB
Python
96 lines
3.7 KiB
Python
"""Critic / Evaluator agent: evaluates task outcome, error analysis, suggested improvements."""
|
|
|
|
import json
|
|
from typing import Any
|
|
|
|
from fusionagi._logger import logger
|
|
from fusionagi.adapters.base import LLMAdapter
|
|
from fusionagi.agents.base_agent import BaseAgent
|
|
from fusionagi.schemas.messages import AgentMessage, AgentMessageEnvelope
|
|
|
|
|
|
class CriticAgent(BaseAgent):
|
|
"""Evaluates task outcome and execution trace; emits evaluation_ready."""
|
|
|
|
def __init__(
|
|
self,
|
|
identity: str = "critic",
|
|
adapter: LLMAdapter | None = None,
|
|
) -> None:
|
|
super().__init__(
|
|
identity=identity,
|
|
role="Critic",
|
|
objective="Evaluate outcomes and suggest improvements",
|
|
memory_access=True,
|
|
tool_permissions=[],
|
|
)
|
|
self._adapter = adapter
|
|
|
|
def handle_message(self, envelope: AgentMessageEnvelope) -> AgentMessageEnvelope | None:
|
|
"""On evaluate_request, return evaluation_ready with score, analysis, suggestions."""
|
|
if envelope.message.intent != "evaluate_request":
|
|
return None
|
|
logger.info(
|
|
"Critic handle_message",
|
|
extra={"recipient": self.identity, "intent": envelope.message.intent},
|
|
)
|
|
payload = envelope.message.payload
|
|
task_id = envelope.task_id
|
|
outcome = payload.get("outcome", "unknown")
|
|
trace = payload.get("trace", [])
|
|
plan = payload.get("plan")
|
|
if self._adapter:
|
|
evaluation = self._evaluate_with_llm(outcome, trace, plan)
|
|
else:
|
|
evaluation = {
|
|
"success": outcome == "completed",
|
|
"score": 1.0 if outcome == "completed" else 0.0,
|
|
"error_analysis": [],
|
|
"suggestions": ["Enable LLM for detailed evaluation"],
|
|
}
|
|
logger.info(
|
|
"Critic response",
|
|
extra={"recipient": self.identity, "response_intent": "evaluation_ready"},
|
|
)
|
|
return AgentMessageEnvelope(
|
|
message=AgentMessage(
|
|
sender=self.identity,
|
|
recipient=envelope.message.sender,
|
|
intent="evaluation_ready",
|
|
payload={"evaluation": evaluation},
|
|
),
|
|
task_id=task_id,
|
|
correlation_id=envelope.correlation_id,
|
|
)
|
|
|
|
def _evaluate_with_llm(
|
|
self,
|
|
outcome: str,
|
|
trace: list[dict[str, Any]],
|
|
plan: dict[str, Any] | None,
|
|
) -> dict[str, Any]:
|
|
"""Use adapter to produce evaluation (score, error_analysis, suggestions)."""
|
|
context = f"Outcome: {outcome}\nTrace (last 5): {json.dumps(trace[-5:], default=str)}\n"
|
|
if plan:
|
|
context += f"Plan: {json.dumps(plan.get('steps', [])[:5], default=str)}"
|
|
messages = [
|
|
{"role": "system", "content": "You evaluate task execution. Output JSON: {\"success\": bool, \"score\": 0-1, \"error_analysis\": [], \"suggestions\": []}. Output only JSON."},
|
|
{"role": "user", "content": context},
|
|
]
|
|
try:
|
|
raw = self._adapter.complete(messages) # type: ignore[union-attr]
|
|
for start in ("```json", "```"):
|
|
if raw.strip().startswith(start):
|
|
raw = raw.strip()[len(start):].strip()
|
|
if raw.endswith("```"):
|
|
raw = raw[:-3].strip()
|
|
return json.loads(raw) # type: ignore[no-any-return]
|
|
except Exception:
|
|
logger.exception("Critic evaluation parse failed, using fallback")
|
|
return {
|
|
"success": outcome == "completed",
|
|
"score": 0.5,
|
|
"error_analysis": ["Evaluation parse failed"],
|
|
"suggestions": [],
|
|
}
|