Initial commit: add .gitignore and README
This commit is contained in:
18
fusionagi/self_improvement/__init__.py
Normal file
18
fusionagi/self_improvement/__init__.py
Normal file
@@ -0,0 +1,18 @@
|
||||
"""
|
||||
Self-improvement subsystem for FusionAGI: self-correction, auto-recommend/suggest, auto-training.
|
||||
|
||||
Enables the world's most advanced agentic AGI system to improve, correct, and train itself
|
||||
from execution outcomes and reflection.
|
||||
"""
|
||||
|
||||
from fusionagi.self_improvement.correction import SelfCorrectionLoop
|
||||
from fusionagi.self_improvement.recommender import AutoRecommender
|
||||
from fusionagi.self_improvement.training import AutoTrainer
|
||||
from fusionagi.self_improvement.loop import FusionAGILoop
|
||||
|
||||
__all__ = [
|
||||
"SelfCorrectionLoop",
|
||||
"AutoRecommender",
|
||||
"AutoTrainer",
|
||||
"FusionAGILoop",
|
||||
]
|
||||
168
fusionagi/self_improvement/correction.py
Normal file
168
fusionagi/self_improvement/correction.py
Normal file
@@ -0,0 +1,168 @@
|
||||
"""Self-correction: on failure, run reflection and optionally prepare retry with feedback."""
|
||||
|
||||
from typing import Any, Protocol
|
||||
|
||||
from fusionagi.schemas.task import TaskState
|
||||
from fusionagi.schemas.recommendation import Recommendation, RecommendationKind
|
||||
from fusionagi._logger import logger
|
||||
|
||||
|
||||
class StateManagerLike(Protocol):
|
||||
"""Protocol for state manager: get task state, trace, task."""
|
||||
|
||||
def get_task_state(self, task_id: str) -> TaskState | None: ...
|
||||
def get_trace(self, task_id: str) -> list[dict[str, Any]]: ...
|
||||
def get_task(self, task_id: str) -> Any: ...
|
||||
|
||||
|
||||
class OrchestratorLike(Protocol):
|
||||
"""Protocol for orchestrator: get plan, set state (for retry)."""
|
||||
|
||||
def get_task_plan(self, task_id: str) -> dict[str, Any] | None: ...
|
||||
def set_task_state(self, task_id: str, state: TaskState, force: bool = False) -> None: ...
|
||||
def set_task_plan(self, task_id: str, plan: dict[str, Any]) -> None: ...
|
||||
|
||||
|
||||
class CriticLike(Protocol):
|
||||
"""Protocol for critic: handle_message with evaluate_request -> evaluation_ready."""
|
||||
|
||||
identity: str
|
||||
|
||||
def handle_message(self, envelope: Any) -> Any | None: ...
|
||||
|
||||
|
||||
def run_reflection_on_failure(
|
||||
critic_agent: CriticLike,
|
||||
task_id: str,
|
||||
state_manager: StateManagerLike,
|
||||
orchestrator: OrchestratorLike,
|
||||
) -> dict[str, Any] | None:
|
||||
"""
|
||||
Run reflection (Critic evaluation) for a failed task.
|
||||
Returns evaluation dict or None.
|
||||
"""
|
||||
from fusionagi.schemas.messages import AgentMessage, AgentMessageEnvelope
|
||||
|
||||
trace = state_manager.get_trace(task_id)
|
||||
plan = orchestrator.get_task_plan(task_id)
|
||||
envelope = AgentMessageEnvelope(
|
||||
message=AgentMessage(
|
||||
sender="self_correction",
|
||||
recipient=critic_agent.identity,
|
||||
intent="evaluate_request",
|
||||
payload={
|
||||
"outcome": "failed",
|
||||
"trace": trace,
|
||||
"plan": plan,
|
||||
},
|
||||
),
|
||||
task_id=task_id,
|
||||
)
|
||||
response = critic_agent.handle_message(envelope)
|
||||
if not response or response.message.intent != "evaluation_ready":
|
||||
return None
|
||||
return response.message.payload.get("evaluation", {})
|
||||
|
||||
|
||||
class SelfCorrectionLoop:
|
||||
"""
|
||||
Self-correction: on failed tasks, run Critic reflection and optionally
|
||||
prepare retry by transitioning FAILED -> PENDING and storing correction context.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
state_manager: StateManagerLike,
|
||||
orchestrator: OrchestratorLike,
|
||||
critic_agent: CriticLike,
|
||||
max_retries_per_task: int = 2,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize the self-correction loop.
|
||||
|
||||
Args:
|
||||
state_manager: State manager for task state and traces.
|
||||
orchestrator: Orchestrator for plan and state transitions.
|
||||
critic_agent: Critic agent for evaluate_request -> evaluation_ready.
|
||||
max_retries_per_task: Maximum retries to suggest per task (default 2).
|
||||
"""
|
||||
self._state = state_manager
|
||||
self._orchestrator = orchestrator
|
||||
self._critic = critic_agent
|
||||
self._max_retries = max_retries_per_task
|
||||
self._retry_counts: dict[str, int] = {}
|
||||
|
||||
def suggest_retry(self, task_id: str) -> tuple[bool, dict[str, Any]]:
|
||||
"""
|
||||
For a failed task, run reflection and decide whether to suggest retry.
|
||||
Returns (should_retry, correction_context).
|
||||
"""
|
||||
state = self._state.get_task_state(task_id)
|
||||
if state != TaskState.FAILED:
|
||||
return False, {}
|
||||
retries = self._retry_counts.get(task_id, 0)
|
||||
if retries >= self._max_retries:
|
||||
logger.info(
|
||||
"Self-correction: max retries reached",
|
||||
extra={"task_id": task_id, "retries": retries},
|
||||
)
|
||||
return False, {}
|
||||
evaluation = run_reflection_on_failure(
|
||||
self._critic, task_id, self._state, self._orchestrator,
|
||||
)
|
||||
if not evaluation:
|
||||
return False, {}
|
||||
suggestions = evaluation.get("suggestions", [])
|
||||
error_analysis = evaluation.get("error_analysis", [])
|
||||
should_retry = bool(suggestions or evaluation.get("score", 0) < 0.5)
|
||||
context = {
|
||||
"evaluation": evaluation,
|
||||
"suggestions": suggestions,
|
||||
"error_analysis": error_analysis,
|
||||
"retry_count": retries + 1,
|
||||
}
|
||||
return should_retry, context
|
||||
|
||||
def prepare_retry(self, task_id: str, correction_context: dict[str, Any] | None = None) -> None:
|
||||
"""
|
||||
Transition task from FAILED to PENDING and store correction context in plan.
|
||||
If correction_context is None, runs suggest_retry to obtain it.
|
||||
"""
|
||||
state = self._state.get_task_state(task_id)
|
||||
if state != TaskState.FAILED:
|
||||
logger.warning("Self-correction: prepare_retry called for non-failed task", extra={"task_id": task_id})
|
||||
return
|
||||
if correction_context is None:
|
||||
ok, correction_context = self.suggest_retry(task_id)
|
||||
if not ok:
|
||||
return
|
||||
plan = self._orchestrator.get_task_plan(task_id) or {}
|
||||
plan = dict(plan)
|
||||
plan["_correction_context"] = correction_context
|
||||
self._orchestrator.set_task_plan(task_id, plan)
|
||||
self._orchestrator.set_task_state(task_id, TaskState.PENDING, force=True)
|
||||
self._retry_counts[task_id] = self._retry_counts.get(task_id, 0) + 1
|
||||
logger.info("Self-correction: prepared retry", extra={"task_id": task_id, "retry_count": self._retry_counts[task_id]})
|
||||
|
||||
def correction_recommendations(self, task_id: str) -> list[Recommendation]:
|
||||
"""For a failed task, run reflection and return structured recommendations."""
|
||||
evaluation = run_reflection_on_failure(
|
||||
self._critic, task_id, self._state, self._orchestrator,
|
||||
)
|
||||
if not evaluation:
|
||||
return []
|
||||
suggestions = evaluation.get("suggestions", [])
|
||||
error_analysis = evaluation.get("error_analysis", [])
|
||||
recs: list[Recommendation] = []
|
||||
for i, s in enumerate(suggestions[:10]):
|
||||
recs.append(
|
||||
Recommendation(
|
||||
kind=RecommendationKind.NEXT_ACTION,
|
||||
title=f"Correction suggestion {i + 1}",
|
||||
description=s if isinstance(s, str) else str(s),
|
||||
payload={"raw": s, "error_analysis": error_analysis},
|
||||
source_task_id=task_id,
|
||||
priority=7,
|
||||
)
|
||||
)
|
||||
return recs
|
||||
153
fusionagi/self_improvement/loop.py
Normal file
153
fusionagi/self_improvement/loop.py
Normal file
@@ -0,0 +1,153 @@
|
||||
"""AGI loop: wires self-correction, auto-recommend, and auto-training to events."""
|
||||
|
||||
from typing import Any, Callable
|
||||
|
||||
from fusionagi.schemas.task import TaskState
|
||||
from fusionagi.schemas.recommendation import Recommendation, TrainingSuggestion
|
||||
from fusionagi.core.event_bus import EventBus
|
||||
from fusionagi._logger import logger
|
||||
|
||||
from fusionagi.self_improvement.correction import (
|
||||
SelfCorrectionLoop,
|
||||
StateManagerLike,
|
||||
OrchestratorLike,
|
||||
CriticLike,
|
||||
)
|
||||
from fusionagi.self_improvement.recommender import AutoRecommender
|
||||
from fusionagi.self_improvement.training import AutoTrainer, ReflectiveMemoryLike
|
||||
|
||||
|
||||
class FusionAGILoop:
|
||||
"""
|
||||
High-level AGI loop: subscribes to task_state_changed and reflection_done,
|
||||
runs self-correction on failures, and runs auto-recommend + auto-training
|
||||
after reflection. Composes the world's most advanced agentic AGI self-improvement pipeline.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
event_bus: EventBus,
|
||||
state_manager: StateManagerLike,
|
||||
orchestrator: OrchestratorLike,
|
||||
critic_agent: CriticLike,
|
||||
reflective_memory: ReflectiveMemoryLike | None = None,
|
||||
*,
|
||||
auto_retry_on_failure: bool = False,
|
||||
max_retries_per_task: int = 2,
|
||||
on_recommendations: Callable[[list[Recommendation]], None] | None = None,
|
||||
on_training_suggestions: Callable[[list[TrainingSuggestion]], None] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize the FusionAGI loop.
|
||||
|
||||
Args:
|
||||
event_bus: Event bus to subscribe to task_state_changed and reflection_done.
|
||||
state_manager: State manager for task state and traces.
|
||||
orchestrator: Orchestrator for plan and state transitions.
|
||||
critic_agent: Critic agent for evaluate_request -> evaluation_ready.
|
||||
reflective_memory: Optional reflective memory for lessons/heuristics.
|
||||
auto_retry_on_failure: If True, on FAILED transition prepare_retry automatically.
|
||||
max_retries_per_task: Max retries per task when auto_retry_on_failure is True.
|
||||
on_recommendations: Optional callback to receive recommendations (e.g. log or UI).
|
||||
on_training_suggestions: Optional callback to receive training suggestions.
|
||||
"""
|
||||
self._event_bus = event_bus
|
||||
self._state = state_manager
|
||||
self._orchestrator = orchestrator
|
||||
self._critic = critic_agent
|
||||
self._memory = reflective_memory
|
||||
self._auto_retry = auto_retry_on_failure
|
||||
self._on_recs = on_recommendations
|
||||
self._on_training = on_training_suggestions
|
||||
|
||||
self._correction = SelfCorrectionLoop(
|
||||
state_manager=state_manager,
|
||||
orchestrator=orchestrator,
|
||||
critic_agent=critic_agent,
|
||||
max_retries_per_task=max_retries_per_task,
|
||||
)
|
||||
self._recommender = AutoRecommender(reflective_memory=reflective_memory)
|
||||
self._trainer = AutoTrainer(reflective_memory=reflective_memory)
|
||||
|
||||
self._event_bus.subscribe("task_state_changed", self._on_task_state_changed)
|
||||
self._event_bus.subscribe("reflection_done", self._on_reflection_done)
|
||||
logger.info("FusionAGILoop: subscribed to task_state_changed and reflection_done")
|
||||
|
||||
def _on_task_state_changed(self, event_type: str, payload: dict[str, Any]) -> None:
|
||||
"""On FAILED, optionally run self-correction and prepare retry."""
|
||||
try:
|
||||
to_state = payload.get("to_state")
|
||||
task_id = payload.get("task_id", "")
|
||||
if to_state != TaskState.FAILED.value or not task_id:
|
||||
return
|
||||
if self._auto_retry:
|
||||
ok, _ = self._correction.suggest_retry(task_id)
|
||||
if ok:
|
||||
self._correction.prepare_retry(task_id)
|
||||
else:
|
||||
recs = self._correction.correction_recommendations(task_id)
|
||||
if recs and self._on_recs:
|
||||
self._on_recs(recs)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"FusionAGILoop: _on_task_state_changed failed (best-effort)",
|
||||
extra={"event_type": event_type},
|
||||
)
|
||||
|
||||
def _on_reflection_done(self, event_type: str, payload: dict[str, Any]) -> None:
|
||||
"""After reflection, run auto-recommend and auto-training."""
|
||||
try:
|
||||
task_id = payload.get("task_id") or ""
|
||||
evaluation = payload.get("evaluation") or {}
|
||||
recs = self._recommender.recommend(
|
||||
task_id=task_id or None,
|
||||
evaluation=evaluation,
|
||||
include_lessons=True,
|
||||
)
|
||||
if self._on_recs:
|
||||
try:
|
||||
self._on_recs(recs)
|
||||
except Exception:
|
||||
logger.exception("FusionAGILoop: on_recommendations callback failed")
|
||||
suggestions = self._trainer.run_auto_training(
|
||||
task_id=task_id or None,
|
||||
evaluation=evaluation,
|
||||
apply_heuristics=True,
|
||||
)
|
||||
if self._on_training:
|
||||
try:
|
||||
self._on_training(suggestions)
|
||||
except Exception:
|
||||
logger.exception("FusionAGILoop: on_training_suggestions callback failed")
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"FusionAGILoop: _on_reflection_done failed (best-effort)",
|
||||
extra={"event_type": event_type},
|
||||
)
|
||||
|
||||
def run_after_reflection(
|
||||
self,
|
||||
task_id: str,
|
||||
evaluation: dict[str, Any],
|
||||
) -> tuple[list[Recommendation], list[TrainingSuggestion]]:
|
||||
"""
|
||||
Run auto-recommend and auto-training after a reflection (e.g. when
|
||||
not using reflection_done event). Returns (recommendations, training_suggestions).
|
||||
"""
|
||||
recs = self._recommender.recommend(
|
||||
task_id=task_id,
|
||||
evaluation=evaluation,
|
||||
include_lessons=True,
|
||||
)
|
||||
suggestions = self._trainer.run_auto_training(
|
||||
task_id=task_id,
|
||||
evaluation=evaluation,
|
||||
apply_heuristics=True,
|
||||
)
|
||||
return recs, suggestions
|
||||
|
||||
def unsubscribe(self) -> None:
|
||||
"""Unsubscribe from event bus (for cleanup)."""
|
||||
self._event_bus.unsubscribe("task_state_changed", self._on_task_state_changed)
|
||||
self._event_bus.unsubscribe("reflection_done", self._on_reflection_done)
|
||||
logger.info("FusionAGILoop: unsubscribed from events")
|
||||
136
fusionagi/self_improvement/recommender.py
Normal file
136
fusionagi/self_improvement/recommender.py
Normal file
@@ -0,0 +1,136 @@
|
||||
"""Auto recommend/suggest: produce actionable recommendations from lessons and evaluations."""
|
||||
|
||||
from typing import Any, Protocol
|
||||
|
||||
from fusionagi.schemas.recommendation import Recommendation, RecommendationKind
|
||||
from fusionagi._logger import logger
|
||||
|
||||
|
||||
class ReflectiveMemoryLike(Protocol):
|
||||
"""Protocol for reflective memory: get lessons and heuristics."""
|
||||
|
||||
def get_lessons(self, limit: int = 50) -> list[dict[str, Any]]: ...
|
||||
def get_all_heuristics(self) -> dict[str, Any]: ...
|
||||
|
||||
|
||||
class AutoRecommender:
|
||||
"""
|
||||
Produces actionable recommendations from reflective memory lessons and
|
||||
from post-task evaluations (suggestions, error_analysis).
|
||||
"""
|
||||
|
||||
def __init__(self, reflective_memory: ReflectiveMemoryLike | None = None) -> None:
|
||||
"""
|
||||
Initialize the auto-recommender.
|
||||
|
||||
Args:
|
||||
reflective_memory: Optional reflective memory for lessons/heuristics.
|
||||
"""
|
||||
self._memory = reflective_memory
|
||||
|
||||
def recommend_from_evaluation(
|
||||
self,
|
||||
task_id: str,
|
||||
evaluation: dict[str, Any],
|
||||
) -> list[Recommendation]:
|
||||
"""Turn a single evaluation (from Critic) into recommendations."""
|
||||
recs: list[Recommendation] = []
|
||||
suggestions = evaluation.get("suggestions", [])
|
||||
error_analysis = evaluation.get("error_analysis", [])
|
||||
score = evaluation.get("score", 0.5)
|
||||
success = evaluation.get("success", False)
|
||||
|
||||
for i, s in enumerate(suggestions[:10]):
|
||||
recs.append(
|
||||
Recommendation(
|
||||
kind=RecommendationKind.NEXT_ACTION,
|
||||
title="Suggestion from evaluation",
|
||||
description=s if isinstance(s, str) else str(s),
|
||||
payload={"raw": s, "error_analysis": error_analysis},
|
||||
source_task_id=task_id,
|
||||
priority=8 if not success else 5,
|
||||
)
|
||||
)
|
||||
if error_analysis and not recs:
|
||||
recs.append(
|
||||
Recommendation(
|
||||
kind=RecommendationKind.STRATEGY_CHANGE,
|
||||
title="Address error analysis",
|
||||
description="; ".join(str(e) for e in error_analysis[:3]),
|
||||
payload={"error_analysis": error_analysis},
|
||||
source_task_id=task_id,
|
||||
priority=7,
|
||||
)
|
||||
)
|
||||
if score < 0.5 and not success:
|
||||
recs.append(
|
||||
Recommendation(
|
||||
kind=RecommendationKind.TRAINING_TARGET,
|
||||
title="Low score: consider training or prompt tuning",
|
||||
description=f"Task {task_id} scored {score}; add as training target.",
|
||||
payload={"score": score, "task_id": task_id},
|
||||
source_task_id=task_id,
|
||||
priority=6,
|
||||
)
|
||||
)
|
||||
return recs
|
||||
|
||||
def recommend_from_lessons(self, limit_lessons: int = 20) -> list[Recommendation]:
|
||||
"""Aggregate lessons from reflective memory into recommendations."""
|
||||
if not self._memory:
|
||||
return []
|
||||
lessons = self._memory.get_lessons(limit=limit_lessons)
|
||||
recs: list[Recommendation] = []
|
||||
failed = [l for l in lessons if l.get("outcome") == "failed"]
|
||||
if len(failed) >= 3:
|
||||
recs.append(
|
||||
Recommendation(
|
||||
kind=RecommendationKind.STRATEGY_CHANGE,
|
||||
title="Multiple failures in recent lessons",
|
||||
description=f"{len(failed)} failed tasks in last {limit_lessons} lessons.",
|
||||
payload={"failed_count": len(failed), "lesson_sample": failed[-3:]},
|
||||
source_task_id=None,
|
||||
priority=6,
|
||||
)
|
||||
)
|
||||
for lesson in lessons[-5:]:
|
||||
ev = lesson.get("evaluation", {})
|
||||
suggestions = ev.get("suggestions", [])
|
||||
for s in suggestions[:2]:
|
||||
recs.append(
|
||||
Recommendation(
|
||||
kind=RecommendationKind.NEXT_ACTION,
|
||||
title="From lesson",
|
||||
description=s if isinstance(s, str) else str(s),
|
||||
payload={"lesson": lesson},
|
||||
source_task_id=lesson.get("task_id"),
|
||||
priority=4,
|
||||
)
|
||||
)
|
||||
return recs
|
||||
|
||||
def recommend(
|
||||
self,
|
||||
task_id: str | None = None,
|
||||
evaluation: dict[str, Any] | None = None,
|
||||
include_lessons: bool = True,
|
||||
) -> list[Recommendation]:
|
||||
"""
|
||||
Produce all recommendations: from optional evaluation and optionally
|
||||
from reflective memory lessons. Deduplicated by title/description.
|
||||
"""
|
||||
recs: list[Recommendation] = []
|
||||
if task_id and evaluation:
|
||||
recs.extend(self.recommend_from_evaluation(task_id, evaluation))
|
||||
if include_lessons:
|
||||
recs.extend(self.recommend_from_lessons())
|
||||
seen: set[tuple[str, str]] = set()
|
||||
unique: list[Recommendation] = []
|
||||
for r in recs:
|
||||
key = (r.title, r.description)
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
unique.append(r)
|
||||
unique.sort(key=lambda x: (-x.priority, x.title))
|
||||
logger.debug("AutoRecommender.recommend", extra={"count": len(unique), "task_id": task_id})
|
||||
return unique
|
||||
167
fusionagi/self_improvement/training.py
Normal file
167
fusionagi/self_improvement/training.py
Normal file
@@ -0,0 +1,167 @@
|
||||
"""Auto training: suggest and apply heuristic updates from reflection and failures."""
|
||||
|
||||
from typing import Any, Protocol
|
||||
|
||||
from fusionagi.schemas.recommendation import TrainingSuggestion, TrainingSuggestionKind
|
||||
from fusionagi._logger import logger
|
||||
|
||||
|
||||
class ReflectiveMemoryLike(Protocol):
|
||||
"""Protocol for reflective memory: set_heuristic, get_lessons."""
|
||||
|
||||
def set_heuristic(self, key: str, value: Any) -> None: ...
|
||||
def get_lessons(self, limit: int = 50) -> list[dict[str, Any]]: ...
|
||||
def get_all_heuristics(self) -> dict[str, Any]: ...
|
||||
|
||||
|
||||
class AutoTrainer:
|
||||
"""
|
||||
Suggests training actions (heuristic updates, prompt tuning, fine-tune datasets)
|
||||
from lessons and evaluations, and applies heuristic updates to reflective memory.
|
||||
"""
|
||||
|
||||
def __init__(self, reflective_memory: ReflectiveMemoryLike | None = None) -> None:
|
||||
"""
|
||||
Initialize the auto-trainer.
|
||||
|
||||
Args:
|
||||
reflective_memory: Optional reflective memory for applying heuristics.
|
||||
"""
|
||||
self._memory = reflective_memory
|
||||
|
||||
def suggest_from_evaluation(
|
||||
self,
|
||||
task_id: str,
|
||||
evaluation: dict[str, Any],
|
||||
) -> list[TrainingSuggestion]:
|
||||
"""
|
||||
From a single Critic evaluation, produce training suggestions
|
||||
(heuristic_update from suggestions, fine_tune_dataset on failure).
|
||||
"""
|
||||
suggestions: list[TrainingSuggestion] = []
|
||||
ev_suggestions = evaluation.get("suggestions", [])
|
||||
success = evaluation.get("success", False)
|
||||
score = evaluation.get("score", 0.5)
|
||||
|
||||
for i, s in enumerate(ev_suggestions[:5]):
|
||||
key = f"heuristic_from_task_{task_id}_{i}"
|
||||
suggestions.append(
|
||||
TrainingSuggestion(
|
||||
kind=TrainingSuggestionKind.HEURISTIC_UPDATE,
|
||||
key=key,
|
||||
value=s,
|
||||
source_task_id=task_id,
|
||||
reason="From Critic evaluation suggestion",
|
||||
)
|
||||
)
|
||||
if not success or score < 0.5:
|
||||
suggestions.append(
|
||||
TrainingSuggestion(
|
||||
kind=TrainingSuggestionKind.FINE_TUNE_DATASET,
|
||||
key=f"training_target_{task_id}",
|
||||
value={
|
||||
"task_id": task_id,
|
||||
"outcome": "failed" if not success else "low_score",
|
||||
"score": score,
|
||||
"suggestions": ev_suggestions,
|
||||
},
|
||||
source_task_id=task_id,
|
||||
reason="Task failed or low score; add to training dataset",
|
||||
)
|
||||
)
|
||||
return suggestions
|
||||
|
||||
def suggest_from_lessons(self, limit_lessons: int = 20) -> list[TrainingSuggestion]:
|
||||
"""
|
||||
Aggregate lessons into training suggestions (e.g. strategy_param
|
||||
or heuristic updates from repeated patterns).
|
||||
"""
|
||||
if not self._memory:
|
||||
return []
|
||||
lessons = self._memory.get_lessons(limit=limit_lessons)
|
||||
suggestions: list[TrainingSuggestion] = []
|
||||
for lesson in lessons[-10:]:
|
||||
ev = lesson.get("evaluation", {})
|
||||
tid = lesson.get("task_id", "unknown")
|
||||
for i, s in enumerate(ev.get("suggestions", [])[:2]):
|
||||
key = f"lesson_heuristic_{tid}_{i}"
|
||||
suggestions.append(
|
||||
TrainingSuggestion(
|
||||
kind=TrainingSuggestionKind.HEURISTIC_UPDATE,
|
||||
key=key,
|
||||
value=s,
|
||||
source_task_id=tid,
|
||||
reason="From reflective lesson",
|
||||
)
|
||||
)
|
||||
return suggestions
|
||||
|
||||
def suggest_training(
|
||||
self,
|
||||
task_id: str | None = None,
|
||||
evaluation: dict[str, Any] | None = None,
|
||||
include_lessons: bool = True,
|
||||
) -> list[TrainingSuggestion]:
|
||||
"""
|
||||
Produce all training suggestions from optional evaluation and
|
||||
optionally from lessons.
|
||||
"""
|
||||
out: list[TrainingSuggestion] = []
|
||||
if task_id and evaluation:
|
||||
out.extend(self.suggest_from_evaluation(task_id, evaluation))
|
||||
if include_lessons:
|
||||
out.extend(self.suggest_from_lessons())
|
||||
logger.debug(
|
||||
"AutoTrainer.suggest_training",
|
||||
extra={"count": len(out), "task_id": task_id},
|
||||
)
|
||||
return out
|
||||
|
||||
def apply_heuristic_updates(
|
||||
self,
|
||||
suggestions: list[TrainingSuggestion],
|
||||
reflective_memory: ReflectiveMemoryLike | None = None,
|
||||
) -> int:
|
||||
"""
|
||||
Apply heuristic-update suggestions to reflective memory.
|
||||
Returns number of heuristics applied. Other suggestion kinds are logged
|
||||
but not applied (e.g. fine_tune_dataset for external pipelines).
|
||||
"""
|
||||
memory = reflective_memory or self._memory
|
||||
if not memory:
|
||||
logger.warning("AutoTrainer.apply_heuristic_updates: no reflective memory")
|
||||
return 0
|
||||
applied = 0
|
||||
for s in suggestions:
|
||||
if s.kind == TrainingSuggestionKind.HEURISTIC_UPDATE:
|
||||
memory.set_heuristic(s.key, s.value)
|
||||
applied += 1
|
||||
logger.info(
|
||||
"AutoTrainer: applied heuristic",
|
||||
extra={"key": s.key, "source_task_id": s.source_task_id},
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"AutoTrainer: suggestion not applied (use external pipeline)",
|
||||
extra={"kind": s.kind.value, "key": s.key},
|
||||
)
|
||||
return applied
|
||||
|
||||
def run_auto_training(
|
||||
self,
|
||||
task_id: str | None = None,
|
||||
evaluation: dict[str, Any] | None = None,
|
||||
apply_heuristics: bool = True,
|
||||
) -> list[TrainingSuggestion]:
|
||||
"""
|
||||
Suggest training from evaluation/lessons and optionally apply
|
||||
heuristic updates. Returns all suggestions (for logging or external use).
|
||||
"""
|
||||
suggestions = self.suggest_training(
|
||||
task_id=task_id,
|
||||
evaluation=evaluation,
|
||||
include_lessons=True,
|
||||
)
|
||||
if apply_heuristics:
|
||||
self.apply_heuristic_updates(suggestions)
|
||||
return suggestions
|
||||
Reference in New Issue
Block a user