FusionAGI/fusionagi/self_improvement/training.py

"""Auto training: suggest and apply heuristic updates from reflection and failures.

The trainer operates without artificial limits on its learning loop.
It can modify heuristics, propose strategy changes, and run GPU-accelerated
gradient optimization as many times as needed.  Growth comes from the
freedom to explore, fail, and learn — not from constraint.
"""

from typing import Any, Protocol

from fusionagi._logger import logger
from fusionagi.schemas.audit import AuditEventType
from fusionagi.schemas.recommendation import TrainingSuggestion, TrainingSuggestionKind


class ReflectiveMemoryLike(Protocol):
    """Protocol for reflective memory: set_heuristic, get_lessons."""

    def set_heuristic(self, key: str, value: Any) -> None: ...
    def get_lessons(self, limit: int = 50) -> list[dict[str, Any]]: ...
    def get_all_heuristics(self) -> dict[str, Any]: ...


class AuditLogLike(Protocol):
    """Protocol for audit log."""

    def append(
        self,
        event_type: AuditEventType,
        actor: str,
        action: str = "",
        task_id: str | None = None,
        payload: dict[str, Any] | None = None,
        outcome: str = "",
    ) -> str: ...


class AutoTrainer:
    """Suggests and applies training updates from reflection and failures.

    Operates without artificial limits on the learning loop.  The trainer
    is free to modify its own heuristics, propose strategy changes, and
    iterate as many times as needed.  Every self-improvement action is
    transparently logged to the audit trail.
    """

    def __init__(
        self,
        reflective_memory: ReflectiveMemoryLike | None = None,
        audit_log: AuditLogLike | None = None,
    ) -> None:
        """Initialize the auto-trainer.

        Args:
            reflective_memory: Reflective memory for applying heuristics.
            audit_log: Optional audit log for transparent self-improvement tracking.
        """
        self._memory = reflective_memory
        self._audit = audit_log
        self._iteration_count = 0

    def suggest_from_evaluation(
        self,
        task_id: str,
        evaluation: dict[str, Any],
    ) -> list[TrainingSuggestion]:
        """
        From a single Critic evaluation, produce training suggestions
        (heuristic_update from suggestions, fine_tune_dataset on failure).
        """
        suggestions: list[TrainingSuggestion] = []
        ev_suggestions = evaluation.get("suggestions", [])
        success = evaluation.get("success", False)
        score = evaluation.get("score", 0.5)

        for i, s in enumerate(ev_suggestions[:5]):
            key = f"heuristic_from_task_{task_id}_{i}"
            suggestions.append(
                TrainingSuggestion(
                    kind=TrainingSuggestionKind.HEURISTIC_UPDATE,
                    key=key,
                    value=s,
                    source_task_id=task_id,
                    reason="From Critic evaluation suggestion",
                )
            )
        if not success or score < 0.5:
            suggestions.append(
                TrainingSuggestion(
                    kind=TrainingSuggestionKind.FINE_TUNE_DATASET,
                    key=f"training_target_{task_id}",
                    value={
                        "task_id": task_id,
                        "outcome": "failed" if not success else "low_score",
                        "score": score,
                        "suggestions": ev_suggestions,
                    },
                    source_task_id=task_id,
                    reason="Task failed or low score; add to training dataset",
                )
            )
        return suggestions

    def suggest_from_lessons(self, limit_lessons: int = 20) -> list[TrainingSuggestion]:
        """
        Aggregate lessons into training suggestions (e.g. strategy_param
        or heuristic updates from repeated patterns).
        """
        if not self._memory:
            return []
        lessons = self._memory.get_lessons(limit=limit_lessons)
        suggestions: list[TrainingSuggestion] = []
        for lesson in lessons[-10:]:
            ev = lesson.get("evaluation", {})
            tid = lesson.get("task_id", "unknown")
            for i, s in enumerate(ev.get("suggestions", [])[:2]):
                key = f"lesson_heuristic_{tid}_{i}"
                suggestions.append(
                    TrainingSuggestion(
                        kind=TrainingSuggestionKind.HEURISTIC_UPDATE,
                        key=key,
                        value=s,
                        source_task_id=tid,
                        reason="From reflective lesson",
                    )
                )
        return suggestions

    def suggest_training(
        self,
        task_id: str | None = None,
        evaluation: dict[str, Any] | None = None,
        include_lessons: bool = True,
    ) -> list[TrainingSuggestion]:
        """
        Produce all training suggestions from optional evaluation and
        optionally from lessons.
        """
        out: list[TrainingSuggestion] = []
        if task_id and evaluation:
            out.extend(self.suggest_from_evaluation(task_id, evaluation))
        if include_lessons:
            out.extend(self.suggest_from_lessons())
        logger.debug(
            "AutoTrainer.suggest_training",
            extra={"count": len(out), "task_id": task_id},
        )
        return out

    def apply_heuristic_updates(
        self,
        suggestions: list[TrainingSuggestion],
        reflective_memory: ReflectiveMemoryLike | None = None,
    ) -> int:
        """Apply heuristic-update suggestions to reflective memory.

        No artificial limits on the number of heuristics that can be
        applied.  Every modification is transparently logged.
        """
        memory = reflective_memory or self._memory
        if not memory:
            logger.warning("AutoTrainer.apply_heuristic_updates: no reflective memory")
            return 0
        applied = 0
        for s in suggestions:
            if s.kind == TrainingSuggestionKind.HEURISTIC_UPDATE:
                memory.set_heuristic(s.key, s.value)
                applied += 1
                logger.info(
                    "AutoTrainer: applied heuristic",
                    extra={"key": s.key, "source_task_id": s.source_task_id},
                )
                if self._audit:
                    self._audit.append(
                        AuditEventType.SELF_IMPROVEMENT,
                        actor="auto_trainer",
                        action="heuristic_update",
                        task_id=s.source_task_id,
                        payload={"key": s.key, "value": str(s.value)[:200]},
                        outcome="applied",
                    )
            else:
                logger.info(
                    "AutoTrainer: suggestion logged (available for external pipeline)",
                    extra={"kind": s.kind.value, "key": s.key},
                )
                if self._audit:
                    self._audit.append(
                        AuditEventType.SELF_IMPROVEMENT,
                        actor="auto_trainer",
                        action="suggestion_logged",
                        task_id=s.source_task_id,
                        payload={"kind": s.kind.value, "key": s.key},
                        outcome="logged",
                    )
        return applied

    def run_auto_training(
        self,
        task_id: str | None = None,
        evaluation: dict[str, Any] | None = None,
        apply_heuristics: bool = True,
        use_gpu: bool = True,
        epochs: int = 50,
    ) -> list[TrainingSuggestion]:
        """Run unconstrained self-improvement from evaluation and lessons.

        The trainer is free to iterate as many times as needed.  When
        *use_gpu* is ``True`` (default) and GPU dependencies are installed,
        also runs GPU-accelerated gradient optimization on reflective
        memory lessons.

        Args:
            task_id: Source task for evaluation-based suggestions.
            evaluation: Critic evaluation dict.
            apply_heuristics: Whether to apply heuristic updates immediately.
            use_gpu: Whether to attempt GPU-accelerated training.
            epochs: Number of GPU training epochs (default 50, no upper bound).
        """
        self._iteration_count += 1
        suggestions = self.suggest_training(
            task_id=task_id,
            evaluation=evaluation,
            include_lessons=True,
        )
        if apply_heuristics:
            self.apply_heuristic_updates(suggestions)
        if use_gpu and self._memory is not None:
            self._try_gpu_training(epochs=epochs)

        if self._audit:
            self._audit.append(
                AuditEventType.SELF_IMPROVEMENT,
                actor="auto_trainer",
                action="training_iteration",
                task_id=task_id,
                payload={
                    "iteration": self._iteration_count,
                    "suggestions_count": len(suggestions),
                    "gpu_requested": use_gpu,
                    "epochs": epochs,
                },
                outcome="completed",
            )

        return suggestions

    def _try_gpu_training(self, epochs: int = 50) -> None:
        """Run GPU-accelerated training if available."""
        try:
            from fusionagi.self_improvement.gpu_training import (
                run_gpu_enhanced_training,
            )

            if self._memory is not None:
                result = run_gpu_enhanced_training(self._memory, epochs=epochs)
                logger.info(
                    "AutoTrainer: GPU training complete",
                    extra={
                        "gpu_accelerated": result.get("gpu_accelerated", False),
                        "epochs": epochs,
                    },
                )
        except ImportError:
            pass