"""GPU-accelerated training support for self-improvement pipeline. Provides tensor-based training utilities: - Heuristic weight optimization via gradient descent - Embedding fine-tuning from execution traces - Training data preparation from reflective memory """ from __future__ import annotations from dataclasses import dataclass, field from typing import Any, Protocol from fusionagi._logger import logger from fusionagi.gpu.backend import TensorBackend, get_backend class ReflectiveMemoryLike(Protocol): """Protocol for reflective memory access.""" def get_lessons(self, limit: int = 50) -> list[dict[str, Any]]: ... def get_all_heuristics(self) -> dict[str, Any]: ... def set_heuristic(self, key: str, value: Any) -> None: ... @dataclass class TrainingConfig: """Configuration for GPU-accelerated training.""" learning_rate: float = 0.01 epochs: int = 10 batch_size: int = 32 embedding_dim: int = 256 weight_decay: float = 0.001 @dataclass class TrainingResult: """Result of a GPU training run.""" initial_loss: float = 0.0 final_loss: float = 0.0 epochs_run: int = 0 weights_updated: int = 0 metadata: dict[str, Any] = field(default_factory=dict) def prepare_training_pairs( lessons: list[dict[str, Any]], backend: TensorBackend | None = None, ) -> tuple[Any, Any]: """Prepare input/target embedding pairs from reflective memory lessons. Each lesson with evaluation produces a (task_goal, outcome_quality) pair. These can be used to train heuristic weights or embeddings. Args: lessons: List of lesson dicts from reflective memory. backend: TensorBackend to use. Returns: Tuple of (input_embeddings, target_scores) tensors. """ be = backend or get_backend() import numpy as np inputs: list[str] = [] targets: list[float] = [] for lesson in lessons: task_id = lesson.get("task_id", "") outcome = lesson.get("outcome", "unknown") evaluation = lesson.get("evaluation", {}) score = evaluation.get("score", 0.5) input_text = f"task:{task_id} outcome:{outcome}" inputs.append(input_text) targets.append(float(score)) if not inputs: dim = 256 return be.from_numpy(np.zeros((0, dim), dtype=np.float32)), be.from_numpy( np.zeros(0, dtype=np.float32) ) input_emb = be.embed_texts(inputs) target_arr = np.array(targets, dtype=np.float32) return input_emb, be.from_numpy(target_arr) def optimize_heuristic_weights( input_embeddings: Any, target_scores: Any, config: TrainingConfig | None = None, backend: TensorBackend | None = None, ) -> TrainingResult: """Optimize heuristic scoring weights using gradient descent on GPU. Learns a weight vector that maps input embeddings to target scores via a simple linear model: score = sigmoid(embeddings @ weights). Args: input_embeddings: Tensor of shape (N, D) — training inputs. target_scores: Tensor of shape (N,) — target scores in [0, 1]. config: Training configuration. backend: TensorBackend to use. Returns: TrainingResult with loss history and weight count. """ be = backend or get_backend() cfg = config or TrainingConfig() import numpy as np inputs = be.to_numpy(input_embeddings) targets = be.to_numpy(target_scores) if len(inputs) == 0: return TrainingResult(metadata={"reason": "no training data"}) dim = inputs.shape[1] weights = np.random.randn(dim).astype(np.float32) * 0.01 bias = np.float32(0.0) def sigmoid(x: Any) -> Any: return 1.0 / (1.0 + np.exp(-np.clip(x, -500, 500))) initial_logits = inputs @ weights + bias initial_preds = sigmoid(initial_logits) initial_loss = float(np.mean((initial_preds - targets) ** 2)) lr = cfg.learning_rate final_loss = initial_loss for epoch in range(cfg.epochs): indices = np.random.permutation(len(inputs)) epoch_loss = 0.0 n_batches = 0 for start in range(0, len(inputs), cfg.batch_size): batch_idx = indices[start : start + cfg.batch_size] x_batch = inputs[batch_idx] y_batch = targets[batch_idx] logits = x_batch @ weights + bias preds = sigmoid(logits) error = preds - y_batch batch_loss = float(np.mean(error**2)) epoch_loss += batch_loss n_batches += 1 grad_w = (x_batch.T @ error) / len(x_batch) + cfg.weight_decay * weights grad_b = float(np.mean(error)) weights -= lr * grad_w bias -= lr * grad_b final_loss = epoch_loss / max(n_batches, 1) logger.info( "Heuristic weight optimization complete", extra={ "initial_loss": initial_loss, "final_loss": final_loss, "epochs": cfg.epochs, "dim": dim, }, ) return TrainingResult( initial_loss=initial_loss, final_loss=final_loss, epochs_run=cfg.epochs, weights_updated=dim, metadata={ "weight_norm": float(np.linalg.norm(weights)), "bias": float(bias), "backend": be.name, }, ) def run_gpu_training( reflective_memory: ReflectiveMemoryLike, config: TrainingConfig | None = None, backend: TensorBackend | None = None, ) -> TrainingResult: """End-to-end GPU training from reflective memory. Loads lessons, prepares pairs, and runs optimization. Args: reflective_memory: Source of training data. config: Training configuration. backend: TensorBackend to use. Returns: TrainingResult. """ be = backend or get_backend() lessons = reflective_memory.get_lessons(limit=500) if not lessons: return TrainingResult(metadata={"reason": "no lessons available"}) inputs, targets = prepare_training_pairs(lessons, backend=be) return optimize_heuristic_weights(inputs, targets, config=config, backend=be)