"""Metacognition: self-awareness of knowledge boundaries and reasoning quality. The metacognition engine monitors the system's own reasoning processes and produces self-assessments: - Does the system have enough evidence to answer confidently? - Which knowledge gaps exist? - Where are the reasoning weak points? - Should the system seek more information before answering? This is distinct from meta_reasoning.py (which challenges assumptions and detects contradictions in content). Metacognition operates on the *process* level — it reasons about the quality of reasoning itself. """ from __future__ import annotations from dataclasses import dataclass, field from fusionagi._logger import logger from fusionagi.schemas.head import HeadOutput @dataclass class KnowledgeGap: """An identified gap in the system's knowledge. Attributes: domain: Knowledge domain (e.g. ``legal``, ``medical``). description: What the system doesn't know. severity: Impact on answer quality (``low``, ``medium``, ``high``). resolvable: Whether additional tool calls could fill this gap. """ domain: str description: str severity: str = "medium" resolvable: bool = True @dataclass class MetacognitiveAssessment: """Self-assessment of reasoning quality for a given task. Attributes: overall_confidence: System's confidence in its answer (0.0–1.0). evidence_sufficiency: Whether evidence is sufficient (0.0–1.0). knowledge_gaps: Identified gaps in knowledge. reasoning_quality: Assessment of the reasoning chain quality. should_seek_more: Whether the system should seek more info. head_agreement: Fraction of heads that agree (0.0–1.0). uncertainty_sources: Where uncertainty comes from. recommendations: What the system should do next. """ overall_confidence: float = 0.5 evidence_sufficiency: float = 0.5 knowledge_gaps: list[KnowledgeGap] = field(default_factory=list) reasoning_quality: float = 0.5 should_seek_more: bool = False head_agreement: float = 0.5 uncertainty_sources: list[str] = field(default_factory=list) recommendations: list[str] = field(default_factory=list) def assess_head_outputs( outputs: list[HeadOutput], user_prompt: str = "", ) -> MetacognitiveAssessment: """Assess reasoning quality from head outputs. Analyzes the collection of head outputs for agreement patterns, confidence distribution, evidence coverage, and knowledge gaps. Args: outputs: Outputs from Dvādaśa content heads. user_prompt: Original user prompt for context. Returns: Metacognitive assessment of reasoning quality. """ if not outputs: return MetacognitiveAssessment( overall_confidence=0.0, evidence_sufficiency=0.0, should_seek_more=True, uncertainty_sources=["No head outputs available"], recommendations=["Execute head pipeline before assessment"], ) confidences: list[float] = [] for out in outputs: if out.claims: confidences.extend(c.confidence for c in out.claims) else: confidences.append(0.0) avg_confidence = sum(confidences) / len(confidences) if confidences else 0.0 all_claims: list[str] = [] for out in outputs: all_claims.extend(c.claim_text for c in out.claims) evidence_counts = [] for out in outputs: for c in out.claims: evidence_counts.append(len(c.evidence)) avg_evidence = sum(evidence_counts) / max(len(evidence_counts), 1) evidence_sufficiency = min(1.0, avg_evidence / 3.0) head_agreement = _compute_head_agreement(outputs) gaps = _detect_knowledge_gaps(outputs, user_prompt) uncertainty_sources: list[str] = [] if avg_confidence < 0.5: uncertainty_sources.append(f"Low average head confidence: {avg_confidence:.2f}") if head_agreement < 0.4: uncertainty_sources.append(f"Low head agreement: {head_agreement:.2f}") if evidence_sufficiency < 0.3: uncertainty_sources.append(f"Insufficient evidence: avg {avg_evidence:.1f} per claim") if gaps: uncertainty_sources.append(f"{len(gaps)} knowledge gap(s) detected") conf_variance = _variance(confidences) if len(confidences) > 1 else 0.0 if conf_variance > 0.1: uncertainty_sources.append( f"High confidence variance across heads: {conf_variance:.3f}" ) reasoning_quality = ( 0.4 * avg_confidence + 0.3 * head_agreement + 0.2 * evidence_sufficiency + 0.1 * (1.0 - min(1.0, len(gaps) * 0.2)) ) should_seek_more = ( reasoning_quality < 0.4 or evidence_sufficiency < 0.3 or any(g.severity == "high" and g.resolvable for g in gaps) ) recommendations: list[str] = [] if should_seek_more: recommendations.append("Seek additional evidence before finalizing answer") if head_agreement < 0.4: recommendations.append("Run second-pass with disputed heads for clarification") for gap in gaps: if gap.resolvable: recommendations.append(f"Fill knowledge gap: {gap.description}") overall = min(1.0, 0.5 * reasoning_quality + 0.3 * head_agreement + 0.2 * evidence_sufficiency) assessment = MetacognitiveAssessment( overall_confidence=overall, evidence_sufficiency=evidence_sufficiency, knowledge_gaps=gaps, reasoning_quality=reasoning_quality, should_seek_more=should_seek_more, head_agreement=head_agreement, uncertainty_sources=uncertainty_sources, recommendations=recommendations, ) logger.info( "Metacognition: assessment complete", extra={ "overall_confidence": overall, "reasoning_quality": reasoning_quality, "head_agreement": head_agreement, "gaps": len(gaps), "should_seek_more": should_seek_more, }, ) return assessment def _compute_head_agreement(outputs: list[HeadOutput]) -> float: """Measure how much heads agree with each other. Uses claim text overlap across heads as a proxy for agreement. """ if len(outputs) < 2: return 1.0 claim_sets: list[set[str]] = [] for out in outputs: words: set[str] = set() for c in out.claims: words.update(c.claim_text.lower().split()) claim_sets.append(words) agreements: float = 0.0 comparisons = 0 for i in range(len(claim_sets)): for j in range(i + 1, len(claim_sets)): if not claim_sets[i] or not claim_sets[j]: continue overlap = len(claim_sets[i] & claim_sets[j]) union = len(claim_sets[i] | claim_sets[j]) if union > 0: agreements += overlap / union comparisons += 1 return agreements / max(comparisons, 1) def _detect_knowledge_gaps( outputs: list[HeadOutput], user_prompt: str, ) -> list[KnowledgeGap]: """Detect knowledge gaps from head outputs and prompt analysis.""" gaps: list[KnowledgeGap] = [] for out in outputs: if out.claims: avg_claim_conf = sum(c.confidence for c in out.claims) / len(out.claims) else: avg_claim_conf = 0.0 if avg_claim_conf < 0.3: gaps.append(KnowledgeGap( domain=out.head_id.value, description=f"Head '{out.head_id.value}' has very low confidence ({avg_claim_conf:.2f})", severity="high" if avg_claim_conf < 0.15 else "medium", resolvable=True, )) empty_heads = [o for o in outputs if not o.claims] for out in empty_heads: gaps.append(KnowledgeGap( domain=out.head_id.value, description=f"Head '{out.head_id.value}' produced no claims", severity="medium", resolvable=True, )) prompt_lower = user_prompt.lower() domain_indicators = { "legal": ["law", "legal", "court", "statute", "regulation", "compliance"], "medical": ["medical", "health", "disease", "treatment", "clinical", "patient"], "financial": ["financial", "stock", "market", "investment", "trading", "portfolio"], "scientific": ["experiment", "hypothesis", "data", "study", "research", "evidence"], } for domain, keywords in domain_indicators.items(): if any(kw in prompt_lower for kw in keywords): head_domains = {o.head_id.value for o in outputs} if domain not in head_domains: gaps.append(KnowledgeGap( domain=domain, description=f"Prompt references '{domain}' domain but no specialized head covers it", severity="medium", resolvable=False, )) return gaps def _variance(values: list[float]) -> float: """Compute variance of a list of floats.""" if len(values) < 2: return 0.0 mean = sum(values) / len(values) return sum((v - mean) ** 2 for v in values) / len(values)