/** * Unified Exception Manager — architecture note §5.9, §12. * * Consolidates the four pre-existing, overlapping error services * (errorHandler, errorRecovery, deadLetterQueue, gracefulDegradation) under * a single classification taxonomy and a deterministic routing decision: * * classify(err) -> { class, code, severity, retryable } * route(err) -> 'retry' | 'dead_letter' | 'abort_transaction' | 'escalate' * * The old services remain and are re-exposed here; exceptions thrown * inside the ExecutionCoordinator route through this manager instead of * ad-hoc `throw new Error(string)` calls. */ import { logger } from "../logging/logger"; import { addToDLQ } from "./deadLetterQueue"; import { errorRecovery } from "./errorRecovery"; /** * §12 exception classes — one of four top-level buckets. */ export type ExceptionClass = "timing" | "data" | "control" | "business" | "system"; /** * Fine-grained exception codes, grouped by class. Source: arch note §12. */ export type ExceptionCode = // §12.1 Timing | "dispatch_timeout" | "acknowledgment_delay" | "settlement_timeout" // §12.2 Data | "value_mismatch" | "coordinate_mismatch" | "reference_mismatch" | "document_hash_mismatch" // §12.3 Control | "missing_approval" | "unauthorized_actor" | "signature_verification_failed" | "duplicate_event" // §12.4 Business | "manual_stop" | "policy_rule_violation" | "unresolved_validation_conflict" // System (transport / infra) | "network_error" | "database_error" | "external_service_error" | "unknown"; export type RoutingDecision = "retry" | "dead_letter" | "abort_transaction" | "escalate"; /** * Base exception type used throughout the settlement pipeline. * * Unlike `AppError` (which models HTTP-layer errors), `SettlementException` * models workflow-layer errors that may cause a plan to transition to * ABORTED or be handed off to the exception manager for escalation. */ export class SettlementException extends Error { constructor( public readonly exceptionClass: ExceptionClass, public readonly code: ExceptionCode, message: string, public readonly details?: Record, public readonly cause?: Error, ) { super(message); this.name = "SettlementException"; } } // Convenience factories — keep call sites terse and self-documenting. export const Timing = { dispatch(details?: Record) { return new SettlementException("timing", "dispatch_timeout", "Dispatch timed out", details); }, acknowledgment(details?: Record) { return new SettlementException( "timing", "acknowledgment_delay", "Acknowledgment delayed beyond SLA", details, ); }, settlement(details?: Record) { return new SettlementException("timing", "settlement_timeout", "Settlement timed out", details); }, }; export const Data = { valueMismatch(details?: Record) { return new SettlementException("data", "value_mismatch", "Value mismatch at validation", details); }, coordinateMismatch(details?: Record) { return new SettlementException( "data", "coordinate_mismatch", "Beneficiary / account coordinate mismatch", details, ); }, referenceMismatch(details?: Record) { return new SettlementException( "data", "reference_mismatch", "Dispatch reference mismatch", details, ); }, documentHashMismatch(details?: Record) { return new SettlementException( "data", "document_hash_mismatch", "Instrument document hash mismatch", details, ); }, }; export const Control = { missingApproval(details?: Record) { return new SettlementException( "control", "missing_approval", "Required approval has not been recorded", details, ); }, unauthorized(actor: string, details?: Record) { return new SettlementException( "control", "unauthorized_actor", `Actor '${actor}' is not authorized for this transition`, { actor, ...details }, ); }, signature(details?: Record) { return new SettlementException( "control", "signature_verification_failed", "Signature verification failed", details, ); }, duplicate(eventId: string) { return new SettlementException("control", "duplicate_event", "Duplicate event detected", { eventId, }); }, }; export const Business = { manualStop(reason: string) { return new SettlementException("business", "manual_stop", reason); }, policyViolation(details: Record) { return new SettlementException( "business", "policy_rule_violation", "Policy rule violation", details, ); }, unresolvedConflict(details: Record) { return new SettlementException( "business", "unresolved_validation_conflict", "Unresolved validation conflict", details, ); }, }; /** * Classify an arbitrary Error into a SettlementException. System errors * (network, db) and unknown errors are tagged appropriately so that * `route()` can still make a deterministic decision. */ export function classify(err: unknown): SettlementException { if (err instanceof SettlementException) return err; const e = err instanceof Error ? err : new Error(String(err)); const msg = e.message.toLowerCase(); if ( msg.includes("timeout") || msg.includes("etimedout") || msg.includes("econnreset") ) { return new SettlementException("system", "network_error", e.message, undefined, e); } if ( msg.includes("econnrefused") || msg.includes("network") || msg.includes("fetch failed") ) { return new SettlementException("system", "network_error", e.message, undefined, e); } if (msg.includes("database") || msg.includes("postgres") || msg.includes("pg")) { return new SettlementException("system", "database_error", e.message, undefined, e); } return new SettlementException("system", "unknown", e.message, undefined, e); } /** * Decide what to do with an exception. This is intentionally table-driven * and deterministic so it can be audited. * * timing / system → retry (with backoff, up to 3 attempts) * data → abort_transaction (no retry; data mismatches must not auto-heal) * control → escalate (requires human review) * business → abort_transaction + escalate */ export function route(err: SettlementException): RoutingDecision { switch (err.exceptionClass) { case "timing": return "retry"; case "system": return err.code === "network_error" ? "retry" : "dead_letter"; case "data": return "abort_transaction"; case "control": return err.code === "duplicate_event" ? "dead_letter" : "escalate"; case "business": return err.code === "manual_stop" ? "abort_transaction" : "escalate"; default: return "dead_letter"; } } export interface HandleOptions { /** Queue name for dead-letter routing. */ queue?: string; /** Opaque context payload to preserve in DLQ / logs. */ context?: Record; /** * When set, `retry` decisions will invoke this function with exponential * backoff via errorRecovery. */ retryable?: () => Promise; } export interface HandleResult { decision: RoutingDecision; exception: SettlementException; recovered?: boolean; recoveryResult?: unknown; } /** * Central dispatch. Given any error, classify → route → act. Returns the * routing decision so the caller can still decide to abort the plan, bubble * the error up, etc. * * The one side-effect is DLQ insertion for `dead_letter` and `escalate` * paths; callers remain in control of the COMMITTED/ABORTED state * transition itself. */ export async function handle( err: unknown, opts: HandleOptions = {}, ): Promise { const exception = classify(err); const decision = route(exception); logger.warn( { exceptionClass: exception.exceptionClass, code: exception.code, decision, details: exception.details, context: opts.context, }, `ExceptionManager: ${exception.exceptionClass}/${exception.code} -> ${decision}`, ); if (decision === "retry" && opts.retryable) { try { const recoveryResult = await errorRecovery.recover(exception, { fn: opts.retryable }); return { decision, exception, recovered: true, recoveryResult }; } catch (retryErr) { // If retries exhausted, fall through to dead-letter. logger.warn({ retryErr }, "Retry exhausted, routing to DLQ"); await addToDLQ(opts.queue ?? "exceptions", opts.context ?? {}, exception.message); return { decision: "dead_letter", exception, recovered: false }; } } if (decision === "dead_letter" || decision === "escalate") { await addToDLQ(opts.queue ?? "exceptions", opts.context ?? {}, exception.message); } return { decision, exception, recovered: false }; }