Files
CurrenciCombo/orchestrator/src/services/exceptionManager.ts
Devin 908c386dff PR B: VALIDATING phase + unified ExceptionManager (arch steps 3, 7)
- services/exceptionManager.ts: single taxonomy (timing/data/control/
  business/system) with §12 codes, deterministic route() table, and
  handle() dispatch to retry/DLQ/escalate
- services/execution.ts: refactor executePlan to drive the full 12-state
  machine (DRAFT -> INITIATED -> ... -> VALIDATING -> COMMITTED -> CLOSED)
  via stateMachine.transition(), with a new validatePhase() that
  reconciles DLT tx hash + bank message id + per-step amounts before
  COMMIT; SoD-gated edges use distinct synthetic actors by default
- api/plans.ts + index.ts: GET /api/plans/:planId/state returning
  current transaction_state + full audit trail of transitions
- tests/unit/exceptionManager.test.ts: 14 tests for classification +
  routing matrix

59 tests pass. tsc clean.
2026-04-22 16:29:21 +00:00

297 lines
9.0 KiB
TypeScript

/**
* Unified Exception Manager — architecture note §5.9, §12.
*
* Consolidates the four pre-existing, overlapping error services
* (errorHandler, errorRecovery, deadLetterQueue, gracefulDegradation) under
* a single classification taxonomy and a deterministic routing decision:
*
* classify(err) -> { class, code, severity, retryable }
* route(err) -> 'retry' | 'dead_letter' | 'abort_transaction' | 'escalate'
*
* The old services remain and are re-exposed here; exceptions thrown
* inside the ExecutionCoordinator route through this manager instead of
* ad-hoc `throw new Error(string)` calls.
*/
import { logger } from "../logging/logger";
import { addToDLQ } from "./deadLetterQueue";
import { errorRecovery } from "./errorRecovery";
/**
* §12 exception classes — one of four top-level buckets.
*/
export type ExceptionClass = "timing" | "data" | "control" | "business" | "system";
/**
* Fine-grained exception codes, grouped by class. Source: arch note §12.
*/
export type ExceptionCode =
// §12.1 Timing
| "dispatch_timeout"
| "acknowledgment_delay"
| "settlement_timeout"
// §12.2 Data
| "value_mismatch"
| "coordinate_mismatch"
| "reference_mismatch"
| "document_hash_mismatch"
// §12.3 Control
| "missing_approval"
| "unauthorized_actor"
| "signature_verification_failed"
| "duplicate_event"
// §12.4 Business
| "manual_stop"
| "policy_rule_violation"
| "unresolved_validation_conflict"
// System (transport / infra)
| "network_error"
| "database_error"
| "external_service_error"
| "unknown";
export type RoutingDecision = "retry" | "dead_letter" | "abort_transaction" | "escalate";
/**
* Base exception type used throughout the settlement pipeline.
*
* Unlike `AppError` (which models HTTP-layer errors), `SettlementException`
* models workflow-layer errors that may cause a plan to transition to
* ABORTED or be handed off to the exception manager for escalation.
*/
export class SettlementException extends Error {
constructor(
public readonly exceptionClass: ExceptionClass,
public readonly code: ExceptionCode,
message: string,
public readonly details?: Record<string, unknown>,
public readonly cause?: Error,
) {
super(message);
this.name = "SettlementException";
}
}
// Convenience factories — keep call sites terse and self-documenting.
export const Timing = {
dispatch(details?: Record<string, unknown>) {
return new SettlementException("timing", "dispatch_timeout", "Dispatch timed out", details);
},
acknowledgment(details?: Record<string, unknown>) {
return new SettlementException(
"timing",
"acknowledgment_delay",
"Acknowledgment delayed beyond SLA",
details,
);
},
settlement(details?: Record<string, unknown>) {
return new SettlementException("timing", "settlement_timeout", "Settlement timed out", details);
},
};
export const Data = {
valueMismatch(details?: Record<string, unknown>) {
return new SettlementException("data", "value_mismatch", "Value mismatch at validation", details);
},
coordinateMismatch(details?: Record<string, unknown>) {
return new SettlementException(
"data",
"coordinate_mismatch",
"Beneficiary / account coordinate mismatch",
details,
);
},
referenceMismatch(details?: Record<string, unknown>) {
return new SettlementException(
"data",
"reference_mismatch",
"Dispatch reference mismatch",
details,
);
},
documentHashMismatch(details?: Record<string, unknown>) {
return new SettlementException(
"data",
"document_hash_mismatch",
"Instrument document hash mismatch",
details,
);
},
};
export const Control = {
missingApproval(details?: Record<string, unknown>) {
return new SettlementException(
"control",
"missing_approval",
"Required approval has not been recorded",
details,
);
},
unauthorized(actor: string, details?: Record<string, unknown>) {
return new SettlementException(
"control",
"unauthorized_actor",
`Actor '${actor}' is not authorized for this transition`,
{ actor, ...details },
);
},
signature(details?: Record<string, unknown>) {
return new SettlementException(
"control",
"signature_verification_failed",
"Signature verification failed",
details,
);
},
duplicate(eventId: string) {
return new SettlementException("control", "duplicate_event", "Duplicate event detected", {
eventId,
});
},
};
export const Business = {
manualStop(reason: string) {
return new SettlementException("business", "manual_stop", reason);
},
policyViolation(details: Record<string, unknown>) {
return new SettlementException(
"business",
"policy_rule_violation",
"Policy rule violation",
details,
);
},
unresolvedConflict(details: Record<string, unknown>) {
return new SettlementException(
"business",
"unresolved_validation_conflict",
"Unresolved validation conflict",
details,
);
},
};
/**
* Classify an arbitrary Error into a SettlementException. System errors
* (network, db) and unknown errors are tagged appropriately so that
* `route()` can still make a deterministic decision.
*/
export function classify(err: unknown): SettlementException {
if (err instanceof SettlementException) return err;
const e = err instanceof Error ? err : new Error(String(err));
const msg = e.message.toLowerCase();
if (
msg.includes("timeout") ||
msg.includes("etimedout") ||
msg.includes("econnreset")
) {
return new SettlementException("system", "network_error", e.message, undefined, e);
}
if (
msg.includes("econnrefused") ||
msg.includes("network") ||
msg.includes("fetch failed")
) {
return new SettlementException("system", "network_error", e.message, undefined, e);
}
if (msg.includes("database") || msg.includes("postgres") || msg.includes("pg")) {
return new SettlementException("system", "database_error", e.message, undefined, e);
}
return new SettlementException("system", "unknown", e.message, undefined, e);
}
/**
* Decide what to do with an exception. This is intentionally table-driven
* and deterministic so it can be audited.
*
* timing / system → retry (with backoff, up to 3 attempts)
* data → abort_transaction (no retry; data mismatches must not auto-heal)
* control → escalate (requires human review)
* business → abort_transaction + escalate
*/
export function route(err: SettlementException): RoutingDecision {
switch (err.exceptionClass) {
case "timing":
return "retry";
case "system":
return err.code === "network_error" ? "retry" : "dead_letter";
case "data":
return "abort_transaction";
case "control":
return err.code === "duplicate_event" ? "dead_letter" : "escalate";
case "business":
return err.code === "manual_stop" ? "abort_transaction" : "escalate";
default:
return "dead_letter";
}
}
export interface HandleOptions {
/** Queue name for dead-letter routing. */
queue?: string;
/** Opaque context payload to preserve in DLQ / logs. */
context?: Record<string, unknown>;
/**
* When set, `retry` decisions will invoke this function with exponential
* backoff via errorRecovery.
*/
retryable?: () => Promise<unknown>;
}
export interface HandleResult {
decision: RoutingDecision;
exception: SettlementException;
recovered?: boolean;
recoveryResult?: unknown;
}
/**
* Central dispatch. Given any error, classify → route → act. Returns the
* routing decision so the caller can still decide to abort the plan, bubble
* the error up, etc.
*
* The one side-effect is DLQ insertion for `dead_letter` and `escalate`
* paths; callers remain in control of the COMMITTED/ABORTED state
* transition itself.
*/
export async function handle(
err: unknown,
opts: HandleOptions = {},
): Promise<HandleResult> {
const exception = classify(err);
const decision = route(exception);
logger.warn(
{
exceptionClass: exception.exceptionClass,
code: exception.code,
decision,
details: exception.details,
context: opts.context,
},
`ExceptionManager: ${exception.exceptionClass}/${exception.code} -> ${decision}`,
);
if (decision === "retry" && opts.retryable) {
try {
const recoveryResult = await errorRecovery.recover(exception, { fn: opts.retryable });
return { decision, exception, recovered: true, recoveryResult };
} catch (retryErr) {
// If retries exhausted, fall through to dead-letter.
logger.warn({ retryErr }, "Retry exhausted, routing to DLQ");
await addToDLQ(opts.queue ?? "exceptions", opts.context ?? {}, exception.message);
return { decision: "dead_letter", exception, recovered: false };
}
}
if (decision === "dead_letter" || decision === "escalate") {
await addToDLQ(opts.queue ?? "exceptions", opts.context ?? {}, exception.message);
}
return { decision, exception, recovered: false };
}