- services/exceptionManager.ts: single taxonomy (timing/data/control/ business/system) with §12 codes, deterministic route() table, and handle() dispatch to retry/DLQ/escalate - services/execution.ts: refactor executePlan to drive the full 12-state machine (DRAFT -> INITIATED -> ... -> VALIDATING -> COMMITTED -> CLOSED) via stateMachine.transition(), with a new validatePhase() that reconciles DLT tx hash + bank message id + per-step amounts before COMMIT; SoD-gated edges use distinct synthetic actors by default - api/plans.ts + index.ts: GET /api/plans/:planId/state returning current transaction_state + full audit trail of transitions - tests/unit/exceptionManager.test.ts: 14 tests for classification + routing matrix 59 tests pass. tsc clean.
297 lines
9.0 KiB
TypeScript
297 lines
9.0 KiB
TypeScript
/**
|
|
* Unified Exception Manager — architecture note §5.9, §12.
|
|
*
|
|
* Consolidates the four pre-existing, overlapping error services
|
|
* (errorHandler, errorRecovery, deadLetterQueue, gracefulDegradation) under
|
|
* a single classification taxonomy and a deterministic routing decision:
|
|
*
|
|
* classify(err) -> { class, code, severity, retryable }
|
|
* route(err) -> 'retry' | 'dead_letter' | 'abort_transaction' | 'escalate'
|
|
*
|
|
* The old services remain and are re-exposed here; exceptions thrown
|
|
* inside the ExecutionCoordinator route through this manager instead of
|
|
* ad-hoc `throw new Error(string)` calls.
|
|
*/
|
|
|
|
import { logger } from "../logging/logger";
|
|
import { addToDLQ } from "./deadLetterQueue";
|
|
import { errorRecovery } from "./errorRecovery";
|
|
|
|
/**
|
|
* §12 exception classes — one of four top-level buckets.
|
|
*/
|
|
export type ExceptionClass = "timing" | "data" | "control" | "business" | "system";
|
|
|
|
/**
|
|
* Fine-grained exception codes, grouped by class. Source: arch note §12.
|
|
*/
|
|
export type ExceptionCode =
|
|
// §12.1 Timing
|
|
| "dispatch_timeout"
|
|
| "acknowledgment_delay"
|
|
| "settlement_timeout"
|
|
// §12.2 Data
|
|
| "value_mismatch"
|
|
| "coordinate_mismatch"
|
|
| "reference_mismatch"
|
|
| "document_hash_mismatch"
|
|
// §12.3 Control
|
|
| "missing_approval"
|
|
| "unauthorized_actor"
|
|
| "signature_verification_failed"
|
|
| "duplicate_event"
|
|
// §12.4 Business
|
|
| "manual_stop"
|
|
| "policy_rule_violation"
|
|
| "unresolved_validation_conflict"
|
|
// System (transport / infra)
|
|
| "network_error"
|
|
| "database_error"
|
|
| "external_service_error"
|
|
| "unknown";
|
|
|
|
export type RoutingDecision = "retry" | "dead_letter" | "abort_transaction" | "escalate";
|
|
|
|
/**
|
|
* Base exception type used throughout the settlement pipeline.
|
|
*
|
|
* Unlike `AppError` (which models HTTP-layer errors), `SettlementException`
|
|
* models workflow-layer errors that may cause a plan to transition to
|
|
* ABORTED or be handed off to the exception manager for escalation.
|
|
*/
|
|
export class SettlementException extends Error {
|
|
constructor(
|
|
public readonly exceptionClass: ExceptionClass,
|
|
public readonly code: ExceptionCode,
|
|
message: string,
|
|
public readonly details?: Record<string, unknown>,
|
|
public readonly cause?: Error,
|
|
) {
|
|
super(message);
|
|
this.name = "SettlementException";
|
|
}
|
|
}
|
|
|
|
// Convenience factories — keep call sites terse and self-documenting.
|
|
export const Timing = {
|
|
dispatch(details?: Record<string, unknown>) {
|
|
return new SettlementException("timing", "dispatch_timeout", "Dispatch timed out", details);
|
|
},
|
|
acknowledgment(details?: Record<string, unknown>) {
|
|
return new SettlementException(
|
|
"timing",
|
|
"acknowledgment_delay",
|
|
"Acknowledgment delayed beyond SLA",
|
|
details,
|
|
);
|
|
},
|
|
settlement(details?: Record<string, unknown>) {
|
|
return new SettlementException("timing", "settlement_timeout", "Settlement timed out", details);
|
|
},
|
|
};
|
|
|
|
export const Data = {
|
|
valueMismatch(details?: Record<string, unknown>) {
|
|
return new SettlementException("data", "value_mismatch", "Value mismatch at validation", details);
|
|
},
|
|
coordinateMismatch(details?: Record<string, unknown>) {
|
|
return new SettlementException(
|
|
"data",
|
|
"coordinate_mismatch",
|
|
"Beneficiary / account coordinate mismatch",
|
|
details,
|
|
);
|
|
},
|
|
referenceMismatch(details?: Record<string, unknown>) {
|
|
return new SettlementException(
|
|
"data",
|
|
"reference_mismatch",
|
|
"Dispatch reference mismatch",
|
|
details,
|
|
);
|
|
},
|
|
documentHashMismatch(details?: Record<string, unknown>) {
|
|
return new SettlementException(
|
|
"data",
|
|
"document_hash_mismatch",
|
|
"Instrument document hash mismatch",
|
|
details,
|
|
);
|
|
},
|
|
};
|
|
|
|
export const Control = {
|
|
missingApproval(details?: Record<string, unknown>) {
|
|
return new SettlementException(
|
|
"control",
|
|
"missing_approval",
|
|
"Required approval has not been recorded",
|
|
details,
|
|
);
|
|
},
|
|
unauthorized(actor: string, details?: Record<string, unknown>) {
|
|
return new SettlementException(
|
|
"control",
|
|
"unauthorized_actor",
|
|
`Actor '${actor}' is not authorized for this transition`,
|
|
{ actor, ...details },
|
|
);
|
|
},
|
|
signature(details?: Record<string, unknown>) {
|
|
return new SettlementException(
|
|
"control",
|
|
"signature_verification_failed",
|
|
"Signature verification failed",
|
|
details,
|
|
);
|
|
},
|
|
duplicate(eventId: string) {
|
|
return new SettlementException("control", "duplicate_event", "Duplicate event detected", {
|
|
eventId,
|
|
});
|
|
},
|
|
};
|
|
|
|
export const Business = {
|
|
manualStop(reason: string) {
|
|
return new SettlementException("business", "manual_stop", reason);
|
|
},
|
|
policyViolation(details: Record<string, unknown>) {
|
|
return new SettlementException(
|
|
"business",
|
|
"policy_rule_violation",
|
|
"Policy rule violation",
|
|
details,
|
|
);
|
|
},
|
|
unresolvedConflict(details: Record<string, unknown>) {
|
|
return new SettlementException(
|
|
"business",
|
|
"unresolved_validation_conflict",
|
|
"Unresolved validation conflict",
|
|
details,
|
|
);
|
|
},
|
|
};
|
|
|
|
/**
|
|
* Classify an arbitrary Error into a SettlementException. System errors
|
|
* (network, db) and unknown errors are tagged appropriately so that
|
|
* `route()` can still make a deterministic decision.
|
|
*/
|
|
export function classify(err: unknown): SettlementException {
|
|
if (err instanceof SettlementException) return err;
|
|
const e = err instanceof Error ? err : new Error(String(err));
|
|
const msg = e.message.toLowerCase();
|
|
|
|
if (
|
|
msg.includes("timeout") ||
|
|
msg.includes("etimedout") ||
|
|
msg.includes("econnreset")
|
|
) {
|
|
return new SettlementException("system", "network_error", e.message, undefined, e);
|
|
}
|
|
if (
|
|
msg.includes("econnrefused") ||
|
|
msg.includes("network") ||
|
|
msg.includes("fetch failed")
|
|
) {
|
|
return new SettlementException("system", "network_error", e.message, undefined, e);
|
|
}
|
|
if (msg.includes("database") || msg.includes("postgres") || msg.includes("pg")) {
|
|
return new SettlementException("system", "database_error", e.message, undefined, e);
|
|
}
|
|
return new SettlementException("system", "unknown", e.message, undefined, e);
|
|
}
|
|
|
|
/**
|
|
* Decide what to do with an exception. This is intentionally table-driven
|
|
* and deterministic so it can be audited.
|
|
*
|
|
* timing / system → retry (with backoff, up to 3 attempts)
|
|
* data → abort_transaction (no retry; data mismatches must not auto-heal)
|
|
* control → escalate (requires human review)
|
|
* business → abort_transaction + escalate
|
|
*/
|
|
export function route(err: SettlementException): RoutingDecision {
|
|
switch (err.exceptionClass) {
|
|
case "timing":
|
|
return "retry";
|
|
case "system":
|
|
return err.code === "network_error" ? "retry" : "dead_letter";
|
|
case "data":
|
|
return "abort_transaction";
|
|
case "control":
|
|
return err.code === "duplicate_event" ? "dead_letter" : "escalate";
|
|
case "business":
|
|
return err.code === "manual_stop" ? "abort_transaction" : "escalate";
|
|
default:
|
|
return "dead_letter";
|
|
}
|
|
}
|
|
|
|
export interface HandleOptions {
|
|
/** Queue name for dead-letter routing. */
|
|
queue?: string;
|
|
/** Opaque context payload to preserve in DLQ / logs. */
|
|
context?: Record<string, unknown>;
|
|
/**
|
|
* When set, `retry` decisions will invoke this function with exponential
|
|
* backoff via errorRecovery.
|
|
*/
|
|
retryable?: () => Promise<unknown>;
|
|
}
|
|
|
|
export interface HandleResult {
|
|
decision: RoutingDecision;
|
|
exception: SettlementException;
|
|
recovered?: boolean;
|
|
recoveryResult?: unknown;
|
|
}
|
|
|
|
/**
|
|
* Central dispatch. Given any error, classify → route → act. Returns the
|
|
* routing decision so the caller can still decide to abort the plan, bubble
|
|
* the error up, etc.
|
|
*
|
|
* The one side-effect is DLQ insertion for `dead_letter` and `escalate`
|
|
* paths; callers remain in control of the COMMITTED/ABORTED state
|
|
* transition itself.
|
|
*/
|
|
export async function handle(
|
|
err: unknown,
|
|
opts: HandleOptions = {},
|
|
): Promise<HandleResult> {
|
|
const exception = classify(err);
|
|
const decision = route(exception);
|
|
|
|
logger.warn(
|
|
{
|
|
exceptionClass: exception.exceptionClass,
|
|
code: exception.code,
|
|
decision,
|
|
details: exception.details,
|
|
context: opts.context,
|
|
},
|
|
`ExceptionManager: ${exception.exceptionClass}/${exception.code} -> ${decision}`,
|
|
);
|
|
|
|
if (decision === "retry" && opts.retryable) {
|
|
try {
|
|
const recoveryResult = await errorRecovery.recover(exception, { fn: opts.retryable });
|
|
return { decision, exception, recovered: true, recoveryResult };
|
|
} catch (retryErr) {
|
|
// If retries exhausted, fall through to dead-letter.
|
|
logger.warn({ retryErr }, "Retry exhausted, routing to DLQ");
|
|
await addToDLQ(opts.queue ?? "exceptions", opts.context ?? {}, exception.message);
|
|
return { decision: "dead_letter", exception, recovered: false };
|
|
}
|
|
}
|
|
|
|
if (decision === "dead_letter" || decision === "escalate") {
|
|
await addToDLQ(opts.queue ?? "exceptions", opts.context ?? {}, exception.message);
|
|
}
|
|
|
|
return { decision, exception, recovered: false };
|
|
}
|