Add full monorepo: virtual-banker, backend, frontend, docs, scripts, deployment
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
239
docs/specs/database/timeseries-schema.md
Normal file
239
docs/specs/database/timeseries-schema.md
Normal file
@@ -0,0 +1,239 @@
|
||||
# Time-Series Database Schema Specification
|
||||
|
||||
## Overview
|
||||
|
||||
This document specifies the time-series database schema using ClickHouse or TimescaleDB for storing mempool data, metrics, and analytics time-series data.
|
||||
|
||||
## Technology Choice
|
||||
|
||||
**Option 1: TimescaleDB** (PostgreSQL extension)
|
||||
- Pros: PostgreSQL compatibility, SQL interface, easier integration
|
||||
- Cons: Less optimized for very high throughput
|
||||
|
||||
**Option 2: ClickHouse**
|
||||
- Pros: Very high performance, columnar storage, excellent compression
|
||||
- Cons: Different SQL dialect, separate infrastructure
|
||||
|
||||
**Recommendation**: Start with TimescaleDB for easier integration, migrate to ClickHouse if needed for scale.
|
||||
|
||||
## TimescaleDB Schema
|
||||
|
||||
### Mempool Transactions Table
|
||||
|
||||
**Table**: `mempool_transactions`
|
||||
|
||||
```sql
|
||||
CREATE TABLE mempool_transactions (
|
||||
time TIMESTAMPTZ NOT NULL,
|
||||
chain_id INTEGER NOT NULL,
|
||||
hash VARCHAR(66) NOT NULL,
|
||||
from_address VARCHAR(42) NOT NULL,
|
||||
to_address VARCHAR(42),
|
||||
value NUMERIC(78, 0),
|
||||
gas_price BIGINT,
|
||||
max_fee_per_gas BIGINT,
|
||||
max_priority_fee_per_gas BIGINT,
|
||||
gas_limit BIGINT,
|
||||
nonce BIGINT,
|
||||
input_data_length INTEGER,
|
||||
first_seen TIMESTAMPTZ NOT NULL,
|
||||
status VARCHAR(20) DEFAULT 'pending', -- 'pending', 'confirmed', 'dropped'
|
||||
confirmed_block_number BIGINT,
|
||||
confirmed_at TIMESTAMPTZ,
|
||||
PRIMARY KEY (time, chain_id, hash)
|
||||
);
|
||||
|
||||
SELECT create_hypertable('mempool_transactions', 'time');
|
||||
|
||||
CREATE INDEX idx_mempool_chain_hash ON mempool_transactions(chain_id, hash);
|
||||
CREATE INDEX idx_mempool_chain_from ON mempool_transactions(chain_id, from_address);
|
||||
CREATE INDEX idx_mempool_chain_status ON mempool_transactions(chain_id, status, time);
|
||||
```
|
||||
|
||||
**Retention Policy**: 7 days for detailed data, aggregates for longer periods
|
||||
|
||||
### Network Metrics Table
|
||||
|
||||
**Table**: `network_metrics`
|
||||
|
||||
```sql
|
||||
CREATE TABLE network_metrics (
|
||||
time TIMESTAMPTZ NOT NULL,
|
||||
chain_id INTEGER NOT NULL,
|
||||
block_number BIGINT,
|
||||
tps DOUBLE PRECISION, -- Transactions per second
|
||||
gps DOUBLE PRECISION, -- Gas per second
|
||||
avg_gas_price BIGINT,
|
||||
pending_transactions INTEGER,
|
||||
block_time_seconds DOUBLE PRECISION,
|
||||
PRIMARY KEY (time, chain_id)
|
||||
);
|
||||
|
||||
SELECT create_hypertable('network_metrics', 'time');
|
||||
|
||||
CREATE INDEX idx_network_metrics_chain_time ON network_metrics(chain_id, time DESC);
|
||||
```
|
||||
|
||||
**Aggregation**: Pre-aggregate to 1-minute, 5-minute, 1-hour intervals
|
||||
|
||||
### Gas Price History Table
|
||||
|
||||
**Table**: `gas_price_history`
|
||||
|
||||
```sql
|
||||
CREATE TABLE gas_price_history (
|
||||
time TIMESTAMPTZ NOT NULL,
|
||||
chain_id INTEGER NOT NULL,
|
||||
block_number BIGINT,
|
||||
min_gas_price BIGINT,
|
||||
max_gas_price BIGINT,
|
||||
avg_gas_price BIGINT,
|
||||
p25_gas_price BIGINT, -- 25th percentile
|
||||
p50_gas_price BIGINT, -- 50th percentile (median)
|
||||
p75_gas_price BIGINT, -- 75th percentile
|
||||
p95_gas_price BIGINT, -- 95th percentile
|
||||
p99_gas_price BIGINT, -- 99th percentile
|
||||
PRIMARY KEY (time, chain_id)
|
||||
);
|
||||
|
||||
SELECT create_hypertable('gas_price_history', 'time');
|
||||
```
|
||||
|
||||
### Address Activity Metrics Table
|
||||
|
||||
**Table**: `address_activity_metrics`
|
||||
|
||||
```sql
|
||||
CREATE TABLE address_activity_metrics (
|
||||
time TIMESTAMPTZ NOT NULL,
|
||||
chain_id INTEGER NOT NULL,
|
||||
address VARCHAR(42) NOT NULL,
|
||||
transaction_count INTEGER,
|
||||
received_count INTEGER,
|
||||
sent_count INTEGER,
|
||||
total_received NUMERIC(78, 0),
|
||||
total_sent NUMERIC(78, 0),
|
||||
PRIMARY KEY (time, chain_id, address)
|
||||
);
|
||||
|
||||
SELECT create_hypertable('address_activity_metrics', 'time',
|
||||
chunk_time_interval => INTERVAL '1 day');
|
||||
|
||||
CREATE INDEX idx_address_activity_chain_address ON address_activity_metrics(chain_id, address, time DESC);
|
||||
```
|
||||
|
||||
**Aggregation**: Pre-aggregate to hourly/daily for addresses
|
||||
|
||||
## ClickHouse Schema (Alternative)
|
||||
|
||||
### Mempool Transactions Table
|
||||
|
||||
```sql
|
||||
CREATE TABLE mempool_transactions (
|
||||
time DateTime('UTC') NOT NULL,
|
||||
chain_id UInt32 NOT NULL,
|
||||
hash String NOT NULL,
|
||||
from_address String NOT NULL,
|
||||
to_address Nullable(String),
|
||||
value Decimal128(0),
|
||||
gas_price UInt64,
|
||||
max_fee_per_gas Nullable(UInt64),
|
||||
max_priority_fee_per_gas Nullable(UInt64),
|
||||
gas_limit UInt64,
|
||||
nonce UInt64,
|
||||
input_data_length UInt32,
|
||||
first_seen DateTime('UTC') NOT NULL,
|
||||
status String DEFAULT 'pending',
|
||||
confirmed_block_number Nullable(UInt64),
|
||||
confirmed_at Nullable(DateTime('UTC'))
|
||||
) ENGINE = MergeTree()
|
||||
PARTITION BY toYYYYMM(time)
|
||||
ORDER BY (chain_id, time, hash)
|
||||
TTL time + INTERVAL 7 DAY; -- Auto-delete after 7 days
|
||||
```
|
||||
|
||||
## Data Retention and Aggregation
|
||||
|
||||
### Retention Policies
|
||||
|
||||
**Raw Data**:
|
||||
- Mempool transactions: 7 days
|
||||
- Network metrics: 30 days
|
||||
- Gas price history: 90 days
|
||||
- Address activity: 30 days
|
||||
|
||||
**Aggregated Data**:
|
||||
- 1-minute aggregates: 90 days
|
||||
- 5-minute aggregates: 1 year
|
||||
- 1-hour aggregates: 5 years
|
||||
- Daily aggregates: Indefinite
|
||||
|
||||
### Continuous Aggregates (TimescaleDB)
|
||||
|
||||
```sql
|
||||
-- 1-minute network metrics aggregate
|
||||
CREATE MATERIALIZED VIEW network_metrics_1m
|
||||
WITH (timescaledb.continuous) AS
|
||||
SELECT
|
||||
time_bucket('1 minute', time) AS bucket,
|
||||
chain_id,
|
||||
AVG(tps) AS avg_tps,
|
||||
AVG(gps) AS avg_gps,
|
||||
AVG(avg_gas_price) AS avg_gas_price,
|
||||
AVG(pending_transactions) AS avg_pending_tx
|
||||
FROM network_metrics
|
||||
GROUP BY bucket, chain_id;
|
||||
|
||||
-- Add refresh policy
|
||||
SELECT add_continuous_aggregate_policy('network_metrics_1m',
|
||||
start_offset => INTERVAL '1 hour',
|
||||
end_offset => INTERVAL '1 minute',
|
||||
schedule_interval => INTERVAL '1 minute');
|
||||
```
|
||||
|
||||
## Query Patterns
|
||||
|
||||
### Recent Mempool Transactions
|
||||
|
||||
```sql
|
||||
SELECT * FROM mempool_transactions
|
||||
WHERE chain_id = 138
|
||||
AND time > NOW() - INTERVAL '1 hour'
|
||||
AND status = 'pending'
|
||||
ORDER BY time DESC
|
||||
LIMIT 100;
|
||||
```
|
||||
|
||||
### Gas Price Statistics
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
time_bucket('5 minutes', time) AS bucket,
|
||||
AVG(avg_gas_price) AS avg_gas_price,
|
||||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY avg_gas_price) AS median_gas_price
|
||||
FROM gas_price_history
|
||||
WHERE chain_id = 138
|
||||
AND time > NOW() - INTERVAL '24 hours'
|
||||
GROUP BY bucket
|
||||
ORDER BY bucket DESC;
|
||||
```
|
||||
|
||||
### Network Throughput
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
time_bucket('1 minute', time) AS bucket,
|
||||
AVG(tps) AS avg_tps,
|
||||
MAX(tps) AS max_tps
|
||||
FROM network_metrics
|
||||
WHERE chain_id = 138
|
||||
AND time > NOW() - INTERVAL '1 hour'
|
||||
GROUP BY bucket
|
||||
ORDER BY bucket DESC;
|
||||
```
|
||||
|
||||
## References
|
||||
|
||||
- Mempool Service: See `../mempool/mempool-service.md`
|
||||
- Observability: See `../observability/metrics-monitoring.md`
|
||||
|
||||
Reference in New Issue
Block a user