Files
loc_az_hci/docs/operations/runbooks/proxmox-operations.md
defiQUG c39465c2bd
Some checks failed
Test / test (push) Has been cancelled
Initial commit: loc_az_hci (smom-dbis-138 excluded via .gitignore)
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-08 09:04:46 -08:00

2.5 KiB

Proxmox Operations Runbook

Common Operations

Cluster Management

Check Cluster Status

# View cluster status
pvecm status

# List all nodes
pvecm nodes

# View cluster configuration
cat /etc/pve/corosync.conf

Add Node to Cluster

# On new node
pvecm add <existing-node-ip>

Remove Node from Cluster

# On node to remove
pvecm delnode <node-name>

VM Management

Create VM from Template

# Via CLI
qm clone <template-vmid> <new-vmid> --name <vm-name>
qm set <new-vmid> --net0 virtio,bridge=vmbr0
qm set <new-vmid> --ipconfig0 ip=<ip-address>/24,gw=<gateway>
qm start <new-vmid>

Migrate VM

# Live migration
qm migrate <vmid> <target-node> --online

# Stop and migrate
qm shutdown <vmid>
qm migrate <vmid> <target-node>

Enable HA for VM

# Via web UI: Datacenter → HA → Add
# Or via CLI
ha-manager add <vmid>:started

Storage Management

List Storage

pvesm status

Add NFS Storage

pvesm add nfs <storage-name> \
  --server <nfs-server> \
  --path <nfs-path> \
  --content images,iso,vztmpl,backup

Check Storage Usage

pvesm list
df -h

Backup Operations

Create Backup

# Via web UI: Backup → Create
# Or via CLI
vzdump <vmid> --storage <storage-name> --compress zstd

Restore from Backup

# Via web UI: Backup → Restore
# Or via CLI
qmrestore <backup-file> <vmid> --storage <storage-name>

Network Management

List Networks

cat /etc/network/interfaces
ip addr show

Add Bridge

# Edit /etc/network/interfaces
# Add bridge configuration
# Apply changes
ifup vmbr1

Troubleshooting

Check Node Status

# System status
pvecm status
systemctl status pve-cluster
systemctl status corosync
systemctl status pvedaemon

View Logs

# Cluster logs
journalctl -u pve-cluster
journalctl -u corosync

# VM logs
qm config <vmid>
cat /var/log/pve/tasks/active

Fix Cluster Issues

# Restart cluster services
systemctl restart pve-cluster
systemctl restart corosync

# Rejoin cluster (if needed)
pvecm updatecerts -f

Maintenance

Update Proxmox

apt update
apt dist-upgrade
pveam update

Reboot Node

# Ensure VMs are migrated or stopped
# Reboot
reboot

Maintenance Mode

# Enable maintenance mode
pvecm expected 1

# Disable maintenance mode
pvecm expected 2