Files
loc_az_hci/scripts/fix/fix-vm100-guest-agent-restart.sh
defiQUG c39465c2bd
Some checks failed
Test / test (push) Has been cancelled
Initial commit: loc_az_hci (smom-dbis-138 excluded via .gitignore)
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-08 09:04:46 -08:00

107 lines
3.4 KiB
Bash
Executable File

#!/bin/bash
# Fix VM 100 Guest Agent Restart Issues
# This script adds a restart delay to prevent restart loops
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
# Source helper functions
source "$PROJECT_ROOT/scripts/lib/proxmox_vm_helpers.sh"
VM_ID=100
VM_USER="ubuntu"
SSH_KEY="${SSH_KEY:-$HOME/.ssh/id_ed25519_proxmox}"
PROXMOX_HOST="${PROXMOX_HOST:-192.168.1.206}"
echo "=== Fixing VM 100 Guest Agent Restart Issues ==="
echo ""
# Get VM IP
echo "Getting VM 100 IP address..."
ip=$(get_vm_ip_or_warn "$VM_ID" "$PROXMOX_HOST" "$SSH_KEY")
if [ -z "$ip" ]; then
echo "ERROR: Could not get IP for VM $VM_ID"
exit 1
fi
echo "VM 100 IP: $ip"
echo ""
# SSH into Proxmox host, then into VM 100
echo "Connecting to VM 100 via Proxmox host..."
ssh -i "$SSH_KEY" -o StrictHostKeyChecking=no "root@${PROXMOX_HOST}" <<EOF
ssh -i "$SSH_KEY" -o StrictHostKeyChecking=no "${VM_USER}@${ip}" <<'VMEOF'
set -euo pipefail
echo "=== Current Guest Agent Status ==="
systemctl status qemu-guest-agent --no-pager | head -10 || true
echo ""
echo "=== Creating systemd override directory ==="
sudo mkdir -p /etc/systemd/system/qemu-guest-agent.service.d/
echo "=== Creating override configuration ==="
sudo tee /etc/systemd/system/qemu-guest-agent.service.d/override.conf > /dev/null <<'OVERRIDE'
[Service]
# Add 5 second delay before restart to prevent restart loops
RestartSec=5
# Increase timeout for service start
TimeoutStartSec=30
OVERRIDE
echo "=== Reloading systemd daemon ==="
sudo systemctl daemon-reload
echo "=== Verifying override configuration ==="
systemctl cat qemu-guest-agent.service | grep -A 5 "override.conf" || true
echo ""
echo "=== Restarting guest agent service ==="
sudo systemctl restart qemu-guest-agent
echo "=== Waiting for service to stabilize ==="
sleep 3
echo "=== Checking service status ==="
systemctl status qemu-guest-agent --no-pager | head -15 || true
echo ""
echo "=== Verifying service is running ==="
if systemctl is-active --quiet qemu-guest-agent; then
echo "✅ Guest agent service is active"
else
echo "❌ Guest agent service is not active"
exit 1
fi
echo ""
echo "=== Checking restart configuration ==="
systemctl show qemu-guest-agent | grep -E "RestartSec|Restart=" || true
echo ""
echo "✅ Guest agent restart fix completed successfully"
VMEOF
EOF
echo ""
echo "=== Testing guest agent from Proxmox host ==="
ssh -i "$SSH_KEY" -o StrictHostKeyChecking=no "root@${PROXMOX_HOST}" <<EOF
echo "Testing guest agent connection..."
if qm guest exec $VM_ID -- hostname > /dev/null 2>&1; then
echo "✅ Guest agent is responding"
qm guest exec $VM_ID -- hostname
else
echo "⚠️ Guest agent test failed (may need a moment to stabilize)"
fi
EOF
echo ""
echo "=== Fix Complete ==="
echo "The guest agent service now has a 5-second restart delay."
echo "This should prevent restart loops and connection timeouts."
echo ""
echo "Monitor the service with:"
echo " ssh root@${PROXMOX_HOST} 'qm guest exec $VM_ID -- systemctl status qemu-guest-agent'"