Files
loc_az_hci/scripts/vm-management/configure/complete-all-vm-tasks.sh
defiQUG c39465c2bd
Some checks failed
Test / test (push) Has been cancelled
Initial commit: loc_az_hci (smom-dbis-138 excluded via .gitignore)
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-08 09:04:46 -08:00

481 lines
12 KiB
Bash
Executable File

#!/bin/bash
source ~/.bashrc
# Complete All VM Tasks via SSH
# This script connects to each VM and completes all pending tasks from the TODO list
set -e
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
NC='\033[0m'
log_info() {
echo -e "${GREEN}[INFO]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
log_step() {
echo -e "${BLUE}[STEP]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_header() {
echo -e "${CYAN}========================================${NC}"
echo -e "${CYAN}$1${NC}"
echo -e "${CYAN}========================================${NC}"
}
# Load environment variables
if [ -f .env ]; then
set -a
source <(grep -v '^#' .env | grep -v '^$' | sed 's/#.*$//' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' | grep '=')
set +a
fi
SSH_USER="${SSH_USER:-ubuntu}"
SSH_KEY="${SSH_KEY:-$HOME/.ssh/id_rsa}"
# Expand ~ in SSH_KEY path if needed
if [[ "$SSH_KEY" == ~* ]]; then
SSH_KEY="${SSH_KEY/#\~/$HOME}"
fi
# VM Configuration
declare -A VMS=(
[100]="cloudflare-tunnel:192.168.1.60:setup-cloudflare-tunnel.sh"
[101]="k3s-master:192.168.1.188:setup-k3s.sh"
[102]="git-server:192.168.1.121:setup-git-server.sh"
[103]="observability:192.168.1.82:setup-observability.sh"
)
# Check if VM is reachable
check_vm_reachable() {
local ip=$1
if ping -c 1 -W 3 "$ip" > /dev/null 2>&1; then
return 0
else
return 1
fi
}
# Check SSH connectivity
check_ssh() {
local ip=$1
local user=$2
if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no -i "$SSH_KEY" "${user}@${ip}" "echo 'SSH OK'" > /dev/null 2>&1; then
return 0
else
return 1
fi
}
# Wait for VM to be ready
wait_for_vm() {
local ip=$1
local name=$2
local max_attempts=30
local attempt=0
log_info "Waiting for $name ($ip) to be reachable..."
while [ $attempt -lt $max_attempts ]; do
if check_vm_reachable "$ip"; then
log_info "✓ VM is reachable"
return 0
fi
attempt=$((attempt + 1))
echo -n "."
sleep 2
done
echo ""
log_error "VM at $ip is not reachable"
return 1
}
# Wait for SSH
wait_for_ssh() {
local ip=$1
local name=$2
local max_attempts=60
local attempt=0
log_info "Waiting for SSH on $name ($ip)..."
while [ $attempt -lt $max_attempts ]; do
if check_ssh "$ip" "$SSH_USER"; then
log_info "✓ SSH is ready"
return 0
fi
attempt=$((attempt + 1))
echo -n "."
sleep 5
done
echo ""
log_error "SSH not available on $ip"
return 1
}
# Install QEMU Guest Agent
install_guest_agent() {
local ip=$1
local name=$2
log_step "Installing QEMU Guest Agent on $name..."
ssh -o StrictHostKeyChecking=no -i "$SSH_KEY" "${SSH_USER}@${ip}" <<'EOF'
sudo apt-get update -qq
sudo apt-get install -y qemu-guest-agent
sudo systemctl enable qemu-guest-agent
sudo systemctl start qemu-guest-agent
sudo systemctl status qemu-guest-agent --no-pager | head -3
EOF
if [ $? -eq 0 ]; then
log_info "✓ Guest agent installed and started"
return 0
else
log_error "✗ Failed to install guest agent"
return 1
fi
}
# Apply install script to VM
apply_install_script() {
local ip=$1
local name=$2
local script=$3
log_step "Applying install script: $script on $name..."
# Copy script to VM
if ! scp -o StrictHostKeyChecking=no -i "$SSH_KEY" "scripts/${script}" "${SSH_USER}@${ip}:/tmp/install-service.sh"; then
log_error "Failed to copy script"
return 1
fi
# Run script
ssh -o StrictHostKeyChecking=no -i "$SSH_KEY" "${SSH_USER}@${ip}" <<'EOF'
sudo chmod +x /tmp/install-service.sh
sudo /tmp/install-service.sh
EOF
if [ $? -eq 0 ]; then
log_info "✓ Install script completed"
return 0
else
log_error "✗ Install script failed"
return 1
fi
}
# Verify service is running
verify_service() {
local ip=$1
local name=$2
local service_name=$3
log_step "Verifying $service_name on $name..."
case $service_name in
cloudflared)
if ssh -o StrictHostKeyChecking=no -i "$SSH_KEY" "${SSH_USER}@${ip}" "sudo systemctl is-active --quiet cloudflared"; then
log_info "✓ Cloudflare Tunnel is running"
return 0
else
log_warn "⚠ Cloudflare Tunnel may not be running (manual config may be needed)"
return 1
fi
;;
k3s)
if ssh -o StrictHostKeyChecking=no -i "$SSH_KEY" "${SSH_USER}@${ip}" "sudo systemctl is-active --quiet k3s && kubectl get nodes" > /dev/null 2>&1; then
log_info "✓ K3s is running"
ssh -o StrictHostKeyChecking=no -i "$SSH_KEY" "${SSH_USER}@${ip}" "kubectl get nodes"
return 0
else
log_warn "⚠ K3s may not be fully ready"
return 1
fi
;;
gitea)
if ssh -o StrictHostKeyChecking=no -i "$SSH_KEY" "${SSH_USER}@${ip}" "sudo systemctl is-active --quiet gitea"; then
log_info "✓ Gitea is running"
log_info " Access at: http://${ip}:3000"
return 0
else
log_warn "⚠ Gitea may not be running"
return 1
fi
;;
observability)
local prom_running=false
local grafana_running=false
if ssh -o StrictHostKeyChecking=no -i "$SSH_KEY" "${SSH_USER}@${ip}" "sudo systemctl is-active --quiet prometheus"; then
prom_running=true
fi
if ssh -o StrictHostKeyChecking=no -i "$SSH_KEY" "${SSH_USER}@${ip}" "sudo systemctl is-active --quiet grafana-server"; then
grafana_running=true
fi
if [ "$prom_running" = true ] && [ "$grafana_running" = true ]; then
log_info "✓ Prometheus and Grafana are running"
log_info " Prometheus: http://${ip}:9090"
log_info " Grafana: http://${ip}:3000"
return 0
else
log_warn "⚠ Some services may not be running"
return 1
fi
;;
esac
}
# Process VM 100: Cloudflare Tunnel
setup_cloudflare_tunnel() {
local vmid=100
local name="cloudflare-tunnel"
local ip="192.168.1.60"
local script="setup-cloudflare-tunnel.sh"
log_header "VM $vmid: $name"
# Wait for VM
if ! wait_for_vm "$ip" "$name"; then
log_error "Skipping $name"
return 1
fi
if ! wait_for_ssh "$ip" "$name"; then
log_error "Skipping $name"
return 1
fi
# Install guest agent
install_guest_agent "$ip" "$name"
# Apply install script
apply_install_script "$ip" "$name" "$script"
# Verify
verify_service "$ip" "$name" "cloudflared"
log_warn "Note: Cloudflare Tunnel requires manual configuration:"
log_info " 1. Run: cloudflared tunnel login"
log_info " 2. Create tunnel: cloudflared tunnel create azure-stack-hci"
log_info " 3. Update /etc/cloudflared/config.yml"
log_info " 4. Configure DNS records in Cloudflare"
echo ""
}
# Process VM 101: K3s
setup_k3s() {
local vmid=101
local name="k3s-master"
local ip="192.168.1.188"
local script="setup-k3s.sh"
log_header "VM $vmid: $name"
# Wait for VM
if ! wait_for_vm "$ip" "$name"; then
log_error "Skipping $name"
return 1
fi
if ! wait_for_ssh "$ip" "$name"; then
log_error "Skipping $name"
return 1
fi
# Install guest agent
install_guest_agent "$ip" "$name"
# Apply install script
apply_install_script "$ip" "$name" "$script"
# Verify
verify_service "$ip" "$name" "k3s"
log_info "K3s cluster is ready!"
log_info " Kubeconfig: /etc/rancher/k3s/k3s.yaml"
echo ""
}
# Process VM 102: Git Server
setup_git_server() {
local vmid=102
local name="git-server"
local ip="192.168.1.121"
local script="setup-git-server.sh"
log_header "VM $vmid: $name"
# Wait for VM
if ! wait_for_vm "$ip" "$name"; then
log_error "Skipping $name"
return 1
fi
if ! wait_for_ssh "$ip" "$name"; then
log_error "Skipping $name"
return 1
fi
# Install guest agent
install_guest_agent "$ip" "$name"
# Apply install script
apply_install_script "$ip" "$name" "$script"
# Verify
verify_service "$ip" "$name" "gitea"
log_info "Gitea is ready!"
log_info " Access at: http://${ip}:3000"
log_warn " Complete initial setup via web UI"
echo ""
}
# Process VM 103: Observability
setup_observability() {
local vmid=103
local name="observability"
local ip="192.168.1.82"
local script="setup-observability.sh"
log_header "VM $vmid: $name"
# Wait for VM
if ! wait_for_vm "$ip" "$name"; then
log_error "Skipping $name"
return 1
fi
if ! wait_for_ssh "$ip" "$name"; then
log_error "Skipping $name"
return 1
fi
# Install guest agent
install_guest_agent "$ip" "$name"
# Apply install script
apply_install_script "$ip" "$name" "$script"
# Verify
verify_service "$ip" "$name" "observability"
log_info "Observability stack is ready!"
log_info " Prometheus: http://${ip}:9090"
log_info " Grafana: http://${ip}:3000 (admin/admin)"
log_warn " Change Grafana password on first login"
echo ""
}
# Enable guest agent in Proxmox
enable_guest_agent_proxmox() {
log_header "Enabling Guest Agent in Proxmox"
if [ -z "$PVE_ROOT_PASS" ]; then
log_warn "PVE_ROOT_PASS not set, skipping Proxmox configuration"
return 0
fi
PVE_USERNAME="${PVE_USERNAME:-root@pam}"
PROXMOX_URL="https://192.168.1.206:8006"
PROXMOX_NODE="pve"
# Get authentication ticket
local response=$(curl -k -s -d "username=$PVE_USERNAME&password=$PVE_ROOT_PASS" \
"$PROXMOX_URL/api2/json/access/ticket")
local ticket=$(echo "$response" | grep -o '"ticket":"[^"]*' | cut -d'"' -f4)
local csrf=$(echo "$response" | grep -o '"CSRFPreventionToken":"[^"]*' | cut -d'"' -f4)
if [ -z "$ticket" ] || [ -z "$csrf" ]; then
log_warn "Failed to authenticate with Proxmox, skipping"
return 0
fi
# Enable agent for each VM
for vmid in 100 101 102 103; do
log_info "Enabling guest agent in Proxmox for VM $vmid..."
curl -k -s -X PUT \
-H "Cookie: PVEAuthCookie=$ticket" \
-H "CSRFPreventionToken: $csrf" \
-d "agent=1" \
"$PROXMOX_URL/api2/json/nodes/$PROXMOX_NODE/qemu/$vmid/config" > /dev/null 2>&1
if [ $? -eq 0 ]; then
log_info "✓ Agent enabled for VM $vmid"
else
log_warn "⚠ Failed to enable agent for VM $vmid"
fi
done
echo ""
}
main() {
log_header "Complete All VM Tasks"
echo ""
if [ ! -f "$SSH_KEY" ]; then
log_error "SSH key not found: $SSH_KEY"
log_info "Set SSH_KEY environment variable or create key pair"
exit 1
fi
log_info "Using SSH key: $SSH_KEY"
log_info "SSH user: $SSH_USER"
echo ""
# Process each VM
setup_cloudflare_tunnel
setup_k3s
setup_git_server
setup_observability
# Enable guest agent in Proxmox
enable_guest_agent_proxmox
log_header "All Tasks Complete!"
echo ""
log_info "Summary:"
echo " ✓ Guest agent installed on all VMs"
echo " ✓ Cloudflare Tunnel setup (manual config needed)"
echo " ✓ K3s installed and verified"
echo " ✓ Gitea installed (initial setup needed)"
echo " ✓ Observability stack installed"
echo ""
log_warn "Manual steps remaining:"
echo " 1. Configure Cloudflare Tunnel (VM 100)"
echo " 2. Complete Gitea initial setup (VM 102)"
echo " 3. Change Grafana password (VM 103)"
echo " 4. Configure K3s namespaces and services (VM 101)"
echo ""
}
main "$@"