Files
Sankofa/crossplane-provider-proxmox/pkg/proxmox/client.go
defiQUG 9daf1fd378 Apply Composer changes: comprehensive API updates, migrations, middleware, and infrastructure improvements
- Add comprehensive database migrations (001-024) for schema evolution
- Enhance API schema with expanded type definitions and resolvers
- Add new middleware: audit logging, rate limiting, MFA enforcement, security, tenant auth
- Implement new services: AI optimization, billing, blockchain, compliance, marketplace
- Add adapter layer for cloud integrations (Cloudflare, Kubernetes, Proxmox, storage)
- Update Crossplane provider with enhanced VM management capabilities
- Add comprehensive test suite for API endpoints and services
- Update frontend components with improved GraphQL subscriptions and real-time updates
- Enhance security configurations and headers (CSP, CORS, etc.)
- Update documentation and configuration files
- Add new CI/CD workflows and validation scripts
- Implement design system improvements and UI enhancements
2025-12-12 18:01:35 -08:00

1345 lines
38 KiB
Go

package proxmox
import (
"context"
"fmt"
"strconv"
"strings"
"time"
"github.com/pkg/errors"
)
// Client represents a Proxmox API client
type Client struct {
httpClient *HTTPClient
endpoint string
username string
password string
token string
insecure bool
}
// NewClient creates a new Proxmox API client
func NewClient(endpoint, username, password string, insecureSkipTLS bool) (*Client, error) {
httpClient := NewHTTPClient(endpoint, insecureSkipTLS)
client := &Client{
httpClient: httpClient,
endpoint: endpoint,
username: username,
password: password,
insecure: insecureSkipTLS,
}
// Set credentials
if username != "" && password != "" {
httpClient.SetCredentials(username, password)
}
// Authenticate if credentials provided
if username != "" && password != "" {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := httpClient.Authenticate(ctx); err != nil {
return nil, errors.Wrap(err, "failed to authenticate")
}
}
return client, nil
}
// NewClientWithToken creates a new Proxmox API client with token authentication
func NewClientWithToken(endpoint, token string, insecureSkipTLS bool) (*Client, error) {
httpClient := NewHTTPClient(endpoint, insecureSkipTLS)
httpClient.SetToken(token)
return &Client{
httpClient: httpClient,
endpoint: endpoint,
token: token,
insecure: insecureSkipTLS,
}, nil
}
// RetryConfig defines retry behavior
type RetryConfig struct {
MaxRetries int
BaseDelay time.Duration
MaxDelay time.Duration
}
// DefaultRetryConfig returns default retry configuration
func DefaultRetryConfig() RetryConfig {
return RetryConfig{
MaxRetries: 3,
BaseDelay: time.Second,
MaxDelay: 30 * time.Second,
}
}
// RetryableError indicates an error that should be retried
type RetryableError struct {
Err error
RetryAfter time.Duration
}
func (e *RetryableError) Error() string {
return e.Err.Error()
}
// IsRetryable checks if an error is retryable
func IsRetryable(err error) bool {
if err == nil {
return false
}
_, ok := err.(*RetryableError)
return ok
}
// Retry executes a function with retry logic
func Retry(ctx context.Context, fn func() error, config RetryConfig) error {
var lastErr error
for attempt := 0; attempt <= config.MaxRetries; attempt++ {
if attempt > 0 {
delay := config.BaseDelay * time.Duration(1<<uint(attempt-1))
if delay > config.MaxDelay {
delay = config.MaxDelay
}
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(delay):
}
}
err := fn()
if err == nil {
return nil
}
lastErr = err
if !IsRetryable(err) {
return err
}
if attempt < config.MaxRetries {
if retryErr, ok := err.(*RetryableError); ok && retryErr.RetryAfter > 0 {
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(retryErr.RetryAfter):
}
}
}
}
return errors.Wrapf(lastErr, "failed after %d retries", config.MaxRetries)
}
// CreateVM creates a virtual machine
func (c *Client) CreateVM(ctx context.Context, spec VMSpec) (*VM, error) {
config := DefaultRetryConfig()
var vm *VM
err := Retry(ctx, func() error {
var retryErr error
vm, retryErr = c.createVM(ctx, spec)
if retryErr != nil {
// Check if error is retryable (network errors, temporary failures)
if isNetworkError(retryErr) || isTemporaryError(retryErr) {
return &RetryableError{Err: retryErr}
}
return retryErr
}
return nil
}, config)
return vm, err
}
// findImageInStorage searches for an image in Proxmox storage
func (c *Client) findImageInStorage(ctx context.Context, node, imageName string) (string, error) {
// List all storages
storages, err := c.ListStorages(ctx)
if err != nil {
return "", errors.Wrap(err, "failed to list storages")
}
// Search for image in each storage that supports images
for _, storage := range storages {
// Check if storage supports images
hasImages := false
for _, content := range storage.Content {
if content == "images" || content == "iso" || content == "vztmpl" {
hasImages = true
break
}
}
if !hasImages {
continue
}
// List content in this storage
var content []struct {
Volid string `json:"volid"`
Format string `json:"format"`
}
path := fmt.Sprintf("/nodes/%s/storage/%s/content", node, storage.Name)
if err := c.httpClient.Get(ctx, path, &content); err != nil {
continue // Skip if we can't list this storage
}
// Search for matching image
for _, item := range content {
// Check if volid contains the image name
if strings.Contains(item.Volid, imageName) {
return item.Volid, nil
}
}
}
return "", fmt.Errorf("image '%s' not found in any storage", imageName)
}
// createVM performs the actual VM creation
func (c *Client) createVM(ctx context.Context, spec VMSpec) (*VM, error) {
// First, get next available VMID
// The HTTP client's Get method already extracts the "data" field, so we get the string directly
var nextIDStr string
if err := c.httpClient.Get(ctx, "/cluster/nextid", &nextIDStr); err != nil {
return nil, errors.Wrap(err, "failed to get next VMID")
}
var vmID int
if _, err := fmt.Sscanf(nextIDStr, "%d", &vmID); err != nil {
return nil, errors.Wrap(err, "failed to parse VMID")
}
// Handle image - determine if we need to clone, import, or use existing
var diskConfig string
var needsImageImport bool
var imageVolid string
if spec.Image != "" {
// Check if image is a template ID (numeric VMID to clone from)
if templateID, err := strconv.Atoi(spec.Image); err == nil {
// Clone from template
cloneConfig := map[string]interface{}{
"newid": vmID,
"name": spec.Name,
"target": spec.Node,
}
var cloneResult string
if err := c.httpClient.Post(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/clone", spec.Node, templateID), cloneConfig, &cloneResult); err != nil {
return nil, errors.Wrapf(err, "failed to clone template %d", templateID)
}
// After cloning, update the VM config
vmConfig := map[string]interface{}{
"cores": spec.CPU,
"memory": parseMemory(spec.Memory),
"agent": "1", // Enable QEMU guest agent
}
if spec.TenantID != "" {
vmConfig["tags"] = fmt.Sprintf("tenant_%s", spec.TenantID)
}
// Add cloud-init configuration if userData is provided
if spec.UserData != "" {
cloudInitStorage := spec.Storage
if cloudInitStorage == "" {
cloudInitStorage = "local"
}
vmConfig["ide2"] = fmt.Sprintf("%s:cloudinit", cloudInitStorage)
vmConfig["ciuser"] = "admin"
vmConfig["ipconfig0"] = "ip=dhcp"
}
if err := c.httpClient.Put(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/config", spec.Node, vmID), vmConfig, nil); err != nil {
return nil, errors.Wrap(err, "failed to update cloned VM config")
}
// Write cloud-init userData if provided
if spec.UserData != "" {
cloudInitConfig := map[string]interface{}{
"user": spec.UserData,
}
cloudInitPath := fmt.Sprintf("/nodes/%s/qemu/%d/cloudinit", spec.Node, vmID)
if err := c.httpClient.Post(ctx, cloudInitPath, cloudInitConfig, nil); err != nil {
// Log warning but don't fail - cloud-init can be configured later
}
}
// Get VM details and return
return c.getVMByID(ctx, spec.Node, vmID)
}
// Check if image is already a volid (format: storage:path/to/image)
if strings.Contains(spec.Image, ":") {
imageVolid = spec.Image
} else {
// Search for image in storage
foundVolid, err := c.findImageInStorage(ctx, spec.Node, spec.Image)
if err != nil {
// Image not found - return error instead of creating blank disk
// This prevents "Nothing to boot" errors
return nil, errors.Wrapf(err, "image '%s' not found in storage - cannot create VM without OS image", spec.Image)
}
imageVolid = foundVolid
}
// If we have an image volid, determine how to use it
if imageVolid != "" {
// For cloud images (.img, .qcow2), we need to import them as disks
if strings.HasSuffix(imageVolid, ".img") || strings.HasSuffix(imageVolid, ".qcow2") {
needsImageImport = true
// Create VM with blank disk first, then import the image
diskConfig = fmt.Sprintf("%s:%d,format=qcow2", spec.Storage, parseDisk(spec.Disk))
} else {
// Use existing template/disk directly (for templates, etc.)
diskConfig = fmt.Sprintf("%s,format=qcow2", imageVolid)
}
} else if diskConfig == "" {
// No image found and no disk config set, create blank disk
diskConfig = fmt.Sprintf("%s:%d,format=raw", spec.Storage, parseDisk(spec.Disk))
}
} else {
// No image specified, create blank disk
diskConfig = fmt.Sprintf("%s:%d,format=raw", spec.Storage, parseDisk(spec.Disk))
}
// Create VM configuration
vmConfig := map[string]interface{}{
"vmid": vmID,
"name": spec.Name,
"cores": spec.CPU,
"memory": parseMemory(spec.Memory),
"net0": fmt.Sprintf("virtio,bridge=%s", spec.Network),
"scsi0": diskConfig,
"ostype": "l26", // Linux 2.6+ kernel
"agent": "1", // Enable QEMU guest agent
"boot": "order=scsi0", // Set boot order to scsi0 (primary disk)
}
// Add tenant labels if provided (for tenant isolation and billing)
if spec.TenantID != "" {
// Proxmox uses tags for labeling - use underscore instead of colon (Proxmox tag format)
// Tags must be alphanumeric with underscores and hyphens only
vmConfig["tags"] = fmt.Sprintf("tenant_%s", spec.TenantID)
}
// Add cloud-init configuration if userData is provided
if spec.UserData != "" {
// Determine cloud-init storage (use same storage as VM disk, or default to "local")
cloudInitStorage := spec.Storage
if cloudInitStorage == "" {
cloudInitStorage = "local"
}
// Proxmox cloud-init drive format: ide2=storage:cloudinit
vmConfig["ide2"] = fmt.Sprintf("%s:cloudinit", cloudInitStorage)
// Set default cloud-init user (can be overridden in userData)
vmConfig["ciuser"] = "admin"
// Set network configuration for cloud-init
vmConfig["ipconfig0"] = "ip=dhcp"
}
// Create VM
// The HTTP client's Post method already extracts the "data" field, so we get the string directly
var resultStr string
if err := c.httpClient.Post(ctx, fmt.Sprintf("/nodes/%s/qemu", spec.Node), vmConfig, &resultStr); err != nil {
return nil, errors.Wrap(err, "failed to create VM")
}
// If we need to import a cloud image, do it now using importdisk API
if needsImageImport && imageVolid != "" {
// Extract storage and filename from volid
parts := strings.SplitN(imageVolid, ":", 2)
if len(parts) != 2 {
return nil, errors.Errorf("invalid image volid format: %s (expected storage:path)", imageVolid)
}
storageName := parts[0]
imagePath := parts[1]
// CRITICAL: Stop VM before import (importdisk requires VM to be stopped)
var vmStatus struct {
Status string `json:"status"`
}
statusPath := fmt.Sprintf("/nodes/%s/qemu/%d/status/current", spec.Node, vmID)
if err := c.httpClient.Get(ctx, statusPath, &vmStatus); err == nil {
if vmStatus.Status == "running" {
// Stop VM
stopPath := fmt.Sprintf("/nodes/%s/qemu/%d/status/stop", spec.Node, vmID)
if err := c.httpClient.Post(ctx, stopPath, nil, nil); err != nil {
return nil, errors.Wrap(err, "failed to stop VM for image import")
}
// Wait for VM to stop (up to 30 seconds)
for i := 0; i < 30; i++ {
time.Sleep(1 * time.Second)
if err := c.httpClient.Get(ctx, statusPath, &vmStatus); err == nil {
if vmStatus.Status == "stopped" {
break
}
}
}
if vmStatus.Status != "stopped" {
return nil, errors.New("VM did not stop within 30 seconds, cannot import image")
}
}
}
// Check if importdisk API is available before attempting to use it
supportsImportDisk, err := c.SupportsImportDisk(ctx)
if err != nil {
// If we can't determine support, log warning but try anyway
// The actual API call will fail with 501 if not supported
}
if !supportsImportDisk {
// Clean up the VM we just created since we can't import the image
// Attempt to unlock and delete
c.UnlockVM(ctx, vmID)
c.deleteVM(ctx, vmID)
return nil, errors.Errorf("importdisk API is not supported in this Proxmox version. VM %d has been cleaned up. Please use template cloning or pre-imported images instead", vmID)
}
// Use Proxmox importdisk API to import the image as a disk
// POST /nodes/{node}/qemu/{vmid}/importdisk
// Note: importdisk creates a NEW disk (usually scsi1), it doesn't replace scsi0
importConfig := map[string]interface{}{
"storage": storageName,
"format": "qcow2",
"filename": imagePath,
}
var importResult string
importPath := fmt.Sprintf("/nodes/%s/qemu/%d/importdisk", spec.Node, vmID)
if err := c.httpClient.Post(ctx, importPath, importConfig, &importResult); err != nil {
// Check if error is 501 (not implemented) - this means API doesn't exist
if strings.Contains(err.Error(), "501") || strings.Contains(err.Error(), "not implemented") {
// Clean up the VM we created
c.UnlockVM(ctx, vmID)
c.deleteVM(ctx, vmID)
return nil, errors.Errorf("importdisk API is not implemented in this Proxmox version. VM %d has been cleaned up. Please use template cloning (numeric VMID) or pre-imported images instead of cloud images", vmID)
}
// For other errors, attempt cleanup but return the original error
c.UnlockVM(ctx, vmID)
c.deleteVM(ctx, vmID)
return nil, errors.Wrapf(err, "failed to import image '%s' - VM %d has been cleaned up", imageVolid, vmID)
}
// CRITICAL FIX: Wait for importdisk task to complete before updating config
// The importResult contains a task UPID that we need to monitor
// Extract UPID from result (format: "UPID:node:timestamp:pid:type:user@realm:")
// The HTTP client already extracts the "data" field, so importResult should be the UPID string
taskUPID := strings.TrimSpace(importResult)
// Handle case where UPID might be in JSON format or empty
if taskUPID == "" || !strings.HasPrefix(taskUPID, "UPID:") {
// Try to extract UPID from JSON if present
if strings.Contains(importResult, "UPID:") {
// Extract UPID from JSON string
parts := strings.Split(importResult, "\"")
for _, part := range parts {
if strings.HasPrefix(part, "UPID:") {
taskUPID = part
break
}
}
}
}
if taskUPID != "" && strings.HasPrefix(taskUPID, "UPID:") {
// Monitor task until completion (with timeout)
maxWaitTime := 10 * time.Minute // 10 minutes max for large images
pollInterval := 3 * time.Second // Check every 3 seconds
startTime := time.Now()
for time.Since(startTime) < maxWaitTime {
// Check task status
var taskStatus struct {
Status string `json:"status"`
ExitStatus string `json:"exitstatus,omitempty"`
}
taskStatusPath := fmt.Sprintf("/nodes/%s/tasks/%s/status", spec.Node, taskUPID)
if err := c.httpClient.Get(ctx, taskStatusPath, &taskStatus); err != nil {
// If we can't get task status, wait a bit and retry
select {
case <-ctx.Done():
return nil, ctx.Err()
case <-time.After(pollInterval):
continue
}
}
// Task status can be: "running", "stopped"
if taskStatus.Status == "stopped" {
// Task completed - check exit status
if taskStatus.ExitStatus != "OK" && taskStatus.ExitStatus != "" {
return nil, errors.Errorf("importdisk task failed with exit status: %s", taskStatus.ExitStatus)
}
// Task completed successfully
break
}
// Task still running, wait and check again
select {
case <-ctx.Done():
return nil, ctx.Err()
case <-time.After(pollInterval):
// Continue loop
}
}
// Verify task completed (final check)
var finalTaskStatus struct {
Status string `json:"status"`
}
taskStatusPath := fmt.Sprintf("/nodes/%s/tasks/%s/status", spec.Node, taskUPID)
if err := c.httpClient.Get(ctx, taskStatusPath, &finalTaskStatus); err == nil {
if finalTaskStatus.Status != "stopped" {
return nil, errors.Errorf("importdisk task did not complete within timeout period (status: %s)", finalTaskStatus.Status)
}
}
// Give a moment for disk to be fully registered
time.Sleep(2 * time.Second)
} else {
// If no UPID returned, wait a conservative amount of time
// This is a fallback for cases where UPID extraction fails
time.Sleep(30 * time.Second)
}
// Get VM config to find the imported disk
var vmConfigResponse struct {
Scsi0 string `json:"scsi0"`
Scsi1 string `json:"scsi1"`
Scsi2 string `json:"scsi2"`
}
configPath := fmt.Sprintf("/nodes/%s/qemu/%d/config", spec.Node, vmID)
if err := c.httpClient.Get(ctx, configPath, &vmConfigResponse); err != nil {
return nil, errors.Wrap(err, "failed to get VM config after import")
}
// Find the imported disk (usually scsi1, but could be scsi2, etc.)
importedDisk := ""
if vmConfigResponse.Scsi1 != "" && strings.Contains(vmConfigResponse.Scsi1, storageName) {
importedDisk = vmConfigResponse.Scsi1
} else if vmConfigResponse.Scsi2 != "" && strings.Contains(vmConfigResponse.Scsi2, storageName) {
importedDisk = vmConfigResponse.Scsi2
}
if importedDisk == "" {
return nil, errors.New("imported disk not found in VM configuration after import")
}
// CRITICAL FIX: Replace blank scsi0 with imported disk
// First, remove the blank scsi0 disk
removeConfig := map[string]interface{}{
"scsi0": "",
}
if err := c.httpClient.Put(ctx, configPath, removeConfig, nil); err != nil {
// Log but continue - might already be removed
}
// Now attach imported disk to scsi0 and set boot order
updateConfig := map[string]interface{}{
"scsi0": importedDisk, // Attach imported disk to primary slot
"boot": "order=scsi0",
}
if err := c.httpClient.Put(ctx, configPath, updateConfig, nil); err != nil {
return nil, errors.Wrap(err, "failed to attach imported disk to scsi0 and set boot order")
}
// Remove the imported disk from its original slot (scsi1, scsi2, etc.) to avoid duplicates
// This is done by setting the slot to empty string
cleanupConfig := map[string]interface{}{}
if vmConfigResponse.Scsi1 == importedDisk {
cleanupConfig["scsi1"] = ""
} else if vmConfigResponse.Scsi2 == importedDisk {
cleanupConfig["scsi2"] = ""
}
if len(cleanupConfig) > 0 {
c.httpClient.Put(ctx, configPath, cleanupConfig, nil)
}
// Verify boot order is set correctly
var verifyConfig struct {
Boot string `json:"boot"`
Scsi0 string `json:"scsi0"`
}
if err := c.httpClient.Get(ctx, configPath, &verifyConfig); err == nil {
if verifyConfig.Boot != "order=scsi0" {
// Retry setting boot order
retryConfig := map[string]interface{}{
"boot": "order=scsi0",
}
c.httpClient.Put(ctx, configPath, retryConfig, nil)
}
if verifyConfig.Scsi0 == "" {
return nil, errors.New("scsi0 is empty after attaching imported disk")
}
}
}
// Write cloud-init userData if provided
if spec.UserData != "" {
// Proxmox cloud-init API: POST /nodes/{node}/qemu/{vmid}/cloudinit
// The userData is written to the cloud-init drive
cloudInitConfig := map[string]interface{}{
"user": spec.UserData,
}
cloudInitPath := fmt.Sprintf("/nodes/%s/qemu/%d/cloudinit", spec.Node, vmID)
// Retry cloud-init write up to 3 times (sometimes fails on first attempt)
var cloudInitErr error
for attempt := 0; attempt < 3; attempt++ {
var err error
if err = c.httpClient.Post(ctx, cloudInitPath, cloudInitConfig, nil); err == nil {
cloudInitErr = nil
break
}
cloudInitErr = err
if attempt < 2 {
time.Sleep(1 * time.Second)
}
}
// Log warning but don't fail VM creation - cloud-init can be configured later
// However, this should be rare and indicates a configuration issue
if cloudInitErr != nil {
// Note: In production, you might want to add a status condition here
// For now, we continue - VM is created but cloud-init may not work
}
}
// Get VM details and return
return c.getVMByID(ctx, spec.Node, vmID)
}
// getVMByID retrieves VM details by ID
func (c *Client) getVMByID(ctx context.Context, node string, vmID int) (*VM, error) {
var vmDetails []struct {
Vmid int `json:"vmid"`
Name string `json:"name"`
Status string `json:"status"`
}
if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/qemu", node), &vmDetails); err != nil {
return nil, errors.Wrap(err, "failed to get VM details")
}
for _, v := range vmDetails {
if v.Vmid == vmID {
return &VM{
ID: v.Vmid,
Name: v.Name,
Status: v.Status,
Node: node,
}, nil
}
}
return &VM{
ID: vmID,
Name: "unknown",
Status: "created",
Node: node,
}, nil
}
// Helper functions for parsing
func parseMemory(memory string) int {
// Parse memory string like "4Gi", "4096M", "4096" to MB
if len(memory) == 0 {
return 4096 // Default
}
// Remove whitespace
memory = strings.TrimSpace(memory)
// Check for unit suffix
if strings.HasSuffix(memory, "Gi") {
value, err := strconv.ParseFloat(strings.TrimSuffix(memory, "Gi"), 64)
if err == nil {
return int(value * 1024) // Convert GiB to MB
}
} else if strings.HasSuffix(memory, "Mi") || strings.HasSuffix(memory, "M") {
value, err := strconv.ParseFloat(strings.TrimSuffix(strings.TrimSuffix(memory, "Mi"), "M"), 64)
if err == nil {
return int(value)
}
} else if strings.HasSuffix(memory, "Ki") || strings.HasSuffix(memory, "K") {
value, err := strconv.ParseFloat(strings.TrimSuffix(strings.TrimSuffix(memory, "Ki"), "K"), 64)
if err == nil {
return int(value / 1024) // Convert KiB to MB
}
}
// Try parsing as number (assume MB)
value, err := strconv.Atoi(memory)
if err == nil {
return value
}
return 4096 // Default if parsing fails
}
func parseDisk(disk string) int {
// Parse disk string like "50Gi", "50G", "50" to GB
if len(disk) == 0 {
return 50 // Default
}
// Remove whitespace
disk = strings.TrimSpace(disk)
// Check for unit suffix
if strings.HasSuffix(disk, "Gi") || strings.HasSuffix(disk, "G") {
value, err := strconv.ParseFloat(strings.TrimSuffix(strings.TrimSuffix(disk, "Gi"), "G"), 64)
if err == nil {
return int(value)
}
} else if strings.HasSuffix(disk, "Ti") || strings.HasSuffix(disk, "T") {
value, err := strconv.ParseFloat(strings.TrimSuffix(strings.TrimSuffix(disk, "Ti"), "T"), 64)
if err == nil {
return int(value * 1024) // Convert TiB to GB
}
} else if strings.HasSuffix(disk, "Mi") || strings.HasSuffix(disk, "M") {
value, err := strconv.ParseFloat(strings.TrimSuffix(strings.TrimSuffix(disk, "Mi"), "M"), 64)
if err == nil {
return int(value / 1024) // Convert MiB to GB
}
}
// Try parsing as number (assume GB)
value, err := strconv.Atoi(disk)
if err == nil {
return value
}
return 50 // Default if parsing fails
}
// UpdateVM updates a virtual machine
func (c *Client) UpdateVM(ctx context.Context, vmID int, spec VMSpec) (*VM, error) {
config := DefaultRetryConfig()
var vm *VM
err := Retry(ctx, func() error {
var retryErr error
vm, retryErr = c.updateVM(ctx, vmID, spec)
if retryErr != nil {
if isNetworkError(retryErr) || isTemporaryError(retryErr) {
return &RetryableError{Err: retryErr}
}
return retryErr
}
return nil
}, config)
return vm, err
}
func (c *Client) updateVM(ctx context.Context, vmID int, spec VMSpec) (*VM, error) {
// Find node with this VM
nodes, err := c.ListNodes(ctx)
if err != nil {
return nil, errors.Wrap(err, "failed to list nodes")
}
var targetNode string
for _, node := range nodes {
vms, err := c.ListVMs(ctx, node)
if err != nil {
continue
}
for _, vm := range vms {
if vm.ID == vmID {
targetNode = node
break
}
}
if targetNode != "" {
break
}
}
if targetNode == "" {
return nil, fmt.Errorf("VM %d not found", vmID)
}
// Update VM configuration
vmConfig := make(map[string]interface{})
if spec.CPU > 0 {
vmConfig["cores"] = spec.CPU
}
if spec.Memory != "" {
vmConfig["memory"] = parseMemory(spec.Memory)
}
// Always ensure guest agent is enabled
vmConfig["agent"] = "1"
if len(vmConfig) > 0 {
var result struct {
Data string `json:"data"`
}
if err := c.httpClient.Put(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/config", targetNode, vmID), vmConfig, &result); err != nil {
return nil, errors.Wrap(err, "failed to update VM")
}
}
// Get updated VM
vms, err := c.ListVMs(ctx, targetNode)
if err != nil {
return nil, errors.Wrap(err, "failed to list VMs")
}
for _, vm := range vms {
if vm.ID == vmID {
return &vm, nil
}
}
return nil, fmt.Errorf("VM %d not found after update", vmID)
}
// findVMNode finds which node a VM is on by searching all nodes
func (c *Client) findVMNode(ctx context.Context, vmID int) (string, error) {
nodes, err := c.ListNodes(ctx)
if err != nil {
return "", errors.Wrap(err, "failed to list nodes")
}
// Search each node for the VM
for _, node := range nodes {
var vms []struct {
Vmid int `json:"vmid"`
}
if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/qemu", node), &vms); err != nil {
continue // Skip nodes we can't query
}
for _, vm := range vms {
if vm.Vmid == vmID {
return node, nil
}
}
}
return "", fmt.Errorf("VM %d not found on any node", vmID)
}
// UnlockVM unlocks a virtual machine (removes lock file)
func (c *Client) UnlockVM(ctx context.Context, vmID int) error {
// Find which node has this VM
targetNode, err := c.findVMNode(ctx, vmID)
if err != nil {
return errors.Wrap(err, "failed to find VM node")
}
// Use Proxmox API to unlock VM
// POST /nodes/{node}/qemu/{vmid}/unlock
unlockPath := fmt.Sprintf("/nodes/%s/qemu/%d/unlock", targetNode, vmID)
if err := c.httpClient.Post(ctx, unlockPath, nil, nil); err != nil {
// If unlock fails, try to continue - lock may not exist
// Log but don't fail
return nil // Don't fail on unlock errors
}
return nil
}
// DeleteVM deletes a virtual machine
func (c *Client) DeleteVM(ctx context.Context, vmID int) error {
config := DefaultRetryConfig()
return Retry(ctx, func() error {
err := c.deleteVM(ctx, vmID)
if err != nil {
if isNetworkError(err) || isTemporaryError(err) {
return &RetryableError{Err: err}
}
return err
}
return nil
}, config)
}
func (c *Client) deleteVM(ctx context.Context, vmID int) error {
// Find node with this VM
nodes, err := c.ListNodes(ctx)
if err != nil {
return errors.Wrap(err, "failed to list nodes")
}
var targetNode string
for _, node := range nodes {
vms, err := c.ListVMs(ctx, node)
if err != nil {
continue
}
for _, vm := range vms {
if vm.ID == vmID {
targetNode = node
break
}
}
if targetNode != "" {
break
}
}
if targetNode == "" {
return fmt.Errorf("VM %d not found", vmID)
}
// CRITICAL: Unlock VM first to prevent lock file issues
// Multiple unlock attempts to handle stuck locks
for i := 0; i < 5; i++ {
if err := c.UnlockVM(ctx, vmID); err == nil {
break
}
time.Sleep(1 * time.Second)
}
// Stop VM first if running
var status struct {
Status string `json:"status"`
}
if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/status/current", targetNode, vmID), &status); err == nil {
if status.Status == "running" {
// Stop VM
if err := c.httpClient.Post(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/status/stop", targetNode, vmID), nil, nil); err != nil {
return errors.Wrap(err, "failed to stop VM")
}
// Wait for VM to stop (up to 30 seconds)
for i := 0; i < 30; i++ {
time.Sleep(1 * time.Second)
if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/status/current", targetNode, vmID), &status); err == nil {
if status.Status == "stopped" {
break
}
}
}
}
}
// Delete VM with purge to ensure complete cleanup
deletePath := fmt.Sprintf("/nodes/%s/qemu/%d?purge=1", targetNode, vmID)
if err := c.httpClient.Delete(ctx, deletePath); err != nil {
return errors.Wrap(err, "failed to delete VM")
}
return nil
}
// GetVMStatus gets the status of a virtual machine
func (c *Client) GetVMStatus(ctx context.Context, vmID int) (*VMStatus, error) {
config := DefaultRetryConfig()
var status *VMStatus
err := Retry(ctx, func() error {
var retryErr error
status, retryErr = c.getVMStatus(ctx, vmID)
if retryErr != nil {
if isNetworkError(retryErr) || isTemporaryError(retryErr) {
return &RetryableError{Err: retryErr}
}
return retryErr
}
return nil
}, config)
return status, err
}
func (c *Client) getVMStatus(ctx context.Context, vmID int) (*VMStatus, error) {
// First, find which node has this VM
nodes, err := c.ListNodes(ctx)
if err != nil {
return nil, errors.Wrap(err, "failed to list nodes")
}
for _, node := range nodes {
vms, err := c.ListVMs(ctx, node)
if err != nil {
continue
}
for _, vm := range vms {
if vm.ID == vmID {
// Get detailed VM status
var vmStatus struct {
Status string `json:"status"`
CPU float64 `json:"cpu"`
Mem int64 `json:"mem"`
NetIn int64 `json:"netin"`
NetOut int64 `json:"netout"`
}
if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/status/current", node, vmID), &vmStatus); err != nil {
return &VMStatus{
State: vm.Status,
}, nil
}
// Try to get IP address
var config struct {
Net0 string `json:"net0"`
}
ipAddress := ""
if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/config", node, vmID), &config); err == nil {
// Parse IP from net0 config if available
ipAddress = extractIP(config.Net0)
}
return &VMStatus{
State: vmStatus.Status,
IPAddress: ipAddress,
CPU: vmStatus.CPU,
Memory: vmStatus.Mem,
}, nil
}
}
}
return nil, fmt.Errorf("VM %d not found", vmID)
}
func extractIP(netConfig string) string {
// Extract IP from net config string
// Format: "virtio,bridge=vmbr0,firewall=1,ip=dhcp" or "virtio,bridge=vmbr0,ip=192.168.1.100/24"
if len(netConfig) == 0 {
return ""
}
// Look for ip= parameter
parts := strings.Split(netConfig, ",")
for _, part := range parts {
part = strings.TrimSpace(part)
if strings.HasPrefix(part, "ip=") {
ipPart := strings.TrimPrefix(part, "ip=")
// Remove subnet mask if present
if idx := strings.Index(ipPart, "/"); idx > 0 {
return ipPart[:idx]
}
// Skip "dhcp" or "static"
if ipPart != "dhcp" && ipPart != "static" {
return ipPart
}
}
}
return ""
}
// Helper functions
func isNetworkError(err error) bool {
if err == nil {
return false
}
errStr := err.Error()
return contains(errStr, "network") || contains(errStr, "timeout") || contains(errStr, "connection")
}
func isTemporaryError(err error) bool {
if err == nil {
return false
}
errStr := err.Error()
return contains(errStr, "temporary") || contains(errStr, "503") || contains(errStr, "502")
}
func contains(s, substr string) bool {
return len(s) >= len(substr) && (s == substr || len(substr) == 0 || indexOfSubstring(s, substr) >= 0)
}
func indexOfSubstring(s, substr string) int {
for i := 0; i <= len(s)-len(substr); i++ {
if s[i:i+len(substr)] == substr {
return i
}
}
return -1
}
// VMSpec represents VM specification
type VMSpec struct {
Node string
Name string
CPU int
Memory string
Disk string
Storage string
Network string
Image string
UserData string // Cloud-init user data
SSHKeys []string // SSH public keys
TenantID string // Tenant ID for multi-tenancy support (superior to Azure)
}
// VM represents a virtual machine
type VM struct {
ID int
Name string
Status string
IP string
Node string
Created time.Time
}
// VMStatus represents VM status
type VMStatus struct {
State string
IPAddress string
CPU float64
Memory int64
}
// Storage represents a storage pool
type Storage struct {
Name string
Type string
Content []string
Shared bool
Enabled bool
Capacity int64
Used int64
}
// Network represents a network interface
type Network struct {
Name string
Type string
Bridge string
VLAN int
Active bool
Autostart bool
}
// ClusterInfo represents cluster information
type ClusterInfo struct {
Name string
Nodes []string
Quorum bool
Version string
}
// GetPVEVersion gets the Proxmox VE version
func (c *Client) GetPVEVersion(ctx context.Context) (string, error) {
var version struct {
Version string `json:"version"`
}
if err := c.httpClient.Get(ctx, "/version", &version); err != nil {
return "", errors.Wrap(err, "failed to get PVE version")
}
return version.Version, nil
}
// SupportsImportDisk checks if the Proxmox version supports the importdisk API
// The importdisk API was added in Proxmox VE 6.0, but some versions may not have it
func (c *Client) SupportsImportDisk(ctx context.Context) (bool, error) {
// Check the version string to determine if importdisk might be available
version, err := c.GetPVEVersion(ctx)
if err != nil {
// If we can't get version, assume it's not supported to be safe
return false, nil
}
// Parse version: format is usually "pve-manager/X.Y.Z/..."
// importdisk should be available in PVE 6.0+, but some builds may not have it
// For safety, we'll check by attempting to use it and catching 501 errors
// This function returns true if version looks compatible, but actual check happens at use time
if strings.Contains(version, "pve-manager/6.") ||
strings.Contains(version, "pve-manager/7.") ||
strings.Contains(version, "pve-manager/8.") ||
strings.Contains(version, "pve-manager/9.") {
// Version looks compatible, but we'll verify at actual use time
return true, nil
}
return false, nil
}
// CheckNodeHealth checks if a node is healthy and reachable
func (c *Client) CheckNodeHealth(ctx context.Context, node string) error {
var status struct {
Status string `json:"status"`
Uptime int64 `json:"uptime"`
}
statusPath := fmt.Sprintf("/nodes/%s/status", node)
if err := c.httpClient.Get(ctx, statusPath, &status); err != nil {
return errors.Wrapf(err, "node %s is not reachable or unhealthy", node)
}
if status.Status != "online" {
return errors.Errorf("node %s is not online (status: %s)", node, status.Status)
}
return nil
}
// ListNodes lists all nodes in the cluster
func (c *Client) ListNodes(ctx context.Context) ([]string, error) {
var nodes []struct {
Node string `json:"node"`
Status string `json:"status"`
Type string `json:"type"`
}
if err := c.httpClient.Get(ctx, "/nodes", &nodes); err != nil {
return nil, errors.Wrap(err, "failed to list nodes")
}
result := make([]string, len(nodes))
for i, node := range nodes {
result[i] = node.Node
}
return result, nil
}
// ListVMs lists all VMs on a node, optionally filtered by tenant
func (c *Client) ListVMs(ctx context.Context, node string, tenantID ...string) ([]VM, error) {
var vms []struct {
Vmid int `json:"vmid"`
Name string `json:"name"`
Status string `json:"status"`
Tags string `json:"tags,omitempty"`
}
if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/qemu", node), &vms); err != nil {
return nil, errors.Wrap(err, "failed to list VMs")
}
result := []VM{}
filterTenantID := ""
if len(tenantID) > 0 && tenantID[0] != "" {
filterTenantID = tenantID[0]
}
for _, vm := range vms {
// If tenant filtering is requested, check VM tags
if filterTenantID != "" {
// Check if VM has tenant tag matching the filter
if vm.Tags == "" || !strings.Contains(vm.Tags, fmt.Sprintf("tenant:%s", filterTenantID)) {
// Try to get VM config to check tags if not in list
var config struct {
Tags string `json:"tags"`
}
if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/config", node, vm.Vmid), &config); err == nil {
if config.Tags == "" || !strings.Contains(config.Tags, fmt.Sprintf("tenant:%s", filterTenantID)) {
continue // Skip this VM - doesn't belong to tenant
}
} else {
continue // Skip if we can't verify
}
}
}
result = append(result, VM{
ID: vm.Vmid,
Name: vm.Name,
Status: vm.Status,
Node: node,
})
}
return result, nil
}
// ListStorages lists all storage pools
func (c *Client) ListStorages(ctx context.Context) ([]Storage, error) {
var storages []struct {
Storage string `json:"storage"`
Type string `json:"type"`
Content []string `json:"content"`
Shared int `json:"shared"`
Enabled int `json:"enabled"`
Total int64 `json:"total"`
Used int64 `json:"used"`
}
if err := c.httpClient.Get(ctx, "/storage", &storages); err != nil {
return nil, errors.Wrap(err, "failed to list storages")
}
result := make([]Storage, len(storages))
for i, s := range storages {
result[i] = Storage{
Name: s.Storage,
Type: s.Type,
Content: s.Content,
Shared: s.Shared == 1,
Enabled: s.Enabled == 1,
Capacity: s.Total,
Used: s.Used,
}
}
return result, nil
}
// ListNetworks lists all networks on a node
func (c *Client) ListNetworks(ctx context.Context, node string) ([]Network, error) {
var networks []struct {
Iface string `json:"iface"`
Type string `json:"type"`
Bridge string `json:"bridge"`
VLAN int `json:"vlan"`
Active int `json:"active"`
Autostart int `json:"autostart"`
}
if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/network", node), &networks); err != nil {
return nil, errors.Wrap(err, "failed to list networks")
}
result := make([]Network, len(networks))
for i, n := range networks {
result[i] = Network{
Name: n.Iface,
Type: n.Type,
Bridge: n.Bridge,
VLAN: n.VLAN,
Active: n.Active == 1,
Autostart: n.Autostart == 1,
}
}
return result, nil
}
// GetClusterInfo gets cluster information
func (c *Client) GetClusterInfo(ctx context.Context) (*ClusterInfo, error) {
var status []struct {
Name string `json:"name"`
Type string `json:"type"`
Quorum int `json:"quorate"`
}
if err := c.httpClient.Get(ctx, "/cluster/status", &status); err != nil {
return nil, errors.Wrap(err, "failed to get cluster status")
}
var version struct {
Version string `json:"version"`
}
if err := c.httpClient.Get(ctx, "/version", &version); err != nil {
return nil, errors.Wrap(err, "failed to get version")
}
nodes, err := c.ListNodes(ctx)
if err != nil {
return nil, errors.Wrap(err, "failed to list nodes")
}
quorum := false
for _, s := range status {
if s.Type == "cluster" && s.Quorum == 1 {
quorum = true
break
}
}
return &ClusterInfo{
Name: "proxmox-cluster",
Nodes: nodes,
Quorum: quorum,
Version: version.Version,
}, nil
}