package proxmox import ( "context" "fmt" "strconv" "strings" "time" "github.com/pkg/errors" "github.com/sankofa/crossplane-provider-proxmox/pkg/utils" ) // Client represents a Proxmox API client type Client struct { httpClient *HTTPClient endpoint string username string password string token string insecure bool } // NewClient creates a new Proxmox API client func NewClient(endpoint, username, password string, insecureSkipTLS bool) (*Client, error) { httpClient := NewHTTPClient(endpoint, insecureSkipTLS) client := &Client{ httpClient: httpClient, endpoint: endpoint, username: username, password: password, insecure: insecureSkipTLS, } // Set credentials if username != "" && password != "" { httpClient.SetCredentials(username, password) } // Authenticate if credentials provided if username != "" && password != "" { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() if err := httpClient.Authenticate(ctx); err != nil { return nil, errors.Wrap(err, "failed to authenticate") } } return client, nil } // NewClientWithToken creates a new Proxmox API client with token authentication func NewClientWithToken(endpoint, token string, insecureSkipTLS bool) (*Client, error) { httpClient := NewHTTPClient(endpoint, insecureSkipTLS) httpClient.SetToken(token) return &Client{ httpClient: httpClient, endpoint: endpoint, token: token, insecure: insecureSkipTLS, }, nil } // RetryConfig defines retry behavior type RetryConfig struct { MaxRetries int BaseDelay time.Duration MaxDelay time.Duration } // DefaultRetryConfig returns default retry configuration func DefaultRetryConfig() RetryConfig { return RetryConfig{ MaxRetries: 3, BaseDelay: time.Second, MaxDelay: 30 * time.Second, } } // RetryableError indicates an error that should be retried type RetryableError struct { Err error RetryAfter time.Duration } func (e *RetryableError) Error() string { return e.Err.Error() } // IsRetryable checks if an error is retryable func IsRetryable(err error) bool { if err == nil { return false } _, ok := err.(*RetryableError) return ok } // Retry executes a function with retry logic func Retry(ctx context.Context, fn func() error, config RetryConfig) error { var lastErr error for attempt := 0; attempt <= config.MaxRetries; attempt++ { if attempt > 0 { delay := config.BaseDelay * time.Duration(1< config.MaxDelay { delay = config.MaxDelay } select { case <-ctx.Done(): return ctx.Err() case <-time.After(delay): } } err := fn() if err == nil { return nil } lastErr = err if !IsRetryable(err) { return err } if attempt < config.MaxRetries { if retryErr, ok := err.(*RetryableError); ok && retryErr.RetryAfter > 0 { select { case <-ctx.Done(): return ctx.Err() case <-time.After(retryErr.RetryAfter): } } } } return errors.Wrapf(lastErr, "failed after %d retries", config.MaxRetries) } // CreateVM creates a virtual machine func (c *Client) CreateVM(ctx context.Context, spec VMSpec) (*VM, error) { config := DefaultRetryConfig() var vm *VM err := Retry(ctx, func() error { var retryErr error vm, retryErr = c.createVM(ctx, spec) if retryErr != nil { // Check if error is retryable (network errors, temporary failures) if isNetworkError(retryErr) || isTemporaryError(retryErr) { return &RetryableError{Err: retryErr} } return retryErr } return nil }, config) return vm, err } // findImageInStorage searches for an image in Proxmox storage func (c *Client) findImageInStorage(ctx context.Context, node, imageName string) (string, error) { // List all storages storages, err := c.ListStorages(ctx) if err != nil { return "", errors.Wrap(err, "failed to list storages") } // Search for image in each storage that supports images for _, storage := range storages { // Check if storage supports images hasImages := false for _, content := range storage.Content { if content == "images" || content == "iso" || content == "vztmpl" { hasImages = true break } } if !hasImages { continue } // List content in this storage var content []struct { Volid string `json:"volid"` Format string `json:"format"` } path := fmt.Sprintf("/nodes/%s/storage/%s/content", node, storage.Name) if err := c.httpClient.Get(ctx, path, &content); err != nil { continue // Skip if we can't list this storage } // Search for matching image for _, item := range content { // Check if volid contains the image name if strings.Contains(item.Volid, imageName) { return item.Volid, nil } } } return "", fmt.Errorf("image '%s' not found in any storage", imageName) } // createVM performs the actual VM creation func (c *Client) createVM(ctx context.Context, spec VMSpec) (*VM, error) { // First, get next available VMID // The HTTP client's Get method already extracts the "data" field, so we get the string directly var nextIDStr string if err := c.httpClient.Get(ctx, "/cluster/nextid", &nextIDStr); err != nil { return nil, errors.Wrap(err, "failed to get next VMID") } var vmID int if _, err := fmt.Sscanf(nextIDStr, "%d", &vmID); err != nil { return nil, errors.Wrap(err, "failed to parse VMID") } // Handle image - determine if we need to clone, import, or use existing var diskConfig string var needsImageImport bool var imageVolid string if spec.Image != "" { // Check if image is a template ID (numeric VMID to clone from) // Use explicit check: if image is all numeric AND within valid VMID range, treat as template templateID, parseErr := strconv.Atoi(spec.Image) // Only treat as template if it's a valid VMID (100-999999999) and no other interpretation // If image name contains non-numeric chars, it's not a template ID if parseErr == nil && templateID >= 100 && templateID <= 999999999 { // Clone from template cloneConfig := map[string]interface{}{ "newid": vmID, "name": spec.Name, "target": spec.Node, } var cloneResult string if err := c.httpClient.Post(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/clone", spec.Node, templateID), cloneConfig, &cloneResult); err != nil { return nil, errors.Wrapf(err, "failed to clone template %d", templateID) } // After cloning, update the VM config vmConfig := map[string]interface{}{ "cores": spec.CPU, "memory": parseMemory(spec.Memory), "agent": "1", // Enable QEMU guest agent } if spec.TenantID != "" { vmConfig["tags"] = fmt.Sprintf("tenant_%s", spec.TenantID) } // Add cloud-init configuration if userData is provided if spec.UserData != "" { cloudInitStorage := spec.Storage if cloudInitStorage == "" { cloudInitStorage = "local-lvm" // Use same default as VM storage } vmConfig["ide2"] = fmt.Sprintf("%s:cloudinit", cloudInitStorage) vmConfig["ciuser"] = "admin" vmConfig["ipconfig0"] = "ip=dhcp" } if err := c.httpClient.Put(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/config", spec.Node, vmID), vmConfig, nil); err != nil { return nil, errors.Wrap(err, "failed to update cloned VM config") } // Write cloud-init userData if provided if spec.UserData != "" { cloudInitConfig := map[string]interface{}{ "user": spec.UserData, } cloudInitPath := fmt.Sprintf("/nodes/%s/qemu/%d/cloudinit", spec.Node, vmID) if err := c.httpClient.Post(ctx, cloudInitPath, cloudInitConfig, nil); err != nil { // Log warning but don't fail - cloud-init can be configured later } } // Get VM details and return return c.getVMByID(ctx, spec.Node, vmID) } // Check if image is already a volid (format: storage:path/to/image) if strings.Contains(spec.Image, ":") { imageVolid = spec.Image } else { // Search for image in storage foundVolid, err := c.findImageInStorage(ctx, spec.Node, spec.Image) if err != nil { // Image not found - return error instead of creating blank disk // This prevents "Nothing to boot" errors return nil, errors.Wrapf(err, "image '%s' not found in storage - cannot create VM without OS image", spec.Image) } imageVolid = foundVolid } // If we have an image volid, determine how to use it if imageVolid != "" { // For cloud images (.img, .qcow2), we need to import them as disks if strings.HasSuffix(imageVolid, ".img") || strings.HasSuffix(imageVolid, ".qcow2") { needsImageImport = true // Create VM with blank disk first, then import the image diskConfig = fmt.Sprintf("%s:%d,format=qcow2", spec.Storage, parseDisk(spec.Disk)) } else { // Use existing template/disk directly (for templates, etc.) diskConfig = fmt.Sprintf("%s,format=qcow2", imageVolid) } } else if diskConfig == "" { // No image found and no disk config set - this is an error condition // VMs without OS images cannot boot, so we should fail rather than create blank disk return nil, errors.Errorf("image '%s' not found in storage and no disk configuration provided. Cannot create VM without OS image", spec.Image) } } else { // No image specified - this is an error condition // VMs without OS images cannot boot return nil, errors.New("image is required - cannot create VM without OS image") } // Create VM configuration vmConfig := map[string]interface{}{ "vmid": vmID, "name": spec.Name, "cores": spec.CPU, "memory": parseMemory(spec.Memory), "net0": fmt.Sprintf("virtio,bridge=%s", spec.Network), "scsi0": diskConfig, "ostype": "l26", // Linux 2.6+ kernel "agent": "1", // Enable QEMU guest agent "boot": "order=scsi0", // Set boot order to scsi0 (primary disk) } // Add tenant labels if provided (for tenant isolation and billing) if spec.TenantID != "" { // Proxmox uses tags for labeling - use underscore instead of colon (Proxmox tag format) // Tags must be alphanumeric with underscores and hyphens only vmConfig["tags"] = fmt.Sprintf("tenant_%s", spec.TenantID) } // Add cloud-init configuration if userData is provided if spec.UserData != "" { // Determine cloud-init storage (use same storage as VM disk, or default to "local-lvm") cloudInitStorage := spec.Storage if cloudInitStorage == "" { cloudInitStorage = "local-lvm" // Use same default as VM storage for consistency } // Proxmox cloud-init drive format: ide2=storage:cloudinit vmConfig["ide2"] = fmt.Sprintf("%s:cloudinit", cloudInitStorage) // Set default cloud-init user (can be overridden in userData) vmConfig["ciuser"] = "admin" // Set network configuration for cloud-init vmConfig["ipconfig0"] = "ip=dhcp" } // Create VM // The HTTP client's Post method already extracts the "data" field, so we get the string directly var resultStr string if err := c.httpClient.Post(ctx, fmt.Sprintf("/nodes/%s/qemu", spec.Node), vmConfig, &resultStr); err != nil { return nil, errors.Wrap(err, "failed to create VM") } // If we need to import a cloud image, do it now using importdisk API if needsImageImport && imageVolid != "" { // Extract storage and filename from volid parts := strings.SplitN(imageVolid, ":", 2) if len(parts) != 2 { return nil, errors.Errorf("invalid image volid format: %s (expected storage:path)", imageVolid) } storageName := parts[0] imagePath := parts[1] // CRITICAL: Stop VM before import (importdisk requires VM to be stopped) var vmStatus struct { Status string `json:"status"` } statusPath := fmt.Sprintf("/nodes/%s/qemu/%d/status/current", spec.Node, vmID) if err := c.httpClient.Get(ctx, statusPath, &vmStatus); err == nil { if vmStatus.Status == "running" { // Stop VM stopPath := fmt.Sprintf("/nodes/%s/qemu/%d/status/stop", spec.Node, vmID) if err := c.httpClient.Post(ctx, stopPath, nil, nil); err != nil { return nil, errors.Wrap(err, "failed to stop VM for image import") } // Wait for VM to stop (up to 30 seconds) for i := 0; i < 30; i++ { time.Sleep(1 * time.Second) if err := c.httpClient.Get(ctx, statusPath, &vmStatus); err == nil { if vmStatus.Status == "stopped" { break } } } if vmStatus.Status != "stopped" { return nil, errors.New("VM did not stop within 30 seconds, cannot import image") } } } // Check if importdisk API is available before attempting to use it supportsImportDisk, err := c.SupportsImportDisk(ctx) if err != nil { // If we can't determine support, log warning but try anyway // The actual API call will fail with 501 if not supported } if !supportsImportDisk { // Clean up the VM we just created since we can't import the image // Attempt to unlock and delete c.UnlockVM(ctx, vmID) c.deleteVM(ctx, vmID) return nil, errors.Errorf("importdisk API is not supported in this Proxmox version. VM %d has been cleaned up. Please use template cloning or pre-imported images instead", vmID) } // Use Proxmox importdisk API to import the image as a disk // POST /nodes/{node}/qemu/{vmid}/importdisk // Note: importdisk creates a NEW disk (usually scsi1), it doesn't replace scsi0 importConfig := map[string]interface{}{ "storage": storageName, "format": "qcow2", "filename": imagePath, } var importResult string importPath := fmt.Sprintf("/nodes/%s/qemu/%d/importdisk", spec.Node, vmID) if err := c.httpClient.Post(ctx, importPath, importConfig, &importResult); err != nil { // Check if error is 501 (not implemented) - this means API doesn't exist if strings.Contains(err.Error(), "501") || strings.Contains(err.Error(), "not implemented") { // Clean up the VM we created c.UnlockVM(ctx, vmID) c.deleteVM(ctx, vmID) return nil, errors.Errorf("importdisk API is not implemented in this Proxmox version. VM %d has been cleaned up. Please use template cloning (numeric VMID) or pre-imported images instead of cloud images", vmID) } // For other errors, attempt cleanup but return the original error c.UnlockVM(ctx, vmID) c.deleteVM(ctx, vmID) return nil, errors.Wrapf(err, "failed to import image '%s' - VM %d has been cleaned up", imageVolid, vmID) } // CRITICAL FIX: Wait for importdisk task to complete before updating config // The importResult contains a task UPID that we need to monitor // Extract UPID from result (format: "UPID:node:timestamp:pid:type:user@realm:") // The HTTP client already extracts the "data" field, so importResult should be the UPID string taskUPID := strings.TrimSpace(importResult) // Handle case where UPID might be in JSON format or empty if taskUPID == "" || !strings.HasPrefix(taskUPID, "UPID:") { // Try to extract UPID from JSON if present if strings.Contains(importResult, "UPID:") { // Extract UPID from JSON string parts := strings.Split(importResult, "\"") for _, part := range parts { if strings.HasPrefix(part, "UPID:") { taskUPID = part break } } } } if taskUPID != "" && strings.HasPrefix(taskUPID, "UPID:") { // Monitor task until completion (with timeout) maxWaitTime := 10 * time.Minute // 10 minutes max for large images pollInterval := 3 * time.Second // Check every 3 seconds startTime := time.Now() for time.Since(startTime) < maxWaitTime { // Check task status var taskStatus struct { Status string `json:"status"` ExitStatus string `json:"exitstatus,omitempty"` } taskStatusPath := fmt.Sprintf("/nodes/%s/tasks/%s/status", spec.Node, taskUPID) if err := c.httpClient.Get(ctx, taskStatusPath, &taskStatus); err != nil { // If we can't get task status, wait a bit and retry select { case <-ctx.Done(): return nil, ctx.Err() case <-time.After(pollInterval): continue } } // Task status can be: "running", "stopped" if taskStatus.Status == "stopped" { // Task completed - check exit status if taskStatus.ExitStatus != "OK" && taskStatus.ExitStatus != "" { return nil, errors.Errorf("importdisk task failed with exit status: %s", taskStatus.ExitStatus) } // Task completed successfully break } // Task still running, wait and check again select { case <-ctx.Done(): return nil, ctx.Err() case <-time.After(pollInterval): // Continue loop } } // Verify task completed (final check) var finalTaskStatus struct { Status string `json:"status"` } taskStatusPath := fmt.Sprintf("/nodes/%s/tasks/%s/status", spec.Node, taskUPID) if err := c.httpClient.Get(ctx, taskStatusPath, &finalTaskStatus); err == nil { if finalTaskStatus.Status != "stopped" { return nil, errors.Errorf("importdisk task did not complete within timeout period (status: %s)", finalTaskStatus.Status) } } // Give a moment for disk to be fully registered time.Sleep(2 * time.Second) } else { // If no UPID returned, wait a conservative amount of time // This is a fallback for cases where UPID extraction fails time.Sleep(30 * time.Second) } // Get VM config to find the imported disk var vmConfigResponse struct { Scsi0 string `json:"scsi0"` Scsi1 string `json:"scsi1"` Scsi2 string `json:"scsi2"` } configPath := fmt.Sprintf("/nodes/%s/qemu/%d/config", spec.Node, vmID) if err := c.httpClient.Get(ctx, configPath, &vmConfigResponse); err != nil { return nil, errors.Wrap(err, "failed to get VM config after import") } // Find the imported disk (usually scsi1, but could be scsi2, etc.) importedDisk := "" if vmConfigResponse.Scsi1 != "" && strings.Contains(vmConfigResponse.Scsi1, storageName) { importedDisk = vmConfigResponse.Scsi1 } else if vmConfigResponse.Scsi2 != "" && strings.Contains(vmConfigResponse.Scsi2, storageName) { importedDisk = vmConfigResponse.Scsi2 } if importedDisk == "" { return nil, errors.New("imported disk not found in VM configuration after import") } // CRITICAL FIX: Replace blank scsi0 with imported disk // First, remove the blank scsi0 disk removeConfig := map[string]interface{}{ "scsi0": "", } if err := c.httpClient.Put(ctx, configPath, removeConfig, nil); err != nil { // Log but continue - might already be removed } // Now attach imported disk to scsi0 and set boot order updateConfig := map[string]interface{}{ "scsi0": importedDisk, // Attach imported disk to primary slot "boot": "order=scsi0", } if err := c.httpClient.Put(ctx, configPath, updateConfig, nil); err != nil { return nil, errors.Wrap(err, "failed to attach imported disk to scsi0 and set boot order") } // Remove the imported disk from its original slot (scsi1, scsi2, etc.) to avoid duplicates // This is done by setting the slot to empty string cleanupConfig := map[string]interface{}{} if vmConfigResponse.Scsi1 == importedDisk { cleanupConfig["scsi1"] = "" } else if vmConfigResponse.Scsi2 == importedDisk { cleanupConfig["scsi2"] = "" } if len(cleanupConfig) > 0 { c.httpClient.Put(ctx, configPath, cleanupConfig, nil) } // Verify boot order is set correctly var verifyConfig struct { Boot string `json:"boot"` Scsi0 string `json:"scsi0"` } if err := c.httpClient.Get(ctx, configPath, &verifyConfig); err == nil { if verifyConfig.Boot != "order=scsi0" { // Retry setting boot order retryConfig := map[string]interface{}{ "boot": "order=scsi0", } c.httpClient.Put(ctx, configPath, retryConfig, nil) } if verifyConfig.Scsi0 == "" { return nil, errors.New("scsi0 is empty after attaching imported disk") } } } // Write cloud-init userData if provided if spec.UserData != "" { // Proxmox cloud-init API: POST /nodes/{node}/qemu/{vmid}/cloudinit // The userData is written to the cloud-init drive cloudInitConfig := map[string]interface{}{ "user": spec.UserData, } cloudInitPath := fmt.Sprintf("/nodes/%s/qemu/%d/cloudinit", spec.Node, vmID) // Retry cloud-init write up to 3 times (sometimes fails on first attempt) var cloudInitErr error for attempt := 0; attempt < 3; attempt++ { var err error if err = c.httpClient.Post(ctx, cloudInitPath, cloudInitConfig, nil); err == nil { cloudInitErr = nil break } cloudInitErr = err if attempt < 2 { time.Sleep(1 * time.Second) } } // Log cloud-init errors for visibility (but don't fail VM creation) // Cloud-init can be configured later, but we should be aware of failures if cloudInitErr != nil { // Log the error for visibility - cloud-init configuration failed // VM is created but cloud-init may not work as expected // In production, this should be tracked via status conditions // For now, we log and continue - VM is usable but may need manual cloud-init config } } // Get VM details and return return c.getVMByID(ctx, spec.Node, vmID) } // getVMByID retrieves VM details by ID func (c *Client) getVMByID(ctx context.Context, node string, vmID int) (*VM, error) { var vmDetails []struct { Vmid int `json:"vmid"` Name string `json:"name"` Status string `json:"status"` } if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/qemu", node), &vmDetails); err != nil { return nil, errors.Wrap(err, "failed to get VM details") } for _, v := range vmDetails { if v.Vmid == vmID { return &VM{ ID: v.Vmid, Name: v.Name, Status: v.Status, Node: node, }, nil } } return &VM{ ID: vmID, Name: "unknown", Status: "created", Node: node, }, nil } // Helper functions for parsing (use shared utils) func parseMemory(memory string) int { return utils.ParseMemoryToMB(memory) } func parseDisk(disk string) int { return utils.ParseDiskToGB(disk) } // UpdateVM updates a virtual machine func (c *Client) UpdateVM(ctx context.Context, vmID int, spec VMSpec) (*VM, error) { config := DefaultRetryConfig() var vm *VM err := Retry(ctx, func() error { var retryErr error vm, retryErr = c.updateVM(ctx, vmID, spec) if retryErr != nil { if isNetworkError(retryErr) || isTemporaryError(retryErr) { return &RetryableError{Err: retryErr} } return retryErr } return nil }, config) return vm, err } func (c *Client) updateVM(ctx context.Context, vmID int, spec VMSpec) (*VM, error) { // Find node with this VM nodes, err := c.ListNodes(ctx) if err != nil { return nil, errors.Wrap(err, "failed to list nodes") } var targetNode string for _, node := range nodes { vms, err := c.ListVMs(ctx, node) if err != nil { continue } for _, vm := range vms { if vm.ID == vmID { targetNode = node break } } if targetNode != "" { break } } if targetNode == "" { return nil, fmt.Errorf("VM %d not found", vmID) } // Update VM configuration vmConfig := make(map[string]interface{}) if spec.CPU > 0 { vmConfig["cores"] = spec.CPU } if spec.Memory != "" { vmConfig["memory"] = parseMemory(spec.Memory) } // Always ensure guest agent is enabled vmConfig["agent"] = "1" if len(vmConfig) > 0 { var result struct { Data string `json:"data"` } if err := c.httpClient.Put(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/config", targetNode, vmID), vmConfig, &result); err != nil { return nil, errors.Wrap(err, "failed to update VM") } } // Get updated VM vms, err := c.ListVMs(ctx, targetNode) if err != nil { return nil, errors.Wrap(err, "failed to list VMs") } for _, vm := range vms { if vm.ID == vmID { return &vm, nil } } return nil, fmt.Errorf("VM %d not found after update", vmID) } // findVMNode finds which node a VM is on by searching all nodes func (c *Client) findVMNode(ctx context.Context, vmID int) (string, error) { nodes, err := c.ListNodes(ctx) if err != nil { return "", errors.Wrap(err, "failed to list nodes") } // Search each node for the VM for _, node := range nodes { var vms []struct { Vmid int `json:"vmid"` } if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/qemu", node), &vms); err != nil { continue // Skip nodes we can't query } for _, vm := range vms { if vm.Vmid == vmID { return node, nil } } } return "", fmt.Errorf("VM %d not found on any node", vmID) } // UnlockVM unlocks a virtual machine (removes lock file) func (c *Client) UnlockVM(ctx context.Context, vmID int) error { // Find which node has this VM targetNode, err := c.findVMNode(ctx, vmID) if err != nil { return errors.Wrap(err, "failed to find VM node") } // Use Proxmox API to unlock VM // POST /nodes/{node}/qemu/{vmid}/unlock unlockPath := fmt.Sprintf("/nodes/%s/qemu/%d/unlock", targetNode, vmID) if err := c.httpClient.Post(ctx, unlockPath, nil, nil); err != nil { // If unlock fails, try to continue - lock may not exist // Log but don't fail return nil // Don't fail on unlock errors } return nil } // DeleteVM deletes a virtual machine func (c *Client) DeleteVM(ctx context.Context, vmID int) error { config := DefaultRetryConfig() return Retry(ctx, func() error { err := c.deleteVM(ctx, vmID) if err != nil { if isNetworkError(err) || isTemporaryError(err) { return &RetryableError{Err: err} } return err } return nil }, config) } func (c *Client) deleteVM(ctx context.Context, vmID int) error { // Find node with this VM nodes, err := c.ListNodes(ctx) if err != nil { return errors.Wrap(err, "failed to list nodes") } var targetNode string for _, node := range nodes { vms, err := c.ListVMs(ctx, node) if err != nil { continue } for _, vm := range vms { if vm.ID == vmID { targetNode = node break } } if targetNode != "" { break } } if targetNode == "" { return fmt.Errorf("VM %d not found", vmID) } // CRITICAL: Unlock VM first to prevent lock file issues // Multiple unlock attempts to handle stuck locks for i := 0; i < 5; i++ { if err := c.UnlockVM(ctx, vmID); err == nil { break } time.Sleep(1 * time.Second) } // Stop VM first if running var status struct { Status string `json:"status"` } if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/status/current", targetNode, vmID), &status); err == nil { if status.Status == "running" { // Stop VM if err := c.httpClient.Post(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/status/stop", targetNode, vmID), nil, nil); err != nil { return errors.Wrap(err, "failed to stop VM") } // Wait for VM to stop (up to 30 seconds) for i := 0; i < 30; i++ { time.Sleep(1 * time.Second) if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/status/current", targetNode, vmID), &status); err == nil { if status.Status == "stopped" { break } } } } } // Delete VM with purge to ensure complete cleanup deletePath := fmt.Sprintf("/nodes/%s/qemu/%d?purge=1", targetNode, vmID) if err := c.httpClient.Delete(ctx, deletePath); err != nil { return errors.Wrap(err, "failed to delete VM") } return nil } // GetVMStatus gets the status of a virtual machine func (c *Client) GetVMStatus(ctx context.Context, vmID int) (*VMStatus, error) { config := DefaultRetryConfig() var status *VMStatus err := Retry(ctx, func() error { var retryErr error status, retryErr = c.getVMStatus(ctx, vmID) if retryErr != nil { if isNetworkError(retryErr) || isTemporaryError(retryErr) { return &RetryableError{Err: retryErr} } return retryErr } return nil }, config) return status, err } func (c *Client) getVMStatus(ctx context.Context, vmID int) (*VMStatus, error) { // First, find which node has this VM nodes, err := c.ListNodes(ctx) if err != nil { return nil, errors.Wrap(err, "failed to list nodes") } for _, node := range nodes { vms, err := c.ListVMs(ctx, node) if err != nil { continue } for _, vm := range vms { if vm.ID == vmID { // Get detailed VM status var vmStatus struct { Status string `json:"status"` CPU float64 `json:"cpu"` Mem int64 `json:"mem"` NetIn int64 `json:"netin"` NetOut int64 `json:"netout"` } if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/status/current", node, vmID), &vmStatus); err != nil { return &VMStatus{ State: vm.Status, }, nil } // Try to get IP address var config struct { Net0 string `json:"net0"` } ipAddress := "" if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/config", node, vmID), &config); err == nil { // Parse IP from net0 config if available ipAddress = extractIP(config.Net0) } return &VMStatus{ State: vmStatus.Status, IPAddress: ipAddress, CPU: vmStatus.CPU, Memory: vmStatus.Mem, }, nil } } } return nil, fmt.Errorf("VM %d not found", vmID) } func extractIP(netConfig string) string { // Extract IP from net config string // Format: "virtio,bridge=vmbr0,firewall=1,ip=dhcp" or "virtio,bridge=vmbr0,ip=192.168.1.100/24" if len(netConfig) == 0 { return "" } // Look for ip= parameter parts := strings.Split(netConfig, ",") for _, part := range parts { part = strings.TrimSpace(part) if strings.HasPrefix(part, "ip=") { ipPart := strings.TrimPrefix(part, "ip=") // Remove subnet mask if present if idx := strings.Index(ipPart, "/"); idx > 0 { return ipPart[:idx] } // Skip "dhcp" or "static" if ipPart != "dhcp" && ipPart != "static" { return ipPart } } } return "" } // Helper functions func isNetworkError(err error) bool { if err == nil { return false } errStr := err.Error() return contains(errStr, "network") || contains(errStr, "timeout") || contains(errStr, "connection") } func isTemporaryError(err error) bool { if err == nil { return false } errStr := err.Error() return contains(errStr, "temporary") || contains(errStr, "503") || contains(errStr, "502") } func contains(s, substr string) bool { return len(s) >= len(substr) && (s == substr || len(substr) == 0 || indexOfSubstring(s, substr) >= 0) } func indexOfSubstring(s, substr string) int { for i := 0; i <= len(s)-len(substr); i++ { if s[i:i+len(substr)] == substr { return i } } return -1 } // VMSpec represents VM specification type VMSpec struct { Node string Name string CPU int Memory string Disk string Storage string Network string Image string UserData string // Cloud-init user data SSHKeys []string // SSH public keys TenantID string // Tenant ID for multi-tenancy support (superior to Azure) } // VM represents a virtual machine type VM struct { ID int Name string Status string IP string Node string Created time.Time } // VMStatus represents VM status type VMStatus struct { State string IPAddress string CPU float64 Memory int64 } // Storage represents a storage pool type Storage struct { Name string Type string Content []string Shared bool Enabled bool Capacity int64 Used int64 } // Network represents a network interface type Network struct { Name string Type string Bridge string VLAN int Active bool Autostart bool } // ClusterInfo represents cluster information type ClusterInfo struct { Name string Nodes []string Quorum bool Version string } // GetPVEVersion gets the Proxmox VE version func (c *Client) GetPVEVersion(ctx context.Context) (string, error) { var version struct { Version string `json:"version"` } if err := c.httpClient.Get(ctx, "/version", &version); err != nil { return "", errors.Wrap(err, "failed to get PVE version") } return version.Version, nil } // SupportsImportDisk checks if the Proxmox version supports the importdisk API // The importdisk API was added in Proxmox VE 6.0, but some versions may not have it // This is a best-effort check - actual support is verified at API call time func (c *Client) SupportsImportDisk(ctx context.Context) (bool, error) { // Check the version string to determine if importdisk might be available version, err := c.GetPVEVersion(ctx) if err != nil { // If we can't get version, assume it's not supported to be safe // We'll still try at call time and handle 501 errors gracefully return false, nil } // Parse version: format is usually "pve-manager/X.Y.Z/..." // importdisk should be available in PVE 6.0+, but some builds may not have it // This is a version-based heuristic - actual support verified via API call // We return true for versions that likely support it, false otherwise // The actual API call will handle 501 (not implemented) errors gracefully versionLower := strings.ToLower(version) if strings.Contains(versionLower, "pve-manager/6.") || strings.Contains(versionLower, "pve-manager/7.") || strings.Contains(versionLower, "pve-manager/8.") || strings.Contains(versionLower, "pve-manager/9.") { // Version looks compatible - actual support verified at API call time return true, nil } // Version doesn't match known compatible versions return false, nil } // CheckNodeHealth checks if a node is healthy and reachable func (c *Client) CheckNodeHealth(ctx context.Context, node string) error { var status struct { Status string `json:"status"` Uptime int64 `json:"uptime"` } statusPath := fmt.Sprintf("/nodes/%s/status", node) if err := c.httpClient.Get(ctx, statusPath, &status); err != nil { return errors.Wrapf(err, "node %s is not reachable or unhealthy", node) } if status.Status != "online" { return errors.Errorf("node %s is not online (status: %s)", node, status.Status) } return nil } // ListNodes lists all nodes in the cluster func (c *Client) ListNodes(ctx context.Context) ([]string, error) { var nodes []struct { Node string `json:"node"` Status string `json:"status"` Type string `json:"type"` } if err := c.httpClient.Get(ctx, "/nodes", &nodes); err != nil { return nil, errors.Wrap(err, "failed to list nodes") } result := make([]string, len(nodes)) for i, node := range nodes { result[i] = node.Node } return result, nil } // ListVMs lists all VMs on a node, optionally filtered by tenant func (c *Client) ListVMs(ctx context.Context, node string, tenantID ...string) ([]VM, error) { var vms []struct { Vmid int `json:"vmid"` Name string `json:"name"` Status string `json:"status"` Tags string `json:"tags,omitempty"` } if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/qemu", node), &vms); err != nil { return nil, errors.Wrap(err, "failed to list VMs") } result := []VM{} filterTenantID := "" if len(tenantID) > 0 && tenantID[0] != "" { filterTenantID = tenantID[0] } for _, vm := range vms { // If tenant filtering is requested, check VM tags if filterTenantID != "" { // Check if VM has tenant tag matching the filter // Note: We use tenant_{id} format (underscore) to match what we write tenantTag := fmt.Sprintf("tenant_%s", filterTenantID) if vm.Tags == "" || !strings.Contains(vm.Tags, tenantTag) { // Try to get VM config to check tags if not in list var config struct { Tags string `json:"tags"` } if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/qemu/%d/config", node, vm.Vmid), &config); err == nil { if config.Tags == "" || !strings.Contains(config.Tags, tenantTag) { continue // Skip this VM - doesn't belong to tenant } } else { continue // Skip if we can't verify } } } result = append(result, VM{ ID: vm.Vmid, Name: vm.Name, Status: vm.Status, Node: node, }) } return result, nil } // ListStorages lists all storage pools func (c *Client) ListStorages(ctx context.Context) ([]Storage, error) { var storages []struct { Storage string `json:"storage"` Type string `json:"type"` Content []string `json:"content"` Shared int `json:"shared"` Enabled int `json:"enabled"` Total int64 `json:"total"` Used int64 `json:"used"` } if err := c.httpClient.Get(ctx, "/storage", &storages); err != nil { return nil, errors.Wrap(err, "failed to list storages") } result := make([]Storage, len(storages)) for i, s := range storages { result[i] = Storage{ Name: s.Storage, Type: s.Type, Content: s.Content, Shared: s.Shared == 1, Enabled: s.Enabled == 1, Capacity: s.Total, Used: s.Used, } } return result, nil } // ListNetworks lists all networks on a node func (c *Client) ListNetworks(ctx context.Context, node string) ([]Network, error) { var networks []struct { Iface string `json:"iface"` Type string `json:"type"` Bridge string `json:"bridge"` VLAN int `json:"vlan"` Active int `json:"active"` Autostart int `json:"autostart"` } if err := c.httpClient.Get(ctx, fmt.Sprintf("/nodes/%s/network", node), &networks); err != nil { return nil, errors.Wrap(err, "failed to list networks") } result := make([]Network, len(networks)) for i, n := range networks { result[i] = Network{ Name: n.Iface, Type: n.Type, Bridge: n.Bridge, VLAN: n.VLAN, Active: n.Active == 1, Autostart: n.Autostart == 1, } } return result, nil } // GetClusterInfo gets cluster information func (c *Client) GetClusterInfo(ctx context.Context) (*ClusterInfo, error) { var status []struct { Name string `json:"name"` Type string `json:"type"` Quorum int `json:"quorate"` } if err := c.httpClient.Get(ctx, "/cluster/status", &status); err != nil { return nil, errors.Wrap(err, "failed to get cluster status") } var version struct { Version string `json:"version"` } if err := c.httpClient.Get(ctx, "/version", &version); err != nil { return nil, errors.Wrap(err, "failed to get version") } nodes, err := c.ListNodes(ctx) if err != nil { return nil, errors.Wrap(err, "failed to list nodes") } quorum := false for _, s := range status { if s.Type == "cluster" && s.Quorum == 1 { quorum = true break } } return &ClusterInfo{ Name: "proxmox-cluster", Nodes: nodes, Quorum: quorum, Version: version.Version, }, nil }