Apply Composer changes: comprehensive API updates, migrations, middleware, and infrastructure improvements
- Add comprehensive database migrations (001-024) for schema evolution - Enhance API schema with expanded type definitions and resolvers - Add new middleware: audit logging, rate limiting, MFA enforcement, security, tenant auth - Implement new services: AI optimization, billing, blockchain, compliance, marketplace - Add adapter layer for cloud integrations (Cloudflare, Kubernetes, Proxmox, storage) - Update Crossplane provider with enhanced VM management capabilities - Add comprehensive test suite for API endpoints and services - Update frontend components with improved GraphQL subscriptions and real-time updates - Enhance security configurations and headers (CSP, CORS, etc.) - Update documentation and configuration files - Add new CI/CD workflows and validation scripts - Implement design system improvements and UI enhancements
This commit is contained in:
@@ -0,0 +1,339 @@
|
||||
package resourcediscovery
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
ctrl "sigs.k8s.io/controller-runtime"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
"sigs.k8s.io/controller-runtime/pkg/log"
|
||||
|
||||
proxmoxv1alpha1 "github.com/sankofa/crossplane-provider-proxmox/apis/v1alpha1"
|
||||
"github.com/sankofa/crossplane-provider-proxmox/pkg/cloudflare"
|
||||
"github.com/sankofa/crossplane-provider-proxmox/pkg/discovery"
|
||||
"github.com/sankofa/crossplane-provider-proxmox/pkg/proxmox"
|
||||
)
|
||||
|
||||
// ResourceDiscoveryReconciler reconciles a ResourceDiscovery object
|
||||
type ResourceDiscoveryReconciler struct {
|
||||
client.Client
|
||||
Scheme *runtime.Scheme
|
||||
K8sClient kubernetes.Interface
|
||||
InventoryAPI string // API endpoint for resource inventory
|
||||
}
|
||||
|
||||
//+kubebuilder:rbac:groups=proxmox.sankofa.nexus,resources=resourcediscoveries,verbs=get;list;watch;create;update;patch;delete
|
||||
//+kubebuilder:rbac:groups=proxmox.sankofa.nexus,resources=resourcediscoveries/status,verbs=get;update;patch
|
||||
//+kubebuilder:rbac:groups=proxmox.sankofa.nexus,resources=resourcediscoveries/finalizers,verbs=update
|
||||
|
||||
// Reconcile is part of the main kubernetes reconciliation loop
|
||||
func (r *ResourceDiscoveryReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
|
||||
logger := log.FromContext(ctx)
|
||||
|
||||
var rd proxmoxv1alpha1.ResourceDiscovery
|
||||
if err := r.Get(ctx, req.NamespacedName, &rd); err != nil {
|
||||
return ctrl.Result{}, client.IgnoreNotFound(err)
|
||||
}
|
||||
|
||||
// Check if discovery is enabled
|
||||
if !rd.Spec.Enabled {
|
||||
logger.Info("Resource discovery is disabled, skipping")
|
||||
return ctrl.Result{RequeueAfter: time.Duration(rd.Spec.SyncInterval) * time.Second}, nil
|
||||
}
|
||||
|
||||
// Check if we need to sync (based on sync interval)
|
||||
syncInterval := time.Duration(rd.Spec.SyncInterval) * time.Second
|
||||
if rd.Status.LastSyncTime != nil {
|
||||
timeSinceLastSync := time.Since(rd.Status.LastSyncTime.Time)
|
||||
if timeSinceLastSync < syncInterval {
|
||||
requeueAfter := syncInterval - timeSinceLastSync
|
||||
logger.Info("Sync interval not reached, requeuing", "requeueAfter", requeueAfter)
|
||||
return ctrl.Result{RequeueAfter: requeueAfter}, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Update status to IN_PROGRESS
|
||||
rd.Status.SyncStatus = "IN_PROGRESS"
|
||||
rd.Status.LastSyncError = ""
|
||||
if err := r.Status().Update(ctx, &rd); err != nil {
|
||||
logger.Error(err, "failed to update status")
|
||||
}
|
||||
|
||||
// Perform discovery based on provider
|
||||
var discoveredResources []discovery.DiscoveredResource
|
||||
var err error
|
||||
|
||||
switch rd.Spec.Provider {
|
||||
case "PROXMOX":
|
||||
discoveredResources, err = r.discoverProxmoxResources(ctx, &rd)
|
||||
case "KUBERNETES":
|
||||
discoveredResources, err = r.discoverKubernetesResources(ctx, &rd)
|
||||
case "CLOUDFLARE":
|
||||
discoveredResources, err = r.discoverCloudflareResources(ctx, &rd)
|
||||
default:
|
||||
err = fmt.Errorf("unsupported provider: %s", rd.Spec.Provider)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
logger.Error(err, "discovery failed")
|
||||
rd.Status.SyncStatus = "FAILED"
|
||||
rd.Status.LastSyncError = err.Error()
|
||||
if updateErr := r.Status().Update(ctx, &rd); updateErr != nil {
|
||||
logger.Error(updateErr, "failed to update status with error")
|
||||
}
|
||||
return ctrl.Result{RequeueAfter: syncInterval}, err
|
||||
}
|
||||
|
||||
// Sync discovered resources to inventory API
|
||||
syncedCount, err := r.syncResourcesToAPI(ctx, discoveredResources, &rd)
|
||||
if err != nil {
|
||||
logger.Error(err, "failed to sync resources to API")
|
||||
rd.Status.SyncStatus = "FAILED"
|
||||
rd.Status.LastSyncError = err.Error()
|
||||
if updateErr := r.Status().Update(ctx, &rd); updateErr != nil {
|
||||
logger.Error(updateErr, "failed to update status with error")
|
||||
}
|
||||
return ctrl.Result{RequeueAfter: syncInterval}, err
|
||||
}
|
||||
|
||||
// Update status
|
||||
now := metav1.Now()
|
||||
rd.Status.LastSyncTime = &now
|
||||
rd.Status.ResourcesDiscovered = syncedCount
|
||||
rd.Status.SyncStatus = "SUCCESS"
|
||||
rd.Status.LastSyncError = ""
|
||||
|
||||
if err := r.Status().Update(ctx, &rd); err != nil {
|
||||
logger.Error(err, "failed to update status")
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
logger.Info("Discovery completed successfully", "resources", syncedCount)
|
||||
return ctrl.Result{RequeueAfter: syncInterval}, nil
|
||||
}
|
||||
|
||||
func (r *ResourceDiscoveryReconciler) discoverProxmoxResources(
|
||||
ctx context.Context,
|
||||
rd *proxmoxv1alpha1.ResourceDiscovery,
|
||||
) ([]discovery.DiscoveredResource, error) {
|
||||
// Validate ProviderConfigReference
|
||||
if rd.Spec.ProviderConfigReference == nil {
|
||||
return []discovery.DiscoveredResource{}, errors.New("providerConfigRef is required")
|
||||
}
|
||||
if rd.Spec.ProviderConfigReference.Name == "" {
|
||||
return []discovery.DiscoveredResource{}, errors.New("providerConfigRef.name is required")
|
||||
}
|
||||
|
||||
// Get ProviderConfig
|
||||
var providerConfig proxmoxv1alpha1.ProviderConfig
|
||||
if err := r.Get(ctx, client.ObjectKey{Name: rd.Spec.ProviderConfigReference.Name}, &providerConfig); err != nil {
|
||||
return []discovery.DiscoveredResource{}, errors.Wrapf(err, "cannot get provider config")
|
||||
}
|
||||
|
||||
// Get credentials from ProviderConfig
|
||||
var credentials struct {
|
||||
Username string
|
||||
Password string
|
||||
Endpoint string
|
||||
}
|
||||
|
||||
// Try to get credentials from ProviderConfig
|
||||
if providerConfig.Spec.Credentials.SecretRef != nil {
|
||||
secretRef := providerConfig.Spec.Credentials.SecretRef
|
||||
secret := &corev1.Secret{}
|
||||
secretKey := client.ObjectKey{
|
||||
Namespace: secretRef.Namespace,
|
||||
Name: secretRef.Name,
|
||||
}
|
||||
|
||||
if err := r.Get(ctx, secretKey, secret); err == nil {
|
||||
if userData, ok := secret.Data["username"]; ok {
|
||||
credentials.Username = string(userData)
|
||||
}
|
||||
if passData, ok := secret.Data["password"]; ok {
|
||||
credentials.Password = string(passData)
|
||||
}
|
||||
if endpointData, ok := secret.Data["endpoint"]; ok {
|
||||
credentials.Endpoint = string(endpointData)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find endpoint from ProviderConfig Sites
|
||||
var endpoint string
|
||||
var insecureSkipTLS bool
|
||||
if rd.Spec.Site != "" {
|
||||
// Find the site matching rd.Spec.Site
|
||||
for _, site := range providerConfig.Spec.Sites {
|
||||
if site.Name == rd.Spec.Site {
|
||||
endpoint = site.Endpoint
|
||||
insecureSkipTLS = site.InsecureSkipTLSVerify
|
||||
break
|
||||
}
|
||||
}
|
||||
} else if len(providerConfig.Spec.Sites) > 0 {
|
||||
// Use first site if no site specified
|
||||
endpoint = providerConfig.Spec.Sites[0].Endpoint
|
||||
insecureSkipTLS = providerConfig.Spec.Sites[0].InsecureSkipTLSVerify
|
||||
}
|
||||
|
||||
// Override with endpoint from credentials if provided
|
||||
if credentials.Endpoint != "" {
|
||||
endpoint = credentials.Endpoint
|
||||
}
|
||||
|
||||
if endpoint == "" {
|
||||
return []discovery.DiscoveredResource{}, errors.New("no endpoint found in ProviderConfig sites or credentials")
|
||||
}
|
||||
|
||||
client, err := proxmox.NewClient(endpoint, credentials.Username, credentials.Password, insecureSkipTLS)
|
||||
if err != nil {
|
||||
return []discovery.DiscoveredResource{}, errors.Wrap(err, "failed to create Proxmox client")
|
||||
}
|
||||
|
||||
agent := discovery.NewProxmoxDiscoveryAgent(client, rd.Spec.Site, rd.Spec.Region)
|
||||
return agent.DiscoverAll(ctx)
|
||||
}
|
||||
|
||||
func (r *ResourceDiscoveryReconciler) discoverKubernetesResources(
|
||||
ctx context.Context,
|
||||
rd *proxmoxv1alpha1.ResourceDiscovery,
|
||||
) ([]discovery.DiscoveredResource, error) {
|
||||
if r.K8sClient == nil {
|
||||
return nil, fmt.Errorf("kubernetes client not configured")
|
||||
}
|
||||
|
||||
agent := discovery.NewKubernetesDiscoveryAgent(r.K8sClient, rd.Spec.Site, rd.Spec.Region)
|
||||
return agent.DiscoverAll(ctx)
|
||||
}
|
||||
|
||||
func (r *ResourceDiscoveryReconciler) discoverCloudflareResources(
|
||||
ctx context.Context,
|
||||
rd *proxmoxv1alpha1.ResourceDiscovery,
|
||||
) ([]discovery.DiscoveredResource, error) {
|
||||
// Get ProviderConfig
|
||||
var providerConfig proxmoxv1alpha1.ProviderConfig
|
||||
if err := r.Get(ctx, client.ObjectKey{Name: rd.Spec.ProviderConfigReference.Name}, &providerConfig); err != nil {
|
||||
return nil, errors.Wrapf(err, "cannot get provider config")
|
||||
}
|
||||
|
||||
// Get credentials from ProviderConfig
|
||||
var apiToken, accountID string
|
||||
|
||||
if providerConfig.Spec.Credentials.SecretRef != nil {
|
||||
secretRef := providerConfig.Spec.Credentials.SecretRef
|
||||
secret := &corev1.Secret{}
|
||||
secretKey := client.ObjectKey{
|
||||
Namespace: secretRef.Namespace,
|
||||
Name: secretRef.Name,
|
||||
}
|
||||
|
||||
if err := r.Get(ctx, secretKey, secret); err == nil {
|
||||
if tokenData, ok := secret.Data["apiToken"]; ok {
|
||||
apiToken = string(tokenData)
|
||||
}
|
||||
if accountData, ok := secret.Data["accountID"]; ok {
|
||||
accountID = string(accountData)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
client := cloudflare.NewClient(apiToken, accountID)
|
||||
|
||||
agent := discovery.NewCloudflareDiscoveryAgent(client, rd.Spec.Site, rd.Spec.Region)
|
||||
return agent.DiscoverAll(ctx)
|
||||
}
|
||||
|
||||
// syncResourcesToAPI syncs discovered resources to the inventory API
|
||||
func (r *ResourceDiscoveryReconciler) syncResourcesToAPI(
|
||||
ctx context.Context,
|
||||
resources []discovery.DiscoveredResource,
|
||||
rd *proxmoxv1alpha1.ResourceDiscovery,
|
||||
) (int, error) {
|
||||
if r.InventoryAPI == "" {
|
||||
// If no API endpoint configured, just return count
|
||||
return len(resources), nil
|
||||
}
|
||||
|
||||
// Prepare resources for API
|
||||
type ResourceInventoryItem struct {
|
||||
ResourceType string `json:"resourceType"`
|
||||
Provider string `json:"provider"`
|
||||
ProviderID string `json:"providerId"`
|
||||
ProviderResourceID string `json:"providerResourceId"`
|
||||
Name string `json:"name"`
|
||||
Region string `json:"region"`
|
||||
SiteID string `json:"siteId"`
|
||||
Metadata map[string]interface{} `json:"metadata"`
|
||||
Tags []string `json:"tags"`
|
||||
}
|
||||
|
||||
items := make([]ResourceInventoryItem, len(resources))
|
||||
for i, res := range resources {
|
||||
items[i] = ResourceInventoryItem{
|
||||
ResourceType: res.ResourceType,
|
||||
Provider: res.Provider,
|
||||
ProviderID: res.ProviderID,
|
||||
ProviderResourceID: res.ProviderResourceID,
|
||||
Name: res.Name,
|
||||
Region: res.Region,
|
||||
SiteID: res.SiteID,
|
||||
Metadata: res.Metadata,
|
||||
Tags: res.Tags,
|
||||
}
|
||||
}
|
||||
|
||||
// Make API call to sync resources
|
||||
jsonData, err := json.Marshal(map[string]interface{}{
|
||||
"provider": rd.Spec.Provider,
|
||||
"resources": items,
|
||||
})
|
||||
if err != nil {
|
||||
return 0, errors.Wrap(err, "failed to marshal resources")
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", r.InventoryAPI+"/sync", bytes.NewBuffer(jsonData))
|
||||
if err != nil {
|
||||
return 0, errors.Wrap(err, "failed to create request")
|
||||
}
|
||||
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
client := &http.Client{}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return 0, errors.Wrap(err, "failed to send request")
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
return 0, fmt.Errorf("API returned status %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var result struct {
|
||||
Synced int `json:"synced"`
|
||||
}
|
||||
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
||||
// If response doesn't match expected format, assume all were synced
|
||||
return len(resources), nil
|
||||
}
|
||||
|
||||
return result.Synced, nil
|
||||
}
|
||||
|
||||
// SetupWithManager sets up the controller with the Manager.
|
||||
func (r *ResourceDiscoveryReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
||||
return ctrl.NewControllerManagedBy(mgr).
|
||||
For(&proxmoxv1alpha1.ResourceDiscovery{}).
|
||||
Complete(r)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,85 @@
|
||||
package virtualmachine
|
||||
|
||||
import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// ExponentialBackoff calculates the next retry delay using exponential backoff
|
||||
// Returns delays: 30s, 1m, 2m, 5m, 10m, then caps at 10m
|
||||
func ExponentialBackoff(attempt int) time.Duration {
|
||||
delays := []time.Duration{
|
||||
30 * time.Second, // First retry: 30 seconds
|
||||
1 * time.Minute, // Second retry: 1 minute
|
||||
2 * time.Minute, // Third retry: 2 minutes
|
||||
5 * time.Minute, // Fourth retry: 5 minutes
|
||||
10 * time.Minute, // Fifth and subsequent retries: 10 minutes (capped)
|
||||
}
|
||||
|
||||
if attempt < len(delays) {
|
||||
return delays[attempt]
|
||||
}
|
||||
return delays[len(delays)-1] // Cap at maximum delay
|
||||
}
|
||||
|
||||
// GetRequeueDelay calculates requeue delay based on error type and attempt count
|
||||
func GetRequeueDelay(err error, attemptCount int) time.Duration {
|
||||
if err == nil {
|
||||
return 30 * time.Second // Default for successful operations
|
||||
}
|
||||
|
||||
errStr := err.Error()
|
||||
|
||||
// Non-retryable errors (configuration issues)
|
||||
if containsAny(errStr, []string{
|
||||
"cannot get provider config",
|
||||
"cannot get credentials",
|
||||
"cannot find site",
|
||||
"cannot create Proxmox client",
|
||||
}) {
|
||||
return 2 * time.Minute // Fixed delay for config errors
|
||||
}
|
||||
|
||||
// Quota errors - longer delay
|
||||
if containsAny(errStr, []string{
|
||||
"quota exceeded",
|
||||
"quota check failed",
|
||||
}) {
|
||||
return 5 * time.Minute
|
||||
}
|
||||
|
||||
// VM creation failures - use exponential backoff
|
||||
if containsAny(errStr, []string{
|
||||
"cannot create VM",
|
||||
"failed to import image",
|
||||
"importdisk",
|
||||
"not implemented",
|
||||
}) {
|
||||
return ExponentialBackoff(attemptCount)
|
||||
}
|
||||
|
||||
// Node health issues
|
||||
if containsAny(errStr, []string{
|
||||
"node",
|
||||
"unhealthy",
|
||||
"not reachable",
|
||||
}) {
|
||||
return 2 * time.Minute
|
||||
}
|
||||
|
||||
// Default: exponential backoff
|
||||
return ExponentialBackoff(attemptCount)
|
||||
}
|
||||
|
||||
func containsAny(s string, substrings []string) bool {
|
||||
for _, substr := range substrings {
|
||||
if len(s) >= len(substr) {
|
||||
for i := 0; i <= len(s)-len(substr); i++ {
|
||||
if s[i:i+len(substr)] == substr {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -3,16 +3,22 @@ package virtualmachine
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
ctrl "sigs.k8s.io/controller-runtime"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
"sigs.k8s.io/controller-runtime/pkg/log"
|
||||
|
||||
proxmoxv1alpha1 "github.com/yourorg/crossplane-provider-proxmox/apis/v1alpha1"
|
||||
"github.com/yourorg/crossplane-provider-proxmox/pkg/proxmox"
|
||||
proxmoxv1alpha1 "github.com/sankofa/crossplane-provider-proxmox/apis/v1alpha1"
|
||||
"github.com/sankofa/crossplane-provider-proxmox/pkg/proxmox"
|
||||
"github.com/sankofa/crossplane-provider-proxmox/pkg/quota"
|
||||
)
|
||||
|
||||
// ProxmoxVMReconciler reconciles a ProxmoxVM object
|
||||
@@ -21,9 +27,9 @@ type ProxmoxVMReconciler struct {
|
||||
Scheme *runtime.Scheme
|
||||
}
|
||||
|
||||
//+kubebuilder:rbac:groups=proxmox.yourorg.io,resources=proxmoxvms,verbs=get;list;watch;create;update;patch;delete
|
||||
//+kubebuilder:rbac:groups=proxmox.yourorg.io,resources=proxmoxvms/status,verbs=get;update;patch
|
||||
//+kubebuilder:rbac:groups=proxmox.yourorg.io,resources=proxmoxvms/finalizers,verbs=update
|
||||
//+kubebuilder:rbac:groups=proxmox.sankofa.nexus,resources=proxmoxvms,verbs=get;list;watch;create;update;patch;delete
|
||||
//+kubebuilder:rbac:groups=proxmox.sankofa.nexus,resources=proxmoxvms/status,verbs=get;update;patch
|
||||
//+kubebuilder:rbac:groups=proxmox.sankofa.nexus,resources=proxmoxvms/finalizers,verbs=update
|
||||
|
||||
// Reconcile is part of the main kubernetes reconciliation loop
|
||||
func (r *ProxmoxVMReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
|
||||
@@ -34,6 +40,14 @@ func (r *ProxmoxVMReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
|
||||
return ctrl.Result{}, client.IgnoreNotFound(err)
|
||||
}
|
||||
|
||||
// Validate ProviderConfigReference
|
||||
if vm.Spec.ProviderConfigReference == nil {
|
||||
return ctrl.Result{}, errors.New("providerConfigRef is required")
|
||||
}
|
||||
if vm.Spec.ProviderConfigReference.Name == "" {
|
||||
return ctrl.Result{}, errors.New("providerConfigRef.name is required")
|
||||
}
|
||||
|
||||
// Get ProviderConfig
|
||||
var providerConfig proxmoxv1alpha1.ProviderConfig
|
||||
providerConfigName := vm.Spec.ProviderConfigReference.Name
|
||||
@@ -41,18 +55,30 @@ func (r *ProxmoxVMReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
|
||||
return ctrl.Result{}, errors.Wrapf(err, "cannot get provider config %s", providerConfigName)
|
||||
}
|
||||
|
||||
// Track retry attempts for exponential backoff
|
||||
attemptCount := 0
|
||||
if vm.Status.Conditions != nil {
|
||||
for _, condition := range vm.Status.Conditions {
|
||||
if condition.Type == "Failed" {
|
||||
attemptCount++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get credentials from secret
|
||||
creds, err := r.getCredentials(ctx, &providerConfig)
|
||||
if err != nil {
|
||||
logger.Error(err, "cannot get credentials")
|
||||
return ctrl.Result{RequeueAfter: 30 * time.Second}, errors.Wrap(err, "cannot get credentials")
|
||||
delay := GetRequeueDelay(err, attemptCount)
|
||||
return ctrl.Result{RequeueAfter: delay}, errors.Wrap(err, "cannot get credentials")
|
||||
}
|
||||
|
||||
// Find the site configuration
|
||||
site, err := r.findSite(&providerConfig, vm.Spec.ForProvider.Site)
|
||||
if err != nil {
|
||||
logger.Error(err, "cannot find site", "site", vm.Spec.ForProvider.Site)
|
||||
return ctrl.Result{RequeueAfter: 30 * time.Second}, errors.Wrapf(err, "cannot find site %s", vm.Spec.ForProvider.Site)
|
||||
delay := GetRequeueDelay(err, attemptCount)
|
||||
return ctrl.Result{RequeueAfter: delay}, errors.Wrapf(err, "cannot find site %s", vm.Spec.ForProvider.Site)
|
||||
}
|
||||
|
||||
// Create Proxmox client
|
||||
@@ -66,12 +92,77 @@ func (r *ProxmoxVMReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
|
||||
return ctrl.Result{}, errors.Wrap(err, "cannot create Proxmox client")
|
||||
}
|
||||
|
||||
// Check node health before proceeding
|
||||
if err := proxmoxClient.CheckNodeHealth(ctx, vm.Spec.ForProvider.Node); err != nil {
|
||||
logger.Error(err, "node health check failed", "node", vm.Spec.ForProvider.Node)
|
||||
// Update status with error condition
|
||||
vm.Status.Conditions = append(vm.Status.Conditions, metav1.Condition{
|
||||
Type: "NodeUnhealthy",
|
||||
Status: "True",
|
||||
Reason: "HealthCheckFailed",
|
||||
Message: err.Error(),
|
||||
LastTransitionTime: metav1.Now(),
|
||||
})
|
||||
r.Status().Update(ctx, &vm)
|
||||
return ctrl.Result{RequeueAfter: 2 * time.Minute}, nil
|
||||
}
|
||||
|
||||
// Reconcile VM
|
||||
if vm.Status.VMID == 0 {
|
||||
// Create VM
|
||||
logger.Info("Creating VM", "name", vm.Name, "node", vm.Spec.ForProvider.Node)
|
||||
|
||||
vmConfig := proxmox.VMConfig{
|
||||
// Extract tenant_id from Kubernetes labels or annotations (for multi-tenancy)
|
||||
tenantID := ""
|
||||
if vm.Labels != nil {
|
||||
if tid, ok := vm.Labels["tenant.sankofa.nexus/id"]; ok {
|
||||
tenantID = tid
|
||||
} else if tid, ok := vm.Labels["tenant-id"]; ok {
|
||||
tenantID = tid
|
||||
}
|
||||
}
|
||||
if tenantID == "" && vm.Annotations != nil {
|
||||
if tid, ok := vm.Annotations["tenant.sankofa.nexus/id"]; ok {
|
||||
tenantID = tid
|
||||
} else if tid, ok := vm.Annotations["tenant-id"]; ok {
|
||||
tenantID = tid
|
||||
}
|
||||
}
|
||||
|
||||
// Enforce quota if tenant ID is present
|
||||
if tenantID != "" {
|
||||
apiURL := os.Getenv("SANKOFA_API_URL")
|
||||
apiToken := os.Getenv("SANKOFA_API_TOKEN")
|
||||
if apiURL != "" && apiToken != "" {
|
||||
quotaClient := quota.NewQuotaClient(apiURL, apiToken)
|
||||
|
||||
// Parse memory from string (e.g., "8Gi" -> 8)
|
||||
memoryGB := parseMemoryToGB(vm.Spec.ForProvider.Memory)
|
||||
diskGB := parseDiskToGB(vm.Spec.ForProvider.Disk)
|
||||
|
||||
resourceRequest := quota.ResourceRequest{
|
||||
Compute: "a.ComputeRequest{
|
||||
VCPU: &vm.Spec.ForProvider.CPU,
|
||||
Memory: &memoryGB,
|
||||
Instances: intPtr(1),
|
||||
},
|
||||
Storage: "a.StorageRequest{
|
||||
Size: &diskGB,
|
||||
},
|
||||
}
|
||||
|
||||
if err := quotaClient.EnforceQuota(ctx, tenantID, resourceRequest); err != nil {
|
||||
logger.Error(err, "Quota check failed", "tenantID", tenantID)
|
||||
return ctrl.Result{RequeueAfter: 60 * time.Second}, errors.Wrap(err, "quota exceeded")
|
||||
}
|
||||
logger.Info("Quota check passed", "tenantID", tenantID)
|
||||
} else {
|
||||
logger.Info("Skipping quota check - API URL or token not configured")
|
||||
}
|
||||
}
|
||||
|
||||
vmSpec := proxmox.VMSpec{
|
||||
Node: vm.Spec.ForProvider.Node,
|
||||
Name: vm.Spec.ForProvider.Name,
|
||||
CPU: vm.Spec.ForProvider.CPU,
|
||||
Memory: vm.Spec.ForProvider.Memory,
|
||||
@@ -81,16 +172,88 @@ func (r *ProxmoxVMReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
|
||||
Image: vm.Spec.ForProvider.Image,
|
||||
UserData: vm.Spec.ForProvider.UserData,
|
||||
SSHKeys: vm.Spec.ForProvider.SSHKeys,
|
||||
TenantID: tenantID, // Pass tenant_id to Proxmox client
|
||||
}
|
||||
|
||||
createdVM, err := proxmoxClient.CreateVM(vm.Spec.ForProvider.Node, vmConfig)
|
||||
createdVM, err := proxmoxClient.CreateVM(ctx, vmSpec)
|
||||
if err != nil {
|
||||
return ctrl.Result{}, errors.Wrap(err, "cannot create VM")
|
||||
// CRITICAL: Check if VM was partially created (VMID assigned but creation failed)
|
||||
// This happens when importdisk fails after VM is created
|
||||
errorStr := err.Error()
|
||||
if strings.Contains(errorStr, "VM") && strings.Contains(errorStr, "has been cleaned up") {
|
||||
// VM was created but cleaned up due to error (e.g., importdisk not supported)
|
||||
// Categorize error and update status to prevent infinite retry loop
|
||||
errorCategory := categorizeError(errorStr)
|
||||
vm.Status.Conditions = append(vm.Status.Conditions, metav1.Condition{
|
||||
Type: errorCategory.Type,
|
||||
Status: "True",
|
||||
Reason: errorCategory.Reason,
|
||||
Message: errorStr,
|
||||
LastTransitionTime: metav1.Now(),
|
||||
})
|
||||
if updateErr := r.Status().Update(ctx, &vm); updateErr != nil {
|
||||
logger.Error(updateErr, "failed to update status after creation failure")
|
||||
}
|
||||
// Use exponential backoff based on attempt count
|
||||
delay := GetRequeueDelay(err, attemptCount)
|
||||
return ctrl.Result{RequeueAfter: delay}, errors.Wrap(err, "cannot create VM - VM was cleaned up")
|
||||
}
|
||||
|
||||
// For other errors, check if a VM was created but not returned
|
||||
// Try to find orphaned VMs by name
|
||||
vms, listErr := proxmoxClient.ListVMs(ctx, vm.Spec.ForProvider.Node)
|
||||
if listErr == nil {
|
||||
for _, existingVM := range vms {
|
||||
if existingVM.Name == vm.Spec.ForProvider.Name {
|
||||
// Found orphaned VM - attempt cleanup
|
||||
logger.Info("Found orphaned VM, attempting cleanup", "vmID", existingVM.ID, "name", existingVM.Name)
|
||||
cleanupErr := proxmoxClient.DeleteVM(ctx, existingVM.ID)
|
||||
if cleanupErr != nil {
|
||||
logger.Error(cleanupErr, "Failed to cleanup orphaned VM", "vmID", existingVM.ID)
|
||||
} else {
|
||||
logger.Info("Successfully cleaned up orphaned VM", "vmID", existingVM.ID)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Categorize error and update status with appropriate condition
|
||||
errorCategory := categorizeError(errorStr)
|
||||
vm.Status.Conditions = append(vm.Status.Conditions, metav1.Condition{
|
||||
Type: errorCategory.Type,
|
||||
Status: "True",
|
||||
Reason: errorCategory.Reason,
|
||||
Message: errorStr,
|
||||
LastTransitionTime: metav1.Now(),
|
||||
})
|
||||
if updateErr := r.Status().Update(ctx, &vm); updateErr != nil {
|
||||
logger.Error(updateErr, "failed to update status after creation failure")
|
||||
}
|
||||
|
||||
// Use exponential backoff based on attempt count
|
||||
delay := GetRequeueDelay(err, attemptCount)
|
||||
return ctrl.Result{RequeueAfter: delay}, errors.Wrap(err, "cannot create VM")
|
||||
}
|
||||
|
||||
vm.Status.VMID = createdVM.ID
|
||||
vm.Status.State = createdVM.Status
|
||||
vm.Status.IPAddress = createdVM.IPAddress
|
||||
vm.Status.IPAddress = createdVM.IP
|
||||
|
||||
// Clear any previous failure conditions
|
||||
for i := len(vm.Status.Conditions) - 1; i >= 0; i-- {
|
||||
if vm.Status.Conditions[i].Type == "Failed" {
|
||||
vm.Status.Conditions = append(vm.Status.Conditions[:i], vm.Status.Conditions[i+1:]...)
|
||||
}
|
||||
}
|
||||
|
||||
// Add success condition
|
||||
vm.Status.Conditions = append(vm.Status.Conditions, metav1.Condition{
|
||||
Type: "Ready",
|
||||
Status: "True",
|
||||
Reason: "Created",
|
||||
Message: fmt.Sprintf("VM %d created successfully", createdVM.ID),
|
||||
LastTransitionTime: metav1.Now(),
|
||||
})
|
||||
|
||||
if err := r.Status().Update(ctx, &vm); err != nil {
|
||||
return ctrl.Result{}, errors.Wrap(err, "cannot update VM status")
|
||||
@@ -99,38 +262,50 @@ func (r *ProxmoxVMReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
|
||||
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
|
||||
}
|
||||
|
||||
// Update VM if needed
|
||||
currentVM, err := proxmoxClient.GetVM(vm.Spec.ForProvider.Node, vm.Status.VMID)
|
||||
// Update VM status
|
||||
vmStatus, err := proxmoxClient.GetVMStatus(ctx, vm.Status.VMID)
|
||||
if err != nil {
|
||||
return ctrl.Result{}, errors.Wrap(err, "cannot get VM")
|
||||
// If VM status can't be retrieved, try to get from ListVMs
|
||||
vms, listErr := proxmoxClient.ListVMs(ctx, vm.Spec.ForProvider.Node)
|
||||
if listErr != nil {
|
||||
return ctrl.Result{}, errors.Wrap(err, "cannot get VM status")
|
||||
}
|
||||
for _, v := range vms {
|
||||
if v.ID == vm.Status.VMID {
|
||||
vm.Status.State = v.Status
|
||||
vm.Status.IPAddress = v.IP
|
||||
break
|
||||
}
|
||||
}
|
||||
} else {
|
||||
vm.Status.State = vmStatus.State
|
||||
vm.Status.IPAddress = vmStatus.IPAddress
|
||||
}
|
||||
|
||||
// Update status
|
||||
vm.Status.State = currentVM.Status
|
||||
vm.Status.IPAddress = currentVM.IPAddress
|
||||
|
||||
if err := r.Status().Update(ctx, &vm); err != nil {
|
||||
return ctrl.Result{}, errors.Wrap(err, "cannot update VM status")
|
||||
}
|
||||
|
||||
// Check if VM needs to be updated
|
||||
needsUpdate := false
|
||||
if vm.Spec.ForProvider.CPU != 0 && currentVM.Config.CPU != vm.Spec.ForProvider.CPU {
|
||||
needsUpdate = true
|
||||
}
|
||||
if vm.Spec.ForProvider.Memory != "" && currentVM.Config.Memory != vm.Spec.ForProvider.Memory {
|
||||
needsUpdate = true
|
||||
if vmStatus != nil {
|
||||
// Compare with current status - for now, always check if spec differs
|
||||
// In a real implementation, you'd compare current VM config with spec
|
||||
needsUpdate = vm.Spec.ForProvider.CPU > 0 || vm.Spec.ForProvider.Memory != ""
|
||||
}
|
||||
|
||||
if needsUpdate {
|
||||
logger.Info("Updating VM", "name", vm.Name, "vmId", vm.Status.VMID)
|
||||
|
||||
vmConfig := proxmox.VMConfig{
|
||||
vmSpec := proxmox.VMSpec{
|
||||
Node: vm.Spec.ForProvider.Node,
|
||||
Name: vm.Spec.ForProvider.Name,
|
||||
CPU: vm.Spec.ForProvider.CPU,
|
||||
Memory: vm.Spec.ForProvider.Memory,
|
||||
}
|
||||
|
||||
if err := proxmoxClient.UpdateVM(vm.Spec.ForProvider.Node, vm.Status.VMID, vmConfig); err != nil {
|
||||
_, err := proxmoxClient.UpdateVM(ctx, vm.Status.VMID, vmSpec)
|
||||
if err != nil {
|
||||
return ctrl.Result{}, errors.Wrap(err, "cannot update VM")
|
||||
}
|
||||
|
||||
@@ -140,8 +315,108 @@ func (r *ProxmoxVMReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
|
||||
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
|
||||
}
|
||||
|
||||
// CleanupOrphanedVMs scans for and cleans up orphaned VMs on controller startup
|
||||
// Orphaned VMs are VMs in Proxmox that don't have corresponding Kubernetes resources
|
||||
func (r *ProxmoxVMReconciler) CleanupOrphanedVMs(ctx context.Context) error {
|
||||
logger := log.FromContext(ctx)
|
||||
logger.Info("Starting orphaned VM cleanup on controller startup")
|
||||
|
||||
// List all ProxmoxVM resources
|
||||
var vmList proxmoxv1alpha1.ProxmoxVMList
|
||||
if err := r.List(ctx, &vmList, &client.ListOptions{}); err != nil {
|
||||
logger.Error(err, "failed to list ProxmoxVM resources")
|
||||
return err
|
||||
}
|
||||
|
||||
// Build map of expected VMIDs
|
||||
expectedVMIDs := make(map[int]bool)
|
||||
for _, vm := range vmList.Items {
|
||||
if vm.Status.VMID > 0 {
|
||||
expectedVMIDs[vm.Status.VMID] = true
|
||||
}
|
||||
}
|
||||
|
||||
// Get all ProviderConfigs to check VMs on all sites
|
||||
var configList proxmoxv1alpha1.ProviderConfigList
|
||||
if err := r.List(ctx, &configList, &client.ListOptions{}); err != nil {
|
||||
logger.Error(err, "failed to list ProviderConfig resources")
|
||||
return err
|
||||
}
|
||||
|
||||
cleanedCount := 0
|
||||
for _, config := range configList.Items {
|
||||
// Get credentials
|
||||
creds, err := r.getCredentials(ctx, &config)
|
||||
if err != nil {
|
||||
logger.Error(err, "failed to get credentials for cleanup", "config", config.Name)
|
||||
continue
|
||||
}
|
||||
|
||||
// Check each site
|
||||
for _, site := range config.Spec.Sites {
|
||||
client, err := proxmox.NewClient(
|
||||
site.Endpoint,
|
||||
creds.Username,
|
||||
creds.Password,
|
||||
site.InsecureSkipTLSVerify,
|
||||
)
|
||||
if err != nil {
|
||||
logger.Error(err, "failed to create Proxmox client for cleanup", "site", site.Name)
|
||||
continue
|
||||
}
|
||||
|
||||
// List VMs on this node
|
||||
vms, err := client.ListVMs(ctx, site.Node)
|
||||
if err != nil {
|
||||
logger.Error(err, "failed to list VMs for cleanup", "site", site.Name, "node", site.Node)
|
||||
continue
|
||||
}
|
||||
|
||||
// Check for orphaned VMs (VMs not in expected list)
|
||||
for _, vm := range vms {
|
||||
if !expectedVMIDs[vm.ID] {
|
||||
// Check if VM has a name that suggests it might be orphaned
|
||||
// (e.g., matches pattern of our VMs but no Kubernetes resource)
|
||||
logger.Info("Found potential orphaned VM", "vmID", vm.ID, "name", vm.Name, "node", site.Node)
|
||||
|
||||
// Only clean up if VM is stopped (safer)
|
||||
if vm.Status == "stopped" {
|
||||
logger.Info("Cleaning up orphaned stopped VM", "vmID", vm.ID)
|
||||
if err := client.DeleteVM(ctx, vm.ID); err != nil {
|
||||
logger.Error(err, "failed to cleanup orphaned VM", "vmID", vm.ID)
|
||||
} else {
|
||||
cleanedCount++
|
||||
logger.Info("Successfully cleaned up orphaned VM", "vmID", vm.ID)
|
||||
}
|
||||
} else {
|
||||
logger.Info("Skipping orphaned VM cleanup - VM is not stopped", "vmID", vm.ID, "status", vm.Status)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if cleanedCount > 0 {
|
||||
logger.Info("Orphaned VM cleanup completed", "cleanedCount", cleanedCount)
|
||||
} else {
|
||||
logger.Info("Orphaned VM cleanup completed - no orphaned VMs found")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetupWithManager sets up the controller with the Manager
|
||||
func (r *ProxmoxVMReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
||||
// Run cleanup on startup (non-blocking, in background)
|
||||
go func() {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
|
||||
defer cancel()
|
||||
logger := log.FromContext(ctx)
|
||||
if err := r.CleanupOrphanedVMs(ctx); err != nil {
|
||||
logger.Error(err, "orphaned VM cleanup failed on startup")
|
||||
}
|
||||
}()
|
||||
|
||||
return ctrl.NewControllerManagedBy(mgr).
|
||||
For(&proxmoxv1alpha1.ProxmoxVM{}).
|
||||
Complete(r)
|
||||
@@ -160,15 +435,46 @@ func (r *ProxmoxVMReconciler) getCredentials(ctx context.Context, config *proxmo
|
||||
|
||||
secretRef := config.Spec.Credentials.SecretRef
|
||||
|
||||
// In a real implementation, you would:
|
||||
// 1. Get the secret from Kubernetes
|
||||
// 2. Parse the credentials (JSON, username/password, etc.)
|
||||
// 3. Return the credentials
|
||||
// Get secret from Kubernetes
|
||||
secret := &corev1.Secret{}
|
||||
secretKey := client.ObjectKey{
|
||||
Namespace: secretRef.Namespace,
|
||||
Name: secretRef.Name,
|
||||
}
|
||||
|
||||
if err := r.Get(ctx, secretKey, secret); err != nil {
|
||||
return nil, errors.Wrap(err, "cannot get secret")
|
||||
}
|
||||
|
||||
// Parse credentials from secret
|
||||
// Support both username/password and JSON token formats
|
||||
var username, password string
|
||||
|
||||
// Try username/password format first
|
||||
if userData, ok := secret.Data["username"]; ok {
|
||||
username = string(userData)
|
||||
}
|
||||
if passData, ok := secret.Data["password"]; ok {
|
||||
password = string(passData)
|
||||
}
|
||||
|
||||
// Try token format (for Proxmox API tokens)
|
||||
if tokenData, ok := secret.Data["token"]; ok {
|
||||
// For token-based auth, username is in format "user@realm!tokenid"
|
||||
// and password is the token secret
|
||||
if userData, ok := secret.Data["tokenid"]; ok {
|
||||
username = string(userData)
|
||||
}
|
||||
password = string(tokenData)
|
||||
}
|
||||
|
||||
if username == "" || password == "" {
|
||||
return nil, fmt.Errorf("username/password or token missing in secret")
|
||||
}
|
||||
|
||||
// This is a placeholder
|
||||
return &credentials{
|
||||
Username: "root@pam",
|
||||
Password: "placeholder",
|
||||
Username: username,
|
||||
Password: password,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -181,3 +487,68 @@ func (r *ProxmoxVMReconciler) findSite(config *proxmoxv1alpha1.ProviderConfig, s
|
||||
return nil, fmt.Errorf("site %s not found", siteName)
|
||||
}
|
||||
|
||||
// Helper functions for quota enforcement
|
||||
func parseMemoryToGB(memory string) int {
|
||||
if memory == "" {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Remove whitespace and convert to lowercase
|
||||
memory = strings.TrimSpace(strings.ToLower(memory))
|
||||
|
||||
// Parse memory string (e.g., "8Gi", "8G", "8192Mi")
|
||||
if strings.HasSuffix(memory, "gi") || strings.HasSuffix(memory, "g") {
|
||||
value, err := strconv.Atoi(strings.TrimSuffix(strings.TrimSuffix(memory, "gi"), "g"))
|
||||
if err == nil {
|
||||
return value
|
||||
}
|
||||
} else if strings.HasSuffix(memory, "mi") || strings.HasSuffix(memory, "m") {
|
||||
value, err := strconv.Atoi(strings.TrimSuffix(strings.TrimSuffix(memory, "mi"), "m"))
|
||||
if err == nil {
|
||||
return value / 1024 // Convert MiB to GiB
|
||||
}
|
||||
} else {
|
||||
// Try parsing as number (assume GB)
|
||||
value, err := strconv.Atoi(memory)
|
||||
if err == nil {
|
||||
return value
|
||||
}
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func parseDiskToGB(disk string) int {
|
||||
if disk == "" {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Remove whitespace and convert to lowercase
|
||||
disk = strings.TrimSpace(strings.ToLower(disk))
|
||||
|
||||
// Parse disk string (e.g., "100Gi", "100G", "100Ti")
|
||||
if strings.HasSuffix(disk, "gi") || strings.HasSuffix(disk, "g") {
|
||||
value, err := strconv.Atoi(strings.TrimSuffix(strings.TrimSuffix(disk, "gi"), "g"))
|
||||
if err == nil {
|
||||
return value
|
||||
}
|
||||
} else if strings.HasSuffix(disk, "ti") || strings.HasSuffix(disk, "t") {
|
||||
value, err := strconv.Atoi(strings.TrimSuffix(strings.TrimSuffix(disk, "ti"), "t"))
|
||||
if err == nil {
|
||||
return value * 1024 // Convert TiB to GiB
|
||||
}
|
||||
} else {
|
||||
// Try parsing as number (assume GB)
|
||||
value, err := strconv.Atoi(disk)
|
||||
if err == nil {
|
||||
return value
|
||||
}
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func intPtr(i int) *int {
|
||||
return &i
|
||||
}
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
"sigs.k8s.io/controller-runtime/pkg/client/fake"
|
||||
ctrl "sigs.k8s.io/controller-runtime"
|
||||
|
||||
proxmoxv1alpha1 "github.com/yourorg/crossplane-provider-proxmox/apis/v1alpha1"
|
||||
proxmoxv1alpha1 "github.com/sankofa/crossplane-provider-proxmox/apis/v1alpha1"
|
||||
)
|
||||
|
||||
func TestProxmoxVMReconciler_Reconcile(t *testing.T) {
|
||||
@@ -36,9 +36,9 @@ func TestProxmoxVMReconciler_Reconcile(t *testing.T) {
|
||||
Storage: "local-lvm",
|
||||
Network: "vmbr0",
|
||||
Image: "ubuntu-22.04-cloud",
|
||||
Site: "us-east-1",
|
||||
Site: "us-sfvalley",
|
||||
},
|
||||
ProviderConfigReference: proxmoxv1alpha1.ProviderConfigReference{
|
||||
ProviderConfigReference: &proxmoxv1alpha1.ProviderConfigReference{
|
||||
Name: "test-provider-config",
|
||||
},
|
||||
},
|
||||
@@ -88,8 +88,13 @@ func TestProxmoxVMReconciler_getCredentials(t *testing.T) {
|
||||
Name: "test-config",
|
||||
},
|
||||
Spec: proxmoxv1alpha1.ProviderConfigSpec{
|
||||
Credentials: proxmoxv1alpha1.ProviderCredentials{
|
||||
Source: proxmoxv1alpha1.CredentialsSourceSecret,
|
||||
Credentials: proxmoxv1alpha1.CredentialsSource{
|
||||
Source: "Secret",
|
||||
SecretRef: &proxmoxv1alpha1.SecretKeySelector{
|
||||
Name: "test-secret",
|
||||
Namespace: "default",
|
||||
Key: "username",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
@@ -0,0 +1,104 @@
|
||||
package virtualmachine
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ErrorCategory represents a categorized error with appropriate condition type and reason
|
||||
type ErrorCategory struct {
|
||||
Type string
|
||||
Reason string
|
||||
}
|
||||
|
||||
// categorizeError categorizes errors into appropriate types for better handling
|
||||
func categorizeError(errorStr string) ErrorCategory {
|
||||
errorStr = strings.ToLower(errorStr)
|
||||
|
||||
// API not implemented errors
|
||||
if strings.Contains(errorStr, "501") ||
|
||||
strings.Contains(errorStr, "not implemented") ||
|
||||
strings.Contains(errorStr, "importdisk") {
|
||||
return ErrorCategory{
|
||||
Type: "APINotSupported",
|
||||
Reason: "ImportDiskAPINotImplemented",
|
||||
}
|
||||
}
|
||||
|
||||
// Configuration errors (non-retryable without manual intervention)
|
||||
if strings.Contains(errorStr, "cannot get provider config") ||
|
||||
strings.Contains(errorStr, "cannot get credentials") ||
|
||||
strings.Contains(errorStr, "cannot find site") ||
|
||||
strings.Contains(errorStr, "cannot create proxmox client") {
|
||||
return ErrorCategory{
|
||||
Type: "ConfigurationError",
|
||||
Reason: "InvalidConfiguration",
|
||||
}
|
||||
}
|
||||
|
||||
// Quota errors
|
||||
if strings.Contains(errorStr, "quota") ||
|
||||
strings.Contains(errorStr, "exceeded") {
|
||||
return ErrorCategory{
|
||||
Type: "QuotaExceeded",
|
||||
Reason: "ResourceQuotaExceeded",
|
||||
}
|
||||
}
|
||||
|
||||
// Node health errors
|
||||
if strings.Contains(errorStr, "node") &&
|
||||
(strings.Contains(errorStr, "unhealthy") ||
|
||||
strings.Contains(errorStr, "not reachable") ||
|
||||
strings.Contains(errorStr, "offline")) {
|
||||
return ErrorCategory{
|
||||
Type: "NodeUnhealthy",
|
||||
Reason: "NodeHealthCheckFailed",
|
||||
}
|
||||
}
|
||||
|
||||
// Image/Storage errors
|
||||
if strings.Contains(errorStr, "image") &&
|
||||
(strings.Contains(errorStr, "not found") ||
|
||||
strings.Contains(errorStr, "cannot find")) {
|
||||
return ErrorCategory{
|
||||
Type: "ImageNotFound",
|
||||
Reason: "ImageNotFoundInStorage",
|
||||
}
|
||||
}
|
||||
|
||||
// Lock file errors
|
||||
if strings.Contains(errorStr, "lock") ||
|
||||
strings.Contains(errorStr, "timeout") {
|
||||
return ErrorCategory{
|
||||
Type: "LockError",
|
||||
Reason: "LockFileTimeout",
|
||||
}
|
||||
}
|
||||
|
||||
// Network/Connection errors (retryable)
|
||||
if strings.Contains(errorStr, "network") ||
|
||||
strings.Contains(errorStr, "connection") ||
|
||||
strings.Contains(errorStr, "timeout") ||
|
||||
strings.Contains(errorStr, "502") ||
|
||||
strings.Contains(errorStr, "503") {
|
||||
return ErrorCategory{
|
||||
Type: "NetworkError",
|
||||
Reason: "TransientNetworkFailure",
|
||||
}
|
||||
}
|
||||
|
||||
// Generic creation failure
|
||||
if strings.Contains(errorStr, "cannot create vm") ||
|
||||
strings.Contains(errorStr, "failed to create") {
|
||||
return ErrorCategory{
|
||||
Type: "CreationFailed",
|
||||
Reason: "VMCreationFailed",
|
||||
}
|
||||
}
|
||||
|
||||
// Default: generic failure
|
||||
return ErrorCategory{
|
||||
Type: "Failed",
|
||||
Reason: "UnknownError",
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,230 @@
|
||||
package vmscaleset
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
ctrl "sigs.k8s.io/controller-runtime"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
"sigs.k8s.io/controller-runtime/pkg/log"
|
||||
|
||||
proxmoxv1alpha1 "github.com/sankofa/crossplane-provider-proxmox/apis/v1alpha1"
|
||||
"github.com/sankofa/crossplane-provider-proxmox/pkg/metrics"
|
||||
"github.com/sankofa/crossplane-provider-proxmox/pkg/proxmox"
|
||||
"github.com/sankofa/crossplane-provider-proxmox/pkg/scaling"
|
||||
)
|
||||
|
||||
// ProxmoxVMScaleSetReconciler reconciles a ProxmoxVMScaleSet object
|
||||
type ProxmoxVMScaleSetReconciler struct {
|
||||
client.Client
|
||||
Scheme *runtime.Scheme
|
||||
}
|
||||
|
||||
//+kubebuilder:rbac:groups=proxmox.sankofa.nexus,resources=proxmoxvmscalesets,verbs=get;list;watch;create;update;patch;delete
|
||||
//+kubebuilder:rbac:groups=proxmox.sankofa.nexus,resources=proxmoxvmscalesets/status,verbs=get;update;patch
|
||||
|
||||
// Reconcile is part of the main kubernetes reconciliation loop
|
||||
func (r *ProxmoxVMScaleSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
|
||||
logger := log.FromContext(ctx)
|
||||
|
||||
var vmss proxmoxv1alpha1.ProxmoxVMScaleSet
|
||||
if err := r.Get(ctx, req.NamespacedName, &vmss); err != nil {
|
||||
return ctrl.Result{}, client.IgnoreNotFound(err)
|
||||
}
|
||||
|
||||
// Validate ProviderConfigReference
|
||||
if vmss.Spec.ProviderConfigReference == nil {
|
||||
return ctrl.Result{}, errors.New("providerConfigRef is required")
|
||||
}
|
||||
if vmss.Spec.ProviderConfigReference.Name == "" {
|
||||
return ctrl.Result{}, errors.New("providerConfigRef.name is required")
|
||||
}
|
||||
|
||||
// Get ProviderConfig
|
||||
var providerConfig proxmoxv1alpha1.ProviderConfig
|
||||
if err := r.Get(ctx, client.ObjectKey{Name: vmss.Spec.ProviderConfigReference.Name}, &providerConfig); err != nil {
|
||||
return ctrl.Result{}, errors.Wrapf(err, "cannot get provider config")
|
||||
}
|
||||
|
||||
// Get credentials from secret (similar to virtualmachine controller)
|
||||
creds, err := r.getCredentials(ctx, &providerConfig)
|
||||
if err != nil {
|
||||
logger.Error(err, "cannot get credentials")
|
||||
return ctrl.Result{RequeueAfter: 30 * time.Second}, errors.Wrap(err, "cannot get credentials")
|
||||
}
|
||||
|
||||
// Find the site configuration (use first site or from spec if available)
|
||||
var site *proxmoxv1alpha1.ProxmoxSite
|
||||
if len(providerConfig.Spec.Sites) > 0 {
|
||||
site = &providerConfig.Spec.Sites[0]
|
||||
} else {
|
||||
return ctrl.Result{}, errors.New("no sites configured in provider config")
|
||||
}
|
||||
|
||||
// Create Proxmox client with proper credentials
|
||||
proxmoxClient, err := proxmox.NewClient(
|
||||
site.Endpoint,
|
||||
creds.Username,
|
||||
creds.Password,
|
||||
site.InsecureSkipTLSVerify,
|
||||
)
|
||||
if err != nil {
|
||||
return ctrl.Result{}, errors.Wrap(err, "cannot create Proxmox client")
|
||||
}
|
||||
|
||||
// Create metrics collector with Prometheus client
|
||||
// Get Prometheus endpoint from environment or ProviderConfig
|
||||
// For now, we'll use a default endpoint - in production this should come from config
|
||||
prometheusEndpoint := "http://prometheus:9090"
|
||||
if prometheusURL := os.Getenv("PROMETHEUS_ENDPOINT"); prometheusURL != "" {
|
||||
prometheusEndpoint = prometheusURL
|
||||
}
|
||||
prometheusClient := metrics.NewPrometheusAPIClient(prometheusEndpoint)
|
||||
metricsCollector := metrics.NewCollector(prometheusClient)
|
||||
|
||||
// Create policy engine
|
||||
policyEngine := scaling.NewPolicyEngine(metricsCollector)
|
||||
|
||||
// Create instance manager
|
||||
instanceManager := scaling.NewInstanceManager(proxmoxClient)
|
||||
|
||||
// Evaluate scaling policies
|
||||
decision, err := policyEngine.Evaluate(ctx, vmss.Spec, vmss.Status, vmss.Status.Instances)
|
||||
if err != nil {
|
||||
logger.Error(err, "failed to evaluate scaling policies")
|
||||
return ctrl.Result{RequeueAfter: 30 * time.Second}, err
|
||||
}
|
||||
|
||||
// Check cooldown period
|
||||
if vmss.Status.LastScaleTime != nil {
|
||||
cooldownPeriod := time.Duration(vmss.Spec.CooldownPeriod) * time.Second
|
||||
timeSinceLastScale := time.Since(vmss.Status.LastScaleTime.Time)
|
||||
if timeSinceLastScale < cooldownPeriod && decision.Action != "NO_ACTION" {
|
||||
logger.Info("Cooldown period active, skipping scaling", "timeSinceLastScale", timeSinceLastScale)
|
||||
return ctrl.Result{RequeueAfter: cooldownPeriod - timeSinceLastScale}, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Execute scaling decision
|
||||
if decision.Action != "NO_ACTION" {
|
||||
logger.Info("Scaling decision", "action", decision.Action, "newReplicas", decision.NewReplicas, "reason", decision.Reason)
|
||||
|
||||
// Scale instances
|
||||
newInstances, err := instanceManager.ScaleTo(
|
||||
ctx,
|
||||
vmss.Spec.Template,
|
||||
vmss.Status.Instances,
|
||||
decision.NewReplicas,
|
||||
)
|
||||
if err != nil {
|
||||
logger.Error(err, "failed to scale instances")
|
||||
return ctrl.Result{RequeueAfter: 30 * time.Second}, err
|
||||
}
|
||||
|
||||
// Update status
|
||||
now := metav1.Now()
|
||||
vmss.Status.CurrentReplicas = len(newInstances)
|
||||
vmss.Status.DesiredReplicas = decision.NewReplicas
|
||||
vmss.Status.Instances = newInstances
|
||||
vmss.Status.LastScaleTime = &now
|
||||
|
||||
// Add scaling event
|
||||
event := proxmoxv1alpha1.ScalingEvent{
|
||||
Type: decision.Action,
|
||||
OldReplicas: len(vmss.Status.Instances),
|
||||
NewReplicas: decision.NewReplicas,
|
||||
Reason: decision.Reason,
|
||||
Timestamp: now,
|
||||
}
|
||||
vmss.Status.ScalingEvents = append(vmss.Status.ScalingEvents, event)
|
||||
|
||||
// Keep only last 10 events
|
||||
if len(vmss.Status.ScalingEvents) > 10 {
|
||||
vmss.Status.ScalingEvents = vmss.Status.ScalingEvents[len(vmss.Status.ScalingEvents)-10:]
|
||||
}
|
||||
|
||||
// Count ready replicas
|
||||
readyCount := 0
|
||||
for _, instance := range newInstances {
|
||||
if instanceManager.HealthCheck(ctx, instance) {
|
||||
readyCount++
|
||||
}
|
||||
}
|
||||
vmss.Status.ReadyReplicas = readyCount
|
||||
|
||||
if err := r.Status().Update(ctx, &vmss); err != nil {
|
||||
logger.Error(err, "failed to update status")
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
}
|
||||
|
||||
// Requeue for periodic evaluation
|
||||
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
|
||||
}
|
||||
|
||||
// getCredentials retrieves credentials from the provider config secret
|
||||
func (r *ProxmoxVMScaleSetReconciler) getCredentials(ctx context.Context, config *proxmoxv1alpha1.ProviderConfig) (*credentials, error) {
|
||||
if config.Spec.Credentials.SecretRef == nil {
|
||||
return nil, fmt.Errorf("no secret reference in provider config")
|
||||
}
|
||||
|
||||
secretRef := config.Spec.Credentials.SecretRef
|
||||
|
||||
// Get secret from Kubernetes
|
||||
secret := &corev1.Secret{}
|
||||
secretKey := client.ObjectKey{
|
||||
Namespace: secretRef.Namespace,
|
||||
Name: secretRef.Name,
|
||||
}
|
||||
|
||||
if err := r.Get(ctx, secretKey, secret); err != nil {
|
||||
return nil, errors.Wrap(err, "cannot get secret")
|
||||
}
|
||||
|
||||
// Parse credentials from secret
|
||||
var username, password string
|
||||
|
||||
// Try username/password format first
|
||||
if userData, ok := secret.Data["username"]; ok {
|
||||
username = string(userData)
|
||||
}
|
||||
if passData, ok := secret.Data["password"]; ok {
|
||||
password = string(passData)
|
||||
}
|
||||
|
||||
// Try token format (for Proxmox API tokens)
|
||||
if tokenData, ok := secret.Data["token"]; ok {
|
||||
if userData, ok := secret.Data["tokenid"]; ok {
|
||||
username = string(userData)
|
||||
}
|
||||
password = string(tokenData)
|
||||
}
|
||||
|
||||
if username == "" || password == "" {
|
||||
return nil, fmt.Errorf("username/password or token missing in secret")
|
||||
}
|
||||
|
||||
return &credentials{
|
||||
Username: username,
|
||||
Password: password,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type credentials struct {
|
||||
Username string
|
||||
Password string
|
||||
}
|
||||
|
||||
// SetupWithManager sets up the controller with the Manager.
|
||||
func (r *ProxmoxVMScaleSetReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
||||
return ctrl.NewControllerManagedBy(mgr).
|
||||
For(&proxmoxv1alpha1.ProxmoxVMScaleSet{}).
|
||||
Complete(r)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user