feat(api): Enhance NodeDiscover with subnet auto-detection and discovery cancellation
- Updated NodeDiscover to accept an optional subnet parameter, with auto-detection of local networks if none is provided. - Removed support for IP list format in NodeDiscover request body. - Implemented discovery cancellation functionality with NodeDiscoveryCancel endpoint. - Improved error handling and response messages for better clarity. feat(cluster): Add operation tracking for cluster bootstrap process - Integrated operations manager into cluster manager for tracking bootstrap progress. - Refactored Bootstrap method to run asynchronously with detailed progress updates. - Added methods to wait for various bootstrap steps (etcd health, VIP assignment, control plane readiness, etc.). fix(discovery): Optimize node discovery process and improve maintenance mode detection - Enhanced node discovery to run in parallel with a semaphore to limit concurrent scans. - Updated probeNode to detect maintenance mode more reliably. - Added functions to expand CIDR notation into individual IP addresses and retrieve local network interfaces. refactor(node): Update node manager to handle instance-specific configurations - Modified NewManager to accept instanceName for tailored talosconfig usage. - Improved hardware detection logic to handle maintenance mode scenarios. feat(operations): Implement detailed bootstrap progress tracking - Introduced BootstrapProgress struct to track and report the status of bootstrap operations. - Updated operation management to include bootstrap-specific details. fix(tools): Improve talosctl command execution with context and error handling - Added context with timeout to talosctl commands to prevent hanging on unreachable nodes. - Enhanced error handling for version retrieval in maintenance mode.
This commit is contained in:
@@ -11,6 +11,9 @@ import (
|
||||
"github.com/wild-cloud/wild-central/daemon/internal/tools"
|
||||
)
|
||||
|
||||
// Bootstrap step constants
|
||||
const totalBootstrapSteps = 7
|
||||
|
||||
// Manager handles async operation tracking
|
||||
type Manager struct {
|
||||
dataDir string
|
||||
@@ -23,18 +26,33 @@ func NewManager(dataDir string) *Manager {
|
||||
}
|
||||
}
|
||||
|
||||
// BootstrapProgress tracks detailed bootstrap progress
|
||||
type BootstrapProgress struct {
|
||||
CurrentStep int `json:"current_step"` // 0-6
|
||||
StepName string `json:"step_name"`
|
||||
Attempt int `json:"attempt"`
|
||||
MaxAttempts int `json:"max_attempts"`
|
||||
StepDescription string `json:"step_description"`
|
||||
}
|
||||
|
||||
// OperationDetails contains operation-specific details
|
||||
type OperationDetails struct {
|
||||
BootstrapProgress *BootstrapProgress `json:"bootstrap,omitempty"`
|
||||
}
|
||||
|
||||
// Operation represents a long-running operation
|
||||
type Operation struct {
|
||||
ID string `json:"id"`
|
||||
Type string `json:"type"` // discover, setup, download, bootstrap
|
||||
Target string `json:"target"`
|
||||
Instance string `json:"instance"`
|
||||
Status string `json:"status"` // pending, running, completed, failed, cancelled
|
||||
Message string `json:"message,omitempty"`
|
||||
Progress int `json:"progress"` // 0-100
|
||||
LogFile string `json:"logFile,omitempty"` // Path to output log file
|
||||
StartedAt time.Time `json:"started_at"`
|
||||
EndedAt time.Time `json:"ended_at,omitempty"`
|
||||
ID string `json:"id"`
|
||||
Type string `json:"type"` // discover, setup, download, bootstrap
|
||||
Target string `json:"target"`
|
||||
Instance string `json:"instance"`
|
||||
Status string `json:"status"` // pending, running, completed, failed, cancelled
|
||||
Message string `json:"message,omitempty"`
|
||||
Progress int `json:"progress"` // 0-100
|
||||
Details *OperationDetails `json:"details,omitempty"` // Operation-specific details
|
||||
LogFile string `json:"logFile,omitempty"` // Path to output log file
|
||||
StartedAt time.Time `json:"started_at"`
|
||||
EndedAt time.Time `json:"ended_at,omitempty"`
|
||||
}
|
||||
|
||||
// GetOperationsDir returns the operations directory for an instance
|
||||
@@ -79,19 +97,6 @@ func (m *Manager) Start(instanceName, opType, target string) (string, error) {
|
||||
return opID, nil
|
||||
}
|
||||
|
||||
// Get returns operation status
|
||||
func (m *Manager) Get(opID string) (*Operation, error) {
|
||||
// Operation ID contains instance name, but we need to find it
|
||||
// For now, we'll scan all instances (not ideal but simple)
|
||||
// Better approach: encode instance in operation ID or maintain index
|
||||
|
||||
// Simplified: assume operation ID format is op_{type}_{target}_{timestamp}
|
||||
// We need to know which instance to look in
|
||||
// For now, return error if we can't find it
|
||||
|
||||
// This needs improvement in actual implementation
|
||||
return nil, fmt.Errorf("operation lookup not implemented - need instance context")
|
||||
}
|
||||
|
||||
// GetByInstance returns an operation for a specific instance
|
||||
func (m *Manager) GetByInstance(instanceName, opID string) (*Operation, error) {
|
||||
@@ -238,6 +243,31 @@ func (m *Manager) Cleanup(instanceName string, olderThan time.Duration) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// UpdateBootstrapProgress updates bootstrap-specific progress details
|
||||
func (m *Manager) UpdateBootstrapProgress(instanceName, opID string, step int, stepName string, attempt, maxAttempts int, stepDescription string) error {
|
||||
op, err := m.GetByInstance(instanceName, opID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if op.Details == nil {
|
||||
op.Details = &OperationDetails{}
|
||||
}
|
||||
|
||||
op.Details.BootstrapProgress = &BootstrapProgress{
|
||||
CurrentStep: step,
|
||||
StepName: stepName,
|
||||
Attempt: attempt,
|
||||
MaxAttempts: maxAttempts,
|
||||
StepDescription: stepDescription,
|
||||
}
|
||||
|
||||
op.Progress = (step * 100) / (totalBootstrapSteps - 1)
|
||||
op.Message = fmt.Sprintf("Step %d/%d: %s (attempt %d/%d)", step+1, totalBootstrapSteps, stepName, attempt, maxAttempts)
|
||||
|
||||
return m.writeOperation(op)
|
||||
}
|
||||
|
||||
// writeOperation writes operation to disk
|
||||
func (m *Manager) writeOperation(op *Operation) error {
|
||||
opsDir := m.GetOperationsDir(op.Instance)
|
||||
|
||||
Reference in New Issue
Block a user