Update documentation structure and enhance .gitignore

- Added generated index files and report directories to .gitignore to prevent unnecessary tracking of transient files. - Updated README links to reflect new documentation paths for better navigation. - Improved documentation organization by ensuring all links point to the correct locations, enhancing user experience and accessibility.
2025-12-12 21:18:55 -08:00
parent 664707d912
commit fe0365757a
106 changed files with 4666 additions and 2294 deletions
--- a/scripts/analyze-files-to-prune.py
+++ b/scripts/analyze-files-to-prune.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python3
+"""
+Analyze Files for Pruning
+Identifies files that could potentially be removed from the project.
+"""
+
+import os
+import hashlib
+from pathlib import Path
+from collections import defaultdict
+from datetime import datetime
+
+def analyze_project():
+    """Analyze project for files that can be pruned."""
+    
+    root = Path('.')
+    results = {
+        'temp_files': [],
+        'duplicates': defaultdict(list),
+        'large_files': [],
+        'old_status_files': [],
+        'backup_files': [],
+        'build_artifacts': [],
+        'potentially_obsolete': []
+    }
+    
+    # Patterns for files to check
+    temp_patterns = ['.tmp', '.swp', '.swo', '~', '.DS_Store', '.log']
+    backup_patterns = ['.backup', '.bak', '.old', '.orig']
+    
+    # Directories to skip
+    skip_dirs = {'.git', 'node_modules', 'dist', 'build', '.next', 'coverage', '__pycache__'}
+    
+    # Check all files
+    for root_dir, dirs, files in os.walk('.'):
+        # Skip certain directories
+        dirs[:] = [d for d in dirs if d not in skip_dirs]
+        
+        root_path = Path(root_dir)
+        
+        for file in files:
+            file_path = root_path / file
+            
+            # Skip if in ignored directory
+            if any(skip in str(file_path) for skip in skip_dirs):
+                continue
+            
+            # Check for temp files
+            if any(pattern in file for pattern in temp_patterns):
+                results['temp_files'].append(str(file_path))
+            
+            # Check for backup files
+            if any(pattern in file for pattern in backup_patterns):
+                results['backup_files'].append(str(file_path))
+            
+            # Check for large files (>5MB)
+            try:
+                size = file_path.stat().st_size
+                if size > 5 * 1024 * 1024:  # 5MB
+                    results['large_files'].append((str(file_path), size))
+            except:
+                pass
+            
+            # Check for old status/complete files in docs
+            if 'docs' in str(file_path) and file_path.suffix == '.md':
+                file_lower = file.upper()
+                if any(keyword in file_lower for keyword in ['COMPLETE', 'COMPLETION', 'FINAL_STATUS', 'ALL_STEPS_COMPLETE']):
+                    if 'archive' not in str(file_path) and 'status' in str(file_path):
+                        results['old_status_files'].append(str(file_path))
+            
+            # Check for potentially obsolete documentation
+            if 'docs' in str(file_path) and file_path.suffix == '.md':
+                file_lower = file.upper()
+                # Files that might be superseded
+                obsolete_keywords = ['OLD_', 'DEPRECATED', 'LEGACY', 'UNUSED']
+                if any(keyword in file_lower for keyword in obsolete_keywords):
+                    results['potentially_obsolete'].append(str(file_path))
+    
+    return results
+
+def find_duplicate_content():
+    """Find files with duplicate content."""
+    duplicates = defaultdict(list)
+    
+    skip_dirs = {'.git', 'node_modules', 'dist', 'build', '.next', 'coverage', '__pycache__'}
+    
+    for root_dir, dirs, files in os.walk('.'):
+        dirs[:] = [d for d in dirs if d not in skip_dirs]
+        
+        for file in files:
+            if not file.endswith(('.md', '.json', '.yaml', '.yml', '.txt')):
+                continue
+            
+            file_path = Path(root_dir) / file
+            if any(skip in str(file_path) for skip in skip_dirs):
+                continue
+            
+            try:
+                with open(file_path, 'rb') as f:
+                    content_hash = hashlib.md5(f.read()).hexdigest()
+                    duplicates[content_hash].append(str(file_path))
+            except:
+                pass
+    
+    # Filter to only actual duplicates (2+ files)
+    return {h: files for h, files in duplicates.items() if len(files) > 1}
+
+def main():
+    print("="*60)
+    print("FILE PRUNING ANALYSIS")
+    print("="*60)
+    print()
+    
+    results = analyze_project()
+    
+    print("1. TEMPORARY FILES")
+    print("-" * 60)
+    if results['temp_files']:
+        print(f"Found {len(results['temp_files'])} temporary files:")
+        for f in sorted(results['temp_files'])[:20]:
+            print(f"  - {f}")
+        if len(results['temp_files']) > 20:
+            print(f"  ... and {len(results['temp_files']) - 20} more")
+    else:
+        print("  No temporary files found")
+    print()
+    
+    print("2. BACKUP FILES")
+    print("-" * 60)
+    if results['backup_files']:
+        print(f"Found {len(results['backup_files'])} backup files:")
+        for f in sorted(results['backup_files']):
+            print(f"  - {f}")
+    else:
+        print("  No backup files found")
+    print()
+    
+    print("3. LARGE FILES (>5MB)")
+    print("-" * 60)
+    if results['large_files']:
+        print(f"Found {len(results['large_files'])} large files:")
+        for f, size in sorted(results['large_files'], key=lambda x: x[1], reverse=True)[:10]:
+            size_mb = size / (1024 * 1024)
+            print(f"  - {f} ({size_mb:.2f} MB)")
+    else:
+        print("  No unusually large files found")
+    print()
+    
+    print("4. OLD STATUS/COMPLETE FILES (outside archive)")
+    print("-" * 60)
+    if results['old_status_files']:
+        print(f"Found {len(results['old_status_files'])} status files that might be archived:")
+        for f in sorted(results['old_status_files']):
+            print(f"  - {f}")
+    else:
+        print("  No old status files found outside archive")
+    print()
+    
+    print("5. POTENTIALLY OBSOLETE FILES")
+    print("-" * 60)
+    if results['potentially_obsolete']:
+        print(f"Found {len(results['potentially_obsolete'])} potentially obsolete files:")
+        for f in sorted(results['potentially_obsolete']):
+            print(f"  - {f}")
+    else:
+        print("  No obviously obsolete files found")
+    print()
+    
+    print("6. DUPLICATE CONTENT")
+    print("-" * 60)
+    duplicates = find_duplicate_content()
+    if duplicates:
+        print(f"Found {len(duplicates)} groups of duplicate files:")
+        for i, (hash_val, files) in enumerate(list(duplicates.items())[:10], 1):
+            print(f"\n  Group {i} ({len(files)} files):")
+            for f in files:
+                print(f"    - {f}")
+        if len(duplicates) > 10:
+            print(f"\n  ... and {len(duplicates) - 10} more duplicate groups")
+    else:
+        print("  No duplicate content found")
+    print()
+    
+    # Summary
+    total_findings = (
+        len(results['temp_files']) +
+        len(results['backup_files']) +
+        len(results['large_files']) +
+        len(results['old_status_files']) +
+        len(results['potentially_obsolete'])
+    )
+    
+    print("="*60)
+    print("SUMMARY")
+    print("="*60)
+    print(f"Total files that could be pruned: {total_findings}")
+    print(f"Duplicate file groups: {len(duplicates)}")
+    print()
+    print("Note: Review each category before deletion.")
+    print("Archive files are intentionally kept for historical reference.")
+
+if __name__ == '__main__':
+    main()
+
--- a/scripts/analyze-markdown.py
+++ b/scripts/analyze-markdown.py
@@ -0,0 +1,213 @@
+#!/usr/bin/env python3
+"""
+Markdown Analysis Script
+Analyzes all Markdown files for duplicates and generates an index mapping content to files and line numbers.
+"""
+
+import os
+import hashlib
+import re
+from pathlib import Path
+from collections import defaultdict
+from typing import Dict, List, Tuple, Set
+import json
+
+class MarkdownAnalyzer:
+    def __init__(self, root_dir: str = '.'):
+        self.root_dir = Path(root_dir)
+        self.md_files: List[Path] = []
+        self.content_index: Dict[str, Dict] = {}
+        self.duplicates: Dict[str, List[str]] = defaultdict(list)
+        self.file_structure: Dict[str, List[str]] = defaultdict(list)
+        
+    def find_all_markdown(self):
+        """Find all markdown files in the project."""
+        for md_file in self.root_dir.rglob('*.md'):
+            # Skip node_modules, .git, and other ignored directories
+            parts = md_file.parts
+            if any(ignore in parts for ignore in ['node_modules', '.git', 'dist', 'build', '.next']):
+                continue
+            self.md_files.append(md_file)
+    
+    def analyze_duplicates(self):
+        """Find duplicate files by content hash."""
+        content_hashes = defaultdict(list)
+        
+        for md_file in self.md_files:
+            try:
+                with open(md_file, 'rb') as f:
+                    content = f.read()
+                    content_hash = hashlib.md5(content).hexdigest()
+                    rel_path = str(md_file.relative_to(self.root_dir))
+                    content_hashes[content_hash].append(rel_path)
+            except Exception as e:
+                print(f"Error reading {md_file}: {e}")
+        
+        # Find duplicates
+        for content_hash, files in content_hashes.items():
+            if len(files) > 1:
+                self.duplicates[content_hash] = files
+    
+    def index_content(self):
+        """Create detailed index of markdown content with line numbers."""
+        for md_file in self.md_files:
+            rel_path = str(md_file.relative_to(self.root_dir))
+            
+            try:
+                with open(md_file, 'r', encoding='utf-8', errors='ignore') as f:
+                    lines = f.readlines()
+                    
+                # Extract metadata
+                title = None
+                headings = []
+                code_blocks = []
+                links = []
+                
+                for line_num, line in enumerate(lines, 1):
+                    # Find title (first H1)
+                    if not title and line.strip().startswith('# '):
+                        title = line.strip()[2:].strip()
+                    
+                    # Find all headings
+                    heading_match = re.match(r'^(#{1,6})\s+(.+)$', line.strip())
+                    if heading_match:
+                        level = len(heading_match.group(1))
+                        heading_text = heading_match.group(2).strip()
+                        headings.append({
+                            'level': level,
+                            'text': heading_text,
+                            'line': line_num
+                        })
+                    
+                    # Find code blocks
+                    if line.strip().startswith('```'):
+                        code_blocks.append({
+                            'line': line_num,
+                            'type': 'code_block'
+                        })
+                    
+                    # Find links
+                    link_pattern = r'\[([^\]]+)\]\(([^\)]+)\)'
+                    for match in re.finditer(link_pattern, line):
+                        links.append({
+                            'text': match.group(1),
+                            'url': match.group(2),
+                            'line': line_num
+                        })
+                
+                self.content_index[rel_path] = {
+                    'path': rel_path,
+                    'title': title,
+                    'line_count': len(lines),
+                    'headings': headings,
+                    'code_blocks': len(code_blocks),
+                    'links': links,
+                    'size_bytes': md_file.stat().st_size
+                }
+                
+            except Exception as e:
+                print(f"Error indexing {md_file}: {e}")
+    
+    def categorize_files(self):
+        """Categorize files by location."""
+        for md_file in self.md_files:
+            rel_path = str(md_file.relative_to(self.root_dir))
+            parts = rel_path.split('/')
+            
+            if len(parts) == 1:
+                category = 'root'
+            elif parts[0] == 'docs':
+                if len(parts) > 1:
+                    category = f"docs/{parts[1]}"
+                else:
+                    category = 'docs'
+            elif parts[0] in ['api', 'portal', 'scripts', 'crossplane-provider-proxmox']:
+                category = parts[0]
+            else:
+                category = 'other'
+            
+            self.file_structure[category].append(rel_path)
+    
+    def generate_report(self) -> Dict:
+        """Generate comprehensive analysis report."""
+        return {
+            'total_files': len(self.md_files),
+            'unique_files': len(self.content_index),
+            'duplicate_groups': len(self.duplicates),
+            'duplicates': dict(self.duplicates),
+            'categories': {k: len(v) for k, v in self.file_structure.items()},
+            'index': self.content_index
+        }
+    
+    def find_similar_content(self) -> Dict[str, List[str]]:
+        """Find files with similar titles (potential duplicates)."""
+        similar = defaultdict(list)
+        
+        for rel_path, data in self.content_index.items():
+            if data['title']:
+                title_key = data['title'].lower().strip()
+                similar[title_key].append(rel_path)
+        
+        return {k: v for k, v in similar.items() if len(v) > 1}
+
+def main():
+    analyzer = MarkdownAnalyzer('.')
+    
+    print("Finding all Markdown files...")
+    analyzer.find_all_markdown()
+    print(f"Found {len(analyzer.md_files)} Markdown files\n")
+    
+    print("Analyzing duplicates...")
+    analyzer.analyze_duplicates()
+    print(f"Found {len(analyzer.duplicates)} duplicate groups\n")
+    
+    print("Indexing content...")
+    analyzer.index_content()
+    print(f"Indexed {len(analyzer.content_index)} files\n")
+    
+    print("Categorizing files...")
+    analyzer.categorize_files()
+    
+    print("Finding similar content...")
+    similar = analyzer.find_similar_content()
+    
+    # Generate report
+    report = analyzer.generate_report()
+    
+    # Print summary
+    print("\n" + "="*60)
+    print("MARKDOWN ANALYSIS SUMMARY")
+    print("="*60)
+    print(f"Total Markdown files: {report['total_files']}")
+    print(f"Unique files: {report['unique_files']}")
+    print(f"Duplicate groups: {report['duplicate_groups']}")
+    
+    if report['duplicate_groups'] > 0:
+        print("\nDuplicate files:")
+        for hash_val, files in list(report['duplicates'].items())[:10]:
+            print(f"\n  Hash: {hash_val[:16]}... ({len(files)} files)")
+            for f in files:
+                print(f"    - {f}")
+    
+    print(f"\nSimilar titles (potential duplicates): {len(similar)}")
+    for title, files in list(similar.items())[:10]:
+        print(f"\n  '{title}':")
+        for f in files:
+            print(f"    - {f}")
+    
+    print("\nFiles by category:")
+    for category, count in sorted(report['categories'].items()):
+        print(f"  {category}: {count} files")
+    
+    # Save detailed report
+    output_file = 'docs/MARKDOWN_INDEX.json'
+    with open(output_file, 'w', encoding='utf-8') as f:
+        json.dump(report, f, indent=2, ensure_ascii=False)
+    
+    print(f"\nDetailed index saved to: {output_file}")
+    
+    return analyzer, report
+
+if __name__ == '__main__':
+    analyzer, report = main()
+
--- a/scripts/cleanup-archive-old-status.sh
+++ b/scripts/cleanup-archive-old-status.sh
@@ -0,0 +1,148 @@
+#!/bin/bash
+#
+# Archive Old Status Files
+# Moves old status and completion files to archive directories.
+#
+# Usage: ./scripts/cleanup-archive-old-status.sh [options]
+# Options:
+#   --dry-run    Show what would be moved without actually moving
+#   --help       Show this help message
+#
+
+set -euo pipefail
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+DRY_RUN=false
+
+# Parse arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --dry-run)
+            DRY_RUN=true
+            shift
+            ;;
+        --help)
+            echo "Usage: $0 [options]"
+            echo "Options:"
+            echo "  --dry-run    Show what would be moved without actually moving"
+            echo "  --help       Show this help message"
+            exit 0
+            ;;
+        *)
+            echo -e "${RED}Unknown option: $1${NC}"
+            echo "Use --help for usage information"
+            exit 1
+            ;;
+    esac
+done
+
+FILES_MOVED=0
+
+# Function to move file to archive
+move_to_archive() {
+    local file="$1"
+    local archive_dir="$2"
+    local reason="$3"
+    
+    if [[ ! -f "$file" ]]; then
+        echo -e "${YELLOW}  ⚠ Skipping (not found): $file${NC}"
+        return
+    fi
+    
+    local filename=$(basename "$file")
+    local dest="$archive_dir/$filename"
+    
+    if [[ "$DRY_RUN" == true ]]; then
+        echo -e "${YELLOW}  [DRY RUN] Would move: $file${NC}"
+        echo -e "    To: $dest"
+        echo -e "    Reason: $reason"
+        ((FILES_MOVED++))
+        return
+    fi
+    
+    mkdir -p "$archive_dir"
+    
+    # Check if destination already exists
+    if [[ -f "$dest" ]]; then
+        echo -e "${YELLOW}  ⚠ Destination exists, skipping: $file${NC}"
+        return
+    fi
+    
+    mv "$file" "$dest"
+    echo -e "${GREEN}  ✓ Moved: $file${NC}"
+    echo -e "    To: $dest"
+    ((FILES_MOVED++))
+}
+
+echo "=========================================="
+echo "Archive Old Status Files"
+echo "=========================================="
+echo ""
+
+if [[ "$DRY_RUN" == true ]]; then
+    echo -e "${YELLOW}DRY RUN MODE - No files will be moved${NC}"
+    echo ""
+fi
+
+echo "Archiving old status files..."
+echo ""
+
+# Files in docs/proxmox/status/ to archive
+proxmox_status_files=(
+    "docs/proxmox/status/COMPLETE_STATUS.md:docs/proxmox/archive:Old status file"
+    "docs/proxmox/status/COMPLETE_STATUS_FINAL.md:docs/proxmox/archive:Old status file"
+    "docs/proxmox/status/COMPLETE_STATUS_REPORT.md:docs/proxmox/archive:Old status file"
+    "docs/proxmox/status/COMPLETE_SUMMARY.md:docs/proxmox/archive:Old status file"
+    "docs/proxmox/status/COMPLETION_SUMMARY.md:docs/proxmox/archive:Old status file"
+    "docs/proxmox/status/FINAL_STATUS.md:docs/proxmox/archive:Old status file"
+    "docs/proxmox/status/FINAL_STATUS_UPDATE.md:docs/proxmox/archive:Old status file"
+    "docs/proxmox/status/NEXT_STEPS_COMPLETED.md:docs/proxmox/archive:Old status file"
+    "docs/proxmox/status/TASK_COMPLETION_SUMMARY.md:docs/proxmox/archive:Old status file"
+)
+
+for entry in "${proxmox_status_files[@]}"; do
+    IFS=':' read -r file archive_dir reason <<< "$entry"
+    move_to_archive "$file" "$archive_dir" "$reason"
+done
+
+# Files in docs/status/implementation/ to archive
+status_impl_files=(
+    "docs/status/implementation/ALL_TASKS_COMPLETE.md:docs/archive/status:Old status file"
+    "docs/status/implementation/IMPLEMENTATION_COMPLETE.md:docs/archive/status:Old status file"
+    "docs/status/implementation/NEXT_STEPS_COMPLETE.md:docs/archive/status:Old status file"
+    "docs/status/implementation/NEXT_STEPS_FINAL_STATUS.md:docs/archive/status:Old status file"
+)
+
+for entry in "${status_impl_files[@]}"; do
+    IFS=':' read -r file archive_dir reason <<< "$entry"
+    move_to_archive "$file" "$archive_dir" "$reason"
+done
+
+# Files in docs/status/ to archive
+status_files=(
+    "docs/status/NEXT_STEPS_COMPLETION.md:docs/archive/status:Old status file"
+)
+
+for entry in "${status_files[@]}"; do
+    IFS=':' read -r file archive_dir reason <<< "$entry"
+    move_to_archive "$file" "$archive_dir" "$reason"
+done
+
+echo ""
+echo "=========================================="
+echo "Summary"
+echo "=========================================="
+echo -e "${GREEN}Files moved: $FILES_MOVED${NC}"
+echo ""
+
+if [[ "$DRY_RUN" == true ]]; then
+    echo -e "${YELLOW}This was a dry run. Run without --dry-run to actually move files.${NC}"
+fi
+
+echo "Done!"
+
--- a/scripts/cleanup-prune-files.sh
+++ b/scripts/cleanup-prune-files.sh
@@ -0,0 +1,196 @@
+#!/bin/bash
+#
+# Cleanup Script - Remove Prunable Files
+# This script removes duplicate files, cache artifacts, and other files identified for pruning.
+#
+# Usage: ./scripts/cleanup-prune-files.sh [options]
+# Options:
+#   --dry-run    Show what would be deleted without actually deleting
+#   --backup     Create backups before deleting
+#   --all        Run all cleanup operations
+#   --duplicates Remove duplicate files only
+#   --cache      Remove cache files only
+#   --help       Show this help message
+#
+
+set -uo pipefail
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+# Flags
+DRY_RUN=false
+BACKUP=false
+RUN_ALL=false
+RUN_DUPLICATES=false
+RUN_CACHE=false
+
+# Counters
+FILES_DELETED=0
+FILES_BACKED_UP=0
+FILES_SKIPPED=0
+
+# Parse arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --dry-run)
+            DRY_RUN=true
+            shift
+            ;;
+        --backup)
+            BACKUP=true
+            shift
+            ;;
+        --all)
+            RUN_ALL=true
+            shift
+            ;;
+        --duplicates)
+            RUN_DUPLICATES=true
+            shift
+            ;;
+        --cache)
+            RUN_CACHE=true
+            shift
+            ;;
+        --help)
+            echo "Usage: $0 [options]"
+            echo "Options:"
+            echo "  --dry-run    Show what would be deleted without actually deleting"
+            echo "  --backup     Create backups before deleting"
+            echo "  --all        Run all cleanup operations"
+            echo "  --duplicates Remove duplicate files only"
+            echo "  --cache      Remove cache files only"
+            echo "  --help       Show this help message"
+            exit 0
+            ;;
+        *)
+            echo -e "${RED}Unknown option: $1${NC}"
+            echo "Use --help for usage information"
+            exit 1
+            ;;
+    esac
+done
+
+# If no specific operation selected, default to all
+if [[ "$RUN_ALL" == false && "$RUN_DUPLICATES" == false && "$RUN_CACHE" == false ]]; then
+    RUN_ALL=true
+fi
+
+# Function to delete file with optional backup
+delete_file() {
+    local file="$1"
+    local reason="$2"
+    
+    if [[ ! -f "$file" ]]; then
+        echo -e "${YELLOW}  ⚠ Skipping (not found): $file${NC}"
+        ((FILES_SKIPPED++))
+        return
+    fi
+    
+    if [[ "$DRY_RUN" == true ]]; then
+        echo -e "${YELLOW}  [DRY RUN] Would delete: $file${NC}"
+        echo -e "    Reason: $reason"
+        ((FILES_DELETED++))
+        return
+    fi
+    
+    if [[ "$BACKUP" == true ]]; then
+        local backup_file="${file}.backup.$(date +%Y%m%d_%H%M%S)"
+        cp "$file" "$backup_file"
+        echo -e "${GREEN}  ✓ Backed up: $backup_file${NC}"
+        ((FILES_BACKED_UP++))
+    fi
+    
+    rm -f "$file"
+    echo -e "${GREEN}  ✓ Deleted: $file${NC}"
+    echo -e "    Reason: $reason"
+    ((FILES_DELETED++))
+}
+
+# Function to delete files matching pattern
+delete_files_pattern() {
+    local pattern="$1"
+    local reason="$2"
+    
+    while IFS= read -r -d '' file; do
+        delete_file "$file" "$reason"
+    done < <(find . -name "$pattern" -type f -print0 2>/dev/null)
+}
+
+echo "=========================================="
+echo "File Cleanup Script"
+echo "=========================================="
+echo ""
+
+if [[ "$DRY_RUN" == true ]]; then
+    echo -e "${YELLOW}DRY RUN MODE - No files will be deleted${NC}"
+    echo ""
+fi
+
+if [[ "$BACKUP" == true ]]; then
+    echo -e "${YELLOW}BACKUP MODE - Backups will be created${NC}"
+    echo ""
+fi
+
+# 1. Remove duplicate infrastructure data files from public/
+if [[ "$RUN_ALL" == true || "$RUN_DUPLICATES" == true ]]; then
+    echo "1. Removing duplicate infrastructure data files..."
+    echo "   (Keeping versions in docs/infrastructure/data/)"
+    echo ""
+    
+    duplicates=(
+        "public/docs/infrastructure/data/cost_estimates.json"
+        "public/docs/infrastructure/data/deployment_timeline.json"
+        "public/docs/infrastructure/data/compliance_requirements.json"
+    )
+    
+    for file in "${duplicates[@]}"; do
+        delete_file "$file" "Duplicate - original exists in docs/infrastructure/data/"
+    done
+    echo ""
+fi
+
+# 2. Remove webpack cache .old files
+if [[ "$RUN_ALL" == true || "$RUN_CACHE" == true ]]; then
+    echo "2. Removing webpack cache .old files..."
+    echo ""
+    
+    delete_files_pattern "*.old" "Old webpack cache file (will be regenerated)"
+    
+    # Also target specific webpack cache locations
+    webpack_cache_files=(
+        ".next/cache/webpack/client-development/index.pack.gz.old"
+        ".next/cache/webpack/server-development/index.pack.gz.old"
+        "portal/.next/cache/webpack/client-development/index.pack.gz.old"
+        "portal/.next/cache/webpack/server-development/index.pack.gz.old"
+    )
+    
+    for file in "${webpack_cache_files[@]}"; do
+        delete_file "$file" "Old webpack cache file (will be regenerated)"
+    done
+    echo ""
+fi
+
+# Summary
+echo "=========================================="
+echo "Summary"
+echo "=========================================="
+echo -e "${GREEN}Files deleted: $FILES_DELETED${NC}"
+if [[ "$BACKUP" == true ]]; then
+    echo -e "${GREEN}Files backed up: $FILES_BACKED_UP${NC}"
+fi
+if [[ $FILES_SKIPPED -gt 0 ]]; then
+    echo -e "${YELLOW}Files skipped: $FILES_SKIPPED${NC}"
+fi
+echo ""
+
+if [[ "$DRY_RUN" == true ]]; then
+    echo -e "${YELLOW}This was a dry run. Run without --dry-run to actually delete files.${NC}"
+fi
+
+echo "Done!"
+
--- a/scripts/generate-markdown-reference.py
+++ b/scripts/generate-markdown-reference.py
@@ -0,0 +1,264 @@
+#!/usr/bin/env python3
+"""
+Generate Markdown Reference Index
+Creates a comprehensive reference mapping Markdown content to source files and line numbers.
+"""
+
+import json
+import re
+from pathlib import Path
+from typing import Dict, List, Tuple
+from collections import defaultdict
+
+def extract_headings_with_lines(content: str) -> List[Dict]:
+    """Extract all headings with their line numbers."""
+    headings = []
+    for line_num, line in enumerate(content.split('\n'), 1):
+        match = re.match(r'^(#{1,6})\s+(.+)$', line.strip())
+        if match:
+            level = len(match.group(1))
+            text = match.group(2).strip()
+            headings.append({
+                'level': level,
+                'text': text,
+                'line': line_num
+            })
+    return headings
+
+def extract_code_references(content: str) -> List[Dict]:
+    """Extract code references (file paths, function names, etc.)."""
+    references = []
+    
+    # Pattern for code references: file paths, function names, etc.
+    patterns = [
+        (r'`([^`]+\.(ts|tsx|js|jsx|go|py|sql|yaml|yml|json))`', 'file'),
+        (r'`([a-zA-Z_][a-zA-Z0-9_]*\([^)]*\))`', 'function'),
+        (r'\[([^\]]+)\]\(([^\)]+)\)', 'link'),
+        (r'`([A-Z_][A-Z0-9_]+)`', 'constant'),
+    ]
+    
+    for line_num, line in enumerate(content.split('\n'), 1):
+        for pattern, ref_type in patterns:
+            for match in re.finditer(pattern, line):
+                if ref_type == 'link':
+                    references.append({
+                        'type': ref_type,
+                        'text': match.group(1),
+                        'target': match.group(2),
+                        'line': line_num
+                    })
+                else:
+                    references.append({
+                        'type': ref_type,
+                        'value': match.group(1),
+                        'line': line_num
+                    })
+    
+    return references
+
+def extract_sections(content: str, headings: List[Dict]) -> List[Dict]:
+    """Extract content sections based on headings."""
+    sections = []
+    lines = content.split('\n')
+    
+    for i, heading in enumerate(headings):
+        start_line = heading['line']
+        # Find end of section (next heading of same or higher level, or end of file)
+        end_line = len(lines)
+        
+        if i < len(headings) - 1:
+            next_heading = headings[i + 1]
+            # Only stop at headings of same or higher level
+            if next_heading['level'] <= heading['level']:
+                end_line = next_heading['line'] - 1
+        
+        section_content = '\n'.join(lines[start_line - 1:end_line])
+        
+        sections.append({
+            'heading': heading['text'],
+            'level': heading['level'],
+            'start_line': start_line,
+            'end_line': end_line,
+            'line_count': end_line - start_line + 1,
+            'content_preview': section_content[:200] + '...' if len(section_content) > 200 else section_content
+        })
+    
+    return sections
+
+def generate_reference_mapping(index_file: str, output_file: str):
+    """Generate comprehensive reference mapping."""
+    
+    # Load existing index
+    with open(index_file, 'r', encoding='utf-8') as f:
+        index_data = json.load(f)
+    
+    reference_map = {
+        'metadata': {
+            'total_files': len(index_data['index']),
+            'generated_at': str(Path(__file__).stat().st_mtime)
+        },
+        'by_file': {},
+        'by_heading': defaultdict(list),
+        'by_category': defaultdict(list),
+        'cross_references': defaultdict(list)
+    }
+    
+    # Process each file
+    for file_path, file_data in index_data['index'].items():
+        file_path_obj = Path(file_path)
+        
+        # Read full content for detailed analysis
+        try:
+            with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                full_content = f.read()
+        except Exception as e:
+            print(f"Warning: Could not read {file_path}: {e}")
+            continue
+        
+        # Extract detailed information
+        headings = extract_headings_with_lines(full_content)
+        code_refs = extract_code_references(full_content)
+        sections = extract_sections(full_content, headings)
+        
+        # Categorize file
+        category = 'other'
+        if file_path.startswith('docs/'):
+            parts = file_path.split('/')
+            if len(parts) > 1:
+                if parts[1] in ['api', 'architecture', 'proxmox', 'runbooks', 'status', 'archive']:
+                    category = parts[1]
+                else:
+                    category = 'docs'
+            else:
+                category = 'docs'
+        elif file_path.startswith('api/'):
+            category = 'api'
+        elif file_path.startswith('portal/'):
+            category = 'portal'
+        
+        # Build file entry
+        file_entry = {
+            'path': file_path,
+            'title': file_data.get('title', ''),
+            'category': category,
+            'line_count': file_data['line_count'],
+            'size_bytes': file_data['size_bytes'],
+            'headings': headings,
+            'sections': sections,
+            'code_references': code_refs,
+            'links': file_data.get('links', []),
+            'code_blocks': file_data.get('code_blocks', 0)
+        }
+        
+        reference_map['by_file'][file_path] = file_entry
+        
+        # Index by heading
+        for heading in headings:
+            reference_map['by_heading'][heading['text'].lower()].append({
+                'file': file_path,
+                'line': heading['line'],
+                'level': heading['level']
+            })
+        
+        # Index by category
+        reference_map['by_category'][category].append(file_path)
+        
+        # Extract cross-references (links to other markdown files)
+        for link in file_data.get('links', []):
+            link_target = link.get('url', '')
+            if link_target.endswith('.md') or link_target.endswith('.md#'):
+                # Normalize link target
+                if link_target.startswith('./'):
+                    link_target = str(file_path_obj.parent / link_target[2:])
+                elif link_target.startswith('../'):
+                    link_target = str(file_path_obj.parent.parent / link_target[3:])
+                
+                reference_map['cross_references'][file_path].append({
+                    'target': link_target,
+                    'text': link.get('text', ''),
+                    'line': link.get('line', 0)
+                })
+    
+    # Save reference mapping
+    with open(output_file, 'w', encoding='utf-8') as f:
+        json.dump(reference_map, f, indent=2, ensure_ascii=False)
+    
+    # Generate human-readable report
+    report_file = output_file.replace('.json', '.md')
+    generate_markdown_report(reference_map, report_file)
+    
+    print(f"Reference mapping saved to: {output_file}")
+    print(f"Human-readable report saved to: {report_file}")
+    
+    return reference_map
+
+def generate_markdown_report(reference_map: Dict, output_file: str):
+    """Generate human-readable Markdown report."""
+    
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.write("# Markdown Reference Index\n\n")
+        f.write(f"**Generated**: {reference_map['metadata']['generated_at']}\n")
+        f.write(f"**Total Files**: {reference_map['metadata']['total_files']}\n\n")
+        f.write("---\n\n")
+        
+        # Files by category
+        f.write("## Files by Category\n\n")
+        for category in sorted(reference_map['by_category'].keys()):
+            files = reference_map['by_category'][category]
+            f.write(f"### {category} ({len(files)} files)\n\n")
+            for file_path in sorted(files)[:20]:
+                file_entry = reference_map['by_file'][file_path]
+                f.write(f"- [{file_entry['title'] or file_path}](./{file_path}) - {file_entry['line_count']} lines\n")
+            if len(files) > 20:
+                f.write(f"  *... and {len(files) - 20} more files*\n")
+            f.write("\n")
+        
+        # Heading index
+        f.write("## Heading Index\n\n")
+        f.write("*Top 50 most common headings*\n\n")
+        heading_counts = [(h, len(refs)) for h, refs in reference_map['by_heading'].items()]
+        heading_counts.sort(key=lambda x: x[1], reverse=True)
+        
+        for heading, count in heading_counts[:50]:
+            refs = reference_map['by_heading'][heading]
+            f.write(f"### {heading} ({count} occurrences)\n\n")
+            for ref in refs[:5]:
+                f.write(f"- Line {ref['line']}: [{ref['file']}](./{ref['file']}#{heading.lower().replace(' ', '-')})\n")
+            if len(refs) > 5:
+                f.write(f"  *... and {len(refs) - 5} more occurrences*\n")
+            f.write("\n")
+        
+        # File details
+        f.write("## File Details\n\n")
+        f.write("*Files with headings and line numbers*\n\n")
+        
+        for file_path in sorted(reference_map['by_file'].keys())[:30]:
+            file_entry = reference_map['by_file'][file_path]
+            f.write(f"### {file_path}\n\n")
+            f.write(f"**Title**: {file_entry['title'] or 'N/A'}\n")
+            f.write(f"**Lines**: {file_entry['line_count']}\n")
+            f.write(f"**Headings**: {len(file_entry['headings'])}\n\n")
+            
+            if file_entry['headings']:
+                f.write("**Headings**:\n")
+                for heading in file_entry['headings'][:10]:
+                    indent = '  ' * (heading['level'] - 1)
+                    f.write(f"{indent}- Line {heading['line']}: {heading['text']}\n")
+                if len(file_entry['headings']) > 10:
+                    f.write(f"  *... and {len(file_entry['headings']) - 10} more headings*\n")
+            f.write("\n")
+
+if __name__ == '__main__':
+    import sys
+    
+    index_file = 'docs/MARKDOWN_INDEX.json'
+    output_file = 'docs/MARKDOWN_REFERENCE.json'
+    
+    if len(sys.argv) > 1:
+        index_file = sys.argv[1]
+    if len(sys.argv) > 2:
+        output_file = sys.argv[2]
+    
+    reference_map = generate_reference_mapping(index_file, output_file)
+    print("\nReference mapping generation complete!")
+
--- a/scripts/update-markdown-links.py
+++ b/scripts/update-markdown-links.py
@@ -0,0 +1,129 @@
+#!/usr/bin/env python3
+"""
+Update Markdown Links After Reorganization
+Updates links to moved files in documentation.
+"""
+
+import os
+import re
+from pathlib import Path
+
+# Mapping of old paths to new paths (relative to docs/)
+FILE_MOVES = {
+    'AUDIT_SUMMARY.md': 'reports/AUDIT_SUMMARY.md',
+    'COMPREHENSIVE_AUDIT_REPORT.md': 'reports/COMPREHENSIVE_AUDIT_REPORT.md',
+    'PROXMOX_COMPREHENSIVE_AUDIT_REPORT.md': 'reports/PROXMOX_COMPREHENSIVE_AUDIT_REPORT.md',
+    'REPOSITORY_AUDIT_REPORT.md': 'reports/REPOSITORY_AUDIT_REPORT.md',
+    'PROJECT_COMPREHENSIVE_REVIEW.md': 'reports/PROJECT_COMPREHENSIVE_REVIEW.md',
+    'REVIEW_ITEMS_COMPLETED.md': 'reports/REVIEW_ITEMS_COMPLETED.md',
+    'DOCUMENTATION_DEEP_DIVE_ANALYSIS.md': 'reports/DOCUMENTATION_DEEP_DIVE_ANALYSIS.md',
+    'DOCUMENTATION_FIXES_APPLIED.md': 'reports/DOCUMENTATION_FIXES_APPLIED.md',
+    'DOCUMENTATION_COMPLETE_SUMMARY.md': 'summaries/DOCUMENTATION_COMPLETE_SUMMARY.md',
+    'IMPLEMENTATION_SUMMARY.md': 'summaries/IMPLEMENTATION_SUMMARY.md',
+    'BUILD_AND_DEPLOY_INSTRUCTIONS.md': 'guides/BUILD_AND_DEPLOY_INSTRUCTIONS.md',
+    'FORCE_UNLOCK_INSTRUCTIONS.md': 'guides/FORCE_UNLOCK_INSTRUCTIONS.md',
+    'QUICK_INSTALL_GUEST_AGENT.md': 'guides/QUICK_INSTALL_GUEST_AGENT.md',
+    'enable-guest-agent-manual.md': 'guides/enable-guest-agent-manual.md',
+    'GUEST_AGENT_CHECKLIST.md': 'guest-agent/GUEST_AGENT_CHECKLIST.md',
+    'GUEST_AGENT_CONFIGURATION_ANALYSIS.md': 'guest-agent/GUEST_AGENT_CONFIGURATION_ANALYSIS.md',
+    'VM_CREATION_PROCEDURE.md': 'vm/VM_CREATION_PROCEDURE.md',
+    'VM_DEPLOYMENT_CHECKLIST.md': 'vm/VM_DEPLOYMENT_CHECKLIST.md',
+    'VM_SPECIFICATIONS.md': 'vm/VM_SPECIFICATIONS.md',
+    'COPY_SCRIPT_TO_PROXMOX_NODES.md': 'reference/COPY_SCRIPT_TO_PROXMOX_NODES.md',
+    'SCRIPT_COPIED_TO_PROXMOX_NODES.md': 'reference/SCRIPT_COPIED_TO_PROXMOX_NODES.md',
+    'CODE_INCONSISTENCIES.md': 'reference/CODE_INCONSISTENCIES.md',
+    'DEPLOYMENT_NEXT_STEPS.md': 'deployment/DEPLOYMENT_NEXT_STEPS.md',
+    'DEPLOYMENT_READY.md': 'deployment/DEPLOYMENT_READY.md',
+    'PRE_DEPLOYMENT_CHECKLIST.md': 'deployment/PRE_DEPLOYMENT_CHECKLIST.md',
+}
+
+def calculate_relative_path(from_file: Path, to_file: str) -> str:
+    """Calculate relative path from one file to another."""
+    from_dir = from_file.parent
+    to_path = Path('docs') / to_file
+    
+    try:
+        rel_path = os.path.relpath(to_path, from_dir)
+        # Normalize path separators for markdown
+        return rel_path.replace('\\', '/')
+    except:
+        return to_file
+
+def update_links_in_file(file_path: Path, dry_run: bool = True):
+    """Update links in a single file."""
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+        
+        original_content = content
+        updated = False
+        
+        for old_file, new_file in FILE_MOVES.items():
+            # Pattern 1: [text](./FILE.md) or [text](FILE.md)
+            pattern1 = rf'(\[[^\]]+\]\()\.?/?{re.escape(old_file)}(#[^\)]+)?(\))'
+            def replace1(match):
+                new_path = calculate_relative_path(file_path, new_file)
+                anchor = match.group(2) or ''
+                return f"{match.group(1)}{new_path}{anchor}{match.group(3)}"
+            
+            if re.search(pattern1, content):
+                content = re.sub(pattern1, replace1, content)
+                updated = True
+            
+            # Pattern 2: [text](./FILE.md#anchor) 
+            pattern2 = rf'(\[[^\]]+\]\(\./){re.escape(old_file)}(#[^\)]+)?(\))'
+            def replace2(match):
+                new_path = calculate_relative_path(file_path, new_file)
+                anchor = match.group(2) or ''
+                return f"{match.group(1)}{new_path}{anchor}{match.group(3)}"
+            
+            if re.search(pattern2, content):
+                content = re.sub(pattern2, replace2, content)
+                updated = True
+        
+        if updated and content != original_content:
+            if not dry_run:
+                with open(file_path, 'w', encoding='utf-8') as f:
+                    f.write(content)
+            return True
+        
+        return False
+    except Exception as e:
+        print(f"Error processing {file_path}: {e}")
+        return False
+
+def main():
+    import sys
+    dry_run = '--dry-run' in sys.argv or '-n' in sys.argv
+    
+    if not dry_run:
+        response = input("This will modify files. Continue? (yes/no): ")
+        if response.lower() != 'yes':
+            print("Aborted.")
+            return
+    
+    docs_dir = Path('docs')
+    md_files = list(docs_dir.rglob('*.md'))
+    
+    updated_count = 0
+    for md_file in md_files:
+        # Skip the moved files themselves
+        if any(md_file.name == old_file for old_file in FILE_MOVES.keys()):
+            continue
+        
+        if update_links_in_file(md_file, dry_run=dry_run):
+            updated_count += 1
+            if dry_run:
+                print(f"Would update: {md_file}")
+            else:
+                print(f"Updated: {md_file}")
+    
+    if dry_run:
+        print(f"\nDry run complete. {updated_count} files would be updated.")
+        print("Run without --dry-run to apply changes.")
+    else:
+        print(f"\nUpdated {updated_count} files.")
+
+if __name__ == '__main__':
+    main()
+