Update documentation structure and enhance .gitignore

- Added generated index files and report directories to .gitignore to prevent unnecessary tracking of transient files.
- Updated README links to reflect new documentation paths for better navigation.
- Improved documentation organization by ensuring all links point to the correct locations, enhancing user experience and accessibility.
This commit is contained in:
defiQUG
2025-12-12 21:18:55 -08:00
parent 664707d912
commit fe0365757a
106 changed files with 4666 additions and 2294 deletions

View File

@@ -0,0 +1,204 @@
#!/usr/bin/env python3
"""
Analyze Files for Pruning
Identifies files that could potentially be removed from the project.
"""
import os
import hashlib
from pathlib import Path
from collections import defaultdict
from datetime import datetime
def analyze_project():
"""Analyze project for files that can be pruned."""
root = Path('.')
results = {
'temp_files': [],
'duplicates': defaultdict(list),
'large_files': [],
'old_status_files': [],
'backup_files': [],
'build_artifacts': [],
'potentially_obsolete': []
}
# Patterns for files to check
temp_patterns = ['.tmp', '.swp', '.swo', '~', '.DS_Store', '.log']
backup_patterns = ['.backup', '.bak', '.old', '.orig']
# Directories to skip
skip_dirs = {'.git', 'node_modules', 'dist', 'build', '.next', 'coverage', '__pycache__'}
# Check all files
for root_dir, dirs, files in os.walk('.'):
# Skip certain directories
dirs[:] = [d for d in dirs if d not in skip_dirs]
root_path = Path(root_dir)
for file in files:
file_path = root_path / file
# Skip if in ignored directory
if any(skip in str(file_path) for skip in skip_dirs):
continue
# Check for temp files
if any(pattern in file for pattern in temp_patterns):
results['temp_files'].append(str(file_path))
# Check for backup files
if any(pattern in file for pattern in backup_patterns):
results['backup_files'].append(str(file_path))
# Check for large files (>5MB)
try:
size = file_path.stat().st_size
if size > 5 * 1024 * 1024: # 5MB
results['large_files'].append((str(file_path), size))
except:
pass
# Check for old status/complete files in docs
if 'docs' in str(file_path) and file_path.suffix == '.md':
file_lower = file.upper()
if any(keyword in file_lower for keyword in ['COMPLETE', 'COMPLETION', 'FINAL_STATUS', 'ALL_STEPS_COMPLETE']):
if 'archive' not in str(file_path) and 'status' in str(file_path):
results['old_status_files'].append(str(file_path))
# Check for potentially obsolete documentation
if 'docs' in str(file_path) and file_path.suffix == '.md':
file_lower = file.upper()
# Files that might be superseded
obsolete_keywords = ['OLD_', 'DEPRECATED', 'LEGACY', 'UNUSED']
if any(keyword in file_lower for keyword in obsolete_keywords):
results['potentially_obsolete'].append(str(file_path))
return results
def find_duplicate_content():
"""Find files with duplicate content."""
duplicates = defaultdict(list)
skip_dirs = {'.git', 'node_modules', 'dist', 'build', '.next', 'coverage', '__pycache__'}
for root_dir, dirs, files in os.walk('.'):
dirs[:] = [d for d in dirs if d not in skip_dirs]
for file in files:
if not file.endswith(('.md', '.json', '.yaml', '.yml', '.txt')):
continue
file_path = Path(root_dir) / file
if any(skip in str(file_path) for skip in skip_dirs):
continue
try:
with open(file_path, 'rb') as f:
content_hash = hashlib.md5(f.read()).hexdigest()
duplicates[content_hash].append(str(file_path))
except:
pass
# Filter to only actual duplicates (2+ files)
return {h: files for h, files in duplicates.items() if len(files) > 1}
def main():
print("="*60)
print("FILE PRUNING ANALYSIS")
print("="*60)
print()
results = analyze_project()
print("1. TEMPORARY FILES")
print("-" * 60)
if results['temp_files']:
print(f"Found {len(results['temp_files'])} temporary files:")
for f in sorted(results['temp_files'])[:20]:
print(f" - {f}")
if len(results['temp_files']) > 20:
print(f" ... and {len(results['temp_files']) - 20} more")
else:
print(" No temporary files found")
print()
print("2. BACKUP FILES")
print("-" * 60)
if results['backup_files']:
print(f"Found {len(results['backup_files'])} backup files:")
for f in sorted(results['backup_files']):
print(f" - {f}")
else:
print(" No backup files found")
print()
print("3. LARGE FILES (>5MB)")
print("-" * 60)
if results['large_files']:
print(f"Found {len(results['large_files'])} large files:")
for f, size in sorted(results['large_files'], key=lambda x: x[1], reverse=True)[:10]:
size_mb = size / (1024 * 1024)
print(f" - {f} ({size_mb:.2f} MB)")
else:
print(" No unusually large files found")
print()
print("4. OLD STATUS/COMPLETE FILES (outside archive)")
print("-" * 60)
if results['old_status_files']:
print(f"Found {len(results['old_status_files'])} status files that might be archived:")
for f in sorted(results['old_status_files']):
print(f" - {f}")
else:
print(" No old status files found outside archive")
print()
print("5. POTENTIALLY OBSOLETE FILES")
print("-" * 60)
if results['potentially_obsolete']:
print(f"Found {len(results['potentially_obsolete'])} potentially obsolete files:")
for f in sorted(results['potentially_obsolete']):
print(f" - {f}")
else:
print(" No obviously obsolete files found")
print()
print("6. DUPLICATE CONTENT")
print("-" * 60)
duplicates = find_duplicate_content()
if duplicates:
print(f"Found {len(duplicates)} groups of duplicate files:")
for i, (hash_val, files) in enumerate(list(duplicates.items())[:10], 1):
print(f"\n Group {i} ({len(files)} files):")
for f in files:
print(f" - {f}")
if len(duplicates) > 10:
print(f"\n ... and {len(duplicates) - 10} more duplicate groups")
else:
print(" No duplicate content found")
print()
# Summary
total_findings = (
len(results['temp_files']) +
len(results['backup_files']) +
len(results['large_files']) +
len(results['old_status_files']) +
len(results['potentially_obsolete'])
)
print("="*60)
print("SUMMARY")
print("="*60)
print(f"Total files that could be pruned: {total_findings}")
print(f"Duplicate file groups: {len(duplicates)}")
print()
print("Note: Review each category before deletion.")
print("Archive files are intentionally kept for historical reference.")
if __name__ == '__main__':
main()

213
scripts/analyze-markdown.py Normal file
View File

@@ -0,0 +1,213 @@
#!/usr/bin/env python3
"""
Markdown Analysis Script
Analyzes all Markdown files for duplicates and generates an index mapping content to files and line numbers.
"""
import os
import hashlib
import re
from pathlib import Path
from collections import defaultdict
from typing import Dict, List, Tuple, Set
import json
class MarkdownAnalyzer:
def __init__(self, root_dir: str = '.'):
self.root_dir = Path(root_dir)
self.md_files: List[Path] = []
self.content_index: Dict[str, Dict] = {}
self.duplicates: Dict[str, List[str]] = defaultdict(list)
self.file_structure: Dict[str, List[str]] = defaultdict(list)
def find_all_markdown(self):
"""Find all markdown files in the project."""
for md_file in self.root_dir.rglob('*.md'):
# Skip node_modules, .git, and other ignored directories
parts = md_file.parts
if any(ignore in parts for ignore in ['node_modules', '.git', 'dist', 'build', '.next']):
continue
self.md_files.append(md_file)
def analyze_duplicates(self):
"""Find duplicate files by content hash."""
content_hashes = defaultdict(list)
for md_file in self.md_files:
try:
with open(md_file, 'rb') as f:
content = f.read()
content_hash = hashlib.md5(content).hexdigest()
rel_path = str(md_file.relative_to(self.root_dir))
content_hashes[content_hash].append(rel_path)
except Exception as e:
print(f"Error reading {md_file}: {e}")
# Find duplicates
for content_hash, files in content_hashes.items():
if len(files) > 1:
self.duplicates[content_hash] = files
def index_content(self):
"""Create detailed index of markdown content with line numbers."""
for md_file in self.md_files:
rel_path = str(md_file.relative_to(self.root_dir))
try:
with open(md_file, 'r', encoding='utf-8', errors='ignore') as f:
lines = f.readlines()
# Extract metadata
title = None
headings = []
code_blocks = []
links = []
for line_num, line in enumerate(lines, 1):
# Find title (first H1)
if not title and line.strip().startswith('# '):
title = line.strip()[2:].strip()
# Find all headings
heading_match = re.match(r'^(#{1,6})\s+(.+)$', line.strip())
if heading_match:
level = len(heading_match.group(1))
heading_text = heading_match.group(2).strip()
headings.append({
'level': level,
'text': heading_text,
'line': line_num
})
# Find code blocks
if line.strip().startswith('```'):
code_blocks.append({
'line': line_num,
'type': 'code_block'
})
# Find links
link_pattern = r'\[([^\]]+)\]\(([^\)]+)\)'
for match in re.finditer(link_pattern, line):
links.append({
'text': match.group(1),
'url': match.group(2),
'line': line_num
})
self.content_index[rel_path] = {
'path': rel_path,
'title': title,
'line_count': len(lines),
'headings': headings,
'code_blocks': len(code_blocks),
'links': links,
'size_bytes': md_file.stat().st_size
}
except Exception as e:
print(f"Error indexing {md_file}: {e}")
def categorize_files(self):
"""Categorize files by location."""
for md_file in self.md_files:
rel_path = str(md_file.relative_to(self.root_dir))
parts = rel_path.split('/')
if len(parts) == 1:
category = 'root'
elif parts[0] == 'docs':
if len(parts) > 1:
category = f"docs/{parts[1]}"
else:
category = 'docs'
elif parts[0] in ['api', 'portal', 'scripts', 'crossplane-provider-proxmox']:
category = parts[0]
else:
category = 'other'
self.file_structure[category].append(rel_path)
def generate_report(self) -> Dict:
"""Generate comprehensive analysis report."""
return {
'total_files': len(self.md_files),
'unique_files': len(self.content_index),
'duplicate_groups': len(self.duplicates),
'duplicates': dict(self.duplicates),
'categories': {k: len(v) for k, v in self.file_structure.items()},
'index': self.content_index
}
def find_similar_content(self) -> Dict[str, List[str]]:
"""Find files with similar titles (potential duplicates)."""
similar = defaultdict(list)
for rel_path, data in self.content_index.items():
if data['title']:
title_key = data['title'].lower().strip()
similar[title_key].append(rel_path)
return {k: v for k, v in similar.items() if len(v) > 1}
def main():
analyzer = MarkdownAnalyzer('.')
print("Finding all Markdown files...")
analyzer.find_all_markdown()
print(f"Found {len(analyzer.md_files)} Markdown files\n")
print("Analyzing duplicates...")
analyzer.analyze_duplicates()
print(f"Found {len(analyzer.duplicates)} duplicate groups\n")
print("Indexing content...")
analyzer.index_content()
print(f"Indexed {len(analyzer.content_index)} files\n")
print("Categorizing files...")
analyzer.categorize_files()
print("Finding similar content...")
similar = analyzer.find_similar_content()
# Generate report
report = analyzer.generate_report()
# Print summary
print("\n" + "="*60)
print("MARKDOWN ANALYSIS SUMMARY")
print("="*60)
print(f"Total Markdown files: {report['total_files']}")
print(f"Unique files: {report['unique_files']}")
print(f"Duplicate groups: {report['duplicate_groups']}")
if report['duplicate_groups'] > 0:
print("\nDuplicate files:")
for hash_val, files in list(report['duplicates'].items())[:10]:
print(f"\n Hash: {hash_val[:16]}... ({len(files)} files)")
for f in files:
print(f" - {f}")
print(f"\nSimilar titles (potential duplicates): {len(similar)}")
for title, files in list(similar.items())[:10]:
print(f"\n '{title}':")
for f in files:
print(f" - {f}")
print("\nFiles by category:")
for category, count in sorted(report['categories'].items()):
print(f" {category}: {count} files")
# Save detailed report
output_file = 'docs/MARKDOWN_INDEX.json'
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(report, f, indent=2, ensure_ascii=False)
print(f"\nDetailed index saved to: {output_file}")
return analyzer, report
if __name__ == '__main__':
analyzer, report = main()

View File

@@ -0,0 +1,148 @@
#!/bin/bash
#
# Archive Old Status Files
# Moves old status and completion files to archive directories.
#
# Usage: ./scripts/cleanup-archive-old-status.sh [options]
# Options:
# --dry-run Show what would be moved without actually moving
# --help Show this help message
#
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
DRY_RUN=false
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
--dry-run)
DRY_RUN=true
shift
;;
--help)
echo "Usage: $0 [options]"
echo "Options:"
echo " --dry-run Show what would be moved without actually moving"
echo " --help Show this help message"
exit 0
;;
*)
echo -e "${RED}Unknown option: $1${NC}"
echo "Use --help for usage information"
exit 1
;;
esac
done
FILES_MOVED=0
# Function to move file to archive
move_to_archive() {
local file="$1"
local archive_dir="$2"
local reason="$3"
if [[ ! -f "$file" ]]; then
echo -e "${YELLOW} ⚠ Skipping (not found): $file${NC}"
return
fi
local filename=$(basename "$file")
local dest="$archive_dir/$filename"
if [[ "$DRY_RUN" == true ]]; then
echo -e "${YELLOW} [DRY RUN] Would move: $file${NC}"
echo -e " To: $dest"
echo -e " Reason: $reason"
((FILES_MOVED++))
return
fi
mkdir -p "$archive_dir"
# Check if destination already exists
if [[ -f "$dest" ]]; then
echo -e "${YELLOW} ⚠ Destination exists, skipping: $file${NC}"
return
fi
mv "$file" "$dest"
echo -e "${GREEN} ✓ Moved: $file${NC}"
echo -e " To: $dest"
((FILES_MOVED++))
}
echo "=========================================="
echo "Archive Old Status Files"
echo "=========================================="
echo ""
if [[ "$DRY_RUN" == true ]]; then
echo -e "${YELLOW}DRY RUN MODE - No files will be moved${NC}"
echo ""
fi
echo "Archiving old status files..."
echo ""
# Files in docs/proxmox/status/ to archive
proxmox_status_files=(
"docs/proxmox/status/COMPLETE_STATUS.md:docs/proxmox/archive:Old status file"
"docs/proxmox/status/COMPLETE_STATUS_FINAL.md:docs/proxmox/archive:Old status file"
"docs/proxmox/status/COMPLETE_STATUS_REPORT.md:docs/proxmox/archive:Old status file"
"docs/proxmox/status/COMPLETE_SUMMARY.md:docs/proxmox/archive:Old status file"
"docs/proxmox/status/COMPLETION_SUMMARY.md:docs/proxmox/archive:Old status file"
"docs/proxmox/status/FINAL_STATUS.md:docs/proxmox/archive:Old status file"
"docs/proxmox/status/FINAL_STATUS_UPDATE.md:docs/proxmox/archive:Old status file"
"docs/proxmox/status/NEXT_STEPS_COMPLETED.md:docs/proxmox/archive:Old status file"
"docs/proxmox/status/TASK_COMPLETION_SUMMARY.md:docs/proxmox/archive:Old status file"
)
for entry in "${proxmox_status_files[@]}"; do
IFS=':' read -r file archive_dir reason <<< "$entry"
move_to_archive "$file" "$archive_dir" "$reason"
done
# Files in docs/status/implementation/ to archive
status_impl_files=(
"docs/status/implementation/ALL_TASKS_COMPLETE.md:docs/archive/status:Old status file"
"docs/status/implementation/IMPLEMENTATION_COMPLETE.md:docs/archive/status:Old status file"
"docs/status/implementation/NEXT_STEPS_COMPLETE.md:docs/archive/status:Old status file"
"docs/status/implementation/NEXT_STEPS_FINAL_STATUS.md:docs/archive/status:Old status file"
)
for entry in "${status_impl_files[@]}"; do
IFS=':' read -r file archive_dir reason <<< "$entry"
move_to_archive "$file" "$archive_dir" "$reason"
done
# Files in docs/status/ to archive
status_files=(
"docs/status/NEXT_STEPS_COMPLETION.md:docs/archive/status:Old status file"
)
for entry in "${status_files[@]}"; do
IFS=':' read -r file archive_dir reason <<< "$entry"
move_to_archive "$file" "$archive_dir" "$reason"
done
echo ""
echo "=========================================="
echo "Summary"
echo "=========================================="
echo -e "${GREEN}Files moved: $FILES_MOVED${NC}"
echo ""
if [[ "$DRY_RUN" == true ]]; then
echo -e "${YELLOW}This was a dry run. Run without --dry-run to actually move files.${NC}"
fi
echo "Done!"

196
scripts/cleanup-prune-files.sh Executable file
View File

@@ -0,0 +1,196 @@
#!/bin/bash
#
# Cleanup Script - Remove Prunable Files
# This script removes duplicate files, cache artifacts, and other files identified for pruning.
#
# Usage: ./scripts/cleanup-prune-files.sh [options]
# Options:
# --dry-run Show what would be deleted without actually deleting
# --backup Create backups before deleting
# --all Run all cleanup operations
# --duplicates Remove duplicate files only
# --cache Remove cache files only
# --help Show this help message
#
set -uo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Flags
DRY_RUN=false
BACKUP=false
RUN_ALL=false
RUN_DUPLICATES=false
RUN_CACHE=false
# Counters
FILES_DELETED=0
FILES_BACKED_UP=0
FILES_SKIPPED=0
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
--dry-run)
DRY_RUN=true
shift
;;
--backup)
BACKUP=true
shift
;;
--all)
RUN_ALL=true
shift
;;
--duplicates)
RUN_DUPLICATES=true
shift
;;
--cache)
RUN_CACHE=true
shift
;;
--help)
echo "Usage: $0 [options]"
echo "Options:"
echo " --dry-run Show what would be deleted without actually deleting"
echo " --backup Create backups before deleting"
echo " --all Run all cleanup operations"
echo " --duplicates Remove duplicate files only"
echo " --cache Remove cache files only"
echo " --help Show this help message"
exit 0
;;
*)
echo -e "${RED}Unknown option: $1${NC}"
echo "Use --help for usage information"
exit 1
;;
esac
done
# If no specific operation selected, default to all
if [[ "$RUN_ALL" == false && "$RUN_DUPLICATES" == false && "$RUN_CACHE" == false ]]; then
RUN_ALL=true
fi
# Function to delete file with optional backup
delete_file() {
local file="$1"
local reason="$2"
if [[ ! -f "$file" ]]; then
echo -e "${YELLOW} ⚠ Skipping (not found): $file${NC}"
((FILES_SKIPPED++))
return
fi
if [[ "$DRY_RUN" == true ]]; then
echo -e "${YELLOW} [DRY RUN] Would delete: $file${NC}"
echo -e " Reason: $reason"
((FILES_DELETED++))
return
fi
if [[ "$BACKUP" == true ]]; then
local backup_file="${file}.backup.$(date +%Y%m%d_%H%M%S)"
cp "$file" "$backup_file"
echo -e "${GREEN} ✓ Backed up: $backup_file${NC}"
((FILES_BACKED_UP++))
fi
rm -f "$file"
echo -e "${GREEN} ✓ Deleted: $file${NC}"
echo -e " Reason: $reason"
((FILES_DELETED++))
}
# Function to delete files matching pattern
delete_files_pattern() {
local pattern="$1"
local reason="$2"
while IFS= read -r -d '' file; do
delete_file "$file" "$reason"
done < <(find . -name "$pattern" -type f -print0 2>/dev/null)
}
echo "=========================================="
echo "File Cleanup Script"
echo "=========================================="
echo ""
if [[ "$DRY_RUN" == true ]]; then
echo -e "${YELLOW}DRY RUN MODE - No files will be deleted${NC}"
echo ""
fi
if [[ "$BACKUP" == true ]]; then
echo -e "${YELLOW}BACKUP MODE - Backups will be created${NC}"
echo ""
fi
# 1. Remove duplicate infrastructure data files from public/
if [[ "$RUN_ALL" == true || "$RUN_DUPLICATES" == true ]]; then
echo "1. Removing duplicate infrastructure data files..."
echo " (Keeping versions in docs/infrastructure/data/)"
echo ""
duplicates=(
"public/docs/infrastructure/data/cost_estimates.json"
"public/docs/infrastructure/data/deployment_timeline.json"
"public/docs/infrastructure/data/compliance_requirements.json"
)
for file in "${duplicates[@]}"; do
delete_file "$file" "Duplicate - original exists in docs/infrastructure/data/"
done
echo ""
fi
# 2. Remove webpack cache .old files
if [[ "$RUN_ALL" == true || "$RUN_CACHE" == true ]]; then
echo "2. Removing webpack cache .old files..."
echo ""
delete_files_pattern "*.old" "Old webpack cache file (will be regenerated)"
# Also target specific webpack cache locations
webpack_cache_files=(
".next/cache/webpack/client-development/index.pack.gz.old"
".next/cache/webpack/server-development/index.pack.gz.old"
"portal/.next/cache/webpack/client-development/index.pack.gz.old"
"portal/.next/cache/webpack/server-development/index.pack.gz.old"
)
for file in "${webpack_cache_files[@]}"; do
delete_file "$file" "Old webpack cache file (will be regenerated)"
done
echo ""
fi
# Summary
echo "=========================================="
echo "Summary"
echo "=========================================="
echo -e "${GREEN}Files deleted: $FILES_DELETED${NC}"
if [[ "$BACKUP" == true ]]; then
echo -e "${GREEN}Files backed up: $FILES_BACKED_UP${NC}"
fi
if [[ $FILES_SKIPPED -gt 0 ]]; then
echo -e "${YELLOW}Files skipped: $FILES_SKIPPED${NC}"
fi
echo ""
if [[ "$DRY_RUN" == true ]]; then
echo -e "${YELLOW}This was a dry run. Run without --dry-run to actually delete files.${NC}"
fi
echo "Done!"

View File

@@ -0,0 +1,264 @@
#!/usr/bin/env python3
"""
Generate Markdown Reference Index
Creates a comprehensive reference mapping Markdown content to source files and line numbers.
"""
import json
import re
from pathlib import Path
from typing import Dict, List, Tuple
from collections import defaultdict
def extract_headings_with_lines(content: str) -> List[Dict]:
"""Extract all headings with their line numbers."""
headings = []
for line_num, line in enumerate(content.split('\n'), 1):
match = re.match(r'^(#{1,6})\s+(.+)$', line.strip())
if match:
level = len(match.group(1))
text = match.group(2).strip()
headings.append({
'level': level,
'text': text,
'line': line_num
})
return headings
def extract_code_references(content: str) -> List[Dict]:
"""Extract code references (file paths, function names, etc.)."""
references = []
# Pattern for code references: file paths, function names, etc.
patterns = [
(r'`([^`]+\.(ts|tsx|js|jsx|go|py|sql|yaml|yml|json))`', 'file'),
(r'`([a-zA-Z_][a-zA-Z0-9_]*\([^)]*\))`', 'function'),
(r'\[([^\]]+)\]\(([^\)]+)\)', 'link'),
(r'`([A-Z_][A-Z0-9_]+)`', 'constant'),
]
for line_num, line in enumerate(content.split('\n'), 1):
for pattern, ref_type in patterns:
for match in re.finditer(pattern, line):
if ref_type == 'link':
references.append({
'type': ref_type,
'text': match.group(1),
'target': match.group(2),
'line': line_num
})
else:
references.append({
'type': ref_type,
'value': match.group(1),
'line': line_num
})
return references
def extract_sections(content: str, headings: List[Dict]) -> List[Dict]:
"""Extract content sections based on headings."""
sections = []
lines = content.split('\n')
for i, heading in enumerate(headings):
start_line = heading['line']
# Find end of section (next heading of same or higher level, or end of file)
end_line = len(lines)
if i < len(headings) - 1:
next_heading = headings[i + 1]
# Only stop at headings of same or higher level
if next_heading['level'] <= heading['level']:
end_line = next_heading['line'] - 1
section_content = '\n'.join(lines[start_line - 1:end_line])
sections.append({
'heading': heading['text'],
'level': heading['level'],
'start_line': start_line,
'end_line': end_line,
'line_count': end_line - start_line + 1,
'content_preview': section_content[:200] + '...' if len(section_content) > 200 else section_content
})
return sections
def generate_reference_mapping(index_file: str, output_file: str):
"""Generate comprehensive reference mapping."""
# Load existing index
with open(index_file, 'r', encoding='utf-8') as f:
index_data = json.load(f)
reference_map = {
'metadata': {
'total_files': len(index_data['index']),
'generated_at': str(Path(__file__).stat().st_mtime)
},
'by_file': {},
'by_heading': defaultdict(list),
'by_category': defaultdict(list),
'cross_references': defaultdict(list)
}
# Process each file
for file_path, file_data in index_data['index'].items():
file_path_obj = Path(file_path)
# Read full content for detailed analysis
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
full_content = f.read()
except Exception as e:
print(f"Warning: Could not read {file_path}: {e}")
continue
# Extract detailed information
headings = extract_headings_with_lines(full_content)
code_refs = extract_code_references(full_content)
sections = extract_sections(full_content, headings)
# Categorize file
category = 'other'
if file_path.startswith('docs/'):
parts = file_path.split('/')
if len(parts) > 1:
if parts[1] in ['api', 'architecture', 'proxmox', 'runbooks', 'status', 'archive']:
category = parts[1]
else:
category = 'docs'
else:
category = 'docs'
elif file_path.startswith('api/'):
category = 'api'
elif file_path.startswith('portal/'):
category = 'portal'
# Build file entry
file_entry = {
'path': file_path,
'title': file_data.get('title', ''),
'category': category,
'line_count': file_data['line_count'],
'size_bytes': file_data['size_bytes'],
'headings': headings,
'sections': sections,
'code_references': code_refs,
'links': file_data.get('links', []),
'code_blocks': file_data.get('code_blocks', 0)
}
reference_map['by_file'][file_path] = file_entry
# Index by heading
for heading in headings:
reference_map['by_heading'][heading['text'].lower()].append({
'file': file_path,
'line': heading['line'],
'level': heading['level']
})
# Index by category
reference_map['by_category'][category].append(file_path)
# Extract cross-references (links to other markdown files)
for link in file_data.get('links', []):
link_target = link.get('url', '')
if link_target.endswith('.md') or link_target.endswith('.md#'):
# Normalize link target
if link_target.startswith('./'):
link_target = str(file_path_obj.parent / link_target[2:])
elif link_target.startswith('../'):
link_target = str(file_path_obj.parent.parent / link_target[3:])
reference_map['cross_references'][file_path].append({
'target': link_target,
'text': link.get('text', ''),
'line': link.get('line', 0)
})
# Save reference mapping
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(reference_map, f, indent=2, ensure_ascii=False)
# Generate human-readable report
report_file = output_file.replace('.json', '.md')
generate_markdown_report(reference_map, report_file)
print(f"Reference mapping saved to: {output_file}")
print(f"Human-readable report saved to: {report_file}")
return reference_map
def generate_markdown_report(reference_map: Dict, output_file: str):
"""Generate human-readable Markdown report."""
with open(output_file, 'w', encoding='utf-8') as f:
f.write("# Markdown Reference Index\n\n")
f.write(f"**Generated**: {reference_map['metadata']['generated_at']}\n")
f.write(f"**Total Files**: {reference_map['metadata']['total_files']}\n\n")
f.write("---\n\n")
# Files by category
f.write("## Files by Category\n\n")
for category in sorted(reference_map['by_category'].keys()):
files = reference_map['by_category'][category]
f.write(f"### {category} ({len(files)} files)\n\n")
for file_path in sorted(files)[:20]:
file_entry = reference_map['by_file'][file_path]
f.write(f"- [{file_entry['title'] or file_path}](./{file_path}) - {file_entry['line_count']} lines\n")
if len(files) > 20:
f.write(f" *... and {len(files) - 20} more files*\n")
f.write("\n")
# Heading index
f.write("## Heading Index\n\n")
f.write("*Top 50 most common headings*\n\n")
heading_counts = [(h, len(refs)) for h, refs in reference_map['by_heading'].items()]
heading_counts.sort(key=lambda x: x[1], reverse=True)
for heading, count in heading_counts[:50]:
refs = reference_map['by_heading'][heading]
f.write(f"### {heading} ({count} occurrences)\n\n")
for ref in refs[:5]:
f.write(f"- Line {ref['line']}: [{ref['file']}](./{ref['file']}#{heading.lower().replace(' ', '-')})\n")
if len(refs) > 5:
f.write(f" *... and {len(refs) - 5} more occurrences*\n")
f.write("\n")
# File details
f.write("## File Details\n\n")
f.write("*Files with headings and line numbers*\n\n")
for file_path in sorted(reference_map['by_file'].keys())[:30]:
file_entry = reference_map['by_file'][file_path]
f.write(f"### {file_path}\n\n")
f.write(f"**Title**: {file_entry['title'] or 'N/A'}\n")
f.write(f"**Lines**: {file_entry['line_count']}\n")
f.write(f"**Headings**: {len(file_entry['headings'])}\n\n")
if file_entry['headings']:
f.write("**Headings**:\n")
for heading in file_entry['headings'][:10]:
indent = ' ' * (heading['level'] - 1)
f.write(f"{indent}- Line {heading['line']}: {heading['text']}\n")
if len(file_entry['headings']) > 10:
f.write(f" *... and {len(file_entry['headings']) - 10} more headings*\n")
f.write("\n")
if __name__ == '__main__':
import sys
index_file = 'docs/MARKDOWN_INDEX.json'
output_file = 'docs/MARKDOWN_REFERENCE.json'
if len(sys.argv) > 1:
index_file = sys.argv[1]
if len(sys.argv) > 2:
output_file = sys.argv[2]
reference_map = generate_reference_mapping(index_file, output_file)
print("\nReference mapping generation complete!")

View File

@@ -0,0 +1,129 @@
#!/usr/bin/env python3
"""
Update Markdown Links After Reorganization
Updates links to moved files in documentation.
"""
import os
import re
from pathlib import Path
# Mapping of old paths to new paths (relative to docs/)
FILE_MOVES = {
'AUDIT_SUMMARY.md': 'reports/AUDIT_SUMMARY.md',
'COMPREHENSIVE_AUDIT_REPORT.md': 'reports/COMPREHENSIVE_AUDIT_REPORT.md',
'PROXMOX_COMPREHENSIVE_AUDIT_REPORT.md': 'reports/PROXMOX_COMPREHENSIVE_AUDIT_REPORT.md',
'REPOSITORY_AUDIT_REPORT.md': 'reports/REPOSITORY_AUDIT_REPORT.md',
'PROJECT_COMPREHENSIVE_REVIEW.md': 'reports/PROJECT_COMPREHENSIVE_REVIEW.md',
'REVIEW_ITEMS_COMPLETED.md': 'reports/REVIEW_ITEMS_COMPLETED.md',
'DOCUMENTATION_DEEP_DIVE_ANALYSIS.md': 'reports/DOCUMENTATION_DEEP_DIVE_ANALYSIS.md',
'DOCUMENTATION_FIXES_APPLIED.md': 'reports/DOCUMENTATION_FIXES_APPLIED.md',
'DOCUMENTATION_COMPLETE_SUMMARY.md': 'summaries/DOCUMENTATION_COMPLETE_SUMMARY.md',
'IMPLEMENTATION_SUMMARY.md': 'summaries/IMPLEMENTATION_SUMMARY.md',
'BUILD_AND_DEPLOY_INSTRUCTIONS.md': 'guides/BUILD_AND_DEPLOY_INSTRUCTIONS.md',
'FORCE_UNLOCK_INSTRUCTIONS.md': 'guides/FORCE_UNLOCK_INSTRUCTIONS.md',
'QUICK_INSTALL_GUEST_AGENT.md': 'guides/QUICK_INSTALL_GUEST_AGENT.md',
'enable-guest-agent-manual.md': 'guides/enable-guest-agent-manual.md',
'GUEST_AGENT_CHECKLIST.md': 'guest-agent/GUEST_AGENT_CHECKLIST.md',
'GUEST_AGENT_CONFIGURATION_ANALYSIS.md': 'guest-agent/GUEST_AGENT_CONFIGURATION_ANALYSIS.md',
'VM_CREATION_PROCEDURE.md': 'vm/VM_CREATION_PROCEDURE.md',
'VM_DEPLOYMENT_CHECKLIST.md': 'vm/VM_DEPLOYMENT_CHECKLIST.md',
'VM_SPECIFICATIONS.md': 'vm/VM_SPECIFICATIONS.md',
'COPY_SCRIPT_TO_PROXMOX_NODES.md': 'reference/COPY_SCRIPT_TO_PROXMOX_NODES.md',
'SCRIPT_COPIED_TO_PROXMOX_NODES.md': 'reference/SCRIPT_COPIED_TO_PROXMOX_NODES.md',
'CODE_INCONSISTENCIES.md': 'reference/CODE_INCONSISTENCIES.md',
'DEPLOYMENT_NEXT_STEPS.md': 'deployment/DEPLOYMENT_NEXT_STEPS.md',
'DEPLOYMENT_READY.md': 'deployment/DEPLOYMENT_READY.md',
'PRE_DEPLOYMENT_CHECKLIST.md': 'deployment/PRE_DEPLOYMENT_CHECKLIST.md',
}
def calculate_relative_path(from_file: Path, to_file: str) -> str:
"""Calculate relative path from one file to another."""
from_dir = from_file.parent
to_path = Path('docs') / to_file
try:
rel_path = os.path.relpath(to_path, from_dir)
# Normalize path separators for markdown
return rel_path.replace('\\', '/')
except:
return to_file
def update_links_in_file(file_path: Path, dry_run: bool = True):
"""Update links in a single file."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
original_content = content
updated = False
for old_file, new_file in FILE_MOVES.items():
# Pattern 1: [text](./FILE.md) or [text](FILE.md)
pattern1 = rf'(\[[^\]]+\]\()\.?/?{re.escape(old_file)}(#[^\)]+)?(\))'
def replace1(match):
new_path = calculate_relative_path(file_path, new_file)
anchor = match.group(2) or ''
return f"{match.group(1)}{new_path}{anchor}{match.group(3)}"
if re.search(pattern1, content):
content = re.sub(pattern1, replace1, content)
updated = True
# Pattern 2: [text](./FILE.md#anchor)
pattern2 = rf'(\[[^\]]+\]\(\./){re.escape(old_file)}(#[^\)]+)?(\))'
def replace2(match):
new_path = calculate_relative_path(file_path, new_file)
anchor = match.group(2) or ''
return f"{match.group(1)}{new_path}{anchor}{match.group(3)}"
if re.search(pattern2, content):
content = re.sub(pattern2, replace2, content)
updated = True
if updated and content != original_content:
if not dry_run:
with open(file_path, 'w', encoding='utf-8') as f:
f.write(content)
return True
return False
except Exception as e:
print(f"Error processing {file_path}: {e}")
return False
def main():
import sys
dry_run = '--dry-run' in sys.argv or '-n' in sys.argv
if not dry_run:
response = input("This will modify files. Continue? (yes/no): ")
if response.lower() != 'yes':
print("Aborted.")
return
docs_dir = Path('docs')
md_files = list(docs_dir.rglob('*.md'))
updated_count = 0
for md_file in md_files:
# Skip the moved files themselves
if any(md_file.name == old_file for old_file in FILE_MOVES.keys()):
continue
if update_links_in_file(md_file, dry_run=dry_run):
updated_count += 1
if dry_run:
print(f"Would update: {md_file}")
else:
print(f"Updated: {md_file}")
if dry_run:
print(f"\nDry run complete. {updated_count} files would be updated.")
print("Run without --dry-run to apply changes.")
else:
print(f"\nUpdated {updated_count} files.")
if __name__ == '__main__':
main()