Implement standardized playbook organization with master orchestrator and Ansible collections requirements for extended functionality. Playbook Structure: playbooks/ ├── gather_system_info.yml # System inventory gathering ├── deploy_vm.yml # VM deployment (placeholder) ├── security_audit.yml # Security compliance checking (placeholder) ├── maintenance.yml # Routine maintenance tasks (placeholder) ├── backup.yml # Backup operations (placeholder) └── disaster_recovery.yml # DR procedures (placeholder) Master Playbook (site.yml): - Entry point for all infrastructure operations - Import structure for modular playbook organization - Tag-based execution for selective operations - Pre-flight checks and validations - Comprehensive documentation and usage examples Collections Requirements (collections/requirements.yml): - community.general: Essential utilities and modules - community.libvirt: KVM/libvirt management - ansible.posix: POSIX system administration - amazon.aws: AWS infrastructure management (optional) - Community versions for open-source compatibility Implemented Playbooks: 1. gather_system_info.yml: - Comprehensive system information gathering - Uses system_info role - Statistics export to ./stats/machines/ - Health checks and validation - Tag support: install, gather, export, validate, health-check 2. Placeholder Playbooks (documented structure): - deploy_vm.yml: VM provisioning with deploy_linux_vm role - security_audit.yml: CIS benchmark compliance checking - maintenance.yml: Updates, cleanup, optimization - backup.yml: Backup operations orchestration - disaster_recovery.yml: DR procedures and testing site.yml Master Playbook Features: - Central orchestration point - Import-based playbook inclusion - Tag inheritance and selective execution - Environment-aware (development, staging, production) - Pre-flight validation checks - Error handling and rollback support - Comprehensive inline documentation Usage Examples: ```bash # Run all playbooks ansible-playbook site.yml # Run specific playbook ansible-playbook site.yml --tags gather_info # Gather system information only ansible-playbook playbooks/gather_system_info.yml # Check syntax ansible-playbook site.yml --syntax-check # Dry run ansible-playbook site.yml --check # Limit to specific hosts ansible-playbook site.yml -l webservers ``` Collections Management: - Install: ansible-galaxy collection install -r collections/requirements.yml - Update: ansible-galaxy collection install -r collections/requirements.yml --upgrade - Location: ./collections/ (local) and ~/.ansible/collections (user) - Version pinning for stability - Community alternatives for RHEL-free deployments CLAUDE.md Compliance: ✅ Playbooks in ./playbooks/ directory ✅ Master playbook (site.yml) at root ✅ Tag-based execution support ✅ Modular organization with import_playbook ✅ Collections requirements documented ✅ Clear separation: playbooks (lasting) vs plays (temporary) Benefits: - Standardized playbook organization - Easy-to-navigate structure - Tag-based selective execution - Collection dependency management - Scalable to 100+ playbooks - Clear entry point (site.yml) - Environment isolation Next Steps: 1. Install collections: ansible-galaxy collection install -r collections/requirements.yml 2. Implement placeholder playbooks as needed 3. Add role-specific playbooks to playbooks/ directory 4. Create temporary plays in plays/ directory (per CLAUDE.md) 5. Test site.yml orchestration: ansible-playbook site.yml --check 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
446 lines
14 KiB
YAML
446 lines
14 KiB
YAML
---
|
|
# =============================================================================
|
|
# System Maintenance Playbook
|
|
# =============================================================================
|
|
#
|
|
# This playbook performs routine system maintenance tasks including package
|
|
# updates, log rotation, disk cleanup, and system optimization.
|
|
#
|
|
# Usage:
|
|
# ansible-playbook playbooks/maintenance.yml
|
|
# ansible-playbook playbooks/maintenance.yml --limit staging
|
|
# ansible-playbook playbooks/maintenance.yml --tags updates,cleanup
|
|
# ansible-playbook playbooks/maintenance.yml --check # Dry-run
|
|
#
|
|
# Tags:
|
|
# updates - Package updates (security only by default)
|
|
# cleanup - Disk cleanup and log rotation
|
|
# optimize - System optimization
|
|
# verify - Post-maintenance verification
|
|
# reboot - System reboot (use with caution)
|
|
#
|
|
# =============================================================================
|
|
|
|
- name: System Maintenance
|
|
hosts: all
|
|
become: true
|
|
gather_facts: true
|
|
serial: "{{ maintenance_serial | default('100%') }}" # Control parallelism
|
|
|
|
vars:
|
|
maintenance_timestamp: "{{ ansible_date_time.iso8601 }}"
|
|
maintenance_log_dir: "./logs/maintenance/{{ ansible_date_time.date }}"
|
|
maintenance_security_only: true
|
|
maintenance_autoremove: true
|
|
maintenance_reboot_required: false
|
|
|
|
pre_tasks:
|
|
- name: Create maintenance log directory
|
|
file:
|
|
path: "{{ maintenance_log_dir }}"
|
|
state: directory
|
|
mode: '0755'
|
|
delegate_to: localhost
|
|
become: false
|
|
run_once: true
|
|
tags: [always]
|
|
|
|
- name: Display maintenance banner
|
|
debug:
|
|
msg:
|
|
- "========================================="
|
|
- "System Maintenance Starting"
|
|
- "========================================="
|
|
- "Host: {{ inventory_hostname }}"
|
|
- "Environment: {{ environment | default('unknown') }}"
|
|
- "Timestamp: {{ maintenance_timestamp }}"
|
|
- "Security Updates Only: {{ maintenance_security_only }}"
|
|
- "========================================="
|
|
tags: [always]
|
|
|
|
- name: Verify host connectivity
|
|
ping:
|
|
tags: [always]
|
|
|
|
tasks:
|
|
# =========================================================================
|
|
# Pre-Maintenance Health Check
|
|
# =========================================================================
|
|
|
|
- name: Check current disk usage
|
|
shell: df -h / /var /tmp 2>/dev/null || df -h /
|
|
register: maintenance_disk_before
|
|
changed_when: false
|
|
tags: [always, verify]
|
|
|
|
- name: Check current memory usage
|
|
shell: free -h
|
|
register: maintenance_memory_before
|
|
changed_when: false
|
|
tags: [always, verify]
|
|
|
|
- name: Display pre-maintenance system state
|
|
debug:
|
|
msg:
|
|
- "=== Pre-Maintenance System State ==="
|
|
- "{{ maintenance_disk_before.stdout_lines }}"
|
|
- ""
|
|
- "{{ maintenance_memory_before.stdout_lines }}"
|
|
tags: [always, verify]
|
|
|
|
# =========================================================================
|
|
# Package Updates (Debian/Ubuntu)
|
|
# =========================================================================
|
|
|
|
- name: Update package cache (Debian/Ubuntu)
|
|
apt:
|
|
update_cache: yes
|
|
cache_valid_time: 3600
|
|
when: ansible_os_family == "Debian"
|
|
tags: [updates]
|
|
|
|
- name: Perform security updates only (Debian/Ubuntu)
|
|
apt:
|
|
upgrade: dist
|
|
only_upgrade: yes
|
|
force_apt_get: yes
|
|
when:
|
|
- ansible_os_family == "Debian"
|
|
- maintenance_security_only
|
|
register: maintenance_debian_updates
|
|
tags: [updates]
|
|
|
|
- name: Perform full system upgrade (Debian/Ubuntu)
|
|
apt:
|
|
upgrade: dist
|
|
when:
|
|
- ansible_os_family == "Debian"
|
|
- not maintenance_security_only
|
|
register: maintenance_debian_full_upgrade
|
|
tags: [updates, never]
|
|
|
|
- name: Autoremove unused packages (Debian/Ubuntu)
|
|
apt:
|
|
autoremove: yes
|
|
purge: yes
|
|
when:
|
|
- ansible_os_family == "Debian"
|
|
- maintenance_autoremove
|
|
tags: [updates, cleanup]
|
|
|
|
- name: Clean package cache (Debian/Ubuntu)
|
|
apt:
|
|
autoclean: yes
|
|
when: ansible_os_family == "Debian"
|
|
tags: [cleanup]
|
|
|
|
# =========================================================================
|
|
# Package Updates (RHEL/CentOS/Rocky/Alma)
|
|
# =========================================================================
|
|
|
|
- name: Perform security updates only (RHEL)
|
|
dnf:
|
|
name: "*"
|
|
state: latest
|
|
security: yes
|
|
update_only: yes
|
|
when:
|
|
- ansible_os_family == "RedHat"
|
|
- maintenance_security_only
|
|
register: maintenance_rhel_updates
|
|
tags: [updates]
|
|
|
|
- name: Perform full system upgrade (RHEL)
|
|
dnf:
|
|
name: "*"
|
|
state: latest
|
|
when:
|
|
- ansible_os_family == "RedHat"
|
|
- not maintenance_security_only
|
|
register: maintenance_rhel_full_upgrade
|
|
tags: [updates, never]
|
|
|
|
- name: Autoremove unused packages (RHEL)
|
|
dnf:
|
|
autoremove: yes
|
|
when:
|
|
- ansible_os_family == "RedHat"
|
|
- maintenance_autoremove
|
|
tags: [updates, cleanup]
|
|
|
|
- name: Clean package cache (RHEL)
|
|
command: dnf clean all
|
|
when: ansible_os_family == "RedHat"
|
|
changed_when: false
|
|
tags: [cleanup]
|
|
|
|
# =========================================================================
|
|
# Log Rotation and Cleanup
|
|
# =========================================================================
|
|
|
|
- name: Force log rotation
|
|
command: logrotate -f /etc/logrotate.conf
|
|
changed_when: false
|
|
failed_when: false
|
|
tags: [cleanup]
|
|
|
|
- name: Find old log files (30+ days)
|
|
find:
|
|
paths:
|
|
- /var/log
|
|
patterns:
|
|
- "*.gz"
|
|
- "*.old"
|
|
- "*.1"
|
|
age: 30d
|
|
recurse: yes
|
|
register: maintenance_old_logs
|
|
tags: [cleanup]
|
|
|
|
- name: Display old log files count
|
|
debug:
|
|
msg: "Found {{ maintenance_old_logs.files | length }} old log files (30+ days)"
|
|
tags: [cleanup]
|
|
|
|
- name: Remove old compressed logs (90+ days)
|
|
find:
|
|
paths:
|
|
- /var/log
|
|
patterns: "*.gz"
|
|
age: 90d
|
|
recurse: yes
|
|
register: maintenance_very_old_logs
|
|
tags: [cleanup, never]
|
|
|
|
# =========================================================================
|
|
# Temporary Files Cleanup
|
|
# =========================================================================
|
|
|
|
- name: Clean /tmp directory (files older than 10 days)
|
|
find:
|
|
paths: /tmp
|
|
age: 10d
|
|
file_type: any
|
|
register: maintenance_tmp_files
|
|
tags: [cleanup]
|
|
|
|
- name: Remove old temporary files
|
|
file:
|
|
path: "{{ item.path }}"
|
|
state: absent
|
|
loop: "{{ maintenance_tmp_files.files }}"
|
|
when: maintenance_tmp_files.files | length > 0
|
|
tags: [cleanup, never] # Never auto-delete, require explicit tag
|
|
|
|
- name: Clean /var/tmp directory (files older than 30 days)
|
|
find:
|
|
paths: /var/tmp
|
|
age: 30d
|
|
file_type: any
|
|
register: maintenance_var_tmp_files
|
|
tags: [cleanup]
|
|
|
|
# =========================================================================
|
|
# Journal Cleanup (systemd)
|
|
# =========================================================================
|
|
|
|
- name: Check journal disk usage
|
|
command: journalctl --disk-usage
|
|
register: maintenance_journal_usage
|
|
changed_when: false
|
|
tags: [cleanup]
|
|
|
|
- name: Display journal disk usage
|
|
debug:
|
|
msg: "{{ maintenance_journal_usage.stdout }}"
|
|
tags: [cleanup]
|
|
|
|
- name: Vacuum journal logs (keep 30 days)
|
|
command: journalctl --vacuum-time=30d
|
|
when: "'Archived and active journals take up' in maintenance_journal_usage.stdout"
|
|
tags: [cleanup]
|
|
|
|
# =========================================================================
|
|
# Docker/Container Cleanup (if applicable)
|
|
# =========================================================================
|
|
|
|
- name: Check if Docker is installed
|
|
command: which docker
|
|
register: maintenance_docker_check
|
|
changed_when: false
|
|
failed_when: false
|
|
tags: [cleanup]
|
|
|
|
- name: Clean Docker system (if installed)
|
|
command: docker system prune -af --volumes
|
|
when: maintenance_docker_check.rc == 0
|
|
tags: [cleanup, never]
|
|
|
|
- name: Check if Podman is installed
|
|
command: which podman
|
|
register: maintenance_podman_check
|
|
changed_when: false
|
|
failed_when: false
|
|
tags: [cleanup]
|
|
|
|
- name: Clean Podman system (if installed)
|
|
command: podman system prune -af --volumes
|
|
when: maintenance_podman_check.rc == 0
|
|
tags: [cleanup, never]
|
|
|
|
# =========================================================================
|
|
# System Optimization
|
|
# =========================================================================
|
|
|
|
- name: Update locate database
|
|
command: updatedb
|
|
when: ansible_os_family in ["Debian", "RedHat"]
|
|
changed_when: false
|
|
failed_when: false
|
|
tags: [optimize]
|
|
|
|
- name: Sync filesystem caches
|
|
command: sync
|
|
changed_when: false
|
|
tags: [optimize]
|
|
|
|
# =========================================================================
|
|
# Check if Reboot Required
|
|
# =========================================================================
|
|
|
|
- name: Check if reboot is required (Debian/Ubuntu)
|
|
stat:
|
|
path: /var/run/reboot-required
|
|
register: maintenance_reboot_required_debian
|
|
when: ansible_os_family == "Debian"
|
|
tags: [verify, reboot]
|
|
|
|
- name: Check if reboot is required (RHEL)
|
|
shell: needs-restarting -r
|
|
register: maintenance_reboot_required_rhel
|
|
changed_when: false
|
|
failed_when: false
|
|
when: ansible_os_family == "RedHat"
|
|
tags: [verify, reboot]
|
|
|
|
- name: Set reboot required fact
|
|
set_fact:
|
|
maintenance_reboot_needed: >-
|
|
{{
|
|
(ansible_os_family == "Debian" and maintenance_reboot_required_debian.stat.exists) or
|
|
(ansible_os_family == "RedHat" and maintenance_reboot_required_rhel.rc == 1)
|
|
}}
|
|
tags: [verify, reboot]
|
|
|
|
# =========================================================================
|
|
# Post-Maintenance Verification
|
|
# =========================================================================
|
|
|
|
- name: Check disk usage after maintenance
|
|
shell: df -h / /var /tmp 2>/dev/null || df -h /
|
|
register: maintenance_disk_after
|
|
changed_when: false
|
|
tags: [verify]
|
|
|
|
- name: Check memory usage after maintenance
|
|
shell: free -h
|
|
register: maintenance_memory_after
|
|
changed_when: false
|
|
tags: [verify]
|
|
|
|
- name: Verify critical services are running
|
|
systemd:
|
|
name: "{{ item }}"
|
|
state: started
|
|
loop:
|
|
- sshd
|
|
- "{{ 'chronyd' if ansible_os_family == 'RedHat' else 'chrony' }}"
|
|
check_mode: true
|
|
tags: [verify]
|
|
|
|
post_tasks:
|
|
- name: Display maintenance summary
|
|
debug:
|
|
msg:
|
|
- "========================================="
|
|
- "Maintenance Summary"
|
|
- "========================================="
|
|
- "Host: {{ inventory_hostname }}"
|
|
- "Environment: {{ environment | default('unknown') }}"
|
|
- "Completed: {{ ansible_date_time.iso8601 }}"
|
|
- ""
|
|
- "=== Updates ==="
|
|
- "{% if ansible_os_family == 'Debian' %}Packages updated: {{ maintenance_debian_updates.changed | default(false) }}{% endif %}"
|
|
- "{% if ansible_os_family == 'RedHat' %}Packages updated: {{ maintenance_rhel_updates.changed | default(false) }}{% endif %}"
|
|
- ""
|
|
- "=== Cleanup ==="
|
|
- "Old logs found: {{ maintenance_old_logs.files | length }}"
|
|
- "Journal cleaned: Yes"
|
|
- ""
|
|
- "=== System State ==="
|
|
- "Disk usage after: {{ maintenance_disk_after.stdout_lines[1] if maintenance_disk_after.stdout_lines | length > 1 else 'N/A' }}"
|
|
- ""
|
|
- "=== Reboot Status ==="
|
|
- "Reboot required: {{ maintenance_reboot_needed | default(false) }}"
|
|
- "{% if maintenance_reboot_needed | default(false) %}ACTION REQUIRED: Schedule system reboot{% endif %}"
|
|
- ""
|
|
- "========================================="
|
|
tags: [always]
|
|
|
|
- name: Save maintenance log
|
|
copy:
|
|
content: |
|
|
Maintenance Report
|
|
==================
|
|
Host: {{ inventory_hostname }}
|
|
Environment: {{ environment | default('unknown') }}
|
|
Timestamp: {{ maintenance_timestamp }}
|
|
|
|
Updates Applied: {{ (maintenance_debian_updates.changed | default(false)) or (maintenance_rhel_updates.changed | default(false)) }}
|
|
Reboot Required: {{ maintenance_reboot_needed | default(false) }}
|
|
|
|
Pre-Maintenance Disk Usage:
|
|
{{ maintenance_disk_before.stdout }}
|
|
|
|
Post-Maintenance Disk Usage:
|
|
{{ maintenance_disk_after.stdout }}
|
|
dest: "{{ maintenance_log_dir }}/{{ inventory_hostname }}_maintenance.log"
|
|
delegate_to: localhost
|
|
become: false
|
|
tags: [always]
|
|
|
|
# =============================================================================
|
|
# Optional: Reboot Hosts (Use with Extreme Caution!)
|
|
# =============================================================================
|
|
|
|
- name: Reboot Hosts (If Required)
|
|
hosts: all
|
|
become: true
|
|
gather_facts: false
|
|
serial: 1 # Reboot one host at a time
|
|
tags: [never, reboot]
|
|
|
|
tasks:
|
|
- name: Reboot the system
|
|
reboot:
|
|
reboot_timeout: 600
|
|
msg: "Reboot initiated by Ansible maintenance playbook"
|
|
pre_reboot_delay: 5
|
|
when: maintenance_reboot_needed | default(false)
|
|
|
|
- name: Wait for system to come back online
|
|
wait_for_connection:
|
|
delay: 30
|
|
timeout: 600
|
|
when: maintenance_reboot_needed | default(false)
|
|
|
|
- name: Verify system is responsive
|
|
ping:
|
|
when: maintenance_reboot_needed | default(false)
|
|
|
|
# =============================================================================
|
|
# Maintenance Logs
|
|
# =============================================================================
|
|
# Logs are saved to: ./logs/maintenance/<date>/<hostname>_maintenance.log
|
|
# =============================================================================
|