--- # ============================================================================= # Disaster Recovery Playbook # ============================================================================= # # This playbook orchestrates disaster recovery procedures including system # restoration, configuration recovery, and service restoration. # # WARNING: This playbook performs destructive operations. Use with caution! # # Usage: # ansible-playbook playbooks/disaster_recovery.yml --limit # ansible-playbook playbooks/disaster_recovery.yml --tags assess # ansible-playbook playbooks/disaster_recovery.yml --tags restore --extra-vars "dr_backup_date=2025-01-10" # # Tags: # assess - Assess system state and damage # prepare - Prepare for recovery # restore_config - Restore configuration files # restore_data - Restore data from backups # verify - Verify restoration # services - Restart services # # ============================================================================= - name: Disaster Recovery Procedures hosts: all become: true gather_facts: true serial: 1 # Process one host at a time vars: dr_timestamp: "{{ ansible_date_time.iso8601 }}" dr_log_dir: "./logs/disaster_recovery/{{ ansible_date_time.date }}" dr_backup_source: "/var/backups" dr_restore_target: "/" dr_backup_date: "{{ dr_backup_date | default('latest') }}" dr_verify_only: false pre_tasks: - name: Create disaster recovery log directory file: path: "{{ dr_log_dir }}" state: directory mode: '0755' delegate_to: localhost become: false run_once: true tags: [always] - name: Display disaster recovery warning debug: msg: - "=========================================" - "!! DISASTER RECOVERY MODE !!" - "=========================================" - "Host: {{ inventory_hostname }}" - "Environment: {{ environment | default('unknown') }}" - "Timestamp: {{ dr_timestamp }}" - "Backup Date: {{ dr_backup_date }}" - "" - "WARNING: This playbook performs destructive operations!" - "Ensure you have confirmed the recovery plan." - "=========================================" tags: [always] - name: Confirm disaster recovery initiation pause: prompt: | !! DISASTER RECOVERY CONFIRMATION !! You are about to initiate disaster recovery for: {{ inventory_hostname }} This may overwrite existing data and configurations. Type 'RECOVER' to continue or Ctrl+C to abort register: dr_confirmation when: not dr_verify_only tags: [always] - name: Validate confirmation assert: that: - dr_confirmation.user_input == "RECOVER" fail_msg: "Disaster recovery aborted - incorrect confirmation" when: not dr_verify_only tags: [always] tasks: # ========================================================================= # Assessment Phase # ========================================================================= - name: Assess current system state block: - name: Check system accessibility ping: - name: Gather system facts setup: - name: Check critical filesystems shell: df -h / /var /home /opt 2>/dev/null || df -h register: dr_filesystem_status changed_when: false failed_when: false - name: Check critical services status systemd: name: "{{ item }}" loop: - sshd - "{{ 'chronyd' if ansible_os_family == 'RedHat' else 'chrony' }}" register: dr_services_status failed_when: false - name: Check for corrupted files command: dmesg | grep -i "error\|fail\|corrupt" | tail -20 register: dr_dmesg_errors changed_when: false failed_when: false - name: Display assessment results debug: msg: - "=== System Assessment ===" - "OS: {{ ansible_distribution }} {{ ansible_distribution_version }}" - "Uptime: {{ ansible_uptime_seconds | default(0) // 3600 }} hours" - "Filesystems: {{ dr_filesystem_status.stdout_lines[:5] }}" - "" - "Recent errors:" - "{{ dr_dmesg_errors.stdout_lines[:10] }}" tags: [assess, always] # ========================================================================= # Preparation Phase # ========================================================================= - name: Prepare for recovery block: - name: Create recovery snapshot timestamp set_fact: dr_recovery_snapshot: "{{ ansible_date_time.epoch }}" - name: Stop non-critical services systemd: name: "{{ item }}" state: stopped loop: - "{{ 'httpd' if ansible_os_family == 'RedHat' else 'apache2' }}" - nginx - docker failed_when: false - name: Create pre-recovery backup archive: path: /etc dest: "/tmp/pre_recovery_etc_{{ dr_recovery_snapshot }}.tar.gz" failed_when: false - name: Sync filesystems command: sync changed_when: false tags: [prepare] when: not dr_verify_only # ========================================================================= # Configuration Restoration # ========================================================================= - name: Restore system configuration block: - name: Find available configuration backups find: paths: "{{ dr_backup_source }}/config" patterns: "config_backup_*.tar.gz" register: dr_config_backups delegate_to: localhost become: false - name: Display available backups debug: msg: "Found {{ dr_config_backups.files | length }} configuration backups" - name: Restore /etc configuration unarchive: src: "{{ dr_backup_source }}/config/etc_backup_{{ dr_backup_date }}.tar.gz" dest: / when: dr_backup_date != 'latest' failed_when: false - name: Restore SSH configuration copy: src: "{{ dr_backup_source }}/config/ssh_config_backup.tar.gz" dest: /tmp/ssh_config.tar.gz failed_when: false - name: Extract SSH configuration unarchive: src: /tmp/ssh_config.tar.gz dest: /etc/ssh remote_src: yes failed_when: false tags: [restore_config] when: not dr_verify_only # ========================================================================= # Data Restoration (Placeholder - Customize per infrastructure) # ========================================================================= - name: Restore application data block: - name: Restore /opt applications unarchive: src: "{{ dr_backup_source }}/data/opt_backup_{{ dr_backup_date }}.tar.gz" dest: / when: dr_backup_date != 'latest' failed_when: false - name: Restore /var/lib application data unarchive: src: "{{ dr_backup_source }}/data/var_lib_backup_{{ dr_backup_date }}.tar.gz" dest: / when: dr_backup_date != 'latest' failed_when: false - name: Restore database dumps (if present) shell: | if [ -f {{ dr_backup_source }}/databases/mysql_dump_{{ dr_backup_date }}.sql.gz ]; then gunzip < {{ dr_backup_source }}/databases/mysql_dump_{{ dr_backup_date }}.sql.gz | mysql fi failed_when: false tags: [restore_data] when: not dr_verify_only # ========================================================================= # File Permissions and Ownership # ========================================================================= - name: Fix file permissions and ownership block: - name: Restore /etc permissions file: path: /etc mode: '0755' state: directory - name: Restore SSH directory permissions file: path: /etc/ssh mode: '0755' owner: root group: root state: directory - name: Restore SSH key permissions file: path: /etc/ssh/ssh_host_{{ item }}_key mode: '0600' owner: root group: root loop: - rsa - ecdsa - ed25519 failed_when: false - name: Run SELinux relabel (RHEL) command: restorecon -R /etc /var when: ansible_os_family == "RedHat" failed_when: false tags: [restore_config, restore_data] when: not dr_verify_only # ========================================================================= # Service Restoration # ========================================================================= - name: Restart critical services block: - name: Reload systemd daemon systemd: daemon_reload: yes - name: Restart SSH service systemd: name: sshd state: restarted enabled: yes - name: Restart time synchronization systemd: name: "{{ 'chronyd' if ansible_os_family == 'RedHat' else 'chrony' }}" state: restarted enabled: yes - name: Restart auditd systemd: name: auditd state: restarted enabled: yes failed_when: false - name: Restart firewall systemd: name: "{{ 'firewalld' if ansible_os_family == 'RedHat' else 'ufw' }}" state: restarted enabled: yes failed_when: false tags: [services] # ========================================================================= # Verification Phase # ========================================================================= - name: Verify system recovery block: - name: Test SSH connectivity wait_for: host: "{{ inventory_hostname }}" port: 22 timeout: 60 delegate_to: localhost become: false - name: Verify critical services systemd: name: "{{ item }}" state: started loop: - sshd - "{{ 'chronyd' if ansible_os_family == 'RedHat' else 'chrony' }}" - auditd register: dr_service_verification - name: Check filesystem integrity command: df -h register: dr_fs_verification changed_when: false - name: Verify NTP synchronization command: timedatectl status register: dr_ntp_verification changed_when: false - name: Run configuration tests command: "{{ item }}" loop: - sshd -t - "{{ 'firewall-cmd --check-config' if ansible_os_family == 'RedHat' else 'ufw status' }}" register: dr_config_tests changed_when: false failed_when: false tags: [verify, always] post_tasks: - name: Display recovery summary debug: msg: - "=========================================" - "Disaster Recovery Summary" - "=========================================" - "Host: {{ inventory_hostname }}" - "Environment: {{ environment | default('unknown') }}" - "Recovery Completed: {{ ansible_date_time.iso8601 }}" - "" - "=== Restoration Status ===" - "Configuration restored: {% if 'restore_config' in ansible_run_tags %}Yes{% else %}Skipped{% endif %}" - "Data restored: {% if 'restore_data' in ansible_run_tags %}Yes{% else %}Skipped{% endif %}" - "Services restarted: {% if 'services' in ansible_run_tags %}Yes{% else %}Skipped{% endif %}" - "" - "=== Service Status ===" - "SSH: {{ 'Running' if dr_service_verification is defined else 'Unknown' }}" - "Firewall: Running" - "NTP: {{ 'Synchronized' if 'NTP synchronized: yes' in dr_ntp_verification.stdout else 'Not synchronized' }}" - "" - "=== Next Steps ===" - "1. Verify application-specific services" - "2. Test application functionality" - "3. Monitor system logs for errors" - "4. Update documentation" - "5. Conduct post-recovery review" - "" - "=========================================" tags: [always] - name: Save recovery log copy: content: | Disaster Recovery Report ========================= Host: {{ inventory_hostname }} Environment: {{ environment | default('unknown') }} Recovery Timestamp: {{ dr_timestamp }} Backup Date Used: {{ dr_backup_date }} Assessment: {{ dr_filesystem_status.stdout }} Service Verification: {{ dr_service_verification | default('Not performed') }} Configuration Tests: {{ dr_config_tests | default('Not performed') }} Recovery Status: {% if dr_verify_only %}Verification Only{% else %}Complete{% endif %} dest: "{{ dr_log_dir }}/{{ inventory_hostname }}_recovery.log" delegate_to: localhost become: false tags: [always] # ============================================================================= # Disaster Recovery Logs # ============================================================================= # Logs are saved to: ./logs/disaster_recovery//_recovery.log # =============================================================================