10.1 Web应用部署项目

10.1.1 项目概述

本项目演示如何使用 Ansible 自动化部署一个完整的 Web 应用栈,包括: - Nginx 负载均衡器 - Node.js 应用服务器 - MySQL 数据库 - Redis 缓存 - SSL 证书配置 - 监控和日志收集

10.1.2 项目结构

web-app-deployment/
├── inventories/
│   ├── production/
│   │   ├── hosts.yml
│   │   ├── group_vars/
│   │   │   ├── all.yml
│   │   │   ├── webservers.yml
│   │   │   ├── databases.yml
│   │   │   └── loadbalancers.yml
│   │   └── host_vars/
│   └── staging/
│       ├── hosts.yml
│       └── group_vars/
├── roles/
│   ├── common/
│   ├── nginx/
│   ├── nodejs/
│   ├── mysql/
│   ├── redis/
│   ├── ssl/
│   └── monitoring/
├── playbooks/
│   ├── site.yml
│   ├── deploy.yml
│   ├── rollback.yml
│   └── maintenance.yml
├── templates/
├── files/
├── scripts/
└── ansible.cfg

10.1.3 Inventory 配置

# inventories/production/hosts.yml
# 生产环境主机清单
---
all:
  children:
    loadbalancers:
      hosts:
        lb-01:
          ansible_host: 10.0.1.10
          nginx_worker_processes: 4
        lb-02:
          ansible_host: 10.0.1.11
          nginx_worker_processes: 4
    
    webservers:
      hosts:
        web-01:
          ansible_host: 10.0.2.10
          nodejs_port: 3000
          nodejs_instances: 4
        web-02:
          ansible_host: 10.0.2.11
          nodejs_port: 3000
          nodejs_instances: 4
        web-03:
          ansible_host: 10.0.2.12
          nodejs_port: 3000
          nodejs_instances: 4
    
    databases:
      hosts:
        db-01:
          ansible_host: 10.0.3.10
          mysql_server_id: 1
          mysql_role: master
        db-02:
          ansible_host: 10.0.3.11
          mysql_server_id: 2
          mysql_role: slave
    
    cache:
      hosts:
        redis-01:
          ansible_host: 10.0.4.10
          redis_port: 6379
          redis_role: master
        redis-02:
          ansible_host: 10.0.4.11
          redis_port: 6379
          redis_role: slave
    
    monitoring:
      hosts:
        monitor-01:
          ansible_host: 10.0.5.10
# inventories/production/group_vars/all.yml
# 全局变量
---
# 应用配置
app_name: mywebapp
app_version: "{{ app_version | default('latest') }}"
app_user: webapp
app_group: webapp
app_home: /opt/{{ app_name }}

# 环境配置
environment: production
domain_name: mywebapp.com
ssl_enabled: true

# 数据库配置
db_name: "{{ app_name }}_prod"
db_user: "{{ app_name }}_user"
db_host: "{{ groups['databases'][0] }}"

# Redis 配置
redis_host: "{{ groups['cache'][0] }}"
redis_db: 0

# 监控配置
monitoring_enabled: true
log_level: info

# 安全配置
firewall_enabled: true
selinux_enabled: true

# 备份配置
backup_enabled: true
backup_retention_days: 30

# 通知配置
notification_email: ops@mywebapp.com
slack_webhook_url: "{{ vault_slack_webhook_url }}"
# inventories/production/group_vars/webservers.yml
# Web服务器组变量
---
# Node.js 配置
nodejs_version: "18.x"
nodejs_pm2_instances: "{{ nodejs_instances | default(2) }}"
nodejs_max_memory: "512M"

# 应用配置
app_port: "{{ nodejs_port | default(3000) }}"
app_env_vars:
  NODE_ENV: production
  PORT: "{{ app_port }}"
  DB_HOST: "{{ db_host }}"
  DB_NAME: "{{ db_name }}"
  DB_USER: "{{ db_user }}"
  DB_PASSWORD: "{{ vault_db_password }}"
  REDIS_HOST: "{{ redis_host }}"
  REDIS_PORT: "{{ redis_port | default(6379) }}"
  SESSION_SECRET: "{{ vault_session_secret }}"
  JWT_SECRET: "{{ vault_jwt_secret }}"

# 日志配置
log_directory: /var/log/{{ app_name }}
log_rotation_size: 100M
log_rotation_count: 10

# 健康检查
health_check_url: "http://localhost:{{ app_port }}/health"
health_check_timeout: 30

10.1.4 Common Role

# roles/common/tasks/main.yml
# 通用系统配置
---
- name: Update system packages
  package:
    name: '*'
    state: latest
  when: update_packages | default(false)
  tags: [packages]

- name: Install essential packages
  package:
    name: "{{ essential_packages }}"
    state: present
  vars:
    essential_packages:
      - curl
      - wget
      - git
      - htop
      - vim
      - unzip
      - logrotate
      - rsync
      - fail2ban
  tags: [packages]

- name: Create application user
  user:
    name: "{{ app_user }}"
    group: "{{ app_group }}"
    home: "{{ app_home }}"
    shell: /bin/bash
    system: yes
    create_home: yes
  tags: [users]

- name: Create application directories
  file:
    path: "{{ item }}"
    state: directory
    owner: "{{ app_user }}"
    group: "{{ app_group }}"
    mode: '0755'
  loop:
    - "{{ app_home }}"
    - "{{ app_home }}/releases"
    - "{{ app_home }}/shared"
    - "{{ app_home }}/shared/logs"
    - "{{ app_home }}/shared/config"
    - "{{ log_directory }}"
  tags: [directories]

- name: Configure timezone
  timezone:
    name: "{{ system_timezone | default('UTC') }}"
  tags: [system]

- name: Configure NTP
  include_tasks: ntp.yml
  tags: [ntp]

- name: Configure firewall
  include_tasks: firewall.yml
  when: firewall_enabled | default(true)
  tags: [firewall]

- name: Configure fail2ban
  include_tasks: fail2ban.yml
  tags: [security]

- name: Setup log rotation
  include_tasks: logrotate.yml
  tags: [logging]
# roles/common/tasks/firewall.yml
# 防火墙配置
---
- name: Install firewall packages (RedHat)
  package:
    name: firewalld
    state: present
  when: ansible_os_family == 'RedHat'

- name: Install firewall packages (Debian)
  package:
    name: ufw
    state: present
  when: ansible_os_family == 'Debian'

- name: Configure firewalld (RedHat)
  block:
    - name: Start and enable firewalld
      systemd:
        name: firewalld
        state: started
        enabled: yes
    
    - name: Configure firewalld rules
      firewalld:
        service: "{{ item }}"
        permanent: yes
        state: enabled
        immediate: yes
      loop:
        - ssh
        - http
        - https
      notify: reload firewalld
    
    - name: Open custom ports
      firewalld:
        port: "{{ item }}"
        permanent: yes
        state: enabled
        immediate: yes
      loop: "{{ custom_firewall_ports | default([]) }}"
      notify: reload firewalld
  
  when: ansible_os_family == 'RedHat'

- name: Configure UFW (Debian)
  block:
    - name: Enable UFW
      ufw:
        state: enabled
        policy: deny
        direction: incoming
    
    - name: Allow SSH
      ufw:
        rule: allow
        port: '22'
        proto: tcp
    
    - name: Allow HTTP/HTTPS
      ufw:
        rule: allow
        port: "{{ item }}"
        proto: tcp
      loop:
        - '80'
        - '443'
    
    - name: Allow custom ports
      ufw:
        rule: allow
        port: "{{ item }}"
        proto: tcp
      loop: "{{ custom_firewall_ports | default([]) }}"
  
  when: ansible_os_family == 'Debian'

10.1.5 Nginx Role

# roles/nginx/tasks/main.yml
# Nginx 负载均衡器配置
---
- name: Install Nginx
  package:
    name: nginx
    state: present
  tags: [nginx, packages]

- name: Create Nginx directories
  file:
    path: "{{ item }}"
    state: directory
    mode: '0755'
  loop:
    - /etc/nginx/sites-available
    - /etc/nginx/sites-enabled
    - /etc/nginx/ssl
    - /var/log/nginx
  tags: [nginx, directories]

- name: Generate Nginx main configuration
  template:
    src: nginx.conf.j2
    dest: /etc/nginx/nginx.conf
    backup: yes
  notify: restart nginx
  tags: [nginx, config]

- name: Generate upstream configuration
  template:
    src: upstream.conf.j2
    dest: /etc/nginx/conf.d/upstream.conf
  notify: reload nginx
  tags: [nginx, config]

- name: Generate site configuration
  template:
    src: site.conf.j2
    dest: "/etc/nginx/sites-available/{{ app_name }}"
  notify: reload nginx
  tags: [nginx, config]

- name: Enable site
  file:
    src: "/etc/nginx/sites-available/{{ app_name }}"
    dest: "/etc/nginx/sites-enabled/{{ app_name }}"
    state: link
  notify: reload nginx
  tags: [nginx, config]

- name: Remove default site
  file:
    path: /etc/nginx/sites-enabled/default
    state: absent
  notify: reload nginx
  tags: [nginx, config]

- name: Configure SSL certificates
  include_tasks: ssl.yml
  when: ssl_enabled | default(false)
  tags: [nginx, ssl]

- name: Start and enable Nginx
  systemd:
    name: nginx
    state: started
    enabled: yes
  tags: [nginx, service]

- name: Configure log rotation for Nginx
  template:
    src: nginx.logrotate.j2
    dest: /etc/logrotate.d/nginx
  tags: [nginx, logging]
{# roles/nginx/templates/nginx.conf.j2 #}
{# Nginx 主配置文件 #}
user nginx;
worker_processes {{ nginx_worker_processes | default('auto') }};
error_log /var/log/nginx/error.log warn;
pid /var/run/nginx.pid;

events {
    worker_connections {{ nginx_worker_connections | default(1024) }};
    use epoll;
    multi_accept on;
}

http {
    include /etc/nginx/mime.types;
    default_type application/octet-stream;

    # Logging format
    log_format main '$remote_addr - $remote_user [$time_local] "$request" '
                    '$status $body_bytes_sent "$http_referer" '
                    '"$http_user_agent" "$http_x_forwarded_for" '
                    '$request_time $upstream_response_time';

    access_log /var/log/nginx/access.log main;

    # Basic settings
    sendfile on;
    tcp_nopush on;
    tcp_nodelay on;
    keepalive_timeout {{ nginx_keepalive_timeout | default(65) }};
    types_hash_max_size 2048;
    server_tokens off;

    # Gzip compression
    gzip on;
    gzip_vary on;
    gzip_proxied any;
    gzip_comp_level 6;
    gzip_types
        text/plain
        text/css
        text/xml
        text/javascript
        application/json
        application/javascript
        application/xml+rss
        application/atom+xml
        image/svg+xml;

    # Rate limiting
    limit_req_zone $binary_remote_addr zone=api:10m rate=10r/s;
    limit_req_zone $binary_remote_addr zone=login:10m rate=1r/s;

    # Security headers
    add_header X-Frame-Options DENY always;
    add_header X-Content-Type-Options nosniff always;
    add_header X-XSS-Protection "1; mode=block" always;
    add_header Referrer-Policy "strict-origin-when-cross-origin" always;

    # SSL configuration
{% if ssl_enabled %}
    ssl_protocols TLSv1.2 TLSv1.3;
    ssl_ciphers ECDHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES128-SHA256:ECDHE-RSA-AES256-SHA384;
    ssl_prefer_server_ciphers on;
    ssl_session_cache shared:SSL:10m;
    ssl_session_timeout 10m;
{% endif %}

    # Include additional configurations
    include /etc/nginx/conf.d/*.conf;
    include /etc/nginx/sites-enabled/*;
}
{# roles/nginx/templates/upstream.conf.j2 #}
{# 上游服务器配置 #}
upstream {{ app_name }}_backend {
    # Load balancing method
    {{ nginx_load_balancing_method | default('least_conn') }};
    
    # Backend servers
{% for host in groups['webservers'] %}
    server {{ hostvars[host]['ansible_host'] }}:{{ hostvars[host]['nodejs_port'] | default(3000) }} 
           max_fails=3 fail_timeout=30s;
{% endfor %}
    
    # Health check
    keepalive 32;
}

# API upstream (if different from main app)
upstream {{ app_name }}_api {
    least_conn;
{% for host in groups['webservers'] %}
    server {{ hostvars[host]['ansible_host'] }}:{{ hostvars[host]['api_port'] | default(3001) }} 
           max_fails=3 fail_timeout=30s;
{% endfor %}
    keepalive 16;
}
{# roles/nginx/templates/site.conf.j2 #}
{# 站点配置文件 #}
# HTTP to HTTPS redirect
{% if ssl_enabled %}
server {
    listen 80;
    server_name {{ domain_name }} www.{{ domain_name }};
    return 301 https://$server_name$request_uri;
}
{% endif %}

# Main server block
server {
{% if ssl_enabled %}
    listen 443 ssl http2;
    ssl_certificate /etc/nginx/ssl/{{ domain_name }}.crt;
    ssl_certificate_key /etc/nginx/ssl/{{ domain_name }}.key;
{% else %}
    listen 80;
{% endif %}
    
    server_name {{ domain_name }} www.{{ domain_name }};
    
    # Security headers
    add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
    
    # Static files
    location /static/ {
        alias {{ app_home }}/current/public/;
        expires 1y;
        add_header Cache-Control "public, immutable";
        
        # Gzip static files
        gzip_static on;
    }
    
    # API endpoints
    location /api/ {
        # Rate limiting
        limit_req zone=api burst=20 nodelay;
        
        proxy_pass http://{{ app_name }}_api;
        proxy_http_version 1.1;
        proxy_set_header Upgrade $http_upgrade;
        proxy_set_header Connection 'upgrade';
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_cache_bypass $http_upgrade;
        
        # Timeouts
        proxy_connect_timeout 5s;
        proxy_send_timeout 60s;
        proxy_read_timeout 60s;
    }
    
    # Login endpoint (stricter rate limiting)
    location /auth/login {
        limit_req zone=login burst=5 nodelay;
        
        proxy_pass http://{{ app_name }}_backend;
        proxy_http_version 1.1;
        proxy_set_header Upgrade $http_upgrade;
        proxy_set_header Connection 'upgrade';
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_cache_bypass $http_upgrade;
    }
    
    # Health check endpoint
    location /health {
        access_log off;
        proxy_pass http://{{ app_name }}_backend;
        proxy_http_version 1.1;
        proxy_set_header Host $host;
    }
    
    # Main application
    location / {
        proxy_pass http://{{ app_name }}_backend;
        proxy_http_version 1.1;
        proxy_set_header Upgrade $http_upgrade;
        proxy_set_header Connection 'upgrade';
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_cache_bypass $http_upgrade;
        
        # Timeouts
        proxy_connect_timeout 5s;
        proxy_send_timeout 60s;
        proxy_read_timeout 60s;
        
        # Buffer settings
        proxy_buffering on;
        proxy_buffer_size 4k;
        proxy_buffers 8 4k;
    }
    
    # Error pages
    error_page 404 /404.html;
    error_page 500 502 503 504 /50x.html;
    
    location = /50x.html {
        root /usr/share/nginx/html;
    }
}

10.1.6 Node.js Role

# roles/nodejs/tasks/main.yml
# Node.js 应用服务器配置
---
- name: Install Node.js repository (RedHat)
  shell: |
    curl -fsSL https://rpm.nodesource.com/setup_{{ nodejs_version }}.x | bash -
  when: ansible_os_family == 'RedHat'
  tags: [nodejs, repository]

- name: Install Node.js repository (Debian)
  shell: |
    curl -fsSL https://deb.nodesource.com/setup_{{ nodejs_version }}.x | bash -
  when: ansible_os_family == 'Debian'
  tags: [nodejs, repository]

- name: Install Node.js
  package:
    name: nodejs
    state: present
  tags: [nodejs, packages]

- name: Install PM2 globally
  npm:
    name: pm2
    global: yes
    state: present
  tags: [nodejs, pm2]

- name: Create PM2 startup script
  shell: pm2 startup systemd -u {{ app_user }} --hp {{ app_home }}
  become: yes
  tags: [nodejs, pm2]

- name: Create application release directory
  file:
    path: "{{ app_home }}/releases/{{ app_version }}"
    state: directory
    owner: "{{ app_user }}"
    group: "{{ app_group }}"
    mode: '0755'
  tags: [nodejs, deployment]

- name: Download application code
  git:
    repo: "{{ app_git_repo }}"
    dest: "{{ app_home }}/releases/{{ app_version }}"
    version: "{{ app_git_branch | default('main') }}"
    force: yes
  become_user: "{{ app_user }}"
  when: app_git_repo is defined
  tags: [nodejs, deployment]

- name: Copy application code (alternative)
  synchronize:
    src: "{{ app_source_path }}/"
    dest: "{{ app_home }}/releases/{{ app_version }}/"
    delete: yes
    rsync_opts:
      - "--exclude=node_modules"
      - "--exclude=.git"
      - "--exclude=logs"
  become_user: "{{ app_user }}"
  when: app_source_path is defined
  tags: [nodejs, deployment]

- name: Install application dependencies
  npm:
    path: "{{ app_home }}/releases/{{ app_version }}"
    production: yes
    ci: yes
  become_user: "{{ app_user }}"
  tags: [nodejs, dependencies]

- name: Build application
  command: npm run build
  args:
    chdir: "{{ app_home }}/releases/{{ app_version }}"
  become_user: "{{ app_user }}"
  when: nodejs_build_required | default(true)
  tags: [nodejs, build]

- name: Create symlink to current release
  file:
    src: "{{ app_home }}/releases/{{ app_version }}"
    dest: "{{ app_home }}/current"
    state: link
    owner: "{{ app_user }}"
    group: "{{ app_group }}"
  tags: [nodejs, deployment]

- name: Generate PM2 ecosystem file
  template:
    src: ecosystem.config.js.j2
    dest: "{{ app_home }}/ecosystem.config.js"
    owner: "{{ app_user }}"
    group: "{{ app_group }}"
    mode: '0644'
  tags: [nodejs, pm2]

- name: Generate environment file
  template:
    src: app.env.j2
    dest: "{{ app_home }}/.env"
    owner: "{{ app_user }}"
    group: "{{ app_group }}"
    mode: '0600'
  tags: [nodejs, config]

- name: Start application with PM2
  shell: |
    cd {{ app_home }}
    pm2 startOrRestart ecosystem.config.js --env production
    pm2 save
  become_user: "{{ app_user }}"
  tags: [nodejs, service]

- name: Configure log rotation for application
  template:
    src: app.logrotate.j2
    dest: "/etc/logrotate.d/{{ app_name }}"
  tags: [nodejs, logging]

- name: Setup health check script
  template:
    src: health_check.sh.j2
    dest: "{{ app_home }}/health_check.sh"
    owner: "{{ app_user }}"
    group: "{{ app_group }}"
    mode: '0755'
  tags: [nodejs, monitoring]

- name: Setup health check cron job
  cron:
    name: "{{ app_name }} health check"
    minute: "*/5"
    job: "{{ app_home }}/health_check.sh"
    user: "{{ app_user }}"
  tags: [nodejs, monitoring]
// roles/nodejs/templates/ecosystem.config.js.j2
// PM2 生态系统配置文件
module.exports = {
  apps: [{
    name: '{{ app_name }}',
    script: '{{ app_home }}/current/{{ nodejs_entry_point | default("app.js") }}',
    cwd: '{{ app_home }}/current',
    instances: {{ nodejs_pm2_instances }},
    exec_mode: 'cluster',
    
    // Environment variables
    env: {
{% for key, value in app_env_vars.items() %}
      {{ key }}: '{{ value }}',
{% endfor %}
    },
    
    // Logging
    log_file: '{{ log_directory }}/{{ app_name }}.log',
    out_file: '{{ log_directory }}/{{ app_name }}-out.log',
    error_file: '{{ log_directory }}/{{ app_name }}-error.log',
    log_date_format: 'YYYY-MM-DD HH:mm:ss Z',
    
    // Memory management
    max_memory_restart: '{{ nodejs_max_memory }}',
    
    // Restart policy
    restart_delay: 4000,
    max_restarts: 10,
    min_uptime: '10s',
    
    // Monitoring
    watch: false,
    ignore_watch: ['node_modules', 'logs'],
    
    // Advanced options
    node_args: ['--max-old-space-size={{ nodejs_max_old_space_size | default("512") }}'],
    
    // Health check
    health_check_grace_period: 3000,
    
    // Graceful shutdown
    kill_timeout: 5000,
    listen_timeout: 3000,
    
    // Source map support
    source_map_support: {{ nodejs_source_map_support | default(false) | lower }}
  }]
};

10.1.7 MySQL Role

# roles/mysql/tasks/main.yml
# MySQL 数据库配置
---
- name: Install MySQL server
  package:
    name: "{{ mysql_packages }}"
    state: present
  vars:
    mysql_packages:
      - mysql-server
      - mysql-client
      - python3-pymysql
  tags: [mysql, packages]

- name: Start and enable MySQL
  systemd:
    name: mysql
    state: started
    enabled: yes
  tags: [mysql, service]

- name: Set MySQL root password
  mysql_user:
    name: root
    password: "{{ vault_mysql_root_password }}"
    login_unix_socket: /var/run/mysqld/mysqld.sock
    state: present
  tags: [mysql, security]

- name: Create MySQL configuration file
  template:
    src: my.cnf.j2
    dest: /etc/mysql/mysql.conf.d/custom.cnf
    backup: yes
  notify: restart mysql
  tags: [mysql, config]

- name: Create application database
  mysql_db:
    name: "{{ db_name }}"
    state: present
    login_user: root
    login_password: "{{ vault_mysql_root_password }}"
  tags: [mysql, database]

- name: Create application user
  mysql_user:
    name: "{{ db_user }}"
    password: "{{ vault_db_password }}"
    priv: "{{ db_name }}.*:ALL"
    host: "%"
    state: present
    login_user: root
    login_password: "{{ vault_mysql_root_password }}"
  tags: [mysql, users]

- name: Configure MySQL replication (master)
  include_tasks: replication_master.yml
  when: mysql_role == 'master'
  tags: [mysql, replication]

- name: Configure MySQL replication (slave)
  include_tasks: replication_slave.yml
  when: mysql_role == 'slave'
  tags: [mysql, replication]

- name: Setup MySQL backup
  include_tasks: backup.yml
  when: backup_enabled | default(true)
  tags: [mysql, backup]

- name: Configure MySQL monitoring
  include_tasks: monitoring.yml
  when: monitoring_enabled | default(true)
  tags: [mysql, monitoring]
# roles/mysql/templates/my.cnf.j2
# MySQL 配置文件
[mysqld]
# Basic settings
bind-address = 0.0.0.0
port = {{ mysql_port | default(3306) }}
socket = /var/run/mysqld/mysqld.sock
pid-file = /var/run/mysqld/mysqld.pid
datadir = {{ mysql_datadir | default('/var/lib/mysql') }}

# Character set
character-set-server = utf8mb4
collation-server = utf8mb4_unicode_ci

# InnoDB settings
innodb_buffer_pool_size = {{ mysql_innodb_buffer_pool_size | default('128M') }}
innodb_log_file_size = {{ mysql_innodb_log_file_size | default('64M') }}
innodb_file_per_table = 1
innodb_flush_log_at_trx_commit = 2
innodb_flush_method = O_DIRECT

# Query cache
query_cache_type = 1
query_cache_size = {{ mysql_query_cache_size | default('16M') }}
query_cache_limit = {{ mysql_query_cache_limit | default('1M') }}

# Connection settings
max_connections = {{ mysql_max_connections | default(100) }}
max_connect_errors = 1000
connect_timeout = 60
wait_timeout = 28800
interactive_timeout = 28800

# Logging
log_error = /var/log/mysql/error.log
slow_query_log = 1
slow_query_log_file = /var/log/mysql/slow.log
long_query_time = {{ mysql_slow_query_time | default(2) }}
log_queries_not_using_indexes = 1

# Binary logging (for replication)
{% if mysql_role == 'master' %}
log_bin = /var/log/mysql/mysql-bin.log
server_id = {{ mysql_server_id }}
binlog_format = ROW
expire_logs_days = {{ mysql_binlog_expire_days | default(7) }}
max_binlog_size = {{ mysql_max_binlog_size | default('100M') }}
{% endif %}

# Replication settings
{% if mysql_role == 'slave' %}
server_id = {{ mysql_server_id }}
relay_log = /var/log/mysql/relay-bin.log
read_only = 1
{% endif %}

# Security
skip_name_resolve = 1
sql_mode = STRICT_TRANS_TABLES,NO_ZERO_DATE,NO_ZERO_IN_DATE,ERROR_FOR_DIVISION_BY_ZERO

[mysql]
default-character-set = utf8mb4

[client]
default-character-set = utf8mb4
port = {{ mysql_port | default(3306) }}
socket = /var/run/mysqld/mysqld.sock

10.1.8 部署 Playbook

# playbooks/site.yml
# 主部署 Playbook
---
- name: Deploy web application infrastructure
  hosts: all
  become: yes
  serial: "{{ deployment_batch_size | default('100%') }}"
  max_fail_percentage: "{{ max_failure_percentage | default(0) }}"
  
  pre_tasks:
    - name: Verify connectivity
      ping:
      tags: [always]
    
    - name: Gather system facts
      setup:
        gather_subset:
          - hardware
          - network
          - virtual
      tags: [always]
    
    - name: Check system requirements
      include_tasks: tasks/pre_checks.yml
      tags: [pre_checks]
  
  roles:
    - role: common
      tags: [common]
  
  post_tasks:
    - name: Send deployment start notification
      include_tasks: tasks/notifications.yml
      vars:
        notification_type: deployment_start
      tags: [notifications]
      run_once: true

# Database servers
- name: Configure database servers
  hosts: databases
  become: yes
  serial: 1  # Deploy databases one at a time
  
  roles:
    - role: mysql
      tags: [mysql, database]
  
  post_tasks:
    - name: Verify database connectivity
      mysql_query:
        login_user: "{{ db_user }}"
        login_password: "{{ vault_db_password }}"
        login_host: localhost
        query: "SELECT 1 as test"
      tags: [verification]

# Cache servers
- name: Configure cache servers
  hosts: cache
  become: yes
  
  roles:
    - role: redis
      tags: [redis, cache]
  
  post_tasks:
    - name: Verify Redis connectivity
      redis:
        command: ping
        login_host: localhost
        login_port: "{{ redis_port | default(6379) }}"
      tags: [verification]

# Web application servers
- name: Configure web servers
  hosts: webservers
  become: yes
  serial: "{{ web_deployment_batch_size | default(1) }}"
  
  roles:
    - role: nodejs
      tags: [nodejs, application]
  
  post_tasks:
    - name: Wait for application to start
      uri:
        url: "{{ health_check_url }}"
        method: GET
        status_code: 200
      retries: 30
      delay: 10
      tags: [verification]
    
    - name: Run application tests
      include_tasks: tasks/app_tests.yml
      tags: [testing]

# Load balancers
- name: Configure load balancers
  hosts: loadbalancers
  become: yes
  
  roles:
    - role: nginx
      tags: [nginx, loadbalancer]
    
    - role: ssl
      when: ssl_enabled | default(false)
      tags: [ssl, security]
  
  post_tasks:
    - name: Verify load balancer
      uri:
        url: "http://{{ inventory_hostname }}/health"
        method: GET
        status_code: 200
      tags: [verification]

# Monitoring servers
- name: Configure monitoring
  hosts: monitoring
  become: yes
  
  roles:
    - role: monitoring
      tags: [monitoring]
  
  when: monitoring_enabled | default(true)

# Final verification and notifications
- name: Final deployment verification
  hosts: localhost
  gather_facts: no
  
  tasks:
    - name: Run end-to-end tests
      include_tasks: tasks/e2e_tests.yml
      tags: [testing, e2e]
    
    - name: Send deployment success notification
      include_tasks: tasks/notifications.yml
      vars:
        notification_type: deployment_success
      tags: [notifications]
    
    - name: Update deployment tracking
      include_tasks: tasks/deployment_tracking.yml
      tags: [tracking]

10.1.9 部署脚本

#!/bin/bash
# scripts/deploy.sh
# 部署脚本

set -euo pipefail

# 配置
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
ENVIRONMENT="${1:-staging}"
APP_VERSION="${2:-$(date +%Y%m%d_%H%M%S)}"
VERBOSE="${VERBOSE:-false}"
DRY_RUN="${DRY_RUN:-false}"

# 颜色输出
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

# 日志函数
log() {
    echo -e "${BLUE}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1"
}

log_success() {
    echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')] ✓${NC} $1"
}

log_warning() {
    echo -e "${YELLOW}[$(date +'%Y-%m-%d %H:%M:%S')] ⚠${NC} $1"
}

log_error() {
    echo -e "${RED}[$(date +'%Y-%m-%d %H:%M:%S')] ✗${NC} $1"
}

# 错误处理
error_exit() {
    log_error "$1"
    exit 1
}

# 检查依赖
check_dependencies() {
    log "Checking dependencies..."
    
    command -v ansible-playbook >/dev/null 2>&1 || error_exit "ansible-playbook is required but not installed."
    command -v git >/dev/null 2>&1 || error_exit "git is required but not installed."
    
    # 检查 Ansible 版本
    local ansible_version
    ansible_version=$(ansible-playbook --version | head -n1 | awk '{print $2}')
    log "Ansible version: $ansible_version"
    
    # 检查 inventory 文件
    local inventory_file="$PROJECT_DIR/inventories/$ENVIRONMENT/hosts.yml"
    [[ -f "$inventory_file" ]] || error_exit "Inventory file not found: $inventory_file"
    
    # 检查 vault 密码文件
    local vault_file="$HOME/.vault_pass_$ENVIRONMENT"
    [[ -f "$vault_file" ]] || error_exit "Vault password file not found: $vault_file"
    
    log_success "Dependencies check passed"
}

# 预部署检查
pre_deployment_checks() {
    log "Running pre-deployment checks..."
    
    # 检查目标主机连通性
    ansible all -i "inventories/$ENVIRONMENT/hosts.yml" -m ping \
        --vault-password-file "$HOME/.vault_pass_$ENVIRONMENT" \
        || error_exit "Host connectivity check failed"
    
    # 检查磁盘空间
    ansible all -i "inventories/$ENVIRONMENT/hosts.yml" \
        --vault-password-file "$HOME/.vault_pass_$ENVIRONMENT" \
        -m shell -a "df -h / | tail -1 | awk '{print \$5}' | sed 's/%//'" \
        --one-line | while read -r line; do
            local host usage
            host=$(echo "$line" | awk '{print $1}')
            usage=$(echo "$line" | awk '{print $NF}')
            
            if [[ $usage -gt 85 ]]; then
                log_warning "High disk usage on $host: ${usage}%"
            fi
        done
    
    # 语法检查
    ansible-playbook playbooks/site.yml \
        -i "inventories/$ENVIRONMENT/hosts.yml" \
        --vault-password-file "$HOME/.vault_pass_$ENVIRONMENT" \
        --syntax-check || error_exit "Playbook syntax check failed"
    
    log_success "Pre-deployment checks passed"
}

# 备份当前版本
backup_current_version() {
    log "Creating backup of current version..."
    
    local backup_dir="$PROJECT_DIR/backups/$ENVIRONMENT/$(date +%Y%m%d_%H%M%S)"
    mkdir -p "$backup_dir"
    
    # 备份数据库
    ansible databases -i "inventories/$ENVIRONMENT/hosts.yml" \
        --vault-password-file "$HOME/.vault_pass_$ENVIRONMENT" \
        -m shell -a "mysqldump -u root -p{{ vault_mysql_root_password }} {{ db_name }} > /tmp/backup_{{ db_name }}_$(date +%Y%m%d_%H%M%S).sql" \
        || log_warning "Database backup failed"
    
    # 记录当前版本信息
    echo "Environment: $ENVIRONMENT" > "$backup_dir/deployment_info.txt"
    echo "Timestamp: $(date)" >> "$backup_dir/deployment_info.txt"
    echo "Previous Version: $(cat "$PROJECT_DIR/.last_deployment_$ENVIRONMENT" 2>/dev/null || echo 'unknown')" >> "$backup_dir/deployment_info.txt"
    
    log_success "Backup completed: $backup_dir"
}

# 执行部署
run_deployment() {
    log "Starting deployment to $ENVIRONMENT environment..."
    log "Application version: $APP_VERSION"
    
    local ansible_args=()
    ansible_args+=("-i" "inventories/$ENVIRONMENT/hosts.yml")
    ansible_args+=("--vault-password-file" "$HOME/.vault_pass_$ENVIRONMENT")
    ansible_args+=("-e" "app_version=$APP_VERSION")
    ansible_args+=("-e" "environment=$ENVIRONMENT")
    
    if [[ "$VERBOSE" == "true" ]]; then
        ansible_args+=("-vvv")
    fi
    
    if [[ "$DRY_RUN" == "true" ]]; then
        ansible_args+=("--check" "--diff")
        log_warning "Running in dry-run mode"
    fi
    
    # 执行部署
    if ansible-playbook playbooks/site.yml "${ansible_args[@]}"; then
        log_success "Deployment completed successfully"
        
        # 记录部署信息
        echo "$APP_VERSION" > "$PROJECT_DIR/.last_deployment_$ENVIRONMENT"
        
        # 运行部署后测试
        run_post_deployment_tests
    else
        error_exit "Deployment failed"
    fi
}

# 部署后测试
run_post_deployment_tests() {
    log "Running post-deployment tests..."
    
    # 健康检查
    ansible loadbalancers -i "inventories/$ENVIRONMENT/hosts.yml" \
        --vault-password-file "$HOME/.vault_pass_$ENVIRONMENT" \
        -m uri -a "url=http://{{ inventory_hostname }}/health method=GET status_code=200" \
        || error_exit "Health check failed"
    
    # 应用功能测试
    if [[ -f "$PROJECT_DIR/tests/integration_tests.sh" ]]; then
        log "Running integration tests..."
        bash "$PROJECT_DIR/tests/integration_tests.sh" "$ENVIRONMENT" || log_warning "Integration tests failed"
    fi
    
    log_success "Post-deployment tests completed"
}

# 回滚函数
rollback() {
    local previous_version
    previous_version=$(cat "$PROJECT_DIR/.last_deployment_$ENVIRONMENT" 2>/dev/null || echo '')
    
    if [[ -z "$previous_version" ]]; then
        error_exit "No previous version found for rollback"
    fi
    
    log_warning "Rolling back to version: $previous_version"
    
    ansible-playbook playbooks/rollback.yml \
        -i "inventories/$ENVIRONMENT/hosts.yml" \
        --vault-password-file "$HOME/.vault_pass_$ENVIRONMENT" \
        -e "rollback_version=$previous_version" \
        -e "environment=$ENVIRONMENT" \
        || error_exit "Rollback failed"
    
    log_success "Rollback completed"
}

# 显示帮助
show_help() {
    cat << EOF
Usage: $0 [ENVIRONMENT] [VERSION] [OPTIONS]

Arguments:
  ENVIRONMENT    Target environment (staging, production)
  VERSION        Application version (default: timestamp)

Options:
  --dry-run      Run in dry-run mode (no actual changes)
  --verbose      Enable verbose output
  --rollback     Rollback to previous version
  --help         Show this help message

Environment Variables:
  VERBOSE        Enable verbose output (true/false)
  DRY_RUN        Run in dry-run mode (true/false)

Examples:
  $0 staging
  $0 production v1.2.3
  $0 staging --dry-run
  $0 production --rollback
  VERBOSE=true $0 staging
EOF
}

# 主函数
main() {
    cd "$PROJECT_DIR"
    
    # 解析命令行参数
    while [[ $# -gt 0 ]]; do
        case $1 in
            --dry-run)
                DRY_RUN="true"
                shift
                ;;
            --verbose)
                VERBOSE="true"
                shift
                ;;
            --rollback)
                rollback
                exit 0
                ;;
            --help)
                show_help
                exit 0
                ;;
            -*)
                error_exit "Unknown option: $1"
                ;;
            *)
                break
                ;;
        esac
    done
    
    # 验证环境参数
    if [[ ! "$ENVIRONMENT" =~ ^(staging|production)$ ]]; then
        error_exit "Invalid environment: $ENVIRONMENT. Must be 'staging' or 'production'"
    fi
    
    log "Starting deployment process..."
    log "Environment: $ENVIRONMENT"
    log "Version: $APP_VERSION"
    
    # 执行部署流程
    check_dependencies
    pre_deployment_checks
    
    if [[ "$DRY_RUN" != "true" ]]; then
        backup_current_version
    fi
    
    run_deployment
    
    log_success "Deployment process completed successfully!"
}

# 捕获中断信号
trap 'log_error "Deployment interrupted"; exit 1' INT TERM

# 执行主函数
main "$@"

10.2 容器化部署项目

10.2.1 项目概述

本项目演示如何使用 Ansible 管理 Docker 容器化应用的部署,包括: - Docker 环境配置 - 容器镜像管理 - Docker Compose 服务编排 - 容器监控和日志 - 滚动更新和回滚

10.2.2 Docker Role

# roles/docker/tasks/main.yml
# Docker 环境配置
---
- name: Install Docker dependencies
  package:
    name: "{{ docker_dependencies }}"
    state: present
  vars:
    docker_dependencies:
      - apt-transport-https
      - ca-certificates
      - curl
      - gnupg
      - lsb-release
  when: ansible_os_family == 'Debian'
  tags: [docker, packages]

- name: Add Docker GPG key
  apt_key:
    url: https://download.docker.com/linux/ubuntu/gpg
    state: present
  when: ansible_os_family == 'Debian'
  tags: [docker, repository]

- name: Add Docker repository
  apt_repository:
    repo: "deb [arch=amd64] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable"
    state: present
  when: ansible_os_family == 'Debian'
  tags: [docker, repository]

- name: Install Docker
  package:
    name: "{{ docker_packages }}"
    state: present
  vars:
    docker_packages:
      - docker-ce
      - docker-ce-cli
      - containerd.io
      - docker-compose-plugin
  tags: [docker, packages]

- name: Start and enable Docker
  systemd:
    name: docker
    state: started
    enabled: yes
  tags: [docker, service]

- name: Add users to docker group
  user:
    name: "{{ item }}"
    groups: docker
    append: yes
  loop: "{{ docker_users | default([]) }}"
  tags: [docker, users]

- name: Configure Docker daemon
  template:
    src: daemon.json.j2
    dest: /etc/docker/daemon.json
    backup: yes
  notify: restart docker
  tags: [docker, config]

- name: Install Docker Compose
  pip:
    name: docker-compose
    state: present
  tags: [docker, compose]

- name: Create Docker networks
  docker_network:
    name: "{{ item.name }}"
    driver: "{{ item.driver | default('bridge') }}"
    ipam_config:
      - subnet: "{{ item.subnet | default(omit) }}"
  loop: "{{ docker_networks | default([]) }}"
  tags: [docker, networks]

- name: Configure log rotation for Docker
  template:
    src: docker.logrotate.j2
    dest: /etc/logrotate.d/docker
  tags: [docker, logging]
{# roles/docker/templates/daemon.json.j2 #}
{# Docker 守护进程配置 #}
{
  "log-driver": "{{ docker_log_driver | default('json-file') }}",
  "log-opts": {
    "max-size": "{{ docker_log_max_size | default('10m') }}",
    "max-file": "{{ docker_log_max_file | default('3') }}"
  },
  "storage-driver": "{{ docker_storage_driver | default('overlay2') }}",
  "data-root": "{{ docker_data_root | default('/var/lib/docker') }}",
  "exec-opts": ["native.cgroupdriver=systemd"],
  "live-restore": {{ docker_live_restore | default(true) | lower }},
  "userland-proxy": {{ docker_userland_proxy | default(false) | lower }},
  "experimental": {{ docker_experimental | default(false) | lower }},
  "metrics-addr": "{{ docker_metrics_addr | default('0.0.0.0:9323') }}",
  "default-ulimits": {
    "nofile": {
      "Name": "nofile",
      "Hard": {{ docker_ulimit_nofile_hard | default(65536) }},
      "Soft": {{ docker_ulimit_nofile_soft | default(65536) }}
    }
  }{% if docker_registry_mirrors is defined %},
  "registry-mirrors": {{ docker_registry_mirrors | to_json }}{% endif %}{% if docker_insecure_registries is defined %},
  "insecure-registries": {{ docker_insecure_registries | to_json }}{% endif %}
}

10.2.3 容器化应用部署

# roles/containerized_app/tasks/main.yml
# 容器化应用部署
---
- name: Create application directories
  file:
    path: "{{ item }}"
    state: directory
    owner: "{{ app_user }}"
    group: "{{ app_group }}"
    mode: '0755'
  loop:
    - "{{ app_home }}"
    - "{{ app_home }}/data"
    - "{{ app_home }}/logs"
    - "{{ app_home }}/config"
    - "{{ app_home }}/backups"
  tags: [app, directories]

- name: Generate Docker Compose file
  template:
    src: docker-compose.yml.j2
    dest: "{{ app_home }}/docker-compose.yml"
    owner: "{{ app_user }}"
    group: "{{ app_group }}"
    mode: '0644'
  tags: [app, compose]

- name: Generate environment file
  template:
    src: .env.j2
    dest: "{{ app_home }}/.env"
    owner: "{{ app_user }}"
    group: "{{ app_group }}"
    mode: '0600'
  tags: [app, config]

- name: Generate application configuration
  template:
    src: "{{ item.src }}"
    dest: "{{ app_home }}/config/{{ item.dest }}"
    owner: "{{ app_user }}"
    group: "{{ app_group }}"
    mode: '{{ item.mode | default("0644") }}'
  loop: "{{ app_config_files | default([]) }}"
  tags: [app, config]

- name: Pull Docker images
  docker_image:
    name: "{{ item.image }}"
    tag: "{{ item.tag | default('latest') }}"
    source: pull
    force_source: "{{ force_pull_images | default(false) }}"
  loop: "{{ docker_images }}"
  tags: [app, images]

- name: Start application services
  docker_compose:
    project_src: "{{ app_home }}"
    state: present
    pull: "{{ pull_images_on_start | default(false) }}"
    recreate: "{{ recreate_containers | default('smart') }}"
  become_user: "{{ app_user }}"
  tags: [app, services]

- name: Wait for services to be healthy
  uri:
    url: "{{ item.health_check_url }}"
    method: GET
    status_code: 200
  retries: 30
  delay: 10
  loop: "{{ health_checks | default([]) }}"
  tags: [app, health_check]

- name: Setup container monitoring
  include_tasks: monitoring.yml
  when: container_monitoring_enabled | default(true)
  tags: [app, monitoring]

- name: Setup log collection
  include_tasks: logging.yml
  tags: [app, logging]

- name: Setup backup jobs
  include_tasks: backup.yml
  when: backup_enabled | default(true)
  tags: [app, backup]
# roles/containerized_app/templates/docker-compose.yml.j2
# Docker Compose 配置文件
version: '3.8'

services:
  # Web 应用
  web:
    image: {{ app_image }}:{{ app_version | default('latest') }}
    container_name: {{ app_name }}_web
    restart: unless-stopped
    ports:
      - "{{ app_port }}:3000"
    environment:
      - NODE_ENV={{ environment }}
      - PORT=3000
      - DB_HOST=database
      - DB_NAME={{ db_name }}
      - DB_USER={{ db_user }}
      - DB_PASSWORD={{ vault_db_password }}
      - REDIS_HOST=redis
      - REDIS_PORT=6379
    volumes:
      - ./logs:/app/logs
      - ./config:/app/config:ro
    depends_on:
      - database
      - redis
    networks:
      - app_network
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s
    logging:
      driver: "json-file"
      options:
        max-size: "10m"
        max-file: "3"
    deploy:
      resources:
        limits:
          memory: {{ app_memory_limit | default('512M') }}
          cpus: '{{ app_cpu_limit | default("0.5") }}'
        reservations:
          memory: {{ app_memory_reservation | default('256M') }}
          cpus: '{{ app_cpu_reservation | default("0.25") }}'

  # 数据库
  database:
    image: mysql:8.0
    container_name: {{ app_name }}_db
    restart: unless-stopped
    environment:
      - MYSQL_ROOT_PASSWORD={{ vault_mysql_root_password }}
      - MYSQL_DATABASE={{ db_name }}
      - MYSQL_USER={{ db_user }}
      - MYSQL_PASSWORD={{ vault_db_password }}
    volumes:
      - db_data:/var/lib/mysql
      - ./config/mysql.cnf:/etc/mysql/conf.d/custom.cnf:ro
      - ./backups:/backups
    networks:
      - app_network
    healthcheck:
      test: ["CMD", "mysqladmin", "ping", "-h", "localhost"]
      interval: 30s
      timeout: 10s
      retries: 5
    logging:
      driver: "json-file"
      options:
        max-size: "10m"
        max-file: "3"

  # Redis 缓存
  redis:
    image: redis:7-alpine
    container_name: {{ app_name }}_redis
    restart: unless-stopped
    command: redis-server --appendonly yes --requirepass {{ vault_redis_password }}
    volumes:
      - redis_data:/data
      - ./config/redis.conf:/usr/local/etc/redis/redis.conf:ro
    networks:
      - app_network
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 30s
      timeout: 10s
      retries: 3
    logging:
      driver: "json-file"
      options:
        max-size: "10m"
        max-file: "3"

  # Nginx 负载均衡
  nginx:
    image: nginx:alpine
    container_name: {{ app_name }}_nginx
    restart: unless-stopped
    ports:
      - "80:80"
      - "443:443"
    volumes:
      - ./config/nginx.conf:/etc/nginx/nginx.conf:ro
      - ./config/ssl:/etc/nginx/ssl:ro
      - ./logs/nginx:/var/log/nginx
    depends_on:
      - web
    networks:
      - app_network
    healthcheck:
      test: ["CMD", "nginx", "-t"]
      interval: 30s
      timeout: 10s
      retries: 3
    logging:
      driver: "json-file"
      options:
        max-size: "10m"
        max-file: "3"

volumes:
  db_data:
    driver: local
  redis_data:
    driver: local

networks:
  app_network:
    driver: bridge
    ipam:
      config:
        - subnet: {{ docker_subnet | default('172.20.0.0/16') }}

10.8 监控配置

10.8.1 Prometheus 配置

文件:roles/monitoring/templates/prometheus.yml.j2

global:
  scrape_interval: 15s
  evaluation_interval: 15s

rule_files:
  - "rules/*.yml"

alerting:
  alertmanagers:
    - static_configs:
        - targets:
          - alertmanager:9093

scrape_configs:
  # Prometheus 自身监控
  - job_name: 'prometheus'
    static_configs:
      - targets: ['localhost:9090']

  # Node Exporter 监控
  - job_name: 'node-exporter'
    static_configs:
      - targets:
{% for host in groups['web'] %}
        - '{{ hostvars[host]['ansible_default_ipv4']['address'] }}:9100'
{% endfor %}
{% for host in groups['db'] %}
        - '{{ hostvars[host]['ansible_default_ipv4']['address'] }}:9100'
{% endfor %}

  # MySQL Exporter 监控
  - job_name: 'mysql-exporter'
    static_configs:
      - targets:
{% for host in groups['db'] %}
        - '{{ hostvars[host]['ansible_default_ipv4']['address'] }}:9104'
{% endfor %}

  # Nginx Exporter 监控
  - job_name: 'nginx-exporter'
    static_configs:
      - targets:
{% for host in groups['web'] %}
        - '{{ hostvars[host]['ansible_default_ipv4']['address'] }}:9113'
{% endfor %}

  # 应用监控
  - job_name: 'webapp'
    static_configs:
      - targets:
{% for host in groups['web'] %}
        - '{{ hostvars[host]['ansible_default_ipv4']['address'] }}:{{ app_metrics_port | default(3001) }}'
{% endfor %}
    metrics_path: '/metrics'
    scrape_interval: 30s

10.8.2 Grafana 仪表板配置

文件:roles/monitoring/templates/grafana-dashboard.json.j2

{
  "dashboard": {
    "id": null,
    "title": "{{ app_name | title }} Application Dashboard",
    "tags": ["{{ app_name }}", "production"],
    "timezone": "browser",
    "panels": [
      {
        "id": 1,
        "title": "HTTP Requests per Second",
        "type": "graph",
        "targets": [
          {
            "expr": "rate(http_requests_total[5m])",
            "legendFormat": "{{instance}}"
          }
        ],
        "gridPos": {"h": 8, "w": 12, "x": 0, "y": 0}
      },
      {
        "id": 2,
        "title": "Response Time",
        "type": "graph",
        "targets": [
          {
            "expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))",
            "legendFormat": "95th percentile"
          }
        ],
        "gridPos": {"h": 8, "w": 12, "x": 12, "y": 0}
      },
      {
        "id": 3,
        "title": "Database Connections",
        "type": "graph",
        "targets": [
          {
            "expr": "mysql_global_status_threads_connected",
            "legendFormat": "Active Connections"
          }
        ],
        "gridPos": {"h": 8, "w": 12, "x": 0, "y": 8}
      },
      {
        "id": 4,
        "title": "System Load",
        "type": "graph",
        "targets": [
          {
            "expr": "node_load1",
            "legendFormat": "{{instance}} - 1m"
          }
        ],
        "gridPos": {"h": 8, "w": 12, "x": 12, "y": 8}
      }
    ],
    "time": {
      "from": "now-1h",
      "to": "now"
    },
    "refresh": "30s"
  }
}

10.9 部署脚本

10.9.1 自动化部署脚本

文件:scripts/deploy.sh

#!/bin/bash

# Web应用自动化部署脚本
# 使用方法: ./deploy.sh [环境] [版本]

set -e

# 配置变量
ENVIRONMENT=${1:-staging}
VERSION=${2:-latest}
PROJECT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.."
LOG_FILE="/var/log/ansible-deploy-$(date +%Y%m%d-%H%M%S).log"

# 颜色输出
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

# 日志函数
log() {
    echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1" | tee -a "$LOG_FILE"
}

warn() {
    echo -e "${YELLOW}[$(date +'%Y-%m-%d %H:%M:%S')] WARNING:${NC} $1" | tee -a "$LOG_FILE"
}

error() {
    echo -e "${RED}[$(date +'%Y-%m-%d %H:%M:%S')] ERROR:${NC} $1" | tee -a "$LOG_FILE"
    exit 1
}

# 检查依赖
check_dependencies() {
    log "检查部署依赖..."
    
    command -v ansible-playbook >/dev/null 2>&1 || error "ansible-playbook 未安装"
    command -v git >/dev/null 2>&1 || error "git 未安装"
    
    if [ ! -f "$PROJECT_DIR/inventories/$ENVIRONMENT/hosts.yml" ]; then
        error "环境配置文件不存在: $ENVIRONMENT"
    fi
    
    log "依赖检查完成"
}

# 备份当前版本
backup_current() {
    log "备份当前版本..."
    
    ansible-playbook -i "inventories/$ENVIRONMENT/hosts.yml" \
        playbooks/backup.yml \
        --extra-vars "backup_version=$VERSION" \
        --vault-password-file .vault_pass || warn "备份失败,继续部署"
}

# 部署应用
deploy_application() {
    log "开始部署应用 (环境: $ENVIRONMENT, 版本: $VERSION)..."
    
    cd "$PROJECT_DIR"
    
    # 语法检查
    log "执行 Playbook 语法检查..."
    ansible-playbook -i "inventories/$ENVIRONMENT/hosts.yml" \
        site.yml --syntax-check || error "Playbook 语法错误"
    
    # 干运行
    log "执行干运行检查..."
    ansible-playbook -i "inventories/$ENVIRONMENT/hosts.yml" \
        site.yml --check --diff \
        --extra-vars "app_version=$VERSION" \
        --vault-password-file .vault_pass || error "干运行检查失败"
    
    # 实际部署
    log "执行实际部署..."
    ansible-playbook -i "inventories/$ENVIRONMENT/hosts.yml" \
        site.yml \
        --extra-vars "app_version=$VERSION" \
        --vault-password-file .vault_pass || error "部署失败"
    
    log "应用部署完成"
}

# 健康检查
health_check() {
    log "执行健康检查..."
    
    ansible-playbook -i "inventories/$ENVIRONMENT/hosts.yml" \
        playbooks/health-check.yml \
        --vault-password-file .vault_pass || error "健康检查失败"
    
    log "健康检查通过"
}

# 发送通知
send_notification() {
    log "发送部署通知..."
    
    ansible-playbook -i "inventories/$ENVIRONMENT/hosts.yml" \
        playbooks/notification.yml \
        --extra-vars "deployment_status=success deployment_version=$VERSION" \
        --vault-password-file .vault_pass || warn "通知发送失败"
}

# 主函数
main() {
    log "开始自动化部署流程"
    log "环境: $ENVIRONMENT"
    log "版本: $VERSION"
    log "日志文件: $LOG_FILE"
    
    check_dependencies
    backup_current
    deploy_application
    health_check
    send_notification
    
    log "部署流程完成!"
}

# 错误处理
trap 'error "部署过程中发生错误,请检查日志: $LOG_FILE"' ERR

# 执行主函数
main "$@"

10.9.2 回滚脚本

文件:scripts/rollback.sh

#!/bin/bash

# 应用回滚脚本
# 使用方法: ./rollback.sh [环境] [目标版本]

set -e

ENVIRONMENT=${1:-staging}
TARGET_VERSION=${2}
PROJECT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.."

# 颜色输出
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'

log() {
    echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1"
}

error() {
    echo -e "${RED}[$(date +'%Y-%m-%d %H:%M:%S')] ERROR:${NC} $1"
    exit 1
}

# 检查参数
if [ -z "$TARGET_VERSION" ]; then
    error "请指定目标版本"
fi

# 确认回滚
read -p "确认回滚到版本 $TARGET_VERSION? (y/N): " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
    log "回滚已取消"
    exit 0
fi

log "开始回滚到版本: $TARGET_VERSION"

cd "$PROJECT_DIR"

# 执行回滚
ansible-playbook -i "inventories/$ENVIRONMENT/hosts.yml" \
    playbooks/rollback.yml \
    --extra-vars "rollback_version=$TARGET_VERSION" \
    --vault-password-file .vault_pass || error "回滚失败"

log "回滚完成"

10.10 项目总结

10.10.1 架构优势

  1. 高可用性

    • 负载均衡器分发请求
    • 多实例应用服务器
    • 数据库主从复制
    • 健康检查和自动恢复
  2. 可扩展性

    • 水平扩展应用服务器
    • 数据库读写分离
    • 缓存层减少数据库压力
    • 容器化部署便于扩容
  3. 安全性

    • Ansible Vault 加密敏感信息
    • 防火墙规则限制访问
    • SSL/TLS 加密传输
    • 定期安全更新
  4. 可维护性

    • 标准化的项目结构
    • 详细的文档和注释
    • 自动化部署和回滚
    • 完善的监控和日志

10.10.2 最佳实践总结

  1. 项目组织

    项目结构最佳实践:
    ├── inventories/          # 环境特定配置
    ├── group_vars/          # 组变量
    ├── host_vars/           # 主机变量
    ├── roles/               # 角色定义
    ├── playbooks/           # 特定任务 Playbook
    ├── scripts/             # 部署脚本
    ├── docs/                # 项目文档
    └── tests/               # 测试文件
    
  2. 变量管理

    • 使用 group_varshost_vars 组织变量
    • 敏感信息使用 Ansible Vault 加密
    • 提供合理的默认值
    • 使用描述性的变量名
  3. Role 设计

    • 单一职责原则
    • 可重用和可配置
    • 完整的文档说明
    • 适当的依赖管理
  4. 部署策略

    • 蓝绿部署减少停机时间
    • 滚动更新保证服务连续性
    • 自动化测试验证部署
    • 快速回滚机制
  5. 监控和日志

    • 应用性能监控
    • 基础设施监控
    • 集中化日志管理
    • 告警和通知机制

10.10.3 性能优化建议

  1. Ansible 性能优化 “`ini

    ansible.cfg 优化配置

    [defaults] host_key_checking = False pipelining = True forks = 20 gathering = smart fact_caching = jsonfile fact_caching_connection = /tmp/ansible_facts_cache fact_caching_timeout = 86400

[ssh_connection] ssh_args = -o ControlMaster=auto -o ControlPersist=60s control_path = /tmp/ansible-ssh-%%h-%%p-%%r


2. **应用性能优化**
   - 启用 HTTP/2
   - 配置适当的缓存策略
   - 优化数据库查询
   - 使用 CDN 加速静态资源

3. **系统性能优化**
   - 调整内核参数
   - 优化文件系统
   - 配置适当的资源限制
   - 定期清理日志和临时文件

#### 10.10.4 故障排除指南

1. **常见问题**
   - **连接失败**: 检查 SSH 配置和防火墙规则
   - **权限错误**: 验证用户权限和 sudo 配置
   - **服务启动失败**: 检查配置文件和依赖服务
   - **性能问题**: 分析监控数据和日志

2. **调试技巧**
   ```bash
   # 详细输出
   ansible-playbook -vvv site.yml
   
   # 调试特定任务
   ansible-playbook site.yml --start-at-task="任务名称"
   
   # 检查变量值
   ansible-playbook site.yml --extra-vars "debug=true"
  1. 日志分析
    • 应用日志: /var/log/app/
    • 系统日志: /var/log/syslog
    • Nginx 日志: /var/log/nginx/
    • MySQL 日志: /var/log/mysql/

10.11 扩展功能

10.11.1 CI/CD 集成

文件:.gitlab-ci.yml

stages:
  - test
  - build
  - deploy

variables:
  ANSIBLE_HOST_KEY_CHECKING: "false"
  ANSIBLE_FORCE_COLOR: "true"

# 测试阶段
test_playbook:
  stage: test
  script:
    - ansible-playbook --syntax-check site.yml
    - ansible-lint site.yml
  only:
    - merge_requests
    - master

# 构建阶段
build_application:
  stage: build
  script:
    - docker build -t $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA .
    - docker push $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA
  only:
    - master

# 部署到测试环境
deploy_staging:
  stage: deploy
  script:
    - ansible-playbook -i inventories/staging/hosts.yml site.yml
      --extra-vars "app_version=$CI_COMMIT_SHA"
      --vault-password-file $VAULT_PASSWORD_FILE
  environment:
    name: staging
    url: https://staging.example.com
  only:
    - master

# 部署到生产环境
deploy_production:
  stage: deploy
  script:
    - ansible-playbook -i inventories/production/hosts.yml site.yml
      --extra-vars "app_version=$CI_COMMIT_SHA"
      --vault-password-file $VAULT_PASSWORD_FILE
  environment:
    name: production
    url: https://www.example.com
  when: manual
  only:
    - master

10.11.2 多环境管理

文件:inventories/environments.yml

# 环境配置管理
environments:
  development:
    domain: dev.example.com
    replicas: 1
    resources:
      cpu: "0.5"
      memory: "512Mi"
    database:
      size: "10Gi"
      backup_retention: 7
  
  staging:
    domain: staging.example.com
    replicas: 2
    resources:
      cpu: "1"
      memory: "1Gi"
    database:
      size: "50Gi"
      backup_retention: 14
  
  production:
    domain: www.example.com
    replicas: 3
    resources:
      cpu: "2"
      memory: "2Gi"
    database:
      size: "200Gi"
      backup_retention: 30

本章总结

本章通过一个完整的 Web 应用部署项目,展示了 Ansible 在实际生产环境中的应用。我们学习了:

  1. 项目架构设计:如何设计一个高可用、可扩展的 Web 应用架构
  2. Ansible 项目组织:标准化的目录结构和文件组织方式
  3. 多环境管理:如何管理开发、测试、生产等不同环境
  4. Role 开发:创建可重用的 Ansible Role
  5. 模板使用:使用 Jinja2 模板生成配置文件
  6. 安全管理:使用 Ansible Vault 保护敏感信息
  7. 监控集成:集成 Prometheus 和 Grafana 监控
  8. 自动化部署:编写部署和回滚脚本
  9. 最佳实践:总结了 Ansible 项目的最佳实践
  10. 故障排除:常见问题的解决方法

通过这个实战项目,你应该能够: - 独立设计和实施 Ansible 自动化项目 - 编写高质量的 Playbook 和 Role - 管理复杂的多环境部署 - 集成监控和日志系统 - 实施 CI/CD 流水线 - 解决常见的部署问题

练习题

基础练习

  1. 项目初始化

    • 创建一个新的 Ansible 项目
    • 设置标准的目录结构
    • 配置 ansible.cfg 文件
  2. Inventory 配置

    • 创建开发和生产环境的 Inventory
    • 配置主机组和变量
    • 测试主机连接
  3. 基础 Role 开发

    • 创建一个 common role
    • 实现基础系统配置
    • 添加防火墙和安全配置

进阶练习

  1. 应用部署 Role

    • 创建 Web 应用部署 Role
    • 实现多版本管理
    • 添加健康检查
  2. 数据库管理

    • 创建 MySQL 部署 Role
    • 实现数据库备份
    • 配置主从复制
  3. 负载均衡配置

    • 配置 Nginx 负载均衡
    • 实现 SSL 终止
    • 添加健康检查

高级练习

  1. 监控集成

    • 部署 Prometheus 监控
    • 配置 Grafana 仪表板
    • 设置告警规则
  2. CI/CD 集成

    • 集成 GitLab CI/CD
    • 实现自动化测试
    • 配置自动部署
  3. 容器化部署

    • 使用 Docker Compose
    • 实现容器编排
    • 配置容器监控
  4. 性能优化

    • 优化 Ansible 执行性能
    • 实现并行部署
    • 配置缓存策略

实战项目

  1. 完整项目实施

    • 选择一个实际应用
    • 设计完整的部署方案
    • 实施自动化部署
    • 集成监控和日志
    • 编写完整文档
  2. 故障演练

    • 模拟各种故障场景
    • 测试自动恢复机制
    • 验证备份和恢复流程
    • 优化故障响应时间