- name: 创建角色专属日志目录 file: path: "{{ log_base_dir }}/{{ driver.name }}" state: directory mode: "0750" - name: 同步驱动脚本到目标服务器 copy: src: "{{ driver.install_script }}" dest: "{{ script_dest }}/{{ driver.install_script }}" mode: "0755" force: yes # 确保使用最新脚本 - name: 执行驱动操作(安装/卸载) shell: | {{ script_dest }}/{{ driver.install_script }} \ {{ operations[operation] }} \ {% if target_version is defined and target_version != "" %}--version {{ target_version }}{% endif %} register: script_result environment: GPU_MODEL: "{{ hostvars[inventory_hostname]['gpu_model'] }}" # 注入主机硬件信息 retries: 3 # 企业级重试机制(失败3次终止) delay: 30 # 重试间隔30秒 become: yes # 使用sudo执行 - name: 验证操作结果(安装时) when: operation == "install" shell: "{{ driver.service_check }}" changed_when: false failed_when: "GPU count: 0" in script_result.stderr - name: 记录操作日志(企业级可观测性) uri: url: "http://logging.internal.com/api/ansible" method: POST body_format: json body: host: "{{ inventory_hostname }}" component: "{{ driver.name }}_driver" operation: "{{ operation }}" version: "{{ target_version | default(driver.default_version) }}" status: "{% if script_result.rc == 0 %}success{% else %}failed{% endif %}"