ansible-devops/roles/gpu_drive/tasks/main.yml

43 lines
1.5 KiB
YAML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

- name: 创建角色专属日志目录
file:
path: "{{ log_base_dir }}/{{ driver.name }}"
state: directory
mode: "0750"
- name: 同步驱动脚本到目标服务器
copy:
src: "{{ driver.install_script }}"
dest: "{{ script_dest }}/{{ driver.install_script }}"
mode: "0755"
force: yes # 确保使用最新脚本
- name: 执行驱动操作(安装/卸载)
shell: |
{{ script_dest }}/{{ driver.install_script }} \
{{ operations[operation] }} \
{% if target_version is defined and target_version != "" %}--version {{ target_version }}{% endif %}
register: script_result
environment:
GPU_MODEL: "{{ hostvars[inventory_hostname]['gpu_model'] }}" # 注入主机硬件信息
retries: 3 # 企业级重试机制失败3次终止
delay: 30 # 重试间隔30秒
become: yes # 使用sudo执行
- name: 验证操作结果(安装时)
when: operation == "install"
shell: "{{ driver.service_check }}"
changed_when: false
failed_when: "GPU count: 0" in script_result.stderr
- name: 记录操作日志(企业级可观测性)
uri:
url: "http://logging.internal.com/api/ansible"
method: POST
body_format: json
body:
host: "{{ inventory_hostname }}"
component: "{{ driver.name }}_driver"
operation: "{{ operation }}"
version: "{{ target_version | default(driver.default_version) }}"
status: "{% if script_result.rc == 0 %}success{% else %}failed{% endif %}"