diff --git a/.github/workflows/make_docs.yml b/.github/workflows/make_docs.yml index a295cad..e6adb71 100644 --- a/.github/workflows/make_docs.yml +++ b/.github/workflows/make_docs.yml @@ -2,30 +2,40 @@ name: publish document on: push: - tags: - - v* + branches: + - main jobs: build: runs-on: ubuntu-latest + container: fedora:latest steps: - - uses: actions/checkout@v3 - - name: Install requirements - run: sudo apt-get install asciidoctor ruby-asciidoctor-pdf + run: sudo dnf install -y git graphviz make plantuml rubygem-asciidoctor rubygem-asciidoctor-pdf rubygem-rouge - - name: Install requirements + - name: Repository checkout + uses: actions/checkout@v3 + + - name: Move to the correct folder run: cd $GITHUB_WORKSPACE - - - name: render HTML + + - name: Ensure git folder is considered safe + run: git config --global --add safe.directory "$GITHUB_WORKSPACE" + + - name: Render HTML run: make release - - - uses: stefanzweifel/git-auto-commit-action@v4 + + - name: Git add and force push to docs + run: git add -f docs + + - name: Push changes + uses: stefanzweifel/git-auto-commit-action@v4 with: commit_message: publish release file_pattern: docs/* + add_options: '-A --force' branch: docs # main branch is protected, make sure this one is used for GitHub pages # the following options are necessary to forcefully overwrite each time the branch skip_fetch: true diff --git a/.github/workflows/test_docs.yml b/.github/workflows/test_docs.yml index 7d2d0b7..4bec534 100644 --- a/.github/workflows/test_docs.yml +++ b/.github/workflows/test_docs.yml @@ -7,15 +7,20 @@ jobs: test-build-docs: runs-on: ubuntu-latest + container: fedora:latest steps: - - uses: actions/checkout@v3 - - name: Install requirements - run: sudo apt-get install asciidoctor ruby-asciidoctor-pdf + run: sudo dnf install -y git graphviz make plantuml rubygem-asciidoctor rubygem-asciidoctor-pdf rubygem-rouge - - name: Install requirements + - name: Repository checkout + uses: actions/checkout@v3 + + - name: Move to the correct folder run: cd $GITHUB_WORKSPACE - - name: render HTML + - name: Ensure git folder is considered safe + run: git config --global --add safe.directory "$GITHUB_WORKSPACE" + + - name: Render HTML run: make release diff --git a/.github/workflows/test_markdown_links.yml b/.github/workflows/test_markdown_links.yml new file mode 100644 index 0000000..3281e66 --- /dev/null +++ b/.github/workflows/test_markdown_links.yml @@ -0,0 +1,21 @@ +--- +# This action checks all Markdown files in the repository for broken links. +# (Uses https://github.com/tcort/markdown-link-check) +name: markdown link check + + +on: + push: + pull_request: + +jobs: + markdown-link-check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: gaurav-nelson/github-action-markdown-link-check@v1 + with: + use-quiet-mode: 'yes' + use-verbose-mode: 'yes' + config-file: '.mlc_config.json' +... diff --git a/.gitignore b/.gitignore index 8f8f394..9708199 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,9 @@ # generated files -*.pdf *.html -!docs/*.html +*.pdf +*.png +*.svg +docs # temporary files .*.swp diff --git a/Makefile b/Makefile index 2b63802..bad9e19 100644 --- a/Makefile +++ b/Makefile @@ -51,13 +51,20 @@ push: clean pull: $(VCS) pull -release: +plantuml: + for f in images/*.plantuml; do \ + plantuml $${f} -tsvg; \ + done + +release: plantuml + mkdir -p docs $(ADOCHTML) -D docs --out-file index.html $(INFILE) $(ADOCHTML) -D docs --out-file CONTRIBUTE.html $(INFILE2) mkdir -p docs/images cp -v images/*.svg docs/images preview: + mkdir -p docs $(ADOCPDF) --out-file docs/preview/$(OUTFILE).pdf $(INFILE) $(ADOCPDF) --out-file docs/preview/$(OUTFILE2).pdf $(INFILE2) $(ADOCHTML) --out-file docs/preview/$(OUTFILE).html $(INFILE) diff --git a/_style/render.adoc b/_style/render.adoc index 2dcc586..f206df4 100644 --- a/_style/render.adoc +++ b/_style/render.adoc @@ -11,6 +11,9 @@ :listing-caption: Listing :imagesdir: images/ +:revnumber: {gitdate} (commit: {githash}) +:!last-update-label: + // The following lines could become relevant in the future //// diff --git a/coding_style/README.adoc b/coding_style/README.adoc index 2c8766b..c7c5df3 100644 --- a/coding_style/README.adoc +++ b/coding_style/README.adoc @@ -89,7 +89,7 @@ TIP: use the sign `>-` if it is important that the last line return code doesn't + [%collapsible] ==== -Rationale:: Ansible will `and` the list elements together (https://docs.ansible.coansible/latest/user_guidplaybooks_conditionalhtml#the-when-statement[Ansible UseGuide » Conditionals]). +Rationale:: Ansible will `and` the list elements together (https://docs.ansible.com/ansible/latest/user_guide/playbooks_conditionals.html#basic-conditionals-with-when[Ansible UseGuide » Conditionals]). Multiple conditions that all need to be true (a logical `and`) can also be specified as a list, but beware of bare variables in `when:`. Examples:: + @@ -198,7 +198,7 @@ Dot notation will fail in some cases (such as when a variable name includes a hy Additionally, some key names collide with attributes and methods of Python dictionaries such as `count`, `copy`, `title`, and others (refer to the https://docs.ansible.com/ansible/latest/user_guide/playbooks_variables.html#referencing-key-value-dictionary-variables[Ansible User Guide] for an extended list) Example:: -This https://blog.networktocode.com/post/Exploring-Jinja-Variable-Syntax-in-Ansible[post] provdes an excellent demonstration of how using dot notation syntax can impact your playbooks. +This https://blog.networktocode.com/post/Exploring-Jinja-Variable-Syntax-in-Ansible[post] provides an excellent demonstration of how using dot notation syntax can impact your playbooks. ==== * Do not use `meta: end_play`. @@ -209,7 +209,38 @@ Rationale:: It aborts the whole play instead of a given host (with multiple host If absolutely necessary, consider using `meta: end_host`. ==== -* Task names can be made dynamic by using variables (wrapped in Jinja2 templates), this helps with reading the logs. +* Task names can be made dynamic by using variables wrapped in Jinja2 templates at the end of the string ++ +[%collapsible] +==== +Rationale:: This can help with reading the logs. +For example, if the task is managing one of several devices, and you want the task name output to show the device being managed. +However, the template must come at the *end* of the string - see (https://ansible-lint.readthedocs.io/rules/name/[Ansible Lint name template rule]). +Note that in some cases, it can make it harder for users to correlate the logs to the code. +For example, if there is a log message like "Manage the disk device /dev/dsk/0001", and the user tries to do something like `grep "Manage the disk device /dev/dsk/0001" rolename/tasks/*.yml` to figure out which task this comes from, they will not find it. +If the template comes at the end of the string, the user will know to omit the device name from `grep`. +A better way to debug is to use `ansible-playbook -vv`, which will show the exact file and line number of the task. + +Example:: +.Do this: +[source,yaml] +---- +tasks: + - name: Manage the disk device {{ storage_device_name }} + some.module: + device: "{{ storage_device_name }}" +---- + +.Don't do this: +[source,yaml] +---- +tasks: + - name: Manage {{ storage_device_name }}, the disk device + some.module: + device: "{{ storage_device_name }}" +---- +==== + * Do not use variables (wrapped in Jinja2 templates) for play names; variables don't get expanded properly there. The same applies to loop variables (by default `item`) in task names within a loop. They, too, don't get properly expanded and hence are not to be used there. @@ -334,7 +365,7 @@ Grouping sets of similar files into a subdirectory of `templates` is allowable, In many cases, if the platform is different, the package name is also different so that using `package` doesn't help a lot. Prefer then the more specific `yum`, `dnf` or `apt` module if you anyway need to differentiate. -* Use `float`, `int`, and `bool` filters to "cast" public API variables used in numeric operations in Jinja templates +* Use `float`, `int`, and `bool` filters to "cast" public API variables to ensure type safety, especially for numeric operations in Jinja. + [%collapsible] ==== diff --git a/docs/CONTRIBUTE.html b/docs/CONTRIBUTE.html deleted file mode 100644 index 6ce041a..0000000 --- a/docs/CONTRIBUTE.html +++ /dev/null @@ -1,934 +0,0 @@ - - - - - - - -Contribution guidelines - - - - - - - -
-
-
-
-

Before you suggest automation guidelines, please consider the contribution guidelines layed out in this document.

-
-
-
-
-

1. Writing

-
-
-
    -
  1. -

    The guidelines are written in asciidoc as described by Asciidoctor.

    -
  2. -
  3. -

    each guideline is made of one sentence, as easy to remember as possible, followed by a collapsible description, made of:

    -
    -
      -
    • -

      explanations

      -
    • -
    • -

      rationale

      -
    • -
    • -

      examples

      -
      -

      The result looks then as the following template shows (you may copy & paste):

      -
      -
      -
      -
      == Do this and do not do that is the guideline
      -[%collapsible]
      -====
      -Explanations:: These are explanations
      -
      -Rationale:: This is the rationale
      -
      -Examples:: These are examples
      -+
      -.A mini playbook example
      -[source,yaml]
      -----
      -- name: a mini example of playbook
      -  hosts: all
      -  gather_facts: false
      -  become: false
      -
      -  tasks:
      -
      -  - name: say what we all think
      -    debug:
      -      msg: asciidoctor is {{ my_private_thoughts }}
      -----
      -+
      -Even more examples...
      -====
      -
      -
      -
      - - - - - -
      - - -see how it looks like in the Example section below. -
      -
      -
    • -
    -
    -
  4. -
  5. -

    Those guidelines are grouped into sections and optionally sub-sections, as far as required for maintainability.

    -
  6. -
  7. -

    Those (sub-)sections can be written in their own source file, but then are included with include::directory/file.adoc[leveloffset=1] in the parent section’s file. -This makes sure that all source files are interlinked and can be rendered all together by rendering the top README.adoc, either with asciidoctor or with asciidoctor-pdf.

    -
    - - - - - -
    - - -this contribution file is obviously not meant for inclusion in the overall document. -
    -
    -
  8. -
  9. -

    Each source file has a single title (the line starting with one equal sign) and can be rendered individually (the leveloffset is set such that it fits in the overall headings structure when included).

    -
  10. -
  11. -

    The source code is written as readable as possible in its raw form, without impacting maintainability.

    -
  12. -
  13. -

    We follow the Asciidoc recommended practices.

    -
  14. -
  15. -

    Sentences are written in the present tense form, avoid "should", "must", etc. -For example, "Sentences are written", not "Sentences should be written" or "Sentences must be written". This avoids filler words.

    -
  16. -
  17. -

    The singular "they" is used to avoid the unreadable "he/she/it" construct and still be neutral.

    -
  18. -
-
-
-
-
-

2. Contributing

-
-
-
    -
  1. -

    Just fork the repository, create a Pull Request (PR) and offer your changes.

    -
  2. -
  3. -

    Feel free to review existing PR and give your opinion

    -
  4. -
  5. -

    Also an issue against one of the recommendations is a valid approach

    -
  6. -
-
-
-
-
-

3. Example

-
-
-

This is how one guideline as shown above looks like once rendered:

-
-
-

3.1. Do this and do not do that is the guideline

-
-Details -
-
-
-
Explanations
-
-

These are explanations

-
-
Rationale
-
-

This is the rationale

-
-
Examples
-
-

These are examples

-
-
Listing 1. A mini playbook example
-
-
- name: a mini example of playbook
-  hosts: all
-  gather_facts: false
-  become: false
-
-  tasks:
-
-  - name: say what we all think
-    debug:
-      msg: asciidoctor is {{ my_private_thoughts }}
-
-
-
-

Even more examples…​

-
-
-
-
-
-
-
-
-
-
-

4. Publish for the website

-
-
-

Use for now the following manual command to publish to -the website:

-
-
-
-
asciidoctor -a toc=left -D docs -o index.html README.adoc
-asciidoctor -a toc=left -D docs CONTRIBUTE.adoc
-mkdir -p docs/images
-cp -v images/*.svg docs/images
-
-
-
- - - - - -
- - -it doesn’t seem that there is any much better way to keep links to images correct according to the HTML generation / managed images chapter. -
-
-
-
-
-

5. Creating a PDF

-
-
-

If you run (a current) Fedora Linux, -then you can use the Makefile.

-
-
-
    -
  • -

    make view generates both PDF files and displays the GPA guide

    -
  • -
  • -

    make print prints to your default printer

    -
  • -
  • -

    make spell runs hunspell for spellchecking

    -
  • -
  • -

    -
  • -
-
-
-

Alternatively, use the following manual commands to generate the 2 PDFs:

-
-
-
-
asciidoctor-pdf \
-  --attribute=gitdate=$(git log -1 --date=short --pretty=format:%cd) \
-  --attribute=githash=$(git rev-parse --verify HEAD) \
-  --out-file Good_Practices_for_Ansible.pdf \
-  README.adoc
-asciidoctor-pdf \
-  --attribute=gitdate=$(git log -1 --date=short --pretty=format:%cd) \
-  --attribute=githash=$(git rev-parse --verify HEAD) \
-  --out-file Contributing-to-GPA.pdf \
-  CONTRIBUTE.adoc
-
-
-
-
-
- - - diff --git a/docs/images/ansible_structures.svg b/docs/images/ansible_structures.svg deleted file mode 100644 index ad5ba7a..0000000 --- a/docs/images/ansible_structures.svg +++ /dev/null @@ -1,40 +0,0 @@ -LandscapeWorkflowPlaybook of playbooksTypePlaybook_FunctionRole_ComponentTask fileRole \ No newline at end of file diff --git a/docs/index.html b/docs/index.html deleted file mode 100644 index a5acb06..0000000 --- a/docs/index.html +++ /dev/null @@ -1,2845 +0,0 @@ - - - - - - - -Good Practices for Ansible - GPA - - - - - - - -
-
-

1. Introduction

-
-
-

Ansible is simple, flexible, and powerful. Like any powerful tool, there are many ways to use it, some better than others.

-
-
-

This document aims to gather good practices from the field of Ansible practitioners at Red Hat, consultants, developers, and others. -And thus it strives to give any Red Hat employee, partner or customer (or any Ansible user) a guideline from which to start in good conditions their automation journey.

-
-
-

Those are opinionated guidelines based on the experience of many people. -They are not meant to be followed blindly if they don’t fit the reader’s specific use case, organization or needs; -there is a reason why they are called good and not best practices.

-
-
-

The reader of this document is expected to have working practice of Ansible. -If they are new to Ansible, the Getting started section of -the official Ansible documentation is a better place to start.

-
-
-

This document is split in six main sections. -Each section covers a different aspect of automation using Ansible (and in a broader term the whole Red Hat Ansible Automation Platform, including Ansible Tower):

-
-
-
    -
  1. -

    structures: we need to know what to use for which purpose before we can delve into the details, this section explains this.

    -
  2. -
  3. -

    roles: as we recommend to use roles to host the most actual Ansible code, this is also where we’ll cover the more low level aspects of code (tasks, variables, etc…​).

    -
  4. -
  5. -

    collections

    -
  6. -
  7. -

    playbooks

    -
  8. -
  9. -

    inventories

    -
  10. -
  11. -

    plugins

    -
  12. -
-
-
-

Each section is then made of guidelines, one sentence hopefully easy to remember, followed by description, rationale and examples. -The HTML version of this document makes the content collapsable so that all guidelines can be seen at once in a very overseeable way, for the reader to uncollapse the content of guidelines they is interested in.

-
-
-

A rationale is expected for each good practice, with a reference if applicable. -It is really helpful to know not only how to do certain things, but why to do them in this way. -It will also help with further revisions of the standards as some items may become obsolete or no longer applicable. -If the reason is not included, there is a risk of keeping items that are no longer applicable, or alternatively blindly removing items that should be kept. -It also has great educational value for understanding how things actually work (or how they don’t).

-
-
-

1.1. Where to get and maintain this document

-
-

This document is published to https://redhat-cop.github.io/automation-good-practices/, it is open source and its source code is maintained at https://github.com/redhat-cop/automation-good-practices/.

-
-
-
-
-
-

2. Automation structures

-
-
-

Before we start to describe roles, playbooks, etc, we need to decide which one we use for what. -This section is meant for topics which span across multiple structures and don’t fit nicely within one.

-
-
-

2.1. Define which structure to use for which purpose

-
-Details -
-
-
-
Explanations
-
-

define for which use case to use roles, playbooks, potentially workflows (in Ansible Tower/AWX), and how to split the code you write.

-
-
Rationale
-
-

especially when writing automation in a team, it is important to have a certain level of consistence and make sure everybody has the same understanding. -By lack of doing so, your automation becomes unreadable and difficult to grasp for new members or even for existing members.

-
-

This structure will also help you to have a consistent level of modelization so that re-usability becomes easier. -If one team member uses roles where another one uses playbooks, they will both struggle to reuse the code of each other. -Metaphorically speaking, only if stones have been cut at roughly the same size, can they be properly used to build a house.

-
-
-
Examples
-
-

The following is only one example of how to structure your content but has proven robust enough on multiple occasions.

-
-
-a hierarchy of landscape type function and component -
-
Figure 1. Structure of Automation
-
-
-
    -
  • -

    a landscape is anything you want to deploy at once, the underlay of your cloud, a three tiers application, a complete application cluster…​ -This level is represented at best by a Tower/AWX workflow, potentially by a "playbook of playbooks", i.e. a playbook made of imported type playbooks, as introduced next.

    -
  • -
  • -

    a type must be defined such that each managed host has one and only one type, applicable using a unique playbook.

    -
  • -
  • -

    each type is then made of multiple functions, represented by roles, so that the same function used by the same type can be re-used, written only once.

    -
  • -
  • -

    and finally components are used to split a function in maintainable bits. By default a component is a task file within the function-role, if the role becomes too big, there is a case for splitting the function role into multiple component roles.

    -
    - - - - - -
    - - -if functions are defined mostly for re-usability purposes, components are more used for maintainability / readability purposes. A re-usable component might be a candidate for promotion to a function. -
    -
    -
    -

    Let’s have a more concrete example to clarify:

    -
    -
  • -
  • -

    as already written, a landscape could be a three tier application with web-front-end, middleware and database. -We would probably create a workflow to deploy this landscape at once.

    -
  • -
  • -

    our types would be relatively obvious here as we would have "web-front-end server", "middleware server" and "database server". -Each type can be fully deployed by one and only one playbook (avoid having numbered playbooks and instructions on how to call them one after the other).

    -
  • -
  • -

    each server type is then made up of one or more functions, each implemented as a role. -For example, the middleware server type could be made of a "virtual machine" (to create the virtual machine hosting the middleware server), a "base Linux OS" and a "JBoss application server" function.

    -
  • -
  • -

    and then the base OS role could be made of multiple components (DNS, NTP, SSH, etc), each represented by a separate tasks/{component}.yml file, included or imported from the tasks/main.yml file of the function-role. -If a component becomes too big to fit within one task file, it might make sense that it gets its own component-role, included from the function-role.

    -
    - - - - - -
    - - -in terms of re-usability, see how you could simply create a new "integrated three tiers server" type (e.g. for test purposes), by just re-combining the "virtual machine", "base Linux OS", "JBoss application server", "PostgreSQL database" and "Apache web-server" function-roles into one new playbook. -
    -
    -
  • -
-
-
-
-
-
-

Beware that those rules, once defined, shouldn’t be applied too strictly. -There can always be reasons for breaking the rules, and sometimes the discussion you can have in the team to decide what is what is more important. -For example if a "hardened Linux OS" and a "normal Linux OS" are two different functions, or the same function with different parameters. You could consider SSH to be a function on its own and not a component of the base OS. -Also, external re-usable roles and collections, obviously not respecting your rules, might force you to bend them. -Important is to break the rules not by ignorance of those but because of good and practical reasons. -Respecting the rules is to know and acknowledge them, not to follow them blindly even if they don’t make sense. -As long as exceptions are discussed openly in the team, they won’t hurt.

-
-
-
-
-
-
-
-

3. Roles Good Practices for Ansible

-
-
- - - - - -
- - -this section has been imported "as-is" from the OASIS metastandards repository and still requires re-formatting to fit the overall structure. -
-
-
-

3.1. Background

-
-Details -
-
-

The goal of the Ansible Metateam project (specifically, the Linux System Roles project) is to provide a stable and consistent user interface to multiple operating systems (multiple versions of RHEL in the downstream RHEL System Roles package, additionally CentOS, Fedora at least). -Stable and consistent means that the same Ansible playbook will be usable to manage the equivalent functionality in the supported versions without the administrator (the user of the role) being forced to change anything in the playbook (the roles should serve as abstractions to shield the administrator from differences). -Of course, this means that the interface of the roles should be itself stable (i.e. changing only in a backward compatible way). -This implies a great responsibility in the design of the interface, because the interface, unlike the underlying implementation, can not be easily changed.

-
-
-

The differences in the underlying operating systems that the roles need to compensate for are basically of two types:

-
-
-
    -
  • -

    Trivial differences like changed names of packages, services, changed location of configuration files. -Roles must deals with those by using internal variables based on the OS defaults. -This is fairly simple, but still it brings value to the user, because they then do not have to worry about keeping up with such trivial changes.

    -
  • -
  • -

    Change of the underlying implementation of a given functionality. -Quite often, there are multiple packages/components implementing the same functionality. -Classic examples are the various MTAs (sendmail, postfix, qmail, exim), FTP daemons, etc. In the context of Linux System Roles, we call them “providers”. -The goal of the roles is to abstract even such differences, so that when the OS changes to a different component (provider), the role continues to work. -An example is time synchronization, where RHEL used to use the ntpd package, then chrony was introduced and became the default, but both components have been shipped in RHEL 6 and RHEL 7, until finally ntpd was dropped from RHEL 8, leaving only chrony. -A role covering time synchronization should therefore support both components with the same interface, and on systems which ship both components, both should be supported. -The appropriate supported component should be automatically selected on systems that ship only one of them. -This covers several related use cases:

    -
    -
      -
    • -

      Users that want to manage multiple major releases of the system simultaneously with a single playbook.

      -
    • -
    • -

      Users that want to migrate to a new version of the system without changing their automation (playbook).

      -
    • -
    • -

      Users who want to switch to a different provider in the same version of the OS (like switching from ntpd to chrony to RHEL 7) and keep the same playbook.

      -
    • -
    -
    -
  • -
-
-
-

Designing the interface in the latter case is difficult because it has to be sufficiently abstract to cover different providers. -We, for example, do not provide an email role in the Linux System Roles project, only a postfix role, because the underlying implementations (sendmail, postfix) were deemed to be too divergent. -Generally, an abstract interface should be something that should be always aimed for though, especially if there are multiple providers in use already, and in -particular when the default provider is changing or is known to be likely to change in the next major releases.

-
-
-
-
-
-

3.2. Basics

-
-Details -
-
-
    -
  • -

    Every repository in the AGP-roles namespace should be a valid Ansible Galaxy compatible role with the exception of any whose names begin with "meta_", such as this one.

    -
  • -
  • -

    New roles should be initiated in line with the skeleton directory, which has standard boilerplate code for a Galaxy-compatible Ansible role and some enforcement around these standards

    -
  • -
  • -

    Use semantic versioning for Git release tags. - Use 0.y.z before the role is declared stable (interface-wise). - Although it has not been a problem so far for linux system roles, since they use strict X.Y.Z versioning, you should be aware that there are some -restrictions for Ansible -Galaxy and Automation Hub. - The versioning must be in strict X.Y.Z[ab][W] format, where X, Y, and Z are integers.

    -
  • -
-
-
-
-
-
-

3.3. Interface design considerations

-
-

What should a role do and how can a user tell it what to do.

-
-
-

3.3.1. Basic design

-
-Details -
-
-

Try to design the interface focused on the functionality, not on the software implementation behind it. -This will help abstracting differences between different providers (see above), and help the user to focus on the functionality, not on technical details.

-
-
-
-
-
-

3.3.2. Naming things

-
-Details -
-
-
    -
  • -

    All defaults and all arguments to a role should have a name that begins with the role name to help avoid collision with other names. -Avoid names like packages in favor of a name like foo_packages.

    -
    -
    -
    Rationale
    -
    -

    Ansible has no namespaces, doing so reduces the potential for conflicts and makes clear what role a given variable belongs to.)

    -
    -
    -
    -
  • -
  • -

    Same argument applies for modules provided in the roles, they also need a $ROLENAME_ prefix: -foo_module. While they are usually implementation details and not intended for direct use in playbooks, the unfortunate fact is that importing a role makes them available to the rest of the playbook and therefore creates opportunities for name collisions.

    -
  • -
  • -

    Moreover, internal variables (those that are not expected to be set by users) are to be prefixed by two underscores: __foo_variable.

    -
    -
    -
    Rationale
    -
    -

    role variables, registered variables, custom facts are usually intended to be local to the role, but in reality are not local to the role - as such a concept does not exist, and pollute the global namespace. -Using the name of the role reduces the potential for name conflicts and using the underscores clearly marks the variables as internals and not part of the common interface. -The two underscores convention has prior art in some popular roles like -geerlingguy.ansible-role-apache). -This includes variables set by set_fact and register, because they persist in the namespace after the role has finished!

    -
    -
    -
    -
  • -
  • -

    Prefix all tags within a role with the role name or, alternatively, a "unique enough" but descriptive prefix.

    -
  • -
-
-
-
-
-
-

3.3.3. Providers

-
-Details -
-
-

When there are multiple implementations of the same functionality, we call them “providers”. -A role supporting multiple providers should have an input variable called $ROLENAME_provider. -If this variable is not defined, the role should detect the currently running provider on the system, and respect it.

-
-
-
-
Rationale
-
-

users can be surprised if the role changes the provider if they are running one already. -If there is no provider currently running, the role should select one according to the OS version.

-
-
Example
-
-

on RHEL 7, chrony should be selected as the provider of time synchronization, unless there is ntpd already running on the system, or user requests it specifically. -Chrony should be chosen on RHEL 8 as well, because it is the only provider available.

-
-
-
-
-

The role should set a variable or custom fact called $ROLENAME_provider_os_default to the appropriate default value for the given OS version.

-
-
-
-
Rationale
-
-

users may want to set all their managed systems to a consistent -state, regardless of the provider that has been used previously. -Setting $ROLENAME_provider would achieve it, but is suboptimal, because it requires selecting the appropriate value by the user, and if the user has multiple system versions managed by a single playbook, a common value supported by all of them may not even exist. -Moreover, after a major upgrade of their systems, it may force the users to change their playbooks to change their $ROLENAME_provider setting, if the previous value is not supported anymore. -Exporting $ROLENAME_provider_os_default allows the users to set $ROLENAME_provider: "{{ $ROLENAME_provider_os_default }}" (thanks to the lazy variable evaluation in Ansible) and thus get a consistent setting for all the systems of the given OS version without having to decide what the actual value is - the decision is delegated to the role).

-
-
-
-
-
-
-
-
-

3.4. Implementation considerations

-
-

3.4.1. Role Structure

-
-Details -
-
-

Avoid testing for distribution and version in tasks. -Rather add a variable file to "vars/" for each supported distribution and version with the variables that need to change according to the distribution and version. -This way it is easy to add support to a new distribution by -simply dropping a new file in to "vars/", see below -Supporting multiple distributions and versions. -See also Vars vs Defaults which mandates "Avoid embedding large lists or 'magic values' directly into the playbook." -Since distribution-specific values are kind of "magic values", it applies to them. -The same logic applies for providers: a role can load a provider-specific variable file, include a provider-specific task file, or both, as needed. -Consider making paths to templates internal variables if you need different templates for different distributions.

-
-
-
-
-
-

3.4.2. Check Mode and Idempotency Issues

-
-Details -
-
-
    -
  • -

    The role should work in check mode, meaning that first of all, they should not fail check mode, and they should also not report changes when there are no changes to be done. -If it is not possible to support it, please state the fact and provide justification in the documentation. -This applies to the first run of the role.

    -
  • -
  • -

    Reporting changes properly is related to the other requirement: idempotency. -Roles should not perform changes when applied a second time to the same system with the same parameters, and it should not report that changes have been done if they have not been done. -Due to this, using command: is problematic, as it always reports changes. -Therefore, override the result by using changed_when:

    -
  • -
  • -

    Concerning check mode, one usual obstacle to supporting it are registered variables. -If there is a task which registers a variable and this task does not get executed (e.g. because it is a command: or another task which is not properly idempotent), the variable will not get registered and further accesses to it will fail (or worse, use the previous value, if the role has been applied before in the play, because variables are global and there is no way to unregister them). -To fix, either use a properly idempotent module to obtain the information (e.g. instead of using command: cat to read file into a registered variable, use slurp and apply .content|b64decode to the result like here), or apply proper check_mode: and changed_when: attributes to the task. -more_info.

    -
  • -
  • -

    Another problem are commands that you need to execute to make changes. -In check mode, you need to test for changes without actually applying them. -If the command has some kind of "--dry-run" flag to enable executing without making actual changes, use it in check_mode (use the variable ansible_check_mode to determine whether we are in check mode). -But you then need to set changed_when: according to the command status or output to indicate changes. -See (https://github.com/linux-system-roles/selinux/pull/38/files#diff-2444ad0870f91f17ca6c2a5e96b26823L101) for an example.

    -
  • -
  • -

    Another problem is using commands that get installed during the install phase, which is skipped in check mode. -This will make check mode fail if the role has not been executed before (and the packages are not there), but does the right thing if check mode is executed after normal mode.

    -
  • -
  • -

    To view reasoning for supporting why check mode in first execution may not be worthwhile: see here. -If this is to be supported, see hhaniel’s proposal, which seems to properly guard even against such cases.

    -
  • -
-
-
-
-
-
-

3.4.3. Supporting multiple distributions and versions

-
-
3.4.3.1. Platform specific variables
-
-Details -
-
-

You normally use vars/main.yml (automatically included) to set variables -used by your role. -If some variables need to be parameterized according to distribution and version (name of packages, configuration file paths, names of services), use this in the beginning of your tasks/main.yml:

-
-
-
-
- name: Set platform/version specific variables
-  include_vars: "{{ __rolename_vars_file }}"
-  loop:
-    - "{{ ansible_facts['os_family'] }}.yml"
-    - "{{ ansible_facts['distribution'] }}.yml"
-    - "{{ ansible_facts['distribution'] }}_{{ ansible_facts['distribution_major_version'] }}.yml"
-    - "{{ ansible_facts['distribution'] }}_{{ ansible_facts['distribution_version'] }}.yml"
-  vars:
-    __rolename_vars_file: "{{ role_path }}/vars/{{ item }}"
-  when: __rolename_vars_file is file
-
-
-
-

The files in the loop are in order from least specific to most specific:

-
-
-
    -
  • -

    os_family covers a group of closely related platforms (e.g. RedHat covers RHEL, CentOS, Fedora)

    -
  • -
  • -

    distribution (e.g. Fedora) is more specific than os_family

    -
  • -
  • -

    distribution_distribution_major_version (e.g. RedHat_8) is more specific than distribution

    -
  • -
  • -

    distribution_distribution_version (e.g. RedHat_8.3) is the most specific

    -
  • -
-
-
-

See Commonly Used Facts for an explanation of the facts and their common values.

-
-
-

Each file in the loop list will allow you to add or override variables to specialize the values for platform and/or version. -Using the when: item is file test means that you do not have to provide all of the vars/ files, only the ones you need. -For example, if every platform except Fedora uses srv_name for the service name, you can define myrole_service: srv_name in vars/main.yml then define myrole_service: srv2_name in vars/Fedora.yml. -In cases where this would lead to duplicate vars files for similar distributions (e.g. CentOS 7 and RHEL 7), use symlinks to avoid the duplication.

-
-
- - - - - -
- - -With this setup, files can be loaded twice. -For example, on Fedora, the distribution_major_version is the same as distribution_version so the file vars/Fedora_31.yml will be loaded twice if you are managing a Fedora 31 host. -If distribution is RedHat then os_family will also be RedHat, -and vars/RedHat.yml will be loaded twice. -This is usually not a problem - you will be replacing the variable with the same value, and the performance hit is negligible. -If this is a problem, construct the file list as a list variable, and filter the variable passed to loop using the unique filter (which preserves the order): -
-
-
-
-
- name: Set vars file list
-  set_fact:
-    __rolename_vars_file_list:
-      - "{{ ansible_facts['os_family'] }}.yml"
-      - "{{ ansible_facts['distribution'] }}.yml"
-      - "{{ ansible_facts['distribution'] }}_{{ ansible_facts['distribution_major_version'] }}.yml"
-      - "{{ ansible_facts['distribution'] }}_{{ ansible_facts['distribution_version'] }}.yml"
-
-- name: Set platform/version specific variables
-  include_vars: "{{ __rolename_vars_file }}"
-  loop: "{{ __rolename_vars_file_list | unique | list }}"
-  vars:
-    __rolename_vars_file: "{{ role_path }}/vars/{{ item }}"
-  when: __rolename_vars_file is file
-
-
-
-

Or define your __rolename_vars_file_list in your vars/main.yml.

-
-
-
-
-
-
3.4.3.2. Platform specific tasks
-
-Details -
-
-

Platform specific tasks, however, are different. -You probably want to perform platform specific tasks once, for the most specific match. -In that case, use lookup('first_found') with the file list in order of most specific to least specific, including a "default":

-
-
-
-
- name: Perform platform/version specific tasks
-  include_tasks: "{{ lookup('first_found', __rolename_ff_params) }}"
-  vars:
-    __rolename_ff_params:
-      files:
-        - "{{ ansible_facts['distribution'] }}_{{ ansible_facts['distribution_version'] }}.yml"
-        - "{{ ansible_facts['distribution'] }}_{{ ansible_facts['distribution_major_version'] }}.yml"
-        - "{{ ansible_facts['distribution'] }}.yml"
-        - "{{ ansible_facts['os_family'] }}.yml"
-        - "default.yml"
-      paths:
-        - "{{ role_path }}/tasks/setup"
-
-
-
-

Then you would provide tasks/setup/default.yml to do the generic setup, and e.g. tasks/setup/Fedora.yml to do the Fedora specific setup. -The tasks/setup/default.yml is required in order to use lookup('first_found'), which will give an error if no file is found.

-
-
-

If you want to have the "use first file found" semantics, but do not want to have to provide a default file, add skip: true:

-
-
-
-
- name: Perform platform/version specific tasks
-  include_tasks: "{{ lookup('first_found', __rolename_ff_params) }}"
-  vars:
-    __rolename_ff_params:
-      files:
-        - "{{ ansible_facts['distribution'] }}_{{ ansible_facts['distribution_version'] }}.yml"
-        - "{{ ansible_facts['os_family'] }}.yml"
-      paths:
-        - "{{ role_path }}/tasks/setup"
-      skip: true
-
-
-
-

NOTE:

-
-
-
    -
  • -

    Use include_tasks or include_vars with lookup('first_found') instead of with_first_found. -loop is not needed - the include forms take a string or a list directly.

    -
  • -
  • -

    Always specify the explicit, absolute path to the files to be included, -using {{ role_path }}/vars or {{ role_path }}/tasks, when using these -idioms. - See below "Ansible Best Practices" for more information.

    -
  • -
  • -

    Use the ansible_facts['name'] bracket notation rather than the ansible_facts.name or ansible_name form. -For example, use ansible_facts['distribution'] instead of ansible_distribution or ansible.distribution. -The ansible_name form relies on fact injection, which can break if there is already a fact of that name. -Also, the bracket notation is what is used in Ansible documentation such as Commonly Used Facts and Operating System and Distribution Variance.

    -
  • -
-
-
-
-
-
-
-

3.4.4. Supporting multiple providers

-
-Details -
-
-

Use a task file per provider and include it from the main task file, like this example from storage:

-
-
-
-
- name: include the appropriate provider tasks
-  include_tasks: "main_{{ storage_provider }}.yml"
-
-
-
-

The same process should be used for variables (not defaults, as defaults can -not be loaded according to a variable). -You should guarantee that a file exists for each provider supported, or use an explicit, absolute path using role_path. -See below "Ansible Best Practices" for more information.

-
-
-
-
-
-

3.4.5. Generating files from templates

-
-Details -
-
-
    -
  • -

    Add {{ ansible_managed | comment }} at the top of the template file file to indicate that the file is managed by Ansible roles, while making sure that multi-line values are properly commented. -For more information, see Adding comments to files.

    -
  • -
  • -

    When commenting, don’t include anything like "Last modified: {{ date }}". -This would change the file at every application of the role, even if it doesn’t need to be changed for other reasons, and thus break proper change reporting.

    -
  • -
  • -

    Use standard module parameters for backups, keep it on unconditionally (backup: true), until there is a user request to have it configurable.

    -
  • -
  • -

    Make prominently clear in the HOWTO (at the top) what settings/configuration files are replaced by the role instead of just modified.

    -
  • -
  • -

    Use {{ role_path }}/subdir/ as the filename prefix when including files if the name has a variable in it.

    -
    -
    -
    Rationale
    -
    -

    your role may be included by another role, and if you specify a relative path, the file could be found in the including role. -For example, if you have something like include_vars: "{{ ansible_facts['distribution'] }}.yml" and you do not provide every possible vars/{{ ansible_facts['distribution'] }}.yml in your role, Ansible will look in the including role for this file. -Instead, to ensure that only your role will be referenced, use include_vars: "{{role_path}}/vars/{{ ansible_facts['distribution'] }}.yml". -Same with other file based includes such as include_tasks. -See Ansible Developer Guide » Ansible architecture » The Ansible Search Path for more information.

    -
    -
    -
    -
  • -
-
-
-
-
-
-

3.4.6. Vars vs Defaults

-
-Details -
-
-
    -
  • -

    Avoid embedding large lists or "magic values" directly into the playbook. -Such static lists should be placed into the vars/main.yml file and named appropriately

    -
  • -
  • -

    Every argument accepted from outside of the role should be given a default value in defaults/main.yml. -This allows a single place for users to look to see what inputs are expected. -Document these variables in the role’s README.md file copiously

    -
  • -
  • -

    Use the defaults/main.yml file in order to avoid use of the default Jinja2 filter within a playbook. -Using the default filter is fine for optional keys on a dictionary, but the variable itself should be defined in defaults/main.yml so that it can have documentation written about it there and so that all arguments can easily be located and identified.

    -
  • -
  • -

    Don’t define defaults in defaults/main.yml if there is no meaningful default. -It is better to have the role fail if the variable isn’t defined than have it do something dangerously wrong. -Still do add the variable to defaults/main.yml but commented out, so that there is one single source of input variables.

    -
  • -
  • -

    Avoid giving default values in vars/main.yml as such values are very high in the precedence order and are difficult for users and consumers of a role to override.

    -
  • -
  • -

    As an example, if a role requires a large number of packages to install, but could also accept a list of additional packages, then the required packages should be placed in vars/main.yml with a name such as foo_packages, and the extra packages should be passed in a variable named foo_extra_packages, which should default to an empty array in defaults/main.yml and be documented as such.

    -
  • -
-
-
-
-
-
-

3.4.7. Documentation conventions

-
-Details -
-
-
    -
  • -

    Use fully qualified role names in examples, like: linux-system-roles.$ROLENAME (with the Galaxy prefix).

    -
  • -
  • -

    Use RFC 5737, 7042 and 3849 addresses in examples.

    -
  • -
  • -

    Modules should have complete metadata, documentation, example and return blocks as described in the Ansible docs.

    -
  • -
-
-
-
-
-
-

3.4.8. Don’t use host group names or at least make them a parameter

-
-Details -
-
-
-
Explanations
-
-

It is relatively common to use (inventory) group names in roles:

-
-
    -
  • -

    either to loop through the hosts in the group, generally in a cluster context

    -
  • -
  • -

    or to validate that a host is in a specific group

    -
    -

    Instead, store the host name(s) in a (list) variable, or at least make the group name a parameter of your role. -You can always set the variable at group level to avoid repetitions.

    -
    -
  • -
-
-
-
Rationale
-
-

Groups are a feature of the data in your inventory, meaning that you mingle data with code when you use those groups in your code. -Rely on the inventory-parsing process to provide your code with the variables it needs instead of enforcing a specific structure of the inventory. -Not all inventory sources are flexible enough to provide exactly the expected group name. -Even more importantly, in a cluster context for example, if the group name is fixed, you can’t describe (and hence automate) more than one cluster in each inventory. -You can’t possibly have multiple groups with the same name in the same inventory. -On the other hand, variables can have any kind of value for each host, so that you can have as many clusters as you want.

-
-
Examples
-
-

Assuming we have the following inventory (not according to recommended practices for sake of simplicity):

-
-
Listing 1. An inventory with two clusters
-
-
[cluster_group_A]
-host1 ansible_host=localhost
-host2 ansible_host=localhost
-host3 ansible_host=localhost
-
-[cluster_group_B]
-host4 ansible_host=localhost
-host5 ansible_host=localhost
-host6 ansible_host=localhost
-
-[cluster_group_A:vars]
-cluster_group_name=cluster_group_A
-
-[cluster_group_B:vars]
-cluster_group_name=cluster_group_B
-
-
-
-

We can then use one of the following three approaches in our role (here as playbook, again for sake of simplicity):

-
-
-
Listing 2. A playbook showing how to loop through a group
-
-
---
-- name: show how to loop through a set of groups
-  hosts: cluster_group_?
-  gather_facts: false
-  become: false
-
-  tasks:
-    - name: the loop happens for each host, might be too much
-      debug:
-        msg: do something with {{ item }}
-      loop: "{{ groups[cluster_group_name] }}"
-    - name: the loop happens only for the first host in each group
-      debug:
-        msg: do something with {{ item }}
-      loop: "{{ groups[cluster_group_name] }}"
-      when: inventory_hostname == groups[cluster_group_name][0]
-    - name: make the first host of each group fail to simulate non-availability
-      assert:
-        that: inventory_hostname != groups[cluster_group_name][0]
-    - name: the loop happens only for the first _available_ host in each group
-      debug:
-        msg: do something with {{ item }}
-      loop: "{{ groups[cluster_group_name] }}"
-      when: >-
-        inventory_hostname == (groups[cluster_group_name]
-        | intersect(ansible_play_hosts))[0]
-
-
-
-

The first approach is probably best to create a cluster configuration file listing all cluster’s hosts. -The other approaches are good to make sure each action is performed only once, but this comes at the price of many skips. -The second one fails if the first host isn’t reachable (which might be what you’d want anyway), and the last one has the best chance to be executed once and only once, even if some hosts aren’t available.

-
-
- - - - - -
- - -the variable cluster_group_name could have a default group name value in your role, of course properly documented, for simple use cases. -
-
-
-

Overall, it is best to avoid this kind of constructs if the use case permits, as they are clumsy.

-
-
-
-
-
-
-
-
-
-

3.5. References

-
-Details -
-
-

Links that contain additional standardization information that provide context, -inspiration or contrast to the standards described above.

-
- -
-
-
-
-
-
-

4. Collections good practices

-
-
- - - - - -
- - -Work in Progress…​ -
-
-
-
-
-

5. Playbooks good practices

-
-
-

5.1. Keep your playbooks as simple as possible

-
-Details -
-
-
-
Explanations
-
-

Don’t put too much logic in your playbook, put it in your roles (or even in custom modules), and try to limit your playbooks to a list of a roles.

-
-
Rationale
-
-

Roles are meant to be re-used and the structure helps you to make your code re-usable. -The more code you put in roles, the higher the chances you, or others, can reuse it. -Also, if you follow the type-function pattern, you can very easily create new (type) playbooks by just re-shuffling the roles. -This way you can create a playbook for each purpose without having to duplicate a lot of code. -This, in turn, also helps with the maintainability as there is only a single place where necessary changes need to be implemented, and that is in the role

-
-
Examples
-
-
-
Listing 3. An example of playbook containing only roles
-
-
- name: a playbook can solely be a list of roles
-  hosts: all
-  gather_facts: false
-  become: false
-
-  roles:
-    - role1
-    - role2
-    - role3
-
-
-
- - - - - -
- - -we’ll explain later why there might be a case for using include_role/import_role tasks instead of the role section. -
-
-
-
-
-
-
-
-
-

5.2. Use either the tasks or roles section in playbooks, not both

-
-Details -
-
-
-
Explanations
-
-

A playbook can contain pre_tasks, roles, tasks and post_tasks sections. -Avoid using both roles and tasks sections, the latter possibly containing import_role or include_role tasks.

-
-
Rationale
-
-

The order of execution between roles and tasks isn’t obvious, and hence mixing them should be avoided.

-
-
Examples
-
-

Either you need only static importing of roles and you can use the roles section, or you need dynamic inclusion and you should use only the tasks section. -Of course, for very simple cases, you can just use tasks without roles.

-
-
-
-
-
-
-
-

5.3. Use tags cautiously either for roles or for complete purposes

-
-Details -
-
-
-
Explanations
-
-

limit your usage of tags to two aspects:

-
-
    -
  1. -

    either tags called like the roles to switch on/off single roles,

    -
  2. -
  3. -

    or specific tags to reach a meaningful purpose

    -
  4. -
-
-
-
-
-
-

Don’t set tags which can’t be used on their own, or can be destructive if used on their own.

-
-
-

Also document tags and their purpose(s).

-
-
-
-
Rationale
-
-

there is nothing worse than tags which can’t be used alone, they bear the risk to destroy something by being called standalone. -An acceptable exception is the pattern to use the role name as tag name, which can be useful while developing the playbook to test, or exclude, individual roles.

-
-

Important is that your users don’t need to learn the right sequence of tags necessary to get a meaningful result, one tag should be enough.

-
-
-
Examples
-
-
-
Listing 4. An example of playbook importing roles with tags
-
-
- name: a playbook can be a list of roles imported with tags
-  hosts: all
-  gather_facts: false
-  become: false
-
-  tasks:
-    - name: import role1
-      import_role:
-        name: role1
-      tags:
-        - role1
-        - deploy
-    - name: import role2
-      import_role:
-        name: role2
-      tags:
-        - role2
-        - deploy
-        - configure
-    - name: import role3
-      import_role:
-        name: role3
-      tags:
-        - role3
-        - configure
-
-
-
-

You see that each role can be skipped/run individually, but also that the tags deploy and configure can be used to do something we’ll assume to be meaningful, without having to explain at length what they do.

-
-
-

The same approach is also possible with include_role but requires additionally to apply the same tags to the role’s tasks, which doesn’t make the code easier to read:

-
-
-
Listing 5. An example of playbook including roles with tags
-
-
- name: a playbook can be a list of roles included with tags applied
-  hosts: all
-  gather_facts: false
-  become: false
-
-  tasks:
-    - name: include role1
-      include_role:
-        name: role1
-        apply:
-          tags:
-            - role1
-            - deploy
-      tags:
-        - role1
-        - deploy
-    - name: include role2
-      include_role:
-        name: role2
-        apply:
-          tags:
-            - role2
-            - deploy
-            - configure
-      tags:
-        - role2
-        - deploy
-        - configure
-    - name: include role3
-      include_role:
-        name: role3
-        apply:
-          tags:
-            - role3
-            - configure
-      tags:
-        - role3
-        - configure
-
-
-
-
-
-
-
-
-
-
-
-

6. Inventories Good Practices for Ansible

-
-
-

6.1. Identify your Single Source(s) of Truth and use it/them in your inventory

-
-Details -
-
-
-
Explanations
-
-

A Single Source of Truth (SSOT) is the place where the "ultimate" truth about a certain data is generated, stored and maintained. -There can be more than one SSOT, each for a different piece of information, but they shouldn’t overlap and even less conflict. -As you create your inventory, you identify these SSOTs and combine them into one inventory using dynamic inventory sources (we’ll see how later on). -Only the aspects which are not already provided by other sources are kept statically in your inventory. -Doing this, your inventory becomes another source of truth, but only for the data it holds statically, because there is no other place to keep it.

-
-
Rationale
-
-

You limit your effort to maintain your inventory to its absolute minimum and you avoid generating potentially conflicting information with the rest of your IT.

-
-
Examples
-
-

You can typically identify three kinds of candidates as SSOTs:

-
-
    -
  • -

    technical ones, where your managed devices live anyway, like a cloud or virtual manager (OpenStack, RHV, Public Cloud API, …​) or management systems (Satellite, monitoring systems, …​). Those sources provide you with technical information like IP addresses, OS type, etc.

    -
  • -
  • -

    managed ones, like a Configuration Management Database (CMDB), where your IT anyway manages a lot of information of use in an inventory. A CMDB provides you with more organizational information, like owner or location, but also with "to-be" technical information.

    -
  • -
  • -

    the inventory itself, only for the data which doesn’t exist anywhere else.

    -
    -

    Ansible provides a lot of inventory plugins to pull data from those sources and they can be combined into one big inventory. -This gives you a complete model of the environment to be automated, with limited effort to maintain it, and no confusion about where to modify it to get the result you need.

    -
    -
  • -
-
-
-
-
-
-
-
-
-

6.2. Differentiate clearly between "As-Is" and "To-Be" information

-
-Details -
-
-
-
Explanations
-
-

As you combine multiple sources, some will represent:

-
-
    -
  • -

    discovered information grabbed from the existing environment, this is the "As-Is" information.

    -
  • -
  • -

    managed information entered in a tool, expressing the state to be reached, hence the "To-Be" information.

    -
    -

    In general, the focus of an inventory is on the managed information because it represents the desired state you want to reach with your automation. This said, some discovered information is required for the automation to work.

    -
    -
  • -
-
-
-
Rationale
-
-

Mixing up these two kind of information can lead to your automation taking the wrong course of action by thinking that the current situation is aligned with the desired state. -That can make your automation go awry and your automation engineers confused. -There is a reason why Ansible makes the difference between "facts" (As-Is) and "variables" (To-Be), and so should you. -In the end, automation is making sure that the As-Is situation complies to the To-Be description.

-
- - - - - -
- - -many CMDBs have failed because they don’t respect this principle. -This and the lack of automation leads to a mix of unmaintained As-Is and To-Be information with no clear guideline on how to keep them up-to-date, and no real motivation to do so. -
-
-
-
Examples
-
-

The technical tools typically contain a lot of discovered information, like an IP address or the RAM size of a VM. -In a typical cloud environment, the IP address isn’t part of the desired state, it is assigned on the fly by the cloud management layer, so you can only get it dynamically from the cloud API and you won’t manage it. -In a more traditional environment nevertheless, the IP address will be static, managed more or less manually, so it will become part of your desired state. -In this case, you shouldn’t use the discovered information or you might not realize that there is a discrepancy betweeen As-Is and To-Be.

-
-

The RAM size of a VM will be always present in two flavours, e.g. As-Is coming from the technical source and To-Be coming from the CMDB, or your static inventory, and you shouldn’t confuse them. -By lack of doing so, your automation might not correct the size of the VM where it should have aligned the As-Is with the To-Be.

-
-
-
-
-
-
-
-
-

6.3. Define your inventory as structured directory instead of single file

-
-Details -
-
-
-
Explanations
-
-

Everybody has started with a single file inventory in ini-format (the courageous ones among us in YAML format), combining list of hosts, groups and variables. -An inventory can nevertheless be also a directory containing:

-
-
    -
  • -

    list(s) of hosts

    -
  • -
  • -

    list(s) of groups, with sub-groups and hosts belonging to those groups

    -
  • -
  • -

    dynamic inventory plug-ins configuration files

    -
  • -
  • -

    dynamic inventory scripts (deprecated but still simple to use)

    -
  • -
  • -

    structured host_vars directories

    -
  • -
  • -

    structured group_vars directories

    -
    -

    The recommendation is to start with such a structure and extend it step by step.

    -
    -
  • -
-
-
-
Rationale
-
-

It is the only way to combine simply multiple sources into one inventory, without the trouble to call ansible with multiple -i {inventory_file} parameters, and keep the door open for extending it with dynamic elements.

-
-

It is also simpler to maintain in a Git repository with multiple maintainers as the chance to get a conflict is reduced because the information is spread among multiple files. -You can drop roles' defaults/main.yml file into the structure and adapt it to your needs very quickly.

-
-
-

And finally it gives you a better overview of what is in your inventory without having to dig deeply into it, because already the structure (as revealed with tree or find) gives you a first idea of where to search what. This makes on-boarding of new maintainers a lot easier.

-
-
-
Examples
-
-

The following is a complete inventory as described before. -You don’t absolutely need to start at this level of complexity, but the experience shows that once you get used to it, it is actually a lot easier to understand and maintain than a single file.

-
-
Listing 6. Tree of a structured inventory directory
-
-
inventory_example/  (1)
-├── dynamic_inventory_plugin.yml  (2)
-├── dynamic_inventory_script.py  (3)
-├── groups_and_hosts  (4)
-├── group_vars/  (5)
-│   ├── alephs/
-│   │   └── capital_letter.yml
-│   ├── all/
-│   │   └── ansible.yml
-│   ├── alphas/
-│   │   ├── capital_letter.yml
-│   │   └── small_caps_letter.yml
-│   ├── betas/
-│   │   └── capital_letter.yml
-│   ├── greek_letters/
-│   │   └── small_caps_letter.yml
-│   └── hebrew_letters/
-│       └── small_caps_letter.yml
-└── host_vars/  (6)
-    ├── host1.example.com/
-    │   └── ansible.yml
-    ├── host2.example.com/
-    │   └── ansible.yml
-    └── host3.example.com/
-        ├── ansible.yml
-        └── capital_letter.yml
-
-
-
- - - - - - - - - - - - - - - - - - - - - - - - - -
1this is your inventory directory
2a configuration file for a dynamic inventory plug-in
3a dynamic inventory script, old style and deprecated but still used (and supported)
4a file containing a static list of hosts and groups, the name isn’t important (often called hosts but some might confuse it with /etc/hosts and it also contains groups). -See below for an example.
5the group_vars directory to define group variables. -Notice how each group is represented by a directory of its name containing one or more variable files.
6the host_vars directory to define host variables. -Notice how each host is represented by a directory of its name containing one or more variable files.
-
-
-

The groups and hosts file could look as follows, important is to not put any variable definition in this file.

-
-
-
Listing 7. Content of the groups_and_hosts file
-
-
[all]
-host1.example.com
-host2.example.com
-host3.example.com
-
-[alphas]
-host1.example.com
-
-[betas]
-host2.example.com
-
-[greek_letters:children]
-alphas
-betas
-
-[alephs]
-host3.example.com
-
-[hebrew_letters:children]
-alephs
-
-
-
-

Listing the hosts under [all] isn’t really required but makes sure that no host is forgotten, should it not belong to any other group. -The ini-format isn’t either an obligation but it seems easier to read than YAML, as long as no variable is involved, and makes it easier to maintain in an automated manner using lineinfile (without needing to care for the indentation).

-
-
-

Regarding the group and host variables, the name of the variable files is actually irrelevant, you can verify it by calling ansible-inventory -i inventory_example --list: -you will see nowhere the name capital_letter or small_caps_letter (you might see ansible though, but for other reasons…​). -We nevertheless follow the convention to name our variable files after the role they are steering (so we assume the roles capital_letter and small_caps_letter). -If correctly written, the defaults/main.yml file from those roles can be simply "dropped" into our inventory structure and adapted accordingly to our needs. -We reserve the name ansible.yml for the Ansible related variables (user, connection, become, etc).

-
-
- - - - - -
- - -you can even create a sub-directory in a host’s or group’s variable directory and put there the variable files. -This is useful if you have many variables related to the same topic you want to group together but maintain in separate files. -For example Satellite requires many variables to be fully configured, so you can have a structure as follows (again, the name of the sub-directory satellite and of the files doesn’t matter): -
-
-
-
Listing 8. Example of a complex tree of variables with sub-directory
-
-
inventory_satellite/
-├── groups_and_hosts
-└── host_vars/
-    └── sat6.example.com/
-        ├── ansible.yml
-        └── satellite/
-            ├── content_views.yml
-            ├── hostgroups.yml
-            └── locations.yml
-
-
-
-
-
-
-
-
-
-

6.4. Rely on your inventory to loop over hosts, don’t create lists of hosts

-
-Details -
-
-
-
Explanations
-
-

To perform the same task on multiple hosts, don’t create a variable with a list of hosts and loop over it. -Instead use as much as possible the capabilities of your inventory, which is already a kind of list of hosts.

-
-

The anti-pattern is especially obvious in the example of provisioning hosts on some kind of manager. -Commonly seen automation tasks of this kind are spinning up a list of VMs via a hypervisor manager like oVirt/RHV or vCenter, or calling a management tool like Foreman/Satellite or even our beloved AWX/Tower/controller.

-
-
-
Rationale
-
-

There are 4 main reasons for following this advice:

-
-
    -
  1. -

    a list of hosts is more difficult to maintain than an inventory structure, and tends to become very quickly difficult to oversee. -This is especially true as you generally need to maintain your hosts also in your inventory. -This brings us to the 2nd advantage:

    -
  2. -
  3. -

    you avoid duplicating information, as you often need the same kind of information in your inventory that you also need in order to provision your VMs. -In your inventory, you can also use groups to define group variables, automatically inherited by hosts. -You can try to implement a similar inheritance pattern with your list of hosts, but it quickly becomes difficult and hand-crafted.

    -
  4. -
  5. -

    as you loop through the hosts of an inventory, Ansible helps you with parallelization, throttling, etc, all of which you can’t do easily with your own list (technically, you can combine async and loop to reach something like this, but it’s a lot more complex to handle than letting Ansible do the heavy lifting for you).

    -
  6. -
  7. -

    you can very simply limit the play to certain hosts, using for example the --limit parameter of ansible-playbook (or the 'limit' field in Tower/controller), even using groups and patterns. -You can’t really do this with your own list of hosts.

    -
  8. -
-
-
-
Examples
-
-

Our first idea could be to define managers and hosts first in an inventory:

-
-
Listing 9. Content of the "bad" groups_and_hosts file
-
-
[managers]
-manager_a
-manager_b
-
-[managed_hosts]
-host1
-host2
-host3
-
-
-
-

Each manager has a list of hosts, which can look like this:

-
-
-
Listing 10. List of hosts in inventory_bad/host_vars/manager_a/provision.yml
-
-
provision_list_of_hosts:
-  - name: host1
-    provision_value: uno
-  - name: host2
-    provision_value: due
-
-
-
-

So that we can loop over the list in this way:

-
-
-
Listing 11. The "bad" way to loop over hosts
-
-
- name: provision hosts in a bad way
-  hosts: managers
-  gather_facts: false
-  become: false
-
-  tasks:
-    - name: create some file to simulate an API call to provision a host
-      copy:
-        content: "{{ item.provision_value }}\n"
-        dest: "/tmp/bad_{{ inventory_hostname }}_{{ item.name }}.txt"
-        force: true
-      loop: "{{ provision_list_of_hosts }}"
-
-
-
- - - - - -
- - -check the resulting files using e.g. head -n-0 /tmp/bad_*. -
-
-
-

As said, no way to limit the hosts provisioned, and no parallelism. -Compare then with the recommended approach, with a slightly different structure:

-
-
-
Listing 12. Content of the "good" groups_and_hosts file
-
-
[managers]
-manager_a
-manager_b
-
-[managed_hosts_a]
-host1
-host2
-
-[managed_hosts_b]
-host3
-
-[managed_hosts:children]
-managed_hosts_a
-managed_hosts_b
-
-
-
-

It is now the hosts and their groups which carry the relevant information, it is not anymore parked in one single list (and can be used for other purposes):

-
-
-
Listing 13. The "good" variable structure
-
-
$ cat inventory_good/host_vars/host1/provision.yml
-provision_value: uno
-$ cat inventory_good/group_vars/managed_hosts_a/provision.yml
-manager_hostname: manager_a
-
-
-
-

And the provisioning playbook now runs in parallel and can be limited to specific hosts:

-
-
-
Listing 14. The "good" way to loop over hosts
-
-
- name: provision hosts in a good way
-  hosts: managed_hosts
-  gather_facts: false
-  become: false
-
-  tasks:
-    - name: create some file to simulate an API call to provision a host
-      copy:
-        content: "{{ provision_value }}\n"
-        dest: "/tmp/good_{{ manager_hostname }}_{{ inventory_hostname }}.txt"
-        force: true
-
-
-
-

The result isn’t overwhelming in this simple setup but you would of course better appreciate if the provisioning would take half an hour instead of a fraction of seconds:

-
-
-
Listing 15. Comparison of the execution times between the "good" and the "bad" implementation
-
-
$ ANSIBLE_STDOUT_CALLBACK=profile_tasks \
-	ansible-playbook -i inventory_bad playbook_bad.yml
-Saturday 23 October 2021  13:11:45 +0200 (0:00:00.040)       0:00:00.040 ******
-Saturday 23 October 2021  13:11:45 +0200 (0:00:00.858)       0:00:00.899 ******
-===============================================================================
-create some file to simulate an API call to provision a host ------------ 0.86s
-$ ANSIBLE_STDOUT_CALLBACK=profile_tasks \
-	ansible-playbook -i inventory_good playbook_good.yml
-Saturday 23 October 2021  13:11:55 +0200 (0:00:00.040)       0:00:00.040 ******
-Saturday 23 October 2021  13:11:56 +0200 (0:00:00.569)       0:00:00.610 ******
-===============================================================================
-create some file to simulate an API call to provision a host ------------ 0.57s
-
-
-
- - - - - -
- - -if for some reason, you can’t follow the recommendation, you can at least avoid duplicating too much information by indirectly referencing the hosts' variables as in "{{ hostvars[item.name]['provision_value'] }}". Not so bad…​ -
-
-
-
-
-
-
-
-
-
-
-

7. Plugins good practices

-
-
- - - - - -
- - -Work in Progress…​ -
-
-
-

7.1. Python Guidelines

-
-
    -
  • -

    Review Ansible guidelines for modules and development.

    -
  • -
  • -

    Use PEP8.

    -
  • -
  • -

    File headers and functions should have comments for their intent.

    -
  • -
-
-
-
-
-
-

8. Coding Style Good Practices for Ansible

-
-
-

It has proven useful to agree on certain guiding principles as early as possible in any automation project. -Doing so makes it much easier to onboard new Ansible developers. -Project guidelines can also be shared with other departments working on automation which in turn improves the re-usability of playbooks, roles, modules, and documentation.

-
-
-

Another major benefit is that it makes code review process less time-consuming and more reliable; making both the developer and reviewer more likely to engage in a constructive review conversation.

-
-
-

This section contains suggestions for such coding-style guidelines. -The list is neither complete nor are all of the guidelines necessary in every automation project. -Experience shows that it makes sense to start with a minimum set of guidelines because the longer the list the lower the chance of people actually reading through it. -Additional guidelines can always be added later should the situation warrant it.

-
-
-

8.1. Naming things

-
-
    -
  • -

    Use valid Python identifiers following standard naming conventions of being in snake_case_naming_schemes for all YAML or Python files, variables, arguments, repositories, and other such names (like dictionary keys).

    -
  • -
  • -

    Do not use special characters other than underscore in variable names, even if YAML/JSON allow them.

    -
    -Details -
    -
    -
    -
    Explanation
    -
    -

    Using such variables in Jinja2 or Python would be then very confusing and probably not functional.

    -
    -
    Rationale
    -
    -

    even when Ansible currently allows names that are not valid identifier, it may stop allowing them in the future, as it happened in the past already. -Making all names valid identifiers will avoid encountering problems in the future. Dictionary keys that are not valid identifiers are also less intuitive to use in Jinja2 (a dot in a dictionary key would be particularly confusing).

    -
    -
    -
    -
    -
    -
  • -
  • -

    Use mnemonic and descriptive names and do not shorten more than necessary. -A pattern object[_feature]_action has proven useful as it guarantees a proper sorting in the file system for roles and playbooks. -Systems support long identifier names, so use them!

    -
  • -
  • -

    Avoid numbering roles and playbooks, you’ll never know how they’ll be used in the future.

    -
  • -
-
-
-
-

8.2. YAML and Jinja2 Syntax

-
-
    -
  • -

    Indent at two spaces

    -
  • -
  • -

    Indent list contents beyond the list definition

    -
    -Details -
    -
    -
    Listing 16. Do this:
    -
    -
    example_list:
    -  - example_element_1
    -  - example_element_2
    -  - example_element_3
    -  - example_element_4
    -
    -
    -
    -
    Listing 17. Don’t do this:
    -
    -
    example_list:
    -- example_element_1
    -- example_element_2
    -- example_element_3
    -- example_element_4
    -
    -
    -
    -
    -
  • -
  • -

    Split long expressions into multiple lines.

    -
    -Details -
    -
    -
    -
    Rationale
    -
    -

    long lines are difficult to read, many teams even ask for a line length limit around 120-150 characters.

    -
    -
    Examples
    -
    -

    there are multiple ways to avoid long lines but the most generic one is to use the YAML folding sign (>):

    -
    -
    Listing 18. Usage of the YAML folding sign
    -
    -
    - name: call a very long command line
    -  command: >
    -    echo Lorem ipsum dolor sit amet, consectetur adipiscing elit.
    -    Maecenas mollis, ante in cursus congue, mauris orci tincidunt nulla,
    -    non gravida tortor mi non nunc.
    -- name: set a very long variable
    -  set_fact:
    -    meaningless_variable: >-
    -      Ut ac neque sit amet turpis ullamcorper auctor.
    -      Cras placerat dolor non ipsum posuere malesuada at ac ipsum.
    -      Duis a neque fermentum nulla imperdiet blandit.
    -
    -
    -
    - - - - - -
    - - -use the sign >- if it is important that the last line return code doesn’t become part of the string (e.g. when defining a string variable). -
    -
    -
    -
    -
    -
    -
    -
  • -
  • -

    If the when: condition results in a line that is too long, and is an and expression, then break it into a list of conditions.

    -
    -Details -
    -
    -
    -
    Rationale
    -
    -

    Ansible will and the list elements together (Ansible UseGuide » Conditionals). -Multiple conditions that all need to be true (a logical and) can also be specified as a list, but beware of bare variables in when:.

    -
    -
    Examples
    -
    -
    -
    Listing 19. Do this
    -
    -
    when:
    -  - myvar is defined
    -  - myvar | bool
    -
    -
    -
    -
    Listing 20. instead of this
    -
    -
    when: myvar is defined and myvar | bool
    -
    -
    -
    -
    -
    -
    -
    -
  • -
  • -

    All roles need to, minimally, pass a basic ansible-playbook syntax check run

    -
  • -
  • -

    Spell out all task arguments in YAML style and do not use key=value type of arguments

    -
    -Details -
    -
    -
    Listing 21. Do this:
    -
    -
    tasks:
    -- name: Print a message
    -  ansible.builtin.debug:
    -    msg: This is how it's done.
    -
    -
    -
    -
    Listing 22. Don’t do this:
    -
    -
    tasks:
    -- name: Print a message
    -  ansible.builtin.debug: msg="This is the exact opposite of how it's done."
    -
    -
    -
    -
    -
  • -
  • -

    Use true and false for boolean values in playbooks.

    -
    -Details -
    -
    -
    -
    Explanation
    -
    -

    Do not use the Ansible-specific yes and no as boolean values in YAML as these are completely custom extensions used by Ansible and are not part of the YAML spec and also avoid the use of the Python-style True and False for boolean values in playbooks.

    -
    -
    Rationale
    -
    -

    YAML 1.1 allows all variants whereas YAML 1.2 allows only true/false, and we want to be ready for when it becomes the default, and avoid a massive migration effort.

    -
    -
    -
    -
    -
    -
  • -
  • -

    Avoid comments in playbooks when possible. -Instead, ensure that the task name value is descriptive enough to tell what a task does. -Variables are commented in the defaults and vars directories and, therefore, do not need explanation in the playbooks themselves.

    -
  • -
  • -

    Use a single space separating the template markers from the variable name inside all Jinja2 template points. -For instance, always write it as {{ variable_name_here }}. -The same goes if the value is an expression. {{ variable_name | default('hiya, doc') }}

    -
  • -
  • -

    When naming files, use the .yml extension and not .yaml. -.yml is what ansible-galaxy init does when creating a new role template.

    -
  • -
  • -

    Use double quotes for YAML strings with the exception of Jinja2 strings which will use single quotes.

    -
  • -
  • -

    Do not use quotes unless you have to, especially for short module-keyword-like strings like present, absent, etc. -But do use quotes for user-side strings such as descriptions, names, and messages.

    -
  • -
  • -

    Even if JSON is valid YAML and Ansible understands it, do only use JSON syntax if it makes sense (e.g. a variable file automatically generated) or adds to the readability. -In doubt, nobody expects JSON so stick to YAML.

    -
  • -
-
-
-
-

8.3. Ansible Guidelines

-
-
    -
  • -

    Ensure that all tasks are idempotent.

    -
  • -
  • -

    Ansible variables use lazy evaluation.

    -
  • -
  • -

    Prefer the command module over the shell module unless you explicitly need shell functionality such as, e.g., piping. -Even better, use a dedicated module, if it exists. -If not, see the section about idempotency and check mode and make sure that your task is idempotent and supports check mode properly; -your task will likely need options such as changed_when: and maybe check_mode:).

    -
  • -
  • -

    Anytime command or shell modules are used, add a comment in the code with justification to help with future maintenance.

    -
  • -
  • -

    Use the | bool filter when using bare variables (expressions consisting of just one variable reference without any operator) in when.

    -
  • -
  • -

    Do not use meta: end_play.

    -
    -Details -
    -
    -
    -
    Rationale
    -
    -

    It aborts the whole play instead of a given host (with multiple hosts in the inventory). -If absolutely necessary, consider using meta: end_host.

    -
    -
    -
    -
    -
    -
  • -
  • -

    Task names can be made dynamic by using variables (wrapped in Jinja2 templates), this helps with reading the logs.

    -
  • -
  • -

    Do not use variables (wrapped in Jinja2 templates) for play names; variables don’t get expanded properly there. -The same applies to loop variables (by default item) in task names within a loop. -They, too, don’t get properly expanded and hence are not to be used there.

    -
  • -
  • -

    Do not override role defaults or vars or input parameters using set_fact. -Use a different name instead.

    -
    -Details -
    -
    -
    -
    Rationale
    -
    -

    a fact set using set_fact can not be unset and it will override the role default or role variable in all subsequent invocations of the role in the same playbook. -A fact has a different priority than other variables and not the highest, so in some cases overriding a given parameter will not work because the parameter has a higher priority (Ansible User Guide » Using Variables)

    -
    -
    -
    -
    -
    -
  • -
  • -

    Use the smallest scope for variables. -Facts are global for playbook run, so it is preferable to use other types of variables. Therefore limit (preferably avoid) the use of set_fact. -Role variables are exposed to the whole play when the role is applied using roles: or import_role:. A more restricted scope such as task or block variables is preferred.

    -
  • -
  • -

    Beware of ignore_errors: true; especially in tests. -If you set on a block, it will ignore all the asserts in the block ultimately making them pointless.

    -
  • -
  • -

    Do not use the eq (introduced in Jinja 2.10) or equalto Jinja operators.

    -
  • -
  • -

    Avoid the use of when: foo_result is changed whenever possible. -Use handlers, and, if necessary, handler chains to achieve this same result.

    -
  • -
  • -

    Use the various include/import statements in Ansible.

    -
    -Details -
    -
    -
    -
    Explanation
    -
    -

    Doing so can lead to simplified code and a reduction of repetition. -This is the closest that Ansible comes to callable sub-routines, so use judgment about callable routines to know when to similarly include a sub playbook. -Some examples of good times to do so are

    -
    -
      -
    • -

      When a set of multiple commands share a single when conditional

      -
    • -
    • -

      When a set of multiple commands are being looped together over a list of items

      -
    • -
    • -

      When a single large role is doing many complicated tasks and cannot easily be broken into multiple roles, but the process proceeds in multiple related stages

      -
    • -
    -
    -
    -
    -
    -
    -
    -
  • -
  • -

    Avoid calling the package module iteratively with the {{ item }} argument, as this is impressively more slow than calling it with the line name: "{{ foo_packages }}". -The same can go for many other modules that can be given an entire list of items all at once.

    -
  • -
  • -

    Use meta modules when possible.

    -
    -Details -
    -
    -
    -
    Rationale
    -
    -

    This will allow our playbooks to run on the widest selection of operating systems possible without having to modify any more tasks than is necessary.

    -
    -
    Examples
    -
    -
    -
      -
    • -

      Instead of using the upstart and systemd modules, use the service -module when at all possible.

      -
    • -
    • -

      Similarly for package management, use package instead of yum or dnf or -similar.

      -
    • -
    -
    -
    -
    -
    -
    -
    -
  • -
  • -

    Avoid the use of lineinfile wherever that might be feasible.

    -
    -Details -
    -
    -
    -
    Rationale
    -
    -

    Slight miscalculations in how it is used can lead to a loss of idempotence. -Modifying config files with it can cause the Ansible code to become arcane and difficult to read, especially for someone not familiar with the file in question. -Try editing files directly using other built-in modules (e.g. ini_file, blockinfile, xml), or reading and parsing. -If you are modifying more than a tiny number of lines or in a manner more than trivially complex, try leveraging the template module, instead. -This will allow the entire structure of the file to be seen by later users and maintainers. -The use of lineinfile should include a comment with justification.

    -
    -
    -
    -
    -
    -
  • -
  • -

    Limit use of the copy module to copying remote files and to uploading binary blobs. -For all other file pushes, use the template module. -Even if there is nothing in the file that is being templated at the current moment, having the file handled by the template module now makes adding that functionality much simpler than if the file is initially handled by the copy and then needs to be moved before it can be edited.

    -
  • -
  • -

    When using the template module, append .j2 to the template file name.

    -
    -Details -
    -
    -
    -
    Example
    -
    -

    If you want to use the ansible.builtin.template module to create a file called example.conf somewhere on the managed host, name the template for this file templates/example.conf.j2.

    -
    -
    Rationale
    -
    -

    When you are at the stage of writing a template file you usually already know how the file should end up looking on the file system, so at that point it is convenient to use Jinja2 syntax highlighting to make sure your templating syntax checks out. -Should you need syntax highlighting for whatever language the target file should be in, it is very easy to define in your editor settings to use, e.g., HTML syntax highlighting for all files ending in .html.j2. -It is much less straightforward to automatically enable Jinja2 syntax highlighting for some files ending on .html.

    -
    -
    -
    -
    -
    -
  • -
  • -

    Keep filenames and templates as close to the name on the destination system as possible.

    -
    -Details -
    -
    -
    -
    Rationale
    -
    -

    This will help with both editor highlighting as well as identifying source and destination versions of the file at a glance. -Avoid duplicating the remote full path in the role directory, however, as that creates unnecessary depth in the file tree for the role. -Grouping sets of similar files into a subdirectory of templates is allowable, but avoid unnecessary depth to the hierarchy.

    -
    -
    -
    -
    -
    -
  • -
  • -

    Using agnostic modules like package only makes sense if the features required are very limited. -In many cases, if the platform is different, the package name is also different so that using package doesn’t help a lot. -Prefer then the more specific yum, dnf or apt module if you anyway need to differentiate.

    -
  • -
-
-
-
-
-
- - - diff --git a/images/ansible_structures.svg b/images/ansible_structures.svg deleted file mode 100644 index ad5ba7a..0000000 --- a/images/ansible_structures.svg +++ /dev/null @@ -1,40 +0,0 @@ -LandscapeWorkflowPlaybook of playbooksTypePlaybook_FunctionRole_ComponentTask fileRole \ No newline at end of file diff --git a/images/variable_precedences.plantuml b/images/variable_precedences.plantuml index baf2533..3e34ab0 100644 --- a/images/variable_precedences.plantuml +++ b/images/variable_precedences.plantuml @@ -7,12 +7,12 @@ split :role vars; split again :scoped vars - block→task; + block <&arrow-right> task; :runtime vars set_fact+register; end split :scoped params - role→include; + role <&arrow-right> include; split again :host facts; end split diff --git a/images/variable_precedences.png b/images/variable_precedences.png deleted file mode 100644 index 4018c1e..0000000 Binary files a/images/variable_precedences.png and /dev/null differ diff --git a/inventories/README.adoc b/inventories/README.adoc index 951627c..d77c5bc 100644 --- a/inventories/README.adoc +++ b/inventories/README.adoc @@ -50,7 +50,7 @@ Examples:: The technical tools typically contain a lot of discovered information, like an IP address or the RAM size of a VM. In a typical cloud environment, the IP address isn't part of the desired state, it is assigned on the fly by the cloud management layer, so you can only get it dynamically from the cloud API and you won't manage it. In a more traditional environment nevertheless, the IP address will be static, managed more or less manually, so it will become part of your desired state. -In this case, you shouldn't use the discovered information or you might not realize that there is a discrepancy betweeen As-Is and To-Be. +In this case, you shouldn't use the discovered information or you might not realize that there is a discrepancy between As-Is and To-Be. + The RAM size of a VM will be always present in two flavours, e.g. As-Is coming from the technical source and To-Be coming from the CMDB, or your static inventory, and you shouldn't confuse them. By lack of doing so, your automation might not correct the size of the VM where it should have aligned the As-Is with the To-Be. @@ -268,7 +268,7 @@ Explanations:: Rationale:: There are https://docs.ansible.com/ansible/latest/user_guide/playbooks_variables.html#understanding-variable-precedence[22 levels of variable precedence]. -This is almost impossible to keep in mind for a "normal" human and can lead to all kind of weird behaviours if not under control. +This is almost impossible to keep in mind for a "normal" human and can lead to all kind of weird behaviors if not under control. In addition, the use of play(book) variables is not recommended as it blurs the separation between code and data. The same applies to all constructs including specific variable files as part of the play (i.e. `include_vars`). By reducing the number of variable types, you end up with a more simple and overseeable list of variables. @@ -289,7 +289,7 @@ NOTE: we didn't explicitly consider https://docs.ansible.com/automation-controll The following picture summarizes this list in a simplified and easier to keep in mind way, highlighting which variables are meant to overwrite others: .Flow of variable precedences -image::variable_precedences.png[flow of variable precedences in 3 lanes] +image::variable_precedences.svg[flow of variable precedences in 3 lanes] CAUTION: even if we write that variables _shouldn't_ overwrite each other, they still all share the same namespace and _can_ potentially overwrite each other. It is your responsibility as automation author to make sure they don't. diff --git a/plugins/README.adoc b/plugins/README.adoc index ae2ed59..9e7bd79 100644 --- a/plugins/README.adoc +++ b/plugins/README.adoc @@ -15,7 +15,7 @@ NOTE: Work in Progress... Explanations:: All plugins, regardless of type, need documentation that describes the input parameters, outputs, and practical examples of how to use it. -Examples:: See the Ansible Developer Guide sections on https://docs.ansible.com/ansible/latest/dev_guide/developing_plugins.html#plugin-configuration-documentation-standards[Plugin Configuration and Documentation Standards] and https://docs.ansible.com/ansible/latest/dev_guide/developing_modules_documenting.html#module-documenting[Module Documentating] for more details. +Examples:: See the Ansible Developer Guide sections on https://docs.ansible.com/ansible/latest/dev_guide/developing_plugins.html#plugin-configuration-documentation-standards[Plugin Configuration and Documentation Standards] and https://docs.ansible.com/ansible/latest/dev_guide/developing_modules_documenting.html#module-documenting[Module Documenting] for more details. ==== == Use sphinx (reST) formatted docstrings in Python code @@ -104,7 +104,7 @@ def test_split_pre_existing_dir_one_level_exists(directory, expected, mocker): [%collapsible] ==== Explanations:: -Ensure a consistent approach to the way commplex argument_specs are formatted within a collection. +Ensure a consistent approach to the way complex argument_specs are formatted within a collection. Rationale:: When hand-writing a complex argspec, the author may choose to build up to data structure from multiple dictionaries or vars. diff --git a/roles/README.adoc b/roles/README.adoc index 927a81e..a232a58 100644 --- a/roles/README.adoc +++ b/roles/README.adoc @@ -174,7 +174,7 @@ Inside a collection, you can share custom plugins across all roles in the collec Collections give your roles a namespace, which removes the potential for naming collisions when developing new roles. Example:: -See the Ansible documentation on (https://docs.ansible.com/ansible/devel/dev_guide/migrating_roles.html)[migrating roles to collections] for details. +See the Ansible documentation on https://docs.ansible.com/ansible/devel/dev_guide/migrating_roles.html[migrating roles to collections] for details. ==== === Check Mode @@ -210,6 +210,8 @@ Rationale:: Additional automation or other integrations, such as with external t ==== === Supporting multiple distributions and versions +[%collapsible] +==== Use Cases:: * The role developer needs to be able to set role variables to different values depending on the OS platform and version. For example, if the name of a service is different between EL8 and EL9, or a config file location is different. * The role developer needs to handle the case where the user specifies `gather_facts: false` in the playbook. @@ -219,6 +221,7 @@ NOTE: The recommended solution below requires at least some `ansible_facts` to b If you just want to ensure the user always uses `gather_facts: true`, and do not want to handle this in the role, then the role documentation should state that `gather_facts: true` or `setup:` is required in order to use the role, and the role should use `fail:` with a descriptive error message if the necessary facts are not defined. If it is desirable to use roles that require facts, but fact gathering is expensive, consider using a cache plugin https://docs.ansible.com/ansible/latest/collections/index_cache.html[List of Cache Plugins], and also consider running a periodic job on the controller to refresh the cache. +==== === Platform specific variables [%collapsible] @@ -522,15 +525,57 @@ Examples:: In a role with one `tasks/main.yml` task file, including `tasks/sub.y .A prefixed task in a sub-tasks file [source,yaml] ---- -- name: sub | some task description +- name: sub | Some task description mytask: [...] ---- + -The log output will then look something like `TASK [myrole : sub | some task description] ****`, which makes it very clear where the task is coming from. +The log output will then look something like `TASK [myrole : sub | Some task description] ****`, which makes it very clear where the task is coming from. + TIP: with a verbosity of 2 or more, ansible-playbook will show the full path to the task file, but this generally means that you need to restart the play in a higher verbosity to get the information you could have had readily available. ==== +=== Argument Validation +[%collapsible] +==== +Explanation:: Starting from ansible version 2.11, an option is available to activate argument validation for roles by utilizing an argument specification. +When this specification is established, a task is introduced at the onset of role execution to validate the parameters provided for the role according to the defined specification. +If the parameters do not pass the validation, the role execution will terminate. + +Rationale:: Argument validation significantly contributes to the stability and reliability of the automation. +It also makes the playbook using the role fail fast instead of failing later when an incorrect variable is utilized. +By ensuring roles receive accurate input data and mitigating common issues, we can enhance the effectiveness of the Ansible playbooks using the roles. + +Examples:: The specification is defined in the meta/argument_specs.yml. For more details on how to write the specification, refer to https://docs.ansible.com/ansible/latest/playbook_guide/playbooks_reuse_roles.html#specification-format. ++ +.Argument Specification file that validates the arguments provided to the role. +[source,yaml] +---- +argument_specs: + main: + short_description: Role description. + options: + string_arg1: + description: string argument description. + type: "str" + default: "x" + choices: ["x", "y"] + dict_arg1: + description: dict argument description. + type: dict + required: True + options: + key1: + description: key1 description. + type: int + key2: + description: key2 description. + type: str + key3: + description: key3 description. + type: dict +---- +==== + == References [%collapsible] ==== @@ -539,7 +584,7 @@ inspiration or contrast to the standards described above. * https://github.com/debops/debops/blob/v0.7.2/docs/debops-policy/code-standards-policy.rst). For inspiration, as the DebOps project has some specific guidance that we do not necessarily want to follow. -* https://docs.adfinis-sygroup.ch/public/ansible-guide/overview.html +* https://adfinis.github.io/ansible-guide/styling_guide.html * https://docs.openstack.org/openstack-ansible/latest/contributor/code-rules.html ==== diff --git a/structures/README.adoc b/structures/README.adoc index b00b953..186732a 100644 --- a/structures/README.adoc +++ b/structures/README.adoc @@ -10,10 +10,10 @@ Explanations:: define for which use case to use roles, playbooks, potentially workflows (in Ansible Controller/Tower/AWX), and how to split the code you write. Rationale:: -especially when writing automation in a team, it is important to have a certain level of consistence and make sure everybody has the same understanding. +especially when writing automation in a team, it is important to have a certain level of consistency and make sure everybody has the same understanding. By lack of doing so, your automation becomes unreadable and difficult to grasp for new members or even for existing members. + -This structure will also help you to have a consistent level of modelization so that re-usability becomes easier. +Following a consistent structure will increase re-usability. If one team member uses roles where another one uses playbooks, they will both struggle to reuse the code of each other. Metaphorically speaking, only if stones have been cut at roughly the same size, can they be properly used to build a house.