From 8730f4b12cc3324c2f87763ac32a38b58ed1904f Mon Sep 17 00:00:00 2001 From: Charlie Voiselle <464492+angrycub@users.noreply.github.com> Date: Thu, 10 Sep 2020 20:15:09 -0400 Subject: [PATCH 1/2] Sunset local copy of Deployment Guide and Reference Architecture --- website/_redirects | 11 +- website/data/docs-navigation.js | 104 ++++---- .../install/production/deployment-guide.mdx | 229 ------------------ .../pages/docs/install/production/index.mdx | 10 +- .../production/reference-architecture.mdx | 134 ---------- 5 files changed, 72 insertions(+), 416 deletions(-) delete mode 100644 website/pages/docs/install/production/deployment-guide.mdx delete mode 100644 website/pages/docs/install/production/reference-architecture.mdx diff --git a/website/_redirects b/website/_redirects index 4c45c8841a0..867cde32d69 100644 --- a/website/_redirects +++ b/website/_redirects @@ -432,11 +432,7 @@ /guides/security/sentinel-policy https://learn.hashicorp.com/nomad/governance-and-policy/sentinel 301! /guides/operations/install/index.html /docs/install 301! /guides/operations/install/index /docs/install 301! -/guides/operations/deployment-guide.html /docs/install/production/deployment-guide 301! -/guides/operations/deployment-guide /docs/install/production/deployment-guide 301! /guides/operations/agent/index.html /docs/install/production/nomad-agent 301! -/guides/operations/reference-architecture.html /docs/install/production/reference-architecture 301! -/guides/operations/reference-architecture /docs/install/production/reference-architecture 301! /guides/operations/requirements.html /docs/install/production/requirements 301! /guides/operations/requirements /docs/install/production/requirements 301! /guides/operations/consul-integration/index.html /docs/integrations/consul-integration 301! @@ -455,7 +451,12 @@ /guides/upgrade/upgrade-specific.html /docs/upgrade/upgrade-specific 301! /guides/upgrade/upgrade-specific /docs/upgrade/upgrade-specific 301! - +/guides/operations/deployment-guide.html https://learn.hashicorp.com/tutorials/nomad/production-deployment-guide-vm-with-consul 301! +/guides/operations/deployment-guide https://learn.hashicorp.com/tutorials/nomad/production-deployment-guide-vm-with-consul 301! +/guides/operations/reference-architecture.html https://learn.hashicorp.com/tutorials/nomad/production-reference-architecture-vm-with-consul 301! +/guides/operations/reference-architecture https://learn.hashicorp.com/tutorials/nomad/production-reference-architecture-vm-with-consul 301! +/docs/install/production/deployment-guide https://learn.hashicorp.com/tutorials/nomad/production-deployment-guide-vm-with-consul 301! +/docs/install/production/reference-architecture https://learn.hashicorp.com/tutorials/nomad/production-reference-architecture-vm-with-consul 301! # Enterprise diff --git a/website/data/docs-navigation.js b/website/data/docs-navigation.js index faf3adf8983..884630e892d 100644 --- a/website/data/docs-navigation.js +++ b/website/data/docs-navigation.js @@ -15,32 +15,43 @@ export default [ content: [ 'requirements', 'nomad-agent', - 'reference-architecture', - 'deployment-guide' - ] + { + title: 'Reference Architecture', + href: + 'https://learn.hashicorp.com/tutorials/nomad/production-reference-architecture-vm-with-consul', + }, + { + title: 'Deployment Guide', + href: + 'https://learn.hashicorp.com/tutorials/nomad/production-deployment-guide-vm-with-consul', + }, + ], }, - 'windows-service' - ] + 'windows-service', + ], }, { category: 'upgrade', content: ['upgrade-specific'] }, { category: 'integrations', - content: ['consul-integration', 'consul-connect', 'vault-integration'] + content: ['consul-integration', 'consul-connect', 'vault-integration'], }, '-----------', { category: 'internals', content: [ 'architecture', - { category: 'plugins', content: ['base', 'task-drivers', 'devices', 'csi'] }, + { + category: 'plugins', + content: ['base', 'task-drivers', 'devices', 'csi'], + }, { category: 'scheduling', - content: ['scheduling', 'preemption'] + content: ['scheduling', 'preemption'], }, 'consensus', 'gossip', - 'security' - ] + 'security', + ], }, { category: 'configuration', @@ -56,8 +67,8 @@ export default [ 'server_join', 'telemetry', 'tls', - 'vault' - ] + 'vault', + ], }, { category: 'commands', @@ -75,18 +86,26 @@ export default [ 'token-info', 'token-list', 'token-self', - 'token-update' - ] + 'token-update', + ], }, 'agent', 'agent-info', { category: 'alloc', - content: ['exec', 'fs', 'logs', 'restart', 'signal', 'status', 'stop'] + content: ['exec', 'fs', 'logs', 'restart', 'signal', 'status', 'stop'], }, { category: 'deployment', - content: ['fail', 'list', 'pause', 'promote', 'resume', 'status', 'unblock'] + content: [ + 'fail', + 'list', + 'pause', + 'promote', + 'resume', + 'status', + 'unblock', + ], }, 'eval-status', { @@ -105,21 +124,21 @@ export default [ 'run', 'status', 'stop', - 'validate' - ] + 'validate', + ], }, { category: 'license', - content: ['get', 'put'] + content: ['get', 'put'], }, 'monitor', { category: 'namespace', - content: ['apply', 'delete', 'inspect', 'list', 'status'] + content: ['apply', 'delete', 'inspect', 'list', 'status'], }, { category: 'node', - content: ['config', 'drain', 'eligibility', 'status'] + content: ['config', 'drain', 'eligibility', 'status'], }, { category: 'operator', @@ -134,13 +153,13 @@ export default [ 'snapshot-agent', 'snapshot-inspect', 'snapshot-restore', - 'snapshot-save' - ] + 'snapshot-save', + ], }, { category: 'plugin', content: ['status'] }, { category: 'quota', - content: ['apply', 'delete', 'init', 'inspect', 'list', 'status'] + content: ['apply', 'delete', 'init', 'inspect', 'list', 'status'], }, { category: 'sentinel', content: ['apply', 'delete', 'list', 'read'] }, { category: 'server', content: ['force-leave', 'join', 'members'] }, @@ -148,8 +167,11 @@ export default [ { category: 'system', content: ['gc', 'reconcile-summaries'] }, 'ui', 'version', - { category: 'volume', content: ['deregister', 'detach', 'status', 'register'] } - ] + { + category: 'volume', + content: ['deregister', 'detach', 'status', 'register'], + }, + ], }, '----------', { @@ -192,8 +214,8 @@ export default [ 'upstreams', 'vault', 'volume', - 'volume_mount' - ] + 'volume_mount', + ], }, { category: 'drivers', @@ -215,14 +237,14 @@ export default [ 'rkt', 'singularity', 'nspawn', - 'iis' - ] - } - ] + 'iis', + ], + }, + ], }, { category: 'devices', - content: ['nvidia', 'community'] + content: ['nvidia', 'community'], }, 'schedulers', { category: 'runtime', content: ['environment', 'interpolation'] }, @@ -236,22 +258,16 @@ export default [ 'telemetry', { category: 'plugins', - content: [ - 'apm', - 'strategy', - 'target' - ] + content: ['apm', 'strategy', 'target'], }, { category: 'internals', - content: [ - 'checks' - ] - } - ] + content: ['checks'], + }, + ], }, { category: 'telemetry', content: ['metrics'] }, '------------', { category: 'enterprise' }, - 'faq' + 'faq', ] diff --git a/website/pages/docs/install/production/deployment-guide.mdx b/website/pages/docs/install/production/deployment-guide.mdx deleted file mode 100644 index 99bd34a87b4..00000000000 --- a/website/pages/docs/install/production/deployment-guide.mdx +++ /dev/null @@ -1,229 +0,0 @@ ---- -layout: docs -page_title: Deployment Guide -sidebar_title: Reference Install Guide -description: |- - This deployment guide covers the steps required to install and - configure a single HashiCorp Nomad cluster as defined in the - Nomad Reference Architecture -ea_version: 0.9 ---- - -# Nomad Reference Install Guide - -This deployment guide covers the steps required to install and configure a single HashiCorp Nomad cluster as defined in the [Nomad Reference Architecture](/docs/install/production/reference-architecture). - -These instructions are for installing and configuring Nomad on Linux hosts running the systemd system and service manager. - -## Reference Material - -This deployment guide is designed to work in combination with the [Nomad Reference Architecture](/docs/install/production/reference-architecture) and [Consul Deployment Guide](https://www.consul.io/docs/guides/deployment-guide.html). Although it is not a strict requirement to follow the Nomad Reference Architecture, please ensure you are familiar with the overall architecture design. For example, installing Nomad server agents on multiple physical or virtual (with correct anti-affinity) hosts for high-availability. - -## Overview - -To provide a highly-available single cluster architecture, we recommend Nomad server agents be deployed to more than one host, as shown in the [Nomad Reference Architecture](/docs/install/production/reference-architecture). - -![Reference diagram](/img/nomad_reference_diagram.png) - -These setup steps should be completed on all Nomad hosts: - -- [Download Nomad](#download-nomad) -- [Install Nomad](#install-nomad) -- [Configure systemd](#configure-systemd) -- [Configure Nomad](#configure-nomad) -- [Start Nomad](#start-nomad) - -## Download Nomad - -Precompiled Nomad binaries are available for download at [https://releases.hashicorp.com/nomad/](https://releases.hashicorp.com/nomad/) and Nomad Enterprise binaries are available for download by following the instructions made available to HashiCorp Enterprise customers. - -```text -export NOMAD_VERSION="0.9.0" -curl --silent --remote-name https://releases.hashicorp.com/nomad/${NOMAD_VERSION}/nomad_${NOMAD_VERSION}_linux_amd64.zip -``` - -You may perform checksum verification of the zip packages using the SHA256SUMS and SHA256SUMS.sig files available for the specific release version. HashiCorp provides [a guide on checksum verification](https://www.hashicorp.com/security) for precompiled binaries. - -## Install Nomad - -Unzip the downloaded package and move the `nomad` binary to `/usr/local/bin/`. Check `nomad` is available on the system path. - -```text -unzip nomad_${NOMAD_VERSION}_linux_amd64.zip -sudo chown root:root nomad -sudo mv nomad /usr/local/bin/ -nomad version -``` - -The `nomad` command features opt-in autocompletion for flags, subcommands, and arguments (where supported). Enable autocompletion. - -```text -nomad -autocomplete-install -complete -C /usr/local/bin/nomad nomad -``` - -Create a data directory for Nomad. - -```text -sudo mkdir --parents /opt/nomad -``` - -## Configure systemd - -Systemd uses [documented sane defaults](https://www.freedesktop.org/software/systemd/man/systemd.directives.html) so only non-default values must be set in the configuration file. - -Create a Nomad service file at `/etc/systemd/system/nomad.service`. - -```text -sudo touch /etc/systemd/system/nomad.service -``` - -Add this configuration to the Nomad service file: - -```text -[Unit] -Description=Nomad -Documentation=https://nomadproject.io/docs/ -Wants=network-online.target -After=network-online.target - -[Service] -ExecReload=/bin/kill -HUP $MAINPID -ExecStart=/usr/local/bin/nomad agent -config /etc/nomad.d -KillMode=process -KillSignal=SIGINT -LimitNOFILE=infinity -LimitNPROC=infinity -Restart=on-failure -RestartSec=2 -StartLimitBurst=3 -StartLimitIntervalSec=10 -TasksMax=infinity - -[Install] -WantedBy=multi-user.target -``` - -The following parameters are set for the `[Unit]` stanza: - -- [`Description`](https://www.freedesktop.org/software/systemd/man/systemd.unit.html#Description=) - Free-form string describing the nomad service -- [`Documentation`](https://www.freedesktop.org/software/systemd/man/systemd.unit.html#Documentation=) - Link to the nomad documentation -- [`Wants`](https://www.freedesktop.org/software/systemd/man/systemd.unit.html#Wants=) - Configure a dependency on the network service -- [`After`](https://www.freedesktop.org/software/systemd/man/systemd.unit.html#After=) - Configure an ordering dependency on the network service being started before the nomad service - -The following parameters are set for the `[Service]` stanza: - -- [`ExecReload`](https://www.freedesktop.org/software/systemd/man/systemd.service.html#ExecReload=) - Send Nomad a `SIGHUP` signal to trigger a configuration reload -- [`ExecStart`](https://www.freedesktop.org/software/systemd/man/systemd.service.html#ExecStart=) - Start Nomad with the `agent` argument and path to a directory of configuration files -- [`KillMode`](https://www.freedesktop.org/software/systemd/man/systemd.kill.html#KillMode=) - Treat nomad as a single process -- [`LimitNOFILE`, `LimitNPROC`](https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Process%20Properties) - Disable limits for file descriptors and processes -- [`RestartSec`](https://www.freedesktop.org/software/systemd/man/systemd.service.html#RestartSec=) - Restart nomad after 2 seconds of it being considered 'failed' -- [`Restart`](https://www.freedesktop.org/software/systemd/man/systemd.service.html#Restart=) - Restart nomad unless it returned a clean exit code -- [`StartLimitBurst`, `StartLimitIntervalSec`](https://www.freedesktop.org/software/systemd/man/systemd.unit.html#StartLimitIntervalSec=interval) - Configure unit start rate limiting -- [`TasksMax`](https://www.freedesktop.org/software/systemd/man/systemd.resource-control.html#TasksMax=N) - Disable task limits (only available in systemd >= 226) - -The following parameters are set for the `[Install]` stanza: - -- [`WantedBy`](https://www.freedesktop.org/software/systemd/man/systemd.unit.html#WantedBy=) - Creates a weak dependency on nomad being started by the multi-user run level - -## Configure Nomad - -Nomad uses [documented sane defaults](/docs/configuration) so only non-default values must be set in the configuration file. Configuration can be read from multiple files and is loaded in lexical order. See the [full description](/docs/configuration) for more information about configuration loading and merge semantics. - -Some configuration settings are common to both server and client Nomad agents, while some configuration settings must only exist on one or the other. Follow the [common configuration](#common-configuration) guidance on all hosts and then the specific guidance depending on whether you are configuring a Nomad [server](#server-configuration) or [client](#client-configuration). - -- [Common Nomad configuration](#common-configuration) -- [Configure a Nomad server](#server-configuration) -- [Configure a Nomad client](#client-configuration) - -### Common configuration - -Create a configuration file at `/etc/nomad.d/nomad.hcl`: - -```text -sudo mkdir --parents /etc/nomad.d -sudo chmod 700 /etc/nomad.d -sudo touch /etc/nomad.d/nomad.hcl -``` - -Add this configuration to the `nomad.hcl` configuration file: - -~> **Note:** Replace the `datacenter` parameter value with the identifier you will use for the datacenter this Nomad cluster is deployed in. - -```hcl -datacenter = "dc1" -data_dir = "/opt/nomad" -``` - -- [`datacenter`](/docs/configuration#datacenter) - The datacenter in which the agent is running. -- [`data_dir`](/docs/configuration#data_dir) - The data directory for the agent to store state. - -### Server configuration - -Create a configuration file at `/etc/nomad.d/server.hcl`: - -```text -sudo touch /etc/nomad.d/server.hcl -``` - -Add this configuration to the `server.hcl` configuration file: - -~> **NOTE** Replace the `bootstrap_expect` value with the number of Nomad servers you will use; three or five [is recommended](/docs/internals/consensus#deployment-table). - -```hcl -server { - enabled = true - bootstrap_expect = 3 -} -``` - -- [`server`](/docs/configuration/server#enabled) - Specifies if this agent should run in server mode. All other server options depend on this value being set. -- [`bootstrap_expect`](/docs/configuration/server#bootstrap_expect) - The number of expected servers in the cluster. Either this value should not be provided or the value must agree with other servers in the cluster. - -### Client configuration - -Create a configuration file at `/etc/nomad.d/client.hcl`: - -```text -sudo touch /etc/nomad.d/client.hcl -``` - -Add this configuration to the `client.hcl` configuration file: - -```hcl -client { - enabled = true -} -``` - -- [`client`](/docs/configuration/client#enabled) - Specifies if this agent should run in client mode. All other client options depend on this value being set. - -~> **NOTE** The [`options`](/docs/configuration/client#options-parameters) parameter can be used to enable or disable specific configurations on Nomad clients, unique to your use case requirements. - -### ACL configuration - -The [Access Control](https://learn.hashicorp.com/collections/nomad/access-control) guide provides instructions on configuring and enabling ACLs. - -### TLS configuration - -Securing Nomad's cluster communication with mutual TLS (mTLS) is recommended for production deployments and can even ease operations by preventing mistakes and misconfigurations. Nomad clients and servers should not be publicly accessible without mTLS enabled. - -The [Securing Nomad with TLS](https://learn.hashicorp.com/nomad/transport-security/enable-tls) guide provides instructions on configuring and enabling TLS. - -## Start Nomad - -Enable and start Nomad using the systemctl command responsible for controlling systemd managed services. Check the status of the nomad service using systemctl. - -```text -sudo systemctl enable nomad -sudo systemctl start nomad -sudo systemctl status nomad -``` - -## Next Steps - -- Read [Outage Recovery](https://learn.hashicorp.com/nomad/operating-nomad/outage) to learn - the steps required to recover from a Nomad cluster outage. -- Read [Autopilot](https://learn.hashicorp.com/nomad/operating-nomad/autopilot) to learn about - features in Nomad 0.8 to allow for automatic operator-friendly - management of Nomad servers. diff --git a/website/pages/docs/install/production/index.mdx b/website/pages/docs/install/production/index.mdx index 3912ba9d6ae..96e3e18ba8b 100644 --- a/website/pages/docs/install/production/index.mdx +++ b/website/pages/docs/install/production/index.mdx @@ -24,17 +24,19 @@ This page details the recommended machine resources (instances), port requiremen Please refer to [Hardware Requirements](/docs/install/production/requirements) sub-section. ## Setting Nodes with Nomad Agent + These pages explain the Nomad agent process and how to set the server and client nodes in the cluster. Please refer to [Set Server & Client Nodes](/docs/install/production/nomad-agent) and [Nomad Agent documentation](/docs/commands/agent) pages. ## Reference Architecture -This document provides recommended practices and a reference architecture for HashiCorp Nomad production deployments. This reference architecture conveys a general architecture that should be adapted to accommodate the specific needs of each implementation. +This HashiCorp Learn tutorial provides recommended practices and a reference architecture for HashiCorp Nomad production deployments. This reference architecture conveys a general architecture that should be adapted to accommodate the specific needs of each implementation. -Please refer to [Reference Architecture](/docs/install/production/reference-architecture) sub-section. +Please refer to [Reference Architecture](https://learn.hashicorp.com/tutorials/nomad/production-reference-architecture-vm-with-consul) sub-section. ## Install Guide Based on Reference Architecture -This guide provides an end-to-end walkthrough of the steps required to install a single production-ready Nomad cluster as defined in the Reference Architecture section. -Please refer to [Reference Install Guide](/docs/install/production/deployment-guide) sub-section. +This HashiCorp Learn tutorial provides an end-to-end walkthrough of the steps required to install a single production-ready Nomad cluster as defined in the Reference Architecture section. + +Please refer to [Reference Install Guide](https://learn.hashicorp.com/tutorials/nomad/production-deployment-guide-vm-with-consul) sub-section. diff --git a/website/pages/docs/install/production/reference-architecture.mdx b/website/pages/docs/install/production/reference-architecture.mdx deleted file mode 100644 index ea7d1f7f37f..00000000000 --- a/website/pages/docs/install/production/reference-architecture.mdx +++ /dev/null @@ -1,134 +0,0 @@ ---- -layout: docs -page_title: Nomad Reference Architecture -sidebar_title: Reference Architecture -description: |- - This document provides recommended practices and a reference - architecture for HashiCorp Nomad production deployments. -ea_version: 0.9 ---- - -# Nomad Reference Architecture - -This document provides recommended practices and a reference architecture for HashiCorp Nomad production deployments. This reference architecture conveys a general architecture that should be adapted to accommodate the specific needs of each implementation. - -The following topics are addressed: - -- [Reference Architecture](#ra) -- [Deployment Topology within a Single Region](#one-region) -- [Deployment Topology across Multiple Regions](#multi-region) -- [Network Connectivity Details](#net) -- [Deployment System Requirements](#system-reqs) -- [High Availability](#high-availability) -- [Failure Scenarios](#failure-scenarios) - -This document describes deploying a Nomad cluster in combination with, or with access to, a [Consul cluster](/docs/integrations/consul-integration). We recommend the use of Consul with Nomad to provide automatic clustering, service discovery, health checking and dynamic configuration. - -## Reference Architecture ((#ra)) - -A Nomad cluster typically comprises three or five servers (but no more than seven) and a number of client agents. Nomad differs slightly from Consul in that it divides infrastructure into regions which are served by one Nomad server cluster, but can manage multiple datacenters or availability zones. For example, a _US Region_ can include datacenters _us-east-1_ and _us-west-2_. - -In a Nomad multi-region architecture, communication happens via [WAN gossip](/docs/internals/gossip). Additionally, Nomad can integrate easily with Consul to provide features such as automatic clustering, service discovery, and dynamic configurations. Thus we recommend you use Consul in your Nomad deployment to simplify the deployment. - -In cloud environments, a single cluster may be deployed across multiple availability zones. For example, in AWS each Nomad server can be deployed to an associated EC2 instance, and those EC2 instances distributed across multiple AZs. Similarly, Nomad server clusters can be deployed to multiple cloud regions to allow for region level HA scenarios. - -For more information on Nomad server cluster design, see the [cluster requirements documentation](/docs/install/production/requirements). - -The design shared in this document is the recommended architecture for production environments, as it provides flexibility and resilience. Nomad utilizes an existing Consul server cluster; however, the deployment design of the Consul server cluster is outside the scope of this document. - -Nomad to Consul connectivity is over HTTP and should be secured with TLS as well as a Consul token to provide encryption of all traffic. This is done using Nomad's [Automatic Clustering with Consul](https://learn.hashicorp.com/nomad/operating-nomad/clustering). - -### Deployment Topology within a Single Region ((#one-region)) - -A single Nomad cluster is recommended for applications deployed in the same region. - -Each cluster is expected to have either three or five servers. This strikes a balance between availability in the case of failure and performance, as [Raft](https://raft.github.io/) consensus gets progressively slower as more servers are added. - -The time taken by a new server to join an existing large cluster may increase as the size of the cluster increases. - -#### Reference Diagram - -![Reference diagram](/img/nomad_reference_diagram.png) - -### Deployment Topology across Multiple Regions ((#multi-region)) - -By deploying Nomad server clusters in multiple regions, the user is able to interact with the Nomad servers by targeting any region from any Nomad server even if that server resides in a separate region. However, most data is not replicated between regions as they are fully independent clusters. The exceptions are [ACL tokens and policies][acl], as well as [Sentinel policies in Nomad Enterprise][sentinel], which _are_ replicated between regions. - -Nomad server clusters in different datacenters can be federated using WAN links. The server clusters can be joined to communicate over the WAN on port `4648`. This same port is used for single datacenter deployments over LAN as well. - -Additional documentation is available to learn more about [Nomad server federation](https://learn.hashicorp.com/nomad/operating-nomad/federation). - -## Network Connectivity Details ((#net)) - -![Nomad network diagram](/img/nomad_network_arch.png) - -Nomad servers are expected to be able to communicate in high bandwidth, low latency network environments and have below 10 millisecond latencies between cluster members. Nomad servers can be spread across cloud regions or datacenters if they satisfy these latency requirements. - -Nomad client clusters require the ability to receive traffic as noted above in the Network Connectivity Details; however, clients can be separated into any type of infrastructure (multi-cloud, on-prem, virtual, bare metal, etc.) as long as they are reachable and can receive job requests from the Nomad servers. - -Additional documentation is available to learn more about [Nomad networking](/docs/install/production/requirements#network-topology). - -## Deployment System Requirements ((#system-reqs)) - -Nomad server agents are responsible for maintaining the cluster state, responding to RPC queries (read operations), and for processing all write operations. Given that Nomad server agents do most of the heavy lifting, server sizing is critical for the overall performance efficiency and health of the Nomad cluster. - -### Nomad Servers - -| Size | CPU | Memory | Disk | Typical Cloud Instance Types | -| ----- | -------- | ------------ | ------ | ----------------------------------------- | -| Small | 2 core | 8-16 GB RAM | 50 GB | **AWS:** m5.large, m5.xlarge | -| | | | | **Azure:** Standard_D2_v3, Standard_D4_v3 | -| | | | | **GCE:** n1-standard-8, n1-standard-16 | -| Large | 4-8 core | 32-64 GB RAM | 100 GB | **AWS:** m5.2xlarge, m5.2xlarge | -| | | | | **Azure:** Standard_D4_v3, Standard_D8_v3 | -| | | | | **GCE:** n1-standard-16, n1-standard-32 | - -#### Hardware Sizing Considerations - -- The small size would be appropriate for most initial production - deployments, or for development/testing environments. - -- The large size is for production environments where there is a - consistently high workload. - -~> **NOTE** For large workloads, ensure that the disks support a high number of IOPS to keep up with the rapid Raft log update rate. - -Nomad clients can be setup with specialized workloads as well. For example, if workloads require GPU processing, a Nomad datacenter can be created to serve those GPU specific jobs and joined to a Nomad server cluster. For more information on specialized workloads, see the documentation on [job constraints](/docs/job-specification/constraint) to target specific client nodes. - -## High Availability - -A Nomad server cluster is the highly-available unit of deployment within a single datacenter. A recommended approach is to deploy a three or five node Nomad server cluster. With this configuration, during a Nomad server outage, failover is handled immediately without human intervention. - -When setting up high availability across regions, multiple Nomad server clusters are deployed and connected via WAN gossip. Nomad clusters in regions are fully independent from each other and do not share jobs, clients, or state. Data residing in a single region-specific cluster is not replicated to other clusters in other regions. - -## Failure Scenarios - -Typical distribution in a cloud environment is to spread Nomad server nodes into separate Availability Zones (AZs) within a high bandwidth, low latency network, such as an AWS Region. The diagram below shows Nomad servers deployed in multiple AZs promoting a single voting member per AZ and providing both AZ-level and node-level failure protection. - -![Nomad fault tolerance](/img/nomad_fault_tolerance.png) - -Additional documentation is available to learn more about [cluster sizing and failure tolerances](/docs/internals/consensus#deployment-table) as well as [outage recovery](https://learn.hashicorp.com/nomad/operating-nomad/outage). - -### Availability Zone Failure - -In the event of a single AZ failure, only a single Nomad server will be affected which would not impact job scheduling as long as there is still a Raft quorum (i.e. 2 available servers in a 3 server cluster, 3 available servers in a 5 server cluster, etc.). There are two scenarios that could occur should an AZ fail in a multiple AZ setup: leader loss or follower loss. - -#### Leader Server Loss - -If the AZ containing the Nomad leader server fails, the remaining quorum members would elect a new leader. The new leader then begins to accept new log entries and replicates these entries to the remaining followers. - -#### Follower Server Loss - -If the AZ containing a Nomad follower server fails, there is no immediate impact to the Nomad leader server or cluster operations. However, there still must be a Raft quorum in order to properly manage a future failure of the Nomad leader server. - -### Region Failure - -In the event of a region-level failure (which would contain an entire Nomad server cluster), clients will still be able to submit jobs to another region that is properly federated. However, there will likely be data loss as Nomad server clusters do not replicate their data to other region clusters. See [Multi-region Federation](https://learn.hashicorp.com/nomad/operating-nomad/federation) for more setup information. - -## Next Steps - -- Read [Deployment Guide](/docs/install/production/deployment-guide) to learn - the steps required to install and configure a single HashiCorp Nomad cluster. - -[acl]: https://learn.hashicorp.com/nomad?track=acls#operations-and-development -[sentinel]: https://learn.hashicorp.com/nomad/governance-and-policy/sentinel From d6f1e4173693a6f76abd17bd3e335c2e352f02d6 Mon Sep 17 00:00:00 2001 From: Charlie Voiselle <464492+angrycub@users.noreply.github.com> Date: Thu, 10 Sep 2020 21:08:52 -0400 Subject: [PATCH 2/2] Update copy on Production overview page --- .../pages/docs/install/production/index.mdx | 49 ++++++++++--------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/website/pages/docs/install/production/index.mdx b/website/pages/docs/install/production/index.mdx index 96e3e18ba8b..b7cc39e5791 100644 --- a/website/pages/docs/install/production/index.mdx +++ b/website/pages/docs/install/production/index.mdx @@ -7,36 +7,41 @@ description: Learn how to install Nomad for Production. # Installing Nomad for Production -This section covers how to install Nomad for production. +While HashiCorp Nomad provides a low-friction practitioner experience out of +the box, there are a few critical steps to take for a successful production +Nomad deployment. -There are multiple steps to cover for a successful Nomad deployment: +## Explore the Reference Architecture and Installation Guide -## Installing Nomad +Learn more about recommended practices and explore a reference architecture for +deploying HashiCorp Nomad in production. -This page lists the two primary methods to installing Nomad and how to verify a successful installation. +- [Nomad Reference Architecture][] - Learn recommended practices and a reference + architecture for Nomad production deployments. This reference architecture + conveys a general architecture. Adapt it to accommodate the specific needs + of your implementation. -Please refer to [Installing Nomad](/docs/install) sub-section. +- [Nomad Deployment Guide][] - Follow along with an end-to-end outline of the + steps required to install a single production-ready Nomad cluster as defined + in the Reference Architecture section. -## Hardware Requirements +## Verify Hardware Requirements -This page details the recommended machine resources (instances), port requirements, and network topology for Nomad. +Review the recommended machine resources (instances), port requirements, and +network topology for Nomad in the [Hardware Requirements](/docs/install/production/requirements). -Please refer to [Hardware Requirements](/docs/install/production/requirements) sub-section. +## Install Nomad -## Setting Nodes with Nomad Agent +Visit the [Installing Nomad](/docs/install) page to learn the options +available for installing Nomad and how to verify a successful +installation. -These pages explain the Nomad agent process and how to set the server and client nodes in the cluster. +## Configure your Nomad Servers and Clients -Please refer to [Set Server & Client Nodes](/docs/install/production/nomad-agent) and [Nomad Agent documentation](/docs/commands/agent) pages. +Refer to the [Set Server & Client Nodes](/docs/install/production/nomad-agent) +and [Nomad Agent documentation](/docs/commands/agent) pages to learn about the +Nomad agent process and how to configure the server and client nodes in your +cluster. -## Reference Architecture - -This HashiCorp Learn tutorial provides recommended practices and a reference architecture for HashiCorp Nomad production deployments. This reference architecture conveys a general architecture that should be adapted to accommodate the specific needs of each implementation. - -Please refer to [Reference Architecture](https://learn.hashicorp.com/tutorials/nomad/production-reference-architecture-vm-with-consul) sub-section. - -## Install Guide Based on Reference Architecture - -This HashiCorp Learn tutorial provides an end-to-end walkthrough of the steps required to install a single production-ready Nomad cluster as defined in the Reference Architecture section. - -Please refer to [Reference Install Guide](https://learn.hashicorp.com/tutorials/nomad/production-deployment-guide-vm-with-consul) sub-section. +[Nomad Reference Architecture]: https://learn.hashicorp.com/tutorials/nomad/production-reference-architecture-vm-with-consul +[Nomad Deployment Guide]: https://learn.hashicorp.com/tutorials/nomad/production-deployment-guide-vm-with-consul \ No newline at end of file