Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Emr #53

Open
wants to merge 5 commits into
base: emr
Choose a base branch
from
Open

Emr #53

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ Here are the services that you can deploy using this repository:
- [AWS EKS](./tf/eks.tf)
- [AWS VPC](./tf/vpc.tf)
- [AWS Lambda](./tf/lambda.tf)
- [AWS EMR](./tf/emr.tf)

## Author

Expand Down
2 changes: 2 additions & 0 deletions envs/qa.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ eks_cluster_name = "boilerplate"
// -- ElastiCache
redis_name = "boilerplate"
// -- ElasticSearch
// -- EMR
emr_name = "boilerplate"
// -- GuardDuty
// -- Lambda
lambda_source = "./lambda_handlers/lambda.py"
Expand Down
118 changes: 118 additions & 0 deletions tf/2-variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,125 @@ variable "redis_use_existing_security_groups" {
// ElasticSearch variables
*/

/*
// Amazon EMR variables
*/
variable "emr_name" {
type = string
description = "Name of the Amazon EMR App"
}

variable "emr_namespace" {
type = string
description = "Namespace, which could be your organization name or abbreviation, e.g. 'eg' or 'cp'"
default = "company"
}

variable "emr_ebs_root_volume_size" {
type = number
description = "Size in GiB of the EBS root device volume of the Linux AMI that is used for each EC2 instance. Available in Amazon EMR version 4.x and later"
default = 10
}

variable "emr_visible_to_all_users" {
type = bool
description = "Whether the job flow is visible to all IAM users of the AWS account associated with the job flow"
default = true
}

# https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-release-5x.html
variable "emr_release_label" {
type = string
description = "The release label for the Amazon EMR release. "
default = "emr-6.2.0"
}

variable "emr_applications" {
type = list(string)
description = "A list of applications for the cluster. Valid values are: Flink, Ganglia, Hadoop, HBase, HCatalog, Hive, Hue, JupyterHub, Livy, Mahout, MXNet, Oozie, Phoenix, Pig, Presto, Spark, Sqoop, TensorFlow, Tez, Zeppelin, and ZooKeeper (as of EMR 5.25.0). Case insensitive"
default = ["Hadoop"]
}

# More details https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-configure-apps.html
variable "emr_configurations_json" {
type = string
description = "A JSON string for supplying list of configurations for the EMR cluster."
default = ""
}

variable "emr_core_instance_group_instance_type" {
type = string
description = "EC2 instance type for all instances in the Core instance group"
default = "m4.large"
}

variable "emr_core_instance_group_instance_count" {
type = number
description = "Target number of instances for the Core instance group. Must be at least 1"
default = 1
}

variable "emr_core_instance_group_ebs_size" {
type = number
description = "Core instances volume size, in gibibytes (GiB)"
default = 64
}

variable "emr_core_instance_group_ebs_type" {
type = string
description = "Core instances volume type. Valid options are gp2, io1, standard and st1"
default = "standard"
}

variable "emr_core_instance_group_ebs_volumes_per_instance" {
type = number
description = "The number of EBS volumes with this configuration to attach to each EC2 instance in the Core instance group"
default = 1
}

variable "emr_master_instance_group_instance_type" {
type = string
description = "EC2 instance type for all instances in the Master instance group"
default = "m4.large"
}

variable "emr_master_instance_group_instance_count" {
type = number
description = "Target number of instances for the Master instance group. Must be at least 1"
default = 1
}

variable "emr_master_instance_group_ebs_size" {
type = number
description = "Master instances volume size, in gibibytes (GiB)"
default = 10
}

variable "emr_master_instance_group_ebs_type" {
type = string
description = "Master instances volume type. Valid options are gp2, io1, standard and st1"
default = "standard"
}

variable "emr_master_instance_group_ebs_volumes_per_instance" {
type = number
description = "The number of EBS volumes with this configuration to attach to each EC2 instance in the Master instance group"
default = 1
}

# https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-master-core-task-nodes.html
# https://www.terraform.io/docs/providers/aws/r/emr_instance_group.html
variable "emr_create_task_instance_group" {
type = bool
description = "Whether to create an instance group for Task nodes."
default = false
}

variable "emr_aws_key_pair_name" {
type = string
description = "Amazon EC2 key pair that can be used to ssh to the master node as the user called hadoop"
default = null
}

/*
// GuardDuty variables
Expand Down
45 changes: 45 additions & 0 deletions tf/emr.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
module "emr_cluster" {
source = "git::https://github.com/cloudposse/terraform-aws-emr-cluster.git?ref=tags/0.15.0"

name = var.emr_name
namespace = var.emr_namespace
stage = var.env
region = var.region

master_allowed_security_groups = [module.vpc.default_security_group_id]
slave_allowed_security_groups = [module.vpc.default_security_group_id]

vpc_id = module.vpc.vpc_id
subnet_id = module.vpc.database_subnets[0]
route_table_id = module.vpc.database_route_table_ids[0]
subnet_type = "private"

ebs_root_volume_size = var.emr_ebs_root_volume_size
visible_to_all_users = var.emr_visible_to_all_users
release_label = var.emr_release_label
applications = var.emr_applications
configurations_json = var.emr_configurations_json

# Core Instance
core_instance_group_instance_type = var.emr_core_instance_group_instance_type
core_instance_group_instance_count = var.emr_core_instance_group_instance_count
core_instance_group_ebs_size = var.emr_core_instance_group_ebs_size
core_instance_group_ebs_type = var.emr_core_instance_group_ebs_type
core_instance_group_ebs_volumes_per_instance = var.emr_core_instance_group_ebs_volumes_per_instance

# Master Instance
master_instance_group_instance_type = var.emr_master_instance_group_instance_type
master_instance_group_instance_count = var.emr_master_instance_group_instance_count
master_instance_group_ebs_size = var.emr_master_instance_group_ebs_size
master_instance_group_ebs_type = var.emr_master_instance_group_ebs_type
master_instance_group_ebs_volumes_per_instance = var.emr_master_instance_group_ebs_volumes_per_instance


create_task_instance_group = var.emr_create_task_instance_group
//log_uri = format("s3n://%s/", module.s3.this_s3_bucket_id)
key_name = var.emr_aws_key_pair_name

tags = local.tags
}
*/