Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat lambda export tool #6163

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,17 @@ venv_aws/
# Backend coverage generated files
htmlcov/
.coverage

# Local .terraform directories
**/.terraform/*

# .tfstate files
*.tfstate
*.tfstate.*

# Crash log files
crash.log
crash.*.log

#To be generated by terraform in build time
*cron_raw_data.zip*
44 changes: 44 additions & 0 deletions scripts/aws/terraform/export-tool-lambda-cron/.terraform.lock.hcl

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions scripts/aws/terraform/export-tool-lambda-cron/backend.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
## Initializes Tf backend to use S3, create s3 bucket first.

# terraform {
# backend "s3" {
# bucket = "hotosm-tm-terraform-statefiles"
# dynamodb_table = "hotosm-tm-terraform-locks"
# key = "terraform.state"
# region = "ap-south-1"
# }
# }
93 changes: 93 additions & 0 deletions scripts/aws/terraform/export-tool-lambda-cron/lambda_function.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import json
import requests
import os

mapping_types_reverse = {
1 : ["highway", "roads"],
2 : ["building", "buildings"],
3 : ["waterway", "waterways"],
4 : ["landuse", "landUse"],
5 : ["other", "other"]
}

output_types = ["geojson", "shp", "kml"]

raw_data_api = os.environ.get('RAW_DATA_API', "https://api-prod.raw-data.hotosm.org/v1/snapshot/")
rawdata_api_auth_token = os.environ.get('RAWDATA_API_AUTH_TOKEN', "")
active_projects_api_base_url = os.environ.get('ACTIVE_PROJECTS_API_BASE_URL', "")

headers = {
"Content-Type": "application/json",
"Access-Token": rawdata_api_auth_token
}

def generate_payload(project_id: int, mapping_type: str, output_type: str, bbox_geometry: str) -> dict:
"""
Generate a payload data dictionary for the given project, mapping type, output type, and bounding box geometry.

Args:
project_id (int): The ID of the project.
mapping_type (str): The type of mapping.
output_type (str): The type of output.
bbox_geometry (str): The bounding box geometry.

Returns:
dict: The payload data dictionary.
"""
payload_data = {
"bindZip": "true",
"centroid": "false",
"fileName": f"hotosm_project_{project_id}_{mapping_type[1]}",
"outputType": output_type,
"uuid": "false",
"useStWithin": "true",
"filters": {
"tags": {
"all_geometry": {
"join_or": {
mapping_type[0]: [],
}
}
},
"attributes": {
"all_geometry": [
"name",
""
]
}
},
"geometry": bbox_geometry
}
return payload_data

def lambda_handler(event, context):
"""
This function retrieves active projects from Tasking Manager API and generates payloads for each project.
The payloads are then used to call the Raw Data API.
"""

time_interval = 24
active_projects_api = f"{active_projects_api_base_url}/api/v2/projects/queries/active/?interval={time_interval}"
active_projects_api_response = requests.get(active_projects_api)

if active_projects_api_response.status_code == 200:
active_projects = active_projects_api_response.json()

for feature in active_projects['features']:
geometry = feature['geometry']
project_id = feature['properties'].get('project_id')
mapping_types = feature['properties'].get('mapping_types')

if mapping_types is not None:
for mapping_type in mapping_types:
for output_type in output_types:
mapping_type_value = mapping_types_reverse.get(mapping_type, None)
if mapping_type_value is not None:
payload = generate_payload(project_id, mapping_type_value, output_type, geometry)
payload_json = json.dumps(payload)
response = requests.post(raw_data_api, headers=headers, data=payload_json)
print(response.json())

# Commented for Lambda use, For CLI Use Uncomment.
if __name__ == "__main__":
lambda_handler(event, context)
Binary file not shown.
144 changes: 144 additions & 0 deletions scripts/aws/terraform/export-tool-lambda-cron/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@

data "aws_iam_policy_document" "assume_role" {
statement {
effect = "Allow"

principals {
type = "Service"
identifiers = ["lambda.amazonaws.com"]
}
actions = ["sts:AssumeRole"]
}
}

resource "aws_iam_role" "iam_for_lambda_tm" {
name = "iam_for_lambda_tm"
assume_role_policy = data.aws_iam_policy_document.assume_role.json
}

data "archive_file" "lambda" {
type = "zip"
source_file = "lambda_function.py"
output_path = "cron_raw_data.zip"
}

resource "aws_lambda_layer_version" "lambda_layer" {
filename = "lambda_raw_data_cron_layer.zip"
layer_name = "lambda_raw_data_cron_layer"

compatible_runtimes = ["python3.8"]
}
resource "aws_cloudwatch_log_group" "lambda_raw_data_cron" {
name = "/aws/lambda/${aws_lambda_function.lambda_raw_data.function_name}"
retention_in_days = 14
lifecycle {
prevent_destroy = false
}
tags = {
Application = "lambda"
}
}

resource "aws_lambda_function" "lambda_raw_data" {

filename = "cron_raw_data.zip"
function_name = "lambda_raw_data_cron"
role = aws_iam_role.iam_for_lambda_tm.arn
handler = "lambda_function.lambda_handler"
memory_size = 128
timeout = 20 # To be Increased if active projects are more.
layers = [ aws_lambda_layer_version.lambda_layer.id ]

source_code_hash = data.archive_file.lambda.output_base64sha256

runtime = "python3.9"
# To be accessed from Environmnet varible TF_VAR_rawdata_api_auth_token & TF_VAR_active_projects_api_base_url.
depends_on = [
aws_lambda_layer_version.lambda_layer,
aws_iam_role.iam_for_lambda_tm,
aws_iam_role_policy_attachment.lambda_logs,
]
environment {
variables = {
ACTIVE_PROJECTS_API_BASE_URL = "${var.active_projects_api_base_url}",
RAWDATA_API_AUTH_TOKEN = "${var.rawdata_api_auth_token}"
}
}

tracing_config {
mode = "PassThrough"
}
lifecycle {
ignore_changes = [layers] # Ignore changes to layers for now
}
}

resource "aws_iam_policy" "lambda_logging" {
name = "lambda_raw_data_logging"
path = "/"
description = "IAM policy for logging from this lambda function"

policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Action": [
"logs:CreateLogGroup",
"logs:CreateLogStream",
"logs:PutLogEvents"
],
"Resource": "arn:aws:logs:*:*:*",
"Effect": "Allow"
}
]
}
EOF
}

resource "aws_iam_role_policy_attachment" "lambda_logs" {
role = aws_iam_role.iam_for_lambda_tm.name
policy_arn = aws_iam_policy.lambda_logging.arn
}

resource "aws_cloudwatch_event_rule" "cron_raw_data_lambda_schedule" {
name = "cron_raw_data_lambda_schedule"
description = "Schedule rule to trigger Lambda"
schedule_expression = "cron(0 0 * * ? *)" # Runs day 12 am.
}

resource "aws_lambda_permission" "eventbridge_invoke_permission" {
statement_id = "AllowExecutionFromEventBridge"
action = "lambda:InvokeFunction"
function_name = aws_lambda_function.lambda_raw_data.function_name
principal = "events.amazonaws.com"
source_arn = aws_cloudwatch_event_rule.cron_raw_data_lambda_schedule.arn
}

resource "aws_cloudwatch_event_target" "cron_raw_data_lambda_target" {
rule = aws_cloudwatch_event_rule.cron_raw_data_lambda_schedule.name
arn = aws_lambda_function.lambda_raw_data.arn
}


resource "aws_cloudwatch_metric_alarm" "lambda_error_alarm" {
alarm_name = "lambda_raw_data_error_alarm"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = 5
threshold = 2 # Number of errors that trigger the alarm.
period = 14400 # 1 Day
namespace = "AWS/Lambda"
metric_name = "Errors"
statistic = "Maximum"
dimensions = {
FunctionName = aws_lambda_function.lambda_raw_data.function_name
}

alarm_description = "Alarm triggered when Lambda function has 3 or more errors in 5 minutes."

actions_enabled = false # Disable actions, meaning no notification actions will be taken

treat_missing_data = "breaching"
}


24 changes: 24 additions & 0 deletions scripts/aws/terraform/export-tool-lambda-cron/providers.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Terraform provider

terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 4.67.0"
}
}
}

provider "aws" {
region = "ap-south-1"

default_tags {
tags = {
Environment = "Production"
Application = "tasking-manager"
Team = "HOTOSM"
Creator = "Terraform"
Owner = "HOTOSM"
}
}
}
22 changes: 22 additions & 0 deletions scripts/aws/terraform/export-tool-lambda-cron/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# ====================== GLOBALS ==================== #
variable "aws_region" {
type = string
default = "ap-south-1"
description = "AWS region for all resources."
}

variable "project_name" {
type = string
default = "hotosm"
description = "prefix for all resources."
}

# To be Exported from environment as TF_VAR_active_projects_api_base_url, from circleci or gh actions.
variable "active_projects_api_base_url" {
type= string
}

# To be Exported from environment as TF_VAR_rawdata_api_auth_token, from circleci or gh actions.
variable "rawdata_api_auth_token" {
type= string
}
Loading