diff --git a/Vagrantfile b/Vagrantfile index 6274fc18..7c173709 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -20,8 +20,8 @@ Vagrant.configure("2") do |config| vb.name = Dir.pwd().split("/")[-1] + "-" + Time.now.to_f.to_i.to_s vb.customize ["modifyvm", :id, "--natdnshostresolver1", "on"] vb.customize [ "guestproperty", "set", :id, "--timesync-threshold", 10000 ] - vb.memory = 4096 - vb.cpus = 1 + vb.memory = 8192 + vb.cpus = 2 end config.vm.provision :shell do |sh| diff --git a/integration/integration_test.sh b/integration/integration_test.sh index 9c411282..d1d3a21b 100755 --- a/integration/integration_test.sh +++ b/integration/integration_test.sh @@ -1,14 +1,5 @@ #!/bin/bash -sudo service elasticsearch start -sudo service iglu_server_0.3.0 start -sudo service snowplow_stream_collector start -sudo service snowplow_stream_enrich start -sudo service snowplow_elasticsearch_loader_good start -sudo service snowplow_elasticsearch_loader_bad start -sudo service kibana4_init start -sleep 15 - # Send good and bad events COUNTER=0 while [ $COUNTER -lt 10 ]; do diff --git a/provisioning/resources/configs/control-plane-api.toml b/provisioning/resources/configs/control-plane-api.toml index 897138be..c9746a45 100644 --- a/provisioning/resources/configs/control-plane-api.toml +++ b/provisioning/resources/configs/control-plane-api.toml @@ -25,4 +25,4 @@ caddy = "caddy_init" user = "snowplow" password = "snowplow" database = "iglu" -adddress = "127.0.0.1:5432" +adddress = "127.0.0.1:5433" diff --git a/provisioning/resources/configs/iglu-resolver.json b/provisioning/resources/configs/iglu-resolver.json index 9506f8e0..e0ddb352 100644 --- a/provisioning/resources/configs/iglu-resolver.json +++ b/provisioning/resources/configs/iglu-resolver.json @@ -24,7 +24,7 @@ ], "connection": { "http": { - "uri": "http://localhost:8081/api", + "uri": "iglu-server:8081/api", "apikey": "PLACEHOLDER" } } diff --git a/provisioning/resources/configs/iglu-server.conf b/provisioning/resources/configs/iglu-server.conf index aa3a49c1..99ed11a9 100644 --- a/provisioning/resources/configs/iglu-server.conf +++ b/provisioning/resources/configs/iglu-server.conf @@ -27,7 +27,7 @@ repo-server { # 'postgres' contains configuration options for the postgre instance the server # is using postgres { - host = "localhost" + host = "postgres" port = 5432 dbname = "iglu" username = "snowplow" diff --git a/provisioning/resources/configs/snowplow-es-loader-bad.hocon b/provisioning/resources/configs/snowplow-es-loader-bad.hocon index 31372ae0..e1772d3e 100644 --- a/provisioning/resources/configs/snowplow-es-loader-bad.hocon +++ b/provisioning/resources/configs/snowplow-es-loader-bad.hocon @@ -14,93 +14,37 @@ # This file (config.hocon.sample) contains a template with # configuration options for the Elasticsearch Loader. -# Sources currently supported are: -# "kinesis" for reading records from a Kinesis stream -# "stdin" for reading unencoded tab-separated events from stdin -# If set to "stdin", JSON documents will not be sent to Elasticsearch -# but will be written to stdout. -# "nsq" for reading unencoded tab-separated events from NSQ source = nsq -# Where to write good and bad records sink { - # Sinks currently supported are: - # "elasticsearch" for writing good records to Elasticsearch - # "stdout" for writing good records to stdout good = elasticsearch - - # Sinks currently supported are: - # "kinesis" for writing bad records to Kinesis - # "stderr" for writing bad records to stderr - # "nsq" for writing bad records to NSQ - # "none" for ignoring bad records bad = none } -# "good" for a stream of successfully enriched events -# "bad" for a stream of bad events -# "plain-json" for writing plain json enabled = bad -# The following are used to authenticate for the Amazon Kinesis sink. -# -# If both are set to "default", the default provider chain is used -# (see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html) -# -# If both are set to "iam", use AWS IAM Roles to provision credentials. -# -# If both are set to "env", use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY aws { accessKey: "" secretKey: "" } -# config for NSQ nsq { - # Channel name for NSQ source channelName = ESLoaderChannelBad - - # Host name for NSQ tools - host = "127.0.0.1" - - # TCP port for nsqd + host = nsqlookupd port = 4150 - - # HTTP port for nsqlookupd lookupPort = 4161 } kinesis { - # "LATEST": most recent data. - # "TRIM_HORIZON": oldest available data. - # "AT_TIMESTAMP": Start from the record at or after the specified timestamp - # Note: This only affects the first run of this application on a stream. initialPosition= TRIM_HORIZON - - # Maximum number of records to get from Kinesis per call to GetRecords maxRecords = 1000 - - # Region where the Kinesis stream is located region = "" - - # "appName" is used for a DynamoDB table to maintain stream state. - # You can set it automatically using: "SnowplowElasticsearchSink-${sink.kinesis.in.stream-name}" appName = "" } -# Common configuration section for all stream sources streams { inStreamName = BadEnrichedEvents - - # Stream for enriched events which are rejected by Elasticsearch outStreamName = BadElasticsearchEvents - - # Events are accumulated in a buffer before being sent to Elasticsearch. - # Note: Buffering is not supported by NSQ; will be ignored - # The buffer is emptied whenever: - # - the combined size of the stored records exceeds byteLimit or - # - the number of stored records exceeds recordLimit or - # - the time in milliseconds since it was last emptied exceeds timeLimit buffer { byteLimit = 5242880 recordLimit = 1 @@ -110,31 +54,18 @@ streams { elasticsearch { - # Events are indexed using an Elasticsearch Client - # - endpoint: the cluster endpoint - # - port: the port the cluster can be accessed on - # - for http this is usually 9200 - # - for transport this is usually 9300 - # - max-timeout: the maximum attempt time before a client restart - # - ssl: if using the http client, whether to use ssl or not client { - endpoint = "localhost" + endpoint = elasticsearch port = 9200 maxTimeout = 10000 ssl = false } - # When using the AWS ES service - # - signing: if using the http client and the AWS ES service you can sign your requests - # http://docs.aws.amazon.com/general/latest/gr/signing_aws_api_requests.html - # - region where the AWS ES service is located aws { signing = false region = "" } - # index: the Elasticsearch index name - # type: the Elasticsearch index type cluster { name = elasticsearch index = bad diff --git a/provisioning/resources/configs/snowplow-es-loader-good.hocon b/provisioning/resources/configs/snowplow-es-loader-good.hocon index 4b4726bc..68c2bfde 100644 --- a/provisioning/resources/configs/snowplow-es-loader-good.hocon +++ b/provisioning/resources/configs/snowplow-es-loader-good.hocon @@ -14,93 +14,37 @@ # This file (config.hocon.sample) contains a template with # configuration options for the Elasticsearch Loader. -# Sources currently supported are: -# "kinesis" for reading records from a Kinesis stream -# "stdin" for reading unencoded tab-separated events from stdin -# If set to "stdin", JSON documents will not be sent to Elasticsearch -# but will be written to stdout. -# "nsq" for reading unencoded tab-separated events from NSQ source = nsq -# Where to write good and bad records sink { - # Sinks currently supported are: - # "elasticsearch" for writing good records to Elasticsearch - # "stdout" for writing good records to stdout good = elasticsearch - - # Sinks currently supported are: - # "kinesis" for writing bad records to Kinesis - # "stderr" for writing bad records to stderr - # "nsq" for writing bad records to NSQ - # "none" for ignoring bad records bad = nsq } -# "good" for a stream of successfully enriched events -# "bad" for a stream of bad events -# "plain-json" for writing plain json enabled = good -# The following are used to authenticate for the Amazon Kinesis sink. -# -# If both are set to "default", the default provider chain is used -# (see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html) -# -# If both are set to "iam", use AWS IAM Roles to provision credentials. -# -# If both are set to "env", use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY aws { accessKey = "" secretKey = "" } -# config for NSQ nsq { - # Channel name for NSQ source channelName = ESLoaderChannelGood - - # Host name for NSQ tools - host = "127.0.0.1" - - # TCP port for nsqd + host = nsqlookupd port = 4150 - - # HTTP port for nsqlookupd lookupPort = 4161 } kinesis { - # "LATEST": most recent data. - # "TRIM_HORIZON": oldest available data. - # "AT_TIMESTAMP": Start from the record at or after the specified timestamp - # Note: This only affects the first run of this application on a stream. initialPosition = TRIM_HORIZON - - # Maximum number of records to get from Kinesis per call to GetRecords maxRecords = 1000 - - # Region where the Kinesis stream is located region = "" - - # "appName" is used for a DynamoDB table to maintain stream state. - # You can set it automatically using: "SnowplowElasticsearchSink-${sink.kinesis.in.stream-name}" appName = "" } -# Common configuration section for all stream sources streams { inStreamName = EnrichedEvents - - # Stream for enriched events which are rejected by Elasticsearch outStreamName = BadElasticsearchEvents - - # Events are accumulated in a buffer before being sent to Elasticsearch. - # Note: Buffering is not supported by NSQ; will be ignored - # The buffer is emptied whenever: - # - the combined size of the stored records exceeds byteLimit or - # - the number of stored records exceeds recordLimit or - # - the time in milliseconds since it was last emptied exceeds timeLimit buffer { byteLimit: 5242880 recordLimit: 1 @@ -110,31 +54,18 @@ streams { elasticsearch { - # Events are indexed using an Elasticsearch Client - # - endpoint: the cluster endpoint - # - port: the port the cluster can be accessed on - # - for http this is usually 9200 - # - for transport this is usually 9300 - # - max-timeout: the maximum attempt time before a client restart - # - ssl: if using the http client, whether to use ssl or not client { - endpoint = "localhost" + endpoint = elasticsearch port = 9200 maxTimeout = 10000 ssl = false } - # When using the AWS ES service - # - signing: if using the http client and the AWS ES service you can sign your requests - # http://docs.aws.amazon.com/general/latest/gr/signing_aws_api_requests.html - # - region where the AWS ES service is located aws { signing = false region = "" } - # index: the Elasticsearch index name - # type: the Elasticsearch index type cluster { name = "elasticsearch" index = "good" diff --git a/provisioning/resources/configs/snowplow-stream-collector.hocon b/provisioning/resources/configs/snowplow-stream-collector.hocon index fa5e3f0a..814af9ac 100644 --- a/provisioning/resources/configs/snowplow-stream-collector.hocon +++ b/provisioning/resources/configs/snowplow-stream-collector.hocon @@ -18,69 +18,49 @@ # 'collector' contains configuration options for the main Scala collector. collector { - # The collector runs as a web service specified on the following - # interface and port. interface = "0.0.0.0" port = 8080 - # Configure the P3P policy header. p3p { policyRef = "/w3c/p3p.xml" CP = "NOI DSP COR NID PSA OUR IND COM NAV STA" } - # The collector returns a cookie to clients for user identification - # with the following domain and expiration. + crossDomain { + enabled = false + domain = "*" + secure = true + } + cookie { enabled = true expiration = "365 days" # e.g. "365 days" - # Network cookie name name = sp - # The domain is optional and will make the cookie accessible to other - # applications on the domain. Comment out this line to tie cookies to - # the collector's full domain domain = "" } - # When enabled and the cookie specified above is missing, performs a redirect to itself to check - # if third-party cookies are blocked using the specified name. If they are indeed blocked, - # fallbackNetworkId is used instead of generating a new random one. cookieBounce { enabled = false - # The name of the request parameter which will be used on redirects checking that third-party - # cookies work. name = "n3pc" - # Network user id to fallback to when third-party cookies are blocked. fallbackNetworkUserId = "00000000-0000-4000-A000-000000000000" } + redirectMacro { + enabled = false + placeholder = "[TOKEN]" + } + streams { - # Events which have successfully been collected will be stored in the good stream/topic good = RawEvents - - # Events that are too big (w.r.t Kinesis 1MB limit) will be stored in the bad stream/topic bad = BadRawEvents - - # Whether to use the incoming event's ip as the partition key for the good stream/topic useIpAddressAsPartitionKey = false - # config for NSQ sink sink { enabled = nsq - - # Host name for NSQ tools - host = "127.0.0.1" - - # TCP port for nsqd + host = nsqd port = 4150 } - # Incoming events are stored in a buffer before being sent to Kinesis/Kafka. - # Note: Buffering is not supported by NSQ. - # The buffer is emptied whenever: - # - the number of stored records reaches record-limit or - # - the combined size of the stored records reaches byte-limit or - # - the time in milliseconds since the buffer was last emptied reaches time-limit buffer { byteLimit = 4000000 recordLimit = 500 # Not supported by Kafka; will be ignored @@ -89,23 +69,13 @@ collector { } } -# Akka has a variety of possible configuration options defined at -# http://doc.akka.io/docs/akka/current/scala/general/configuration.html akka { loglevel = DEBUG # 'OFF' for no logging, 'DEBUG' for all logging. loggers = ["akka.event.slf4j.Slf4jLogger"] - # akka-http is the server the Stream collector uses and has configurable options defined at - # http://doc.akka.io/docs/akka-http/current/scala/http/configuration.html http.server { - # To obtain the hostname in the collector, the 'remote-address' header - # should be set. By default, this is disabled, and enabling it - # adds the 'Remote-Address' header to every request automatically. remote-address-header = on - raw-request-uri-header = on - - # Define the maximum request length (the default is 2048) parsing { max-uri-length = 32768 uri-parsing-mode = relaxed diff --git a/provisioning/resources/configs/snowplow-stream-enrich.hocon b/provisioning/resources/configs/snowplow-stream-enrich.hocon index 353095bb..b653c604 100644 --- a/provisioning/resources/configs/snowplow-stream-enrich.hocon +++ b/provisioning/resources/configs/snowplow-stream-enrich.hocon @@ -19,38 +19,21 @@ enrich { streams { in { - # Stream/topic where the raw events to be enriched are located raw = RawEvents } out { - # Stream/topic where the events that were successfully enriched will end up enriched = EnrichedEvents - # Stream/topic where the event that failed enrichment will be stored bad = BadEnrichedEvents - - # How the output stream/topic will be partitioned. - # Possible partition keys are: event_id, event_fingerprint, domain_userid, network_userid, - # user_ipaddress, domain_sessionid, user_fingerprint. - # Refer to https://github.com/snowplow/snowplow/wiki/canonical-event-model to know what the - # possible partition keys correspond to. - # Otherwise, the partition key will be a random UUID. - # Note: Nsq does not make use of partition key. partitionKey = "" } sourceSink { enabled = nsq - - # Channel name for nsq source rawChannel = StreamEnrichChannel - # Host name for nsqd - host = "127.0.0.1" - # TCP port for nsqd, 4150 by default + host = nsqd port = 4150 - # Host name for lookupd - lookupHost = "127.0.0.1" - # HTTP port for nsqlookupd, 4161 by default + lookupHost = nsqlookupd lookupPort = 4161 } @@ -60,6 +43,6 @@ enrich { timeLimit = 5000 } - appName = "" + appName = "snowplow-stream-enrich" } } diff --git a/provisioning/resources/elasticsearch/config/elasticsearch.yml b/provisioning/resources/elasticsearch/config/elasticsearch.yml new file mode 100644 index 00000000..581d3697 --- /dev/null +++ b/provisioning/resources/elasticsearch/config/elasticsearch.yml @@ -0,0 +1,33 @@ +# ======================== Elasticsearch Configuration ========================= +# +# NOTE: Elasticsearch comes with reasonable defaults for most settings. +# Before you set out to tweak and tune the configuration, make sure you +# understand what are you trying to accomplish and the consequences. +# +# The primary way of configuring a node is via this file. This template lists +# the most important settings you may want to configure for a production cluster. +# +# Please consult the documentation for further information on configuration options: +# https://www.elastic.co/guide/en/elasticsearch/reference/index.html +# +# ---------------------------------- Cluster ----------------------------------- +# +# Use a descriptive name for your cluster: +# +cluster.name: "sp-mini-es-cluster" +# +# ------------------------------------ Node ------------------------------------ +# +# Use a descriptive name for the node: +# +node.name: "sp-mini-es-node" +# ---------------------------------- Network ----------------------------------- +# +# Set the bind address to a specific IP (IPv4 or IPv6): +# +network.host: 0.0.0.0 +# --------------------------------- Discovery ---------------------------------- +# +# Prevent the "split brain" by configuring the majority of nodes (total number of master-eligible nodes / 2 + 1): +# +discovery.zen.minimum_master_nodes: 1 \ No newline at end of file diff --git a/provisioning/resources/elasticsearch/config/log4j2.properties b/provisioning/resources/elasticsearch/config/log4j2.properties new file mode 100644 index 00000000..8c5cae8b --- /dev/null +++ b/provisioning/resources/elasticsearch/config/log4j2.properties @@ -0,0 +1,28 @@ +status = error + +appender.console.type = Console +appender.console.name = console +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = [%d{ISO8601}][%-5p][%-25c{1.}] %marker%m%n + +appender.rolling.type = RollingFile +appender.rolling.name = rolling +appender.rolling.fileName = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}.log +appender.rolling.layout.type = PatternLayout +appender.rolling.layout.pattern = [%d{ISO8601}][%-5p][%-25c] %.10000m%n +appender.rolling.filePattern = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}-%d{yyyy-MM-dd}.log.zip +appender.rolling.policies.type = Policies +appender.rolling.policies.time.type = TimeBasedTriggeringPolicy +appender.rolling.policies.time.interval = 1 +appender.rolling.policies.time.modulate = true + +appender.rolling.strategy.type = DefaultRolloverStrategy +appender.rolling.strategy.action.type = Delete +appender.rolling.strategy.action.basepath = ${sys:es.logs.base_path} +appender.rolling.strategy.action.condition.type = IfLastModified +appender.rolling.strategy.action.condition.age = 7D +appender.rolling.strategy.action.PathConditions.type = IfFileName +appender.rolling.strategy.action.PathConditions.glob = ${sys:es.logs.cluster_name}-* + +rootLogger.level = info +rootLogger.appenderRef.console.ref = console diff --git a/provisioning/resources/elasticsearch/bad-mapping.json b/provisioning/resources/elasticsearch/mapping/bad-mapping.json similarity index 72% rename from provisioning/resources/elasticsearch/bad-mapping.json rename to provisioning/resources/elasticsearch/mapping/bad-mapping.json index ee8740d0..7b96de74 100644 --- a/provisioning/resources/elasticsearch/bad-mapping.json +++ b/provisioning/resources/elasticsearch/mapping/bad-mapping.json @@ -14,23 +14,15 @@ }, "mappings": { "bad": { - "_timestamp" : { - "enabled" : "yes", - "path" : "failure_tstamp" - }, - "_ttl": { - "enabled": true, - "default": "604800000" - }, "properties": { "errors": { "properties": { "message" : { - "type": "string", + "type": "text", "analyzer": "standard" }, "level" : { - "type": "string", + "type": "text", "analyzer": "standard" } } @@ -40,7 +32,7 @@ "format": "dateOptionalTime" }, "line": { - "type": "string", + "type": "text", "analyzer": "standard" } } diff --git a/provisioning/resources/elasticsearch/good-mapping.json b/provisioning/resources/elasticsearch/mapping/good-mapping.json similarity index 57% rename from provisioning/resources/elasticsearch/good-mapping.json rename to provisioning/resources/elasticsearch/mapping/good-mapping.json index 1102d531..4437a784 100644 --- a/provisioning/resources/elasticsearch/good-mapping.json +++ b/provisioning/resources/elasticsearch/mapping/good-mapping.json @@ -14,29 +14,21 @@ }, "mappings": { "good": { - "_timestamp" : { - "enabled" : "yes", - "path" : "collector_tstamp" - }, - "_ttl": { - "enabled": true, - "default": "604800000" - }, "properties": { "app_id": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_colordepth": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_cookies": { "type": "boolean" }, "br_family": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_features_director": { "type": "boolean" @@ -66,24 +58,24 @@ "type": "boolean" }, "br_lang": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_name": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_renderengine": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_type": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_version": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_viewheight": { "type": "long" @@ -96,8 +88,8 @@ "format": "dateOptionalTime" }, "doc_charset": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "doc_height": { "type": "long" @@ -106,15 +98,15 @@ "type": "long" }, "domain_sessionid": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "domain_sessionidx": { "type": "long" }, "domain_userid": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "dvce_ismobile": { "type": "boolean" @@ -134,106 +126,106 @@ "format": "dateOptionalTime" }, "dvce_type": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "etl_tstamp": { "type": "date", "format": "dateOptionalTime" }, "event": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "event_id": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "geo_location": { "type": "geo_point" }, "mkt_campaign": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "mkt_content": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "mkt_medium": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "mkt_source": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "mkt_term": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "name_tracker": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "network_userid": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "os_family": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "os_manufacturer": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "os_name": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "os_timezone": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_referrer": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_title": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_url": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_urlfragment": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_urlhost": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_urlpath": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_urlport": { "type": "long" }, "page_urlquery": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_urlscheme": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "platform": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "pp_xoffset_max": { "type": "long" @@ -248,79 +240,79 @@ "type": "long" }, "refr_medium": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_source": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_term": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_urlfragment": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_urlhost": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_urlpath": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_urlport": { "type": "long" }, "refr_urlquery": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_urlscheme": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "se_action": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "se_category": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "se_label": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "user_fingerprint": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "user_id": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "user_ipaddress": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "useragent": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "v_collector": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "v_etl": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "v_tracker": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true } } } diff --git a/provisioning/resources/init/create.sh b/provisioning/resources/init/create.sh new file mode 100755 index 00000000..004ca75c --- /dev/null +++ b/provisioning/resources/init/create.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# Creates Elasticsearch indexes with their mappings +# Followed by Kibana index patterns +# Followed by NSQ topics + +curl -X PUT localhost:9200/good -H 'application/json' -d @/home/ubuntu/snowplow/elasticsearch/mapping/good-mapping.json && \ +curl -X PUT localhost:9200/bad -H 'application/json' -d @/home/ubuntu/snowplow/elasticsearch/mapping/bad-mapping.json && \ +curl -X PUT localhost:9200/.kibana/index-pattern/good -d '{"title":"good", "timeFieldName":"collector_tstamp"}' && \ +curl -X PUT localhost:9200/.kibana/index-pattern/bad -d '{"title":"bad", "timeFieldName":"failure_tstamp"}' && \ +curl -X PUT localhost:9200/.kibana/config/5.6.10 -d '{"defaultIndex":"good"}' && \ +curl -X POST localhost:4151/topic/create?topic=RawEvents && \ +curl -X POST localhost:4151/topic/create?topic=BadEvents && \ +curl -X POST localhost:4151/topic/create?topic=EnrichedEvents && \ +curl -X POST localhost:4151/topic/create?topic=BadEnrichedEvents \ No newline at end of file diff --git a/provisioning/resources/init/iglu-server-init.sql b/provisioning/resources/init/iglu-server-init.sql new file mode 100644 index 00000000..7e572822 --- /dev/null +++ b/provisioning/resources/init/iglu-server-init.sql @@ -0,0 +1,2 @@ +CREATE USER snowplow WITH PASSWORD 'snowplow'; +CREATE DATABASE iglu OWNER snowplow; diff --git a/provisioning/resources/init/wait-for-elasticsearch.sh b/provisioning/resources/init/wait-for-elasticsearch.sh new file mode 100755 index 00000000..2f63c0af --- /dev/null +++ b/provisioning/resources/init/wait-for-elasticsearch.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# wait-for-elasticsearch.sh + +set -e + +host="$1" +shift +cmd="$@" + +until curl "$host"; do + >&2 echo "Elasticsearch is unavailable - sleeping" + sleep 5 +done + +>&2 echo "Elasticsearch is up - executing command(s)" +exec $cmd \ No newline at end of file diff --git a/provisioning/resources/init/wait-for-postgres.sh b/provisioning/resources/init/wait-for-postgres.sh new file mode 100755 index 00000000..479a5d64 --- /dev/null +++ b/provisioning/resources/init/wait-for-postgres.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# wait-for-postgres.sh + +set -e + +host="$1" +shift +cmd="$@" + +until PGPASSWORD=snowplow psql -h "$host" -d "iglu" -U "snowplow" -c '\q'; do + >&2 echo "Postgres is unavailable - sleeping" + sleep 2 +done + +>&2 echo "Postgres is up - executing command" +exec docker-entrypoint.sh $cmd \ No newline at end of file diff --git a/provisioning/roles/docker/files/docker-compose.yml b/provisioning/roles/docker/files/docker-compose.yml new file mode 100644 index 00000000..71a7c04c --- /dev/null +++ b/provisioning/roles/docker/files/docker-compose.yml @@ -0,0 +1,171 @@ +version: "3" + +services: + elasticsearch: + image: elasticsearch:5.6.10 + container_name: elasticsearch + restart: always + environment: + - "bootstrap.memory_lock=true" + - "ES_JAVA_OPTS=-Xms4g -Xmx4g" + volumes: + - /home/ubuntu/snowplow/elasticsearch/data:/usr/share/elasticsearch/data + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 + hard: 65536 + logging: + options: + max-size: "50M" + max-file: "10" + ports: + - "9200:9200" + + kibana: + image: kibana:5.6.10 + container_name: kibana + restart: always + ports: + - "5601:5601" + depends_on: + - elasticsearch + + elasticsearch-loader-good: + image: snowplow-docker-registry.bintray.io/snowplow/elasticsearch-loader:0.10.1 + container_name: elasticsearch-loader-good + command: [ "--config", "/snowplow/config/snowplow-es-loader-good.hocon" ] + restart: always + depends_on: + - elasticsearch + volumes: + - /home/ubuntu/snowplow/configs:/snowplow/config + logging: + options: + max-size: "1M" + max-file: "10" + environment: + - "SP_JAVA_OPTS=-Xms512m -Xmx512m" + + elasticsearch-loader-bad: + image: snowplow-docker-registry.bintray.io/snowplow/elasticsearch-loader:0.10.1 + container_name: elasticsearch-loader-bad + command: [ "--config", "/snowplow/config/snowplow-es-loader-bad.hocon" ] + restart: always + depends_on: + - elasticsearch + volumes: + - /home/ubuntu/snowplow/configs:/snowplow/config + logging: + options: + max-size: "1M" + max-file: "10" + environment: + - "SP_JAVA_OPTS=-Xms512m -Xmx512m" + + nsqlookupd: + image: nsqio/nsq:v1.0.0-compat + container_name: nsqlookupd + command: /nsqlookupd + restart: always + logging: + options: + max-size: "1M" + max-file: "10" + ports: + - "4160:4160" + - "4161:4161" + + nsqd: + image: nsqio/nsq:v1.0.0-compat + container_name: nsqd + command: /nsqd --lookupd-tcp-address=nsqlookupd:4160 --data-path=/home/ubuntu/snowplow/nsq-data + restart: always + volumes: + - /home/ubuntu/snowplow/nsq-data:/home/ubuntu/snowplow/nsq-data + depends_on: + - nsqlookupd + ports: + - "4150:4150" + - "4151:4151" + + nsqadmin: + image: nsqio/nsq:v1.0.0-compat + container_name: nsqadmin + command: /nsqadmin --lookupd-http-address=nsqlookupd:4161 + restart: always + depends_on: + - nsqlookupd + ports: + - "4171:4171" + + scala-stream-collector: + image: snowplow-docker-registry.bintray.io/snowplow/scala-stream-collector-nsq:0.13.0 + container_name: scala-stream-collector-nsq + command: [ "--config", "/snowplow/config/snowplow-stream-collector.hocon" ] + restart: always + depends_on: + - nsqd + ports: + - "8080:8080" + volumes: + - /home/ubuntu/snowplow/configs:/snowplow/config + logging: + options: + max-size: "1M" + max-file: "10" + environment: + - "SP_JAVA_OPTS=-Xms512m -Xmx512m" + + stream-enrich: + image: snowplow-docker-registry.bintray.io/snowplow/stream-enrich-nsq:0.16.0 + container_name: stream-enrich-nsq + command: [ + "--config", "/snowplow/config/snowplow-stream-enrich.hocon", + "--resolver", "file:/snowplow/config/iglu-resolver.json", + "--enrichments", "file:/snowplow/config/enrichments", + "--force-ip-lookups-download" + ] + restart: always + depends_on: + - scala-stream-collector + volumes: + - /home/ubuntu/snowplow/configs:/snowplow/config + logging: + options: + max-size: "1M" + max-file: "10" + environment: + - "SP_JAVA_OPTS=-Xms512m -Xmx512m" + + postgres: + container_name: postgres + image: postgres:9.5 + restart: always + volumes: + - /home/ubuntu/snowplow/init/iglu-server-init.sql:/docker-entrypoint-initdb.d/init.sql + ports: + - "5433:5432" + logging: + options: + max-size: "1M" + max-file: "10" + + iglu-server: + container_name: iglu-server + image: snowplow-docker-registry.bintray.io/snowplow/iglu-server:0.3.0 + entrypoint: /snowplow/bin/wait-for-postgres.sh postgres --config /snowplow/config/iglu-server.conf + restart: always + depends_on: + - postgres + ports: + - "8081:8081" + volumes: + - /home/ubuntu/snowplow/init/wait-for-postgres.sh:/snowplow/bin/wait-for-postgres.sh + - /home/ubuntu/snowplow/configs:/snowplow/config + logging: + options: + max-size: "1M" + max-file: "10" diff --git a/provisioning/roles/docker/tasks/main.yml b/provisioning/roles/docker/tasks/main.yml new file mode 100644 index 00000000..51459111 --- /dev/null +++ b/provisioning/roles/docker/tasks/main.yml @@ -0,0 +1,50 @@ +--- +- include_vars: ../../common_vars.yml + +- name: Setup the docker repository and install docker + sudo: yes + shell: | + apt-get update + apt-get install apt-transport-https ca-certificates curl software-properties-common --yes + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - + add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" + apt-get update && apt-get install docker-ce --yes + +- name: Download docker-compose + sudo: yes + shell: curl -L https://github.com/docker/compose/releases/download/1.21.2/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/bin/docker-compose + +- name: Apply executable permissions to the docker-compose binary + sudo: yes + shell: chmod +x /usr/local/bin/docker-compose + +- name: Copy docker-compose.yml + copy: src={{ item.src }} dest={{ item.dest }} owner=ubuntu group=ubuntu mode=0644 + with_items: + - { src: '../files/docker-compose.yml', dest: '/home/ubuntu/snowplow/' } + +- name: Ensure file permissions + become: yes + shell: chown ubuntu:ubuntu -R /home/ubuntu/snowplow && chmod 755 -R /home/ubuntu/snowplow + +- name: Deploy snowplow mini + become: yes + shell: cd /home/ubuntu/snowplow/ && docker-compose up -d && sleep 20 + +- name: Wait for Elasticsearch to get ready + become: yes + shell: sh {{init_dir}}/wait-for-elasticsearch.sh localhost:9200 + +# Kibana 5.x has issues updating index patterns +# rebooting kibana and retrying works out +- name: Likely to fail attempt to create ES indexes & Kibana index patterns & NSQ topics + become: yes + shell: sh {{init_dir}}/create.sh + +- name: restart kibana for known index-creating issues + become: yes + shell: docker-compose -f {{main_dir}}/docker-compose.yml restart kibana + +- name: Create ES indexes & Kibana index patterns & NSQ topics + become: yes + shell: sh {{init_dir}}/create.sh diff --git a/provisioning/roles/sp_mini_1_create_dirs/tasks/main.yml b/provisioning/roles/sp_mini_1_create_dirs/tasks/main.yml index e8c81c4b..75454316 100644 --- a/provisioning/roles/sp_mini_1_create_dirs/tasks/main.yml +++ b/provisioning/roles/sp_mini_1_create_dirs/tasks/main.yml @@ -10,26 +10,8 @@ shell: 'adduser ubuntu --disabled-password --gecos "" ; passwd -d ubuntu' -- name: Insert logrotate configuration for Snowplow Services - become: yes - copy: - dest: "/etc/logrotate.d/snowplow-apps" - content: | - /var/log/snowplow*.log /var/log/snowplow*.err /var/log/nsq*.log /var/log/nsq*.err /var/log/iglu_server*.log /var/log/iglu_server*.err { - hourly - rotate 3 - missingok - notifempty - copytruncate - } - mode: 0644 - -- name: Change logrotate cron to hourly - become: yes - shell: 'mv /etc/cron.daily/logrotate /etc/cron.hourly && service cron restart' - - name: creating directories - file: path={{item}} state=directory + file: path={{item}} state=directory mode=0755 with_items: - "{{configs_dir}}" - "{{staging_dir}}" @@ -48,13 +30,16 @@ src: "{{playbook_dir}}/resources/elasticsearch" dest: "{{main_dir}}" recursive: yes + archive: no - synchronize: src: "{{playbook_dir}}/resources/configs" dest: "{{main_dir}}" recursive: yes + archive: no - synchronize: src: "{{playbook_dir}}/resources/init" dest: "{{main_dir}}" recursive: yes + archive: no diff --git a/provisioning/roles/sp_mini_2_install_postgresl/tasks/main.yml b/provisioning/roles/sp_mini_2_install_postgresl/tasks/main.yml deleted file mode 100644 index ad7c423b..00000000 --- a/provisioning/roles/sp_mini_2_install_postgresl/tasks/main.yml +++ /dev/null @@ -1,53 +0,0 @@ ---- -- include_vars: ../../common_vars.yml - -- name: Adding APT repository key - become: yes - apt_key: - id: ACCC4CF8 - url: https://www.postgresql.org/media/keys/ACCC4CF8.asc - tags: - - postgresql - - db - - repo - -- name: Add PostgreSQL official APT repository - become: yes - apt_repository: - repo: "deb http://apt.postgresql.org/pub/repos/apt/ {{ansible_distribution_release}}-pgdg main" - tags: - - postgresql - - db - - repo - -- name: Install acl for creating Postgresql user - become: yes - apt: - name: "acl" - state: present - update_cache: yes - cache_valid_time: 3600 - -- name: Install PostgreSQL - become: yes - apt: - name: "postgresql-9.5" - state: present - update_cache: yes - cache_valid_time: 3600 - tags: - - postgresql - - db - - deps - -- name: Install dependencies for the Ansible module - become: yes - apt: - name: "{{item}}" - state: latest - with_items: - - python-psycopg2 - tags: - - postgresql - - db - - deps diff --git a/provisioning/roles/sp_mini_4_setup_apps/tasks/main.yml b/provisioning/roles/sp_mini_4_setup_apps/tasks/main.yml index e8fe313b..6553abc5 100644 --- a/provisioning/roles/sp_mini_4_setup_apps/tasks/main.yml +++ b/provisioning/roles/sp_mini_4_setup_apps/tasks/main.yml @@ -3,43 +3,8 @@ - name: Set variables set_fact: - stream_collector_package: 'snowplow_scala_stream_collector_0.11.0.zip' - stream_enrich_package: 'snowplow_stream_enrich_nsq_0.16.1.zip' - es_loader_package: 'snowplow_elasticsearch_loader_http_0.10.1.zip' - iglu_server_package: 'iglu_server_0.3.0.zip' - kibana_v: '4.0.1' - nsq_package: 'nsq-1.0.0-compat.linux-amd64.go1.8.tar.gz' - nsq_bin_dir: 'nsq-1.0.0-compat.linux-amd64.go1.8/bin' control_plane_dir: '{{playbook_dir}}/resources/control-plane' -- name: Install unzip - become: yes - apt: - name: "unzip" - state: present - update_cache: yes - cache_valid_time: 3600 - -- name: Add Java 8 repository - become: yes - apt_repository: - repo: 'ppa:webupd8team/java' - state: present - -- name: Signed Oracle License - become: yes - shell: "echo oracle-java8-installer shared/accepted-oracle-license-v1-1 select true | sudo /usr/bin/debconf-set-selections" - register: oracle_license_signed - -- name: Install Java 8 - become: yes - apt: - name: oracle-java8-installer - state: present - update_cache: yes - cache_valid_time: 3600 - when: oracle_license_signed|changed - - name: Copy Control API to executables dir become: yes synchronize: @@ -52,134 +17,6 @@ src: "{{playbook_dir}}/../VERSION" dest: "{{main_dir}}" -- name: Check Stream Collector - stat: - path: "{{staging_dir}}/{{stream_collector_package}}" - register: check_stream_collector_result - -- name: Download Stream Collector - get_url: - url: "http://dl.bintray.com/snowplow/snowplow-generic/{{stream_collector_package}}" - dest: "{{staging_dir}}" - when: check_stream_collector_result.stat.exists == False - register: stream_collector_downloaded - -- name: Unzip downloaded Stream Collector - shell: "unzip {{staging_dir}}/{{stream_collector_package}} -d {{executables_dir}}" - when: stream_collector_downloaded|changed - -- name: Check Stream Enrich - stat: - path: "{{staging_dir}}/{{stream_enrich_package}}" - register: check_stream_enrich_result - -- name: Download Stream Enrich - get_url: - url: "http://dl.bintray.com/snowplow/snowplow-generic/{{stream_enrich_package}}" - dest: "{{staging_dir}}" - when: check_stream_enrich_result.stat.exists == False - register: stream_enrich_downloaded - -- name: Unzip downloaded Stream Enrich - shell: "unzip {{staging_dir}}/{{stream_enrich_package}} -d {{executables_dir}}" - when: stream_enrich_downloaded|changed - -- name: Check Elasticsearch Loader - stat: - path: "{{staging_dir}}/{{es_loader_package}}" - register: check_es_loader_result - -- name: Download Elasticsearch Loader - get_url: - url: "http://bintray.com/artifact/download/snowplow/snowplow-generic/{{es_loader_package}}" - dest: "{{staging_dir}}" - when: check_es_loader_result.stat.exists == False - register: es_loader_downloaded - -- name: Unzip downloaded Elasticsearch Loader - shell: "unzip {{staging_dir}}/{{es_loader_package}} -d {{executables_dir}}" - when: es_loader_downloaded|changed - -- name: Check Iglu Server - stat: - path: "{{staging_dir}}/{{iglu_server_package}}" - register: check_iglu_server_result - -- name: Download Iglu Server - get_url: - url: "http://bintray.com/artifact/download/snowplow/snowplow-generic/{{iglu_server_package}}" - dest: "{{staging_dir}}" - when: check_iglu_server_result.stat.exists == False - register: iglu_server_downloaded - -- name: Unzip downloaded Iglu Server - shell: "unzip {{staging_dir}}/{{iglu_server_package}} -d {{executables_dir}}" - when: iglu_server_downloaded|changed - register: iglu_server_extracted - -- name: Download NSQ - get_url: - url: "https://s3.amazonaws.com/bitly-downloads/nsq/{{nsq_package}}" - dest: "{{staging_dir}}" - -- name: Unzip downloaded NSQ - shell: "tar xvfz {{staging_dir}}/{{nsq_package}} --directory {{staging_dir}}" - -- name: Copy NSQ binaries to executables_dir - shell: "cp {{staging_dir}}/{{nsq_bin_dir}}/nsqd {{staging_dir}}/{{nsq_bin_dir}}/nsqlookupd {{staging_dir}}/{{nsq_bin_dir}}/nsqadmin {{executables_dir}}" - -- name: Create snowplow user on Postgresql - become: true - become_user: postgres - postgresql_user: - name: snowplow - password: snowplow - -- name: Create iglu db on Postgresql - become: true - become_user: postgres - postgresql_db: - name: iglu - owner: snowplow - -- name: Download Elasticsearch - become: yes - shell: "wget https://download.elastic.co/elasticsearch/elasticsearch/elasticsearch-1.7.5.deb -P {{staging_dir}}" - -- name: Install Elasticsearch - become: yes - shell: "dpkg -i {{staging_dir}}/elasticsearch-1.7.5.deb" - -- name: Install Elasticsearch Head Plugin - become: yes - shell: "/usr/share/elasticsearch/bin/plugin --install mobz/elasticsearch-head" - -- name: Check Kibana - stat: - path: "{{staging_dir}}/kibana-{{kibana_v}}-linux-x64.zip" - register: check_kibana_result - -- name: Download Kibana - get_url: - url: "https://download.elasticsearch.org/kibana/kibana/kibana-{{kibana_v}}-linux-x64.zip" - dest: "{{staging_dir}}" - when: check_kibana_result.stat.exists == False - register: kibana_downloaded - -- name: Unzip downloaded Kibana package - become: yes - shell: "unzip {{staging_dir}}/kibana-{{kibana_v}}-linux-x64.zip -d /opt/" - when: kibana_downloaded|changed - register: kibana_unzipped - -- name: Symlink for kibana - become: yes - file: - src: "/opt/kibana-{{kibana_v}}-linux-x64" - dest: "/opt/kibana" - state: link - when: kibana_unzipped|changed - - name: Copy Caddy executable to executables dir become: yes environment: diff --git a/provisioning/roles/sp_mini_7_setup_init/tasks/main.yml b/provisioning/roles/sp_mini_7_setup_init/tasks/main.yml index 1197549e..369622f0 100644 --- a/provisioning/roles/sp_mini_7_setup_init/tasks/main.yml +++ b/provisioning/roles/sp_mini_7_setup_init/tasks/main.yml @@ -9,31 +9,16 @@ dest: "/etc/init.d" mode: 0755 with_items: - - kibana4_init - snowplow_mini_control_plane_api - - snowplow_stream_collector - - snowplow_stream_enrich - - snowplow_elasticsearch_loader_good - - snowplow_elasticsearch_loader_bad - - iglu_server_0.3.0 - caddy_init - - nsqd_init - - nsqlookupd_init - - nsqadmin_init - name: Configure for inits for calling at boot time become: yes shell: "update-rc.d {{item}} defaults" with_items: - - kibana4_init - snowplow_mini_control_plane_api - - snowplow_stream_collector - - snowplow_stream_enrich - - snowplow_elasticsearch_loader_good - - snowplow_elasticsearch_loader_bad - - iglu_server_0.3.0 - - elasticsearch - caddy_init - - nsqd_init - - nsqlookupd_init - - nsqadmin_init + +- name: Increase mmap count to recommended 262144 for Elasticsearch + become: yes + shell: echo "vm.max_map_count=262144" >> /etc/sysctl.conf && service procps start diff --git a/provisioning/roles/sp_mini_8_configure/tasks/main.yml b/provisioning/roles/sp_mini_8_configure/tasks/main.yml index 0cc4c509..41faa379 100644 --- a/provisioning/roles/sp_mini_8_configure/tasks/main.yml +++ b/provisioning/roles/sp_mini_8_configure/tasks/main.yml @@ -1,80 +1,16 @@ --- -- include_vars: ../../common_vars.yml -- name: Starting Elasticsearch - become: yes - service: - name: elasticsearch - state: started - register: ElasticsearchStarted +- cron: + name: "Add cronjob to restart containers at system reboot" + special_time: reboot + job: "/usr/local/bin/docker-compose -f /home/ubuntu/snowplow/docker-compose.yml restart && sleep 30" -- name: Wait for Elasticsearch port 9200 to become open on the host, don't start checking for 10 seconds - wait_for: - port: 9200 - delay: 10 +- cron: + name: "Add cronjob to crop ES good index's docs older than a week" + special_time: weekly + job: curl -X POST localhost:9200/good/_delete_by_query -H 'Content-Type :application/json' -d' { "query" :{ "range" :{ "collector_tstamp" :{ "lt" :"now-1w/d" } } } }' >> /var/log/crop_good.log 2>&1 -- name: curl put good-mapping.json - shell: "curl -XPUT 'http://localhost:9200/good' -d @{{es_dir}}/good-mapping.json" - -- name: curl put bad-mapping.json - shell: "curl -XPUT 'http://localhost:9200/bad' -d @{{es_dir}}/bad-mapping.json" - -- name: Starting nsqd - become: yes - service: - name: nsqd_init - state: started - register: NsqdStarted - -- name: Starting nsqlookupd - become: yes - service: - name: nsqlookupd_init - state: started - register: NsqlookupdStarted - -- name: Starting nsqadmin - become: yes - service: - name: nsqadmin_init - state: started - register: NsqadminStarted - -- name: Wait for the NSQ services to start - wait_for: - port: "{{item}}" - delay: 1 - with_items: - - 4151 - - 4161 - - 4171 - -- name: Starting Kibana - become: yes - service: - name: kibana4_init - state: started - -- name: add "good" index pattern to Kibana - shell: > - curl -XPUT http://localhost:9200/.kibana/index-pattern/good -d '{"title" : "good", "timeFieldName" : "collector_tstamp"}' - -- name: add "bad" index pattern to Kibana - shell: > - curl -XPUT http://localhost:9200/.kibana/index-pattern/bad -d '{"title" : "bad", "timeFieldName" : "failure_tstamp"}' - -- name: make "good" index pattern default - shell: > - curl -XPUT http://localhost:9200/.kibana/config/4.0.1 -d '{"defaultIndex" : "good"}' - -- name: Create new topic for RawEvents - shell: "curl -X POST http://127.0.0.1:4151/topic/create?topic=RawEvents" - -- name: Create new topic for BadEvents - shell: "curl -X POST http://127.0.0.1:4151/topic/create?topic=BadEvents" - -- name: Create new topic for EnrichedEvents - shell: "curl -X POST http://127.0.0.1:4151/topic/create?topic=EnrichedEvents" - -- name: Create new topic for BadEnrichedEvents - shell: "curl -X POST http://127.0.0.1:4151/topic/create?topic=BadEnrichedEvents" +- cron: + name: "Add cronjob to crop ES bad index's docs older than a week" + special_time: weekly + job: curl -X POST localhost:9200/bad/_delete_by_query -H 'Content-Type :application/json' -d' { "query" :{ "range" :{ "failure_tstamp" :{ "lt" :"now-1w/d" } } } }' >> /var/log/crop_bad.log 2>&1 diff --git a/provisioning/with_building_ui_and_go_projects.yml b/provisioning/with_building_ui_and_go_projects.yml index 8bf08842..a55ff0d2 100644 --- a/provisioning/with_building_ui_and_go_projects.yml +++ b/provisioning/with_building_ui_and_go_projects.yml @@ -12,10 +12,10 @@ - typescript - packer - sp_mini_1_create_dirs - - sp_mini_2_install_postgresl - sp_mini_3_build_go_projects - sp_mini_4_setup_apps - sp_mini_5_build_ui - sp_mini_6_copy_ui_folders - sp_mini_7_setup_init + - docker - sp_mini_8_configure diff --git a/provisioning/without_building_ui_and_go_projects.yml b/provisioning/without_building_ui_and_go_projects.yml index b7483aca..d6e2ee8d 100644 --- a/provisioning/without_building_ui_and_go_projects.yml +++ b/provisioning/without_building_ui_and_go_projects.yml @@ -7,8 +7,8 @@ roles: - sp_mini_1_create_dirs - - sp_mini_2_install_postgresl - sp_mini_4_setup_apps - sp_mini_6_copy_ui_folders - sp_mini_7_setup_init + - docker - sp_mini_8_configure