From 371cdd6558176794569918946eafaa4fce5d7247 Mon Sep 17 00:00:00 2001 From: Alistair Brown Date: Sun, 14 Oct 2018 15:46:42 +0100 Subject: [PATCH] Implement Eventbrite transformer --- lambdas/README.md | 40 ++- lambdas/eventbrite/handlers/producer.js | 23 +- .../handlers/schemas/event-schema.json | 260 ++++++++++++++++++ lambdas/eventbrite/handlers/transformer.js | 134 +++++++++ lambdas/eventbrite/package-lock.json | 5 + lambdas/eventbrite/package.json | 3 +- lambdas/eventbrite/serverless.yml | 31 +++ .../handlers/schemas/event-schema.json | 1 - lambdas/package.json | 4 + 9 files changed, 486 insertions(+), 15 deletions(-) create mode 100644 lambdas/eventbrite/handlers/schemas/event-schema.json create mode 100644 lambdas/eventbrite/handlers/transformer.js diff --git a/lambdas/README.md b/lambdas/README.md index 2d72639f..05d2fbbe 100644 --- a/lambdas/README.md +++ b/lambdas/README.md @@ -124,6 +124,44 @@ will receive an "Access Denied" error. Instead you may want to comment out the Pulls the logs from cloudwatch of the last lambda run. Useful for debugging. + +### Transformer + +This lambda takes the Eventbrite JSON data which has been saved to S3, +transforms it into a standardised format and saves it back to S3. + +This lambda is triggered by the creation of the source file by the producer +lambda. + +#### `eventbrite:transform:update` + +Update just the handler functionality. Do then whenever you change the +functionality of the lambda. + +#### `eventbrite:transform:invoke` + +Invoke the lambda on AWS. As this lambda is triggered by the creation of files +in the producer bucket, this may not run correctly. It can be invoked by calling +the producer invoke command instead. + +#### `eventbrite:transform:invoke-local` + +Invoke the lambda locally. Use this for development. + +The local lambda will need to be provided with an event object which contains +mock values for the newly created file. Samples of the AWS event objects can be +found at: +https://docs.aws.amazon.com/lambda/latest/dg/eventsources.html#eventsources-s3-put + +The local lambda will not have permissions to read / write files to / from S3 +and you will receive an "Access Denied" error. Instead you may want to comment +out the `getFromS3` and `uploadTo` calls whilst in development. + +#### `eventbrite:transform:logs` + +Pulls the logs from cloudwatch of the last lambda run. Useful for debugging. + + --- ## Farset Labs Calendar @@ -204,7 +242,7 @@ functionality of the lambda. #### `farsetlabs:transform:invoke` Invoke the lambda on AWS. As this lambda is triggered by the creation of files -in the events bucket, this may not run correctly. It can be invoked by calling +in the producer bucket, this may not run correctly. It can be invoked by calling the producer invoke command instead. #### `farsetlabs:transform:invoke-local` diff --git a/lambdas/eventbrite/handlers/producer.js b/lambdas/eventbrite/handlers/producer.js index 26a6b965..d644aa28 100644 --- a/lambdas/eventbrite/handlers/producer.js +++ b/lambdas/eventbrite/handlers/producer.js @@ -26,15 +26,13 @@ const getFromApi = async function() { return Promise.all([initialResponse].concat(requests)); }; -const uploadData = function(bucketName, eventsPages) { - return eventsPages.map(function(eventsPage, index) { - return uploadTo( - bucketName, - (today, hash) => - `eventbrite-events-page-${index + 1}__${today.valueOf()}__${hash}.json`, - eventsPage - ); - }); +const uploadData = function(bucketName, eventsPage, { index }) { + return uploadTo( + bucketName, + (today, hash) => + `eventbrite-events-page-${index + 1}__${today.valueOf()}__${hash}.json`, + eventsPage + ); }; module.exports.produce = async (event, context, callback) => { @@ -50,10 +48,11 @@ module.exports.produce = async (event, context, callback) => { // Write captured data to S3 const { producerBucket } = buckets(); - const uploads = uploadData(producerBucket, eventsPages); - const message = (await Promise.all(uploads)).map(({ key }) => key); + const filePaths = await Promise.all(await eventsPages.map(async function (eventsPage, index) { + return (await uploadData(producerBucket, eventsPage, { index })).key; + })); - callback(null, { message }); + callback(null, { message: filePaths }); } catch (err) { callback(err, null); } diff --git a/lambdas/eventbrite/handlers/schemas/event-schema.json b/lambdas/eventbrite/handlers/schemas/event-schema.json new file mode 100644 index 00000000..fd3e6ebb --- /dev/null +++ b/lambdas/eventbrite/handlers/schemas/event-schema.json @@ -0,0 +1,260 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "definitions": { + "time": { + "type": "object", + "required": [ + "utc", + "timezone" + ], + "properties": { + "utc": { + "type": "string", + "format": "date-time", + "title": "ISO 8601 to second precision in UTC", + "pattern": "(:?\\d{4})-(:?\\d{2})-(:?\\d{2})T(:?\\d{2})\\:(:?\\d{2})\\:(:?\\d{2}).(:?\\d{3})Z", + "examples": ["2018-10-20T18:00:00.000Z"] + }, + "timezone": { + "type": "string", + "title": "Timezone in TZ value", + "pattern": "\\w+\\/[\\w\\/]+", + "examples": ["Europe/Belfast"] + } + } + }, + "image-set": { + "type": "object", + "required": [ + "regular" + ], + "properties": { + "high": { + "type": "string", + "format": "uri", + "title": "URL to high resolution image" + }, + "regular": { + "type": "string", + "format": "uri", + "title": "URL to regular resolution image" + }, + "thumnail": { + "type": "string", + "format": "uri", + "title": "URL to thumbnail image" + } + } + } + }, + "type": "object", + "required": [ + "name", + "times", + "venue", + "created_at", + "last_updated", + "source_data" + ], + "properties": { + "name": { + "type": "string", + "title": "Name of event", + "examples": ["Code Co-Op Challenge"] + }, + "description": { + "type": "string", + "title": "Description of event", + "examples": ["Try your hand at this month's coding challenge and learn how your peers tackle the same task."] + }, + "url": { + "type": "string", + "title": "URL to source webpage for event", + "examples": ["https://www.meetup.com/CodeCoop-NI/events/ggxkhqyxpbcb/"] + }, + "times": { + "type": "object", + "title": "Timing information for the event", + "required": [ + "start", + "end", + "duration" + ], + "properties": { + "start": { + "$ref": "#/definitions/time" + }, + "end": { + "$ref": "#/definitions/time" + }, + "duration": { + "type": "integer", + "title": "Number of milliseconds between start and end times", + "examples": [14400000] + } + } + }, + "logo": { + "$ref": "#/definitions/image-set" + }, + "topics": { + "type": "array", + "title": "Topics describing the event", + "items": { + "type": "string", + "examples": ["Science & Technology"] + } + }, + "venue": { + "type": "object", + "required": [ + "name", + "address", + "country", + "latitude", + "longitude" + ], + "properties": { + "name": { + "type": "string", + "title": "Name of venue", + "examples": ["Farset Labs"] + }, + "address": { + "type": "string", + "title": "Address of venue", + "examples": ["Weavers Court, Linfield Road, BT12 5GH"] + }, + "city": { + "type": "string", + "title": "City the venue is in", + "examples": ["Belfast"] + }, + "country": { + "type": "string", + "title": "Country the venue is in, must be ISO 3166-1 alpha-2 upper cased two-letter country code", + "pattern": "[A-Z]{2}", + "examples": ["GB"] + }, + "latitude": { + "type": "string", + "title": "Latitude of venue location, no precision requirement", + "examples": ["54.592826"] + }, + "longitude": { + "type": "string", + "title": "Longitude of venue location, no precision requirement", + "examples": ["-5.940666"] + } + } + }, + "organiser": { + "type": "object", + "required": [ + "name" + ], + "properties": { + "name": { + "type": "string", + "title": "Name of organiser or organising group", + "examples": ["Code Co-Op"] + }, + "logo": { + "$ref": "#/definitions/image-set" + } + } + }, + "attendee_numbers": { + "type": "object", + "required": [ + "capacity", + "responses", + "waitlist" + ], + "properties": { + "capacity": { + "type": "integer", + "title": "The maximum number of attendees", + "examples": [50] + }, + "responses": { + "type": "integer", + "title": "The number of people who are going / have indicated they are going", + "examples": [38] + }, + "waitlist": { + "type": "integer", + "title": "The number of people who are on the wait list (when at capacity)", + "examples": [0] + } + } + }, + "charge": { + "type": "object", + "required": [ + "is_free" + ], + "properties": { + "is_free": { + "type": "boolean", + "title": "Whether the event is free to attend", + "examples": [false] + }, + "cost": { + "type": "object", + "required": [ + "currency", + "value" + ], + "properties": { + "currency": { + "type": "string", + "title": "Currency of the cost, must be ISO 4217 three-letter currency code", + "pattern": "[A-Z]{2}", + "examples": ["GBP"] + }, + "value": { + "type": "integer", + "title": "Number representing the value of the cost in the lowest denomination (eg. pence if in GBP)", + "examples": [1200] + } + } + } + } + }, + "created_at": { + "type": "string", + "format": "date-time", + "title": "ISO 8601 to second precision in UTC", + "pattern": "(:?\\d{4})-(:?\\d{2})-(:?\\d{2})T(:?\\d{2})\\:(:?\\d{2})\\:(:?\\d{2}).(:?\\d{3})Z", + "examples": ["2018-06-20T09:26:59Z"] + }, + "last_updated": { + "type": "string", + "format": "date-time", + "title": "ISO 8601 to second precision in UTC", + "pattern": "(:?\\d{4})-(:?\\d{2})-(:?\\d{2})T(:?\\d{2})\\:(:?\\d{2})\\:(:?\\d{2}).(:?\\d{3})Z", + "examples": ["2018-10-07T14:51:21Z"] + }, + "source_data": { + "type": "object", + "required": [ + "name", + "id" + ], + "properties": { + "name": { + "type": "string", + "title": "Name of the source, eg. farsetlabs-calendar, meetupcom, eventbrite, etc.", + "enum": ["meetupcom", "eventbrite", "farsetlabs-calendar"], + "examples": ["farsetlabs-calendar"] + }, + "id": { + "type": "string", + "title": "ID of the event from the source", + "examples": ["316kag8mfr56qt58k3guo4ejsr@google.com"] + } + } + } + } +} diff --git a/lambdas/eventbrite/handlers/transformer.js b/lambdas/eventbrite/handlers/transformer.js new file mode 100644 index 00000000..33454b9b --- /dev/null +++ b/lambdas/eventbrite/handlers/transformer.js @@ -0,0 +1,134 @@ +"use strict"; + +const { getFromS3 } = require("aws-lambda-data-utils"); +const { validate } = require("jsonschema"); +const eventSchema = require("./schemas/event-schema"); +const { buckets } = require("../config"); +const { uploadTo } = require("../utils"); + +const getLogoFrom = function ({ logo }) { + if (!logo) return undefined; + + return { + high: logo.original ? logo.original.url.trim() : undefined, + regular: logo.url.trim() + } +}; + +const getTopicsFrom = function ({ category, subcategory }) { + return [category, subcategory].reduce( + (topics, source) => !source ? topics : topics.concat(source.name.trim()), + [] + ); +}; + +const getChargeFrom = function ({ is_free: isFree, ticket_availability: tickets }) { + if (isFree) return { is_free: true }; + + return { + is_free: false, + cost: { + currency: tickets.minimum_ticket_price.currency.trim(), + value: tickets.minimum_ticket_price.value + } + } +}; + +const transformEvent = function (defaults, event) { + const { + id, + name, + description, + url, + start, + end, + venue, + organizer, + created, + changed, + } = event; + const startDate = new Date(start.utc); + const endDate = new Date(end.utc); + + return { + name: name.text.trim(), + description: description.text.trim(), + url, + times: { + start: { + utc: startDate.toISOString(), + timezone: start.timezone.trim() + }, + end: { + utc: endDate.toISOString(), + timezone: end.timezone.trim() + }, + duration: endDate - startDate + }, + logo: getLogoFrom(event), + topics: getTopicsFrom(event), + venue: { + name: (venue.name || venue.address.localized_address_display).trim(), + address: venue.address.localized_address_display.trim(), + city: venue.address.city || undefined, + country: (venue.address.country || defaults.country).trim(), + latitude: venue.latitude.trim(), + longitude: venue.longitude.trim() + }, + organiser: { + name: organizer.name.trim() + }, + charge: getChargeFrom(event), + created_at: new Date(created).toISOString(), + last_updated: new Date(changed).toISOString(), + source_data: { + name: "eventbrite", + id + } + } +}; + +const isValidEvent = (event) => validate(event, eventSchema).errors.length === 0; + +const uploadData = function(bucketName, eventsPage) { + return uploadTo( + bucketName, + (today, hash) => + `eventbrite-events__${today.valueOf()}__${hash}.json`, + eventsPage + ); +}; + +module.exports.transform = async (event, context, callback) => { + try { + const records = event.Records; + + const transformedFiles = await Promise.all(await records.map(async function ({ + s3: { bucket, object: file } + }) { + const data = await getFromS3(bucket.name, file.key); + const response = JSON.parse(data.Body.toString()); + + const transformedEvents = response.events.map((event) => ( + transformEvent({ country: "GB" }, event) + )); + + const validEvents = transformedEvents.filter(isValidEvent) + + if (validEvents.length !== transformedEvents.length) { + console.log('WARNING: some events generated were not valid!') + } + + return validEvents; + })); + + const { eventsBucket } = buckets(); + const filePaths = await Promise.all(await transformedFiles.map(async function (transformedFile) { + return (await uploadData(eventsBucket, transformedFile)).key; + })); + + callback(null, { message: filePaths }); + } catch (err) { + callback(err, null); + } +}; diff --git a/lambdas/eventbrite/package-lock.json b/lambdas/eventbrite/package-lock.json index cbf9c8a4..2c3c62ab 100644 --- a/lambdas/eventbrite/package-lock.json +++ b/lambdas/eventbrite/package-lock.json @@ -8,6 +8,11 @@ "version": "1.0.0", "resolved": "https://registry.npmjs.org/aws-lambda-data-utils/-/aws-lambda-data-utils-1.0.0.tgz", "integrity": "sha512-jfzEJ7RII1SXAXoKevCS3C9bB0BGU1c8qTVBxqMoOaENVC3cpOSyzdSNdftW7WYHCKlWDt9IBmEcg57LnPL1WA==" + }, + "jsonschema": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/jsonschema/-/jsonschema-1.2.4.tgz", + "integrity": "sha512-lz1nOH69GbsVHeVgEdvyavc/33oymY1AZwtePMiMj4HZPMbP5OIKK3zT9INMWjwua/V4Z4yq7wSlBbSG+g4AEw==" } } } diff --git a/lambdas/eventbrite/package.json b/lambdas/eventbrite/package.json index cb12d8a7..c077bb80 100644 --- a/lambdas/eventbrite/package.json +++ b/lambdas/eventbrite/package.json @@ -3,6 +3,7 @@ "version": "1.0.0", "description": "Pull eventbrite.com events data", "dependencies": { - "aws-lambda-data-utils": "^1.0.0" + "aws-lambda-data-utils": "^1.0.0", + "jsonschema": "^1.2.4" } } diff --git a/lambdas/eventbrite/serverless.yml b/lambdas/eventbrite/serverless.yml index dfcc8d16..523ac83d 100644 --- a/lambdas/eventbrite/serverless.yml +++ b/lambdas/eventbrite/serverless.yml @@ -15,6 +15,16 @@ provider: - - "arn:aws:s3:::" - ${self:custom.producerBucket} - "/*" + - Effect: Allow + Action: + - "s3:PutObject" + Resource: + Fn::Join: + - "" + - - "arn:aws:s3:::" + - ${self:custom.eventsBucket} + - "/*" + functions: produce: handler: handlers/producer.produce @@ -23,10 +33,31 @@ functions: environment: TZ: Europe/Belfast EVENTBRITE_API_TOKEN: ${ssm:eventbriteApiToken~true} + transform: + handler: handlers/transformer.transform + environment: + TZ: Europe/Belfast resources: Resources: S3BucketMuxerProducedEventsEventbrite: + DependsOn: + - TransformLambdaPermissionS3BucketMuxerProducedEventsEventbriteS3 Type: AWS::S3::Bucket Properties: BucketName: ${self:custom.producerBucket} + NotificationConfiguration: + LambdaConfigurations: + - Event: "s3:ObjectCreated:*" + Function: + "Fn::GetAtt": [ TransformLambdaFunction, Arn ] + TransformLambdaPermissionS3BucketMuxerProducedEventsEventbriteS3: + DependsOn: + - TransformLambdaFunction + Type: AWS::Lambda::Permission + Properties: + FunctionName: + "Fn::GetAtt": [ TransformLambdaFunction, Arn ] + Action: "lambda:InvokeFunction" + Principal: "s3.amazonaws.com" + SourceArn: "arn:aws:s3:::${self:custom.producerBucket}" diff --git a/lambdas/farsetlabs/handlers/schemas/event-schema.json b/lambdas/farsetlabs/handlers/schemas/event-schema.json index db8e92da..fd3e6ebb 100644 --- a/lambdas/farsetlabs/handlers/schemas/event-schema.json +++ b/lambdas/farsetlabs/handlers/schemas/event-schema.json @@ -110,7 +110,6 @@ "required": [ "name", "address", - "city", "country", "latitude", "longitude" diff --git a/lambdas/package.json b/lambdas/package.json index 342d617e..23c4beca 100644 --- a/lambdas/package.json +++ b/lambdas/package.json @@ -14,6 +14,10 @@ "eventbrite:producer:invoke": "cd eventbrite && serverless invoke -f produce -l --aws-profile eventbriteEvents", "eventbrite:producer:invoke-local": "cd eventbrite && serverless invoke local -f produce", "eventbrite:producer:logs": "cd eventbrite && serverless logs -f produce -l --aws-profile eventbriteEvents", + "eventbrite:transformer:update": "cd eventbrite && serverless deploy function -f transform --aws-profile eventbriteEvents", + "eventbrite:transformer:invoke": "cd eventbrite && serverless invoke -f transform -l --aws-profile eventbriteEvents", + "eventbrite:transformer:invoke-local": "cd eventbrite && serverless invoke local -f transform", + "eventbrite:transformer:logs": "cd eventbrite && serverless logs -f transform -l --aws-profile eventbriteEvents", "farsetlabs:deploy": "cd farsetlabs && serverless deploy -v --aws-profile farsetlabsEvents", "farsetlabs:producer:update": "cd farsetlabs && serverless deploy function -f produce --aws-profile farsetlabsEvents", "farsetlabs:producer:invoke": "cd farsetlabs && serverless invoke -f produce -l --aws-profile farsetlabsEvents",