From ca4aae97332694adbd07dea40a0bee02a2c02fcc Mon Sep 17 00:00:00 2001 From: Jamie Tanna Date: Tue, 16 Nov 2021 08:29:32 +0000 Subject: [PATCH 1/4] Refactor context handler usage To make this more scalable, as we add more handlers for retrieving context, we can use a `getHandler` method that can provide the relevant handler. This then allows us to pass around that handler, not worrying what it is, just that it's doing its job. --- src/events/fetch-context/index.js | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/events/fetch-context/index.js b/src/events/fetch-context/index.js index bd28ff2..7f918a4 100644 --- a/src/events/fetch-context/index.js +++ b/src/events/fetch-context/index.js @@ -5,21 +5,19 @@ const granary = require('./granary') const meetup = require('./meetup') const openGraph = require('./open-graph') -async function getContext (url) { - // for specific sites, use custom parsing +async function getHandler (url) { if (meetup.isMeetupUrl(url)) { - const properties = await meetup.fetchContext(url) - if (properties) { - return properties - } + return meetup } else if (eventbrite.isEventbriteUrl(url)) { - const properties = await eventbrite.fetchContext(url) - if (properties) { - return properties - } + return eventbrite + } else { + return granary } - // otherwise fallback to Granary, and then OpenGraph - const properties = await granary.fetchContext(url) +} + +async function getContext (handler, url) { + // if our fetching fails, fallback to OpenGraph + const properties = await handler.fetchContext(url) if (properties) { return properties } @@ -30,7 +28,8 @@ async function getContext (url) { exports.handler = async function subscribe (event) { const data = await arc.tables() const { url } = JSON.parse(event.Records[0].Sns.Message) - const properties = await getContext(url) + const handler = await getHandler(url) + const properties = await getContext(handler, url) await data.contexts.put({ url, properties From 719ef68cfd56ce2dfb4f98b464b059b6f3aea556 Mon Sep 17 00:00:00 2001 From: Jamie Tanna Date: Tue, 16 Nov 2021 08:56:49 +0000 Subject: [PATCH 2/4] Log which handler has been used for context retrieval To better improve visibility of which context retrieval handler has been used, we can add a `name` to each handler that can be logged when retrieving. In the case of our fallback to OpenGraph, we can log a slightly more appropriate message. --- src/events/fetch-context/eventbrite.js | 5 +++++ src/events/fetch-context/granary.js | 6 +++++- src/events/fetch-context/index.js | 3 ++- src/events/fetch-context/meetup.js | 5 +++++ src/events/fetch-context/open-graph.js | 6 +++++- 5 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/events/fetch-context/eventbrite.js b/src/events/fetch-context/eventbrite.js index 4227fdf..52b922b 100644 --- a/src/events/fetch-context/eventbrite.js +++ b/src/events/fetch-context/eventbrite.js @@ -1,6 +1,10 @@ const fetch = require('node-fetch') const logger = require('@architect/shared/logger') +function name () { + return 'Eventbrite' +} + function isEventbriteUrl (url) { return ((url.indexOf('https://eventbrite.com') > -1) || (url.indexOf('https://www.eventbrite.com') > -1) || @@ -29,6 +33,7 @@ async function fetchContext (url) { } module.exports = { + name, isEventbriteUrl, fetchContext } diff --git a/src/events/fetch-context/granary.js b/src/events/fetch-context/granary.js index 3b9a92b..a1f2257 100644 --- a/src/events/fetch-context/granary.js +++ b/src/events/fetch-context/granary.js @@ -1,6 +1,10 @@ const fetch = require('node-fetch') const logger = require('@architect/shared/logger') +function name () { + return 'Granary' +} + function getGranaryUrl (url) { const granaryBaseUrl = 'https://granary.io/' const safeUrl = encodeURIComponent(url) @@ -26,4 +30,4 @@ async function fetchContext (url) { return mf2.items[0].properties } -module.exports = { fetchContext } +module.exports = { name, fetchContext } diff --git a/src/events/fetch-context/index.js b/src/events/fetch-context/index.js index 7f918a4..9933b3e 100644 --- a/src/events/fetch-context/index.js +++ b/src/events/fetch-context/index.js @@ -19,9 +19,11 @@ async function getContext (handler, url) { // if our fetching fails, fallback to OpenGraph const properties = await handler.fetchContext(url) if (properties) { + logger.info(`Context fetched ${url} using ${handler.name()}`, JSON.stringify(properties)) return properties } + logger.info(`Context fetching ${url} using fallback ${openGraph.name()}`, JSON.stringify(properties)) return await openGraph.fetchContext(url) } @@ -34,5 +36,4 @@ exports.handler = async function subscribe (event) { url, properties }) - logger.info(`Context fetched ${url}`, JSON.stringify(properties)) } diff --git a/src/events/fetch-context/meetup.js b/src/events/fetch-context/meetup.js index b914fe3..4e7dcf9 100644 --- a/src/events/fetch-context/meetup.js +++ b/src/events/fetch-context/meetup.js @@ -1,6 +1,10 @@ const fetch = require('node-fetch') const logger = require('@architect/shared/logger') +function name () { + return 'Meetup' +} + function isMeetupUrl (url) { return ((url.indexOf('https://meetup.com') > -1) || (url.indexOf('https://www.meetup.com') > -1)) @@ -27,6 +31,7 @@ async function fetchContext (url) { } module.exports = { + name, isMeetupUrl, fetchContext } diff --git a/src/events/fetch-context/open-graph.js b/src/events/fetch-context/open-graph.js index 011541f..e72fd82 100644 --- a/src/events/fetch-context/open-graph.js +++ b/src/events/fetch-context/open-graph.js @@ -1,6 +1,10 @@ const ogs = require('open-graph-scraper') const logger = require('@architect/shared/logger') +function name () { + return 'OpenGraph' +} + function setName (result, properties) { if (result.ogTitle) { properties.name = [result.ogTitle] @@ -61,4 +65,4 @@ async function fetchContext (url) { return properties } -module.exports = { fetchContext } +module.exports = { name, fetchContext } From 4c0e8cebb2f7a5242c768b8e4e5d706c117cf37d Mon Sep 17 00:00:00 2001 From: Jamie Tanna Date: Tue, 16 Nov 2021 09:11:16 +0000 Subject: [PATCH 3/4] Add support for reads To allow tracking of books that are being read by folks, we can introduce the `read` post type. This also includes sample data for three common types of reads: - an `h-cite` from data from books-mf2.herokuapp.com - a URL cite to books-mf2.herokuapp.com - an `h-cite` from https://indiebookclub.biz To allow for better visualisation in editors, we can also hint which expected properties are available, and required. --- scripts/posts.json | 86 ++++++++++++++++++++++ src/http/get-micropub/config/post-types.js | 13 ++++ src/http/get-micropub/contexts.js | 2 +- src/shared/utils.js | 6 +- 4 files changed, 105 insertions(+), 2 deletions(-) diff --git a/scripts/posts.json b/scripts/posts.json index 5802aeb..6b1635a 100644 --- a/scripts/posts.json +++ b/scripts/posts.json @@ -396,6 +396,92 @@ "note" ] } + }, + { + "url": "2021/11/ghatp", + "type": [ + "h-entry" + ], + "properties": { + "entry-type": [ + "read" + ], + "published": [ + "2021-11-13T07:51:00+0000" + ], + "read-status": [ + "finished" + ], + "read-of": [ + { + "type": [ + "h-cite" + ], + "properties": { + "url": [ + "https://openlibrary.org/books/OL26318312M" + ], + "uid": [ + "isbn:9780316217651" + ], + "name": [ + "Gods of risk" + ], + "author": [ + "James S. A. Corey" + ], + "photo": { + "value": "https://covers.openlibrary.org/b/id/7992542.jpg", + "alt": "Cover picture of Gods of risk" + } + } + } + ] + } + }, + { + "url": "2021/11/ghatp-url", + "type": [ + "h-entry" + ], + "properties": { + "entry-type": [ + "read" + ], + "published": [ + "2021-11-13T07:51:00+0000" + ], + "read-status": [ + "finished" + ], + "read-of": [ + "https://books-mf2.herokuapp.com/isbn/9780316332897" + ] + } + }, + { + "url": "2021/11/99ib8", + "type": [ + "h-entry" + ], + "properties" : { + "entry-type": [ + "read" + ], + "summary" : [ "Finished reading: Cibola Burn by James S. A. Corey, ISBN: 9780316217620" ], + "read-status" : [ "finished" ], + "read-of" : [ + { + "type" : [ "h-cite" ], + "properties" : { + "name" : [ "Cibola Burn" ], + "author" : [ "James S. A. Corey" ], + "uid" : [ "isbn:9780316217620" ] + } + } + ], + "published" : [ "2020-10-09T23:08:24.411Z" ] + } } ] } \ No newline at end of file diff --git a/src/http/get-micropub/config/post-types.js b/src/http/get-micropub/config/post-types.js index 7cb1473..5f073b7 100644 --- a/src/http/get-micropub/config/post-types.js +++ b/src/http/get-micropub/config/post-types.js @@ -60,5 +60,18 @@ module.exports = [ { type: 'listen', name: 'Listen' + }, + { + type: 'read', + name: 'Read', + properties: [ + 'content', + 'read-of', + 'read-status' + ], + 'required-properties': [ + 'read-of', + 'read-status' + ] } ] diff --git a/src/http/get-micropub/contexts.js b/src/http/get-micropub/contexts.js index d004769..4ef45e7 100644 --- a/src/http/get-micropub/contexts.js +++ b/src/http/get-micropub/contexts.js @@ -3,7 +3,7 @@ const { isValidURL } = require('@architect/shared/utils') async function setContexts (post) { const data = await arc.tables() - const urlProps = ['in-reply-to', 'repost-of', 'like-of', 'bookmark-of', 'listen-of'] + const urlProps = ['in-reply-to', 'repost-of', 'like-of', 'bookmark-of', 'listen-of', 'read-of'] for (const prop of urlProps) { if ((prop in post.properties) && Array.isArray(post.properties[prop])) { diff --git a/src/shared/utils.js b/src/shared/utils.js index 2673388..4931d3a 100644 --- a/src/shared/utils.js +++ b/src/shared/utils.js @@ -5,6 +5,7 @@ const reservedUrls = ` bookmarks photos checkins + reads reposts likes replies @@ -51,6 +52,9 @@ function derivePostType (post) { } else if (('listen-of' in post.properties) && isValidURL(post.properties['listen-of'][0])) { return 'listen' + } else if (('read-of' in post.properties) && post.properties['read-status'] && + ['to-read', 'reading', 'finished'].includes(post.properties['read-status'][0])) { + return 'read' } else { return 'note' } @@ -67,7 +71,7 @@ function isValidURL (string) { function findContexts (post) { const urls = [] - for (const prop of ['in-reply-to', 'repost-of', 'like-of', 'bookmark-of', 'listen-of']) { + for (const prop of ['in-reply-to', 'repost-of', 'like-of', 'bookmark-of', 'listen-of', 'read-of']) { if ((prop in post.properties) && Array.isArray(post.properties[prop])) { for (const i in post.properties[prop]) { const url = post.properties[prop][i] From 2748f2b35c95b1747e4feff969811863ae6b8fd1 Mon Sep 17 00:00:00 2001 From: Jamie Tanna Date: Tue, 16 Nov 2021 09:39:24 +0000 Subject: [PATCH 4/4] Add context retrieval for reads via books-mf2 Using the service books-mf2, we can retrieve the context for read data. As it's a Heroku-based app, there's the risk that this can timeout, so we need to increase the timeouts available for context retrieval events. --- src/events/fetch-context/books-mf2.js | 31 +++++++++++++++++++++++++++ src/events/fetch-context/config.arc | 2 ++ src/events/fetch-context/index.js | 3 +++ 3 files changed, 36 insertions(+) create mode 100644 src/events/fetch-context/books-mf2.js create mode 100644 src/events/fetch-context/config.arc diff --git a/src/events/fetch-context/books-mf2.js b/src/events/fetch-context/books-mf2.js new file mode 100644 index 0000000..32e2f8c --- /dev/null +++ b/src/events/fetch-context/books-mf2.js @@ -0,0 +1,31 @@ +const fetch = require('node-fetch') +const logger = require('@architect/shared/logger') + +function name () { + return 'Books-MF2' +} + +function isBooksMf2Url (url) { + return (url.indexOf('https://books-mf2.herokuapp.com/') > -1) +} + +async function fetchContext (url) { + if (!isBooksMf2Url(url)) { + return + } + const response = await fetch(url) + if (!response.ok) { + const text = await response.text() + logger.warn('Failed to fetch context from Books-MF2', `${url}\n${text}`) + return + } + const mf2 = await response.json() + if (!('items' in mf2) || !mf2.items.length) return + return mf2.items[0].properties +} + +module.exports = { + name, + isBooksMf2Url, + fetchContext +} diff --git a/src/events/fetch-context/config.arc b/src/events/fetch-context/config.arc new file mode 100644 index 0000000..e2c787a --- /dev/null +++ b/src/events/fetch-context/config.arc @@ -0,0 +1,2 @@ +@aws +timeout 15 diff --git a/src/events/fetch-context/index.js b/src/events/fetch-context/index.js index 9933b3e..0845f45 100644 --- a/src/events/fetch-context/index.js +++ b/src/events/fetch-context/index.js @@ -1,5 +1,6 @@ const arc = require('@architect/functions') const logger = require('@architect/shared/logger') +const booksMf2 = require('./books-mf2') const eventbrite = require('./eventbrite') const granary = require('./granary') const meetup = require('./meetup') @@ -10,6 +11,8 @@ async function getHandler (url) { return meetup } else if (eventbrite.isEventbriteUrl(url)) { return eventbrite + } else if (booksMf2.isBooksMf2Url(url)) { + return booksMf2 } else { return granary }