From 224c6659e5d0db1a33e2a29d22e2fb6d52fceb0c Mon Sep 17 00:00:00 2001 From: Lars Trieloff Date: Fri, 26 Apr 2019 13:41:15 +0000 Subject: [PATCH] feat(html): enable HAST parsing and serialization of HTML responses HTML response bodies will now be parsed and then serialized before the final response is created. This creates the opportunity of changing the HTML AST for HTML-post-processing Fixes #285 --- docs/response.schema.json | 4 + docs/response.schema.md | 25 +++++++ package.json | 1 - src/defaults/html.pipe.js | 4 + src/html/html-to-hast.js | 25 +++++++ src/html/static-asset-links.js | 24 ++---- src/html/stringify-hast.js | 32 ++++++++ src/schemas/response.schema.json | 4 + test/testEmbedHandler.js | 2 +- test/testRewriteStatic.js | 19 ++++- test/testStringifyHast.js | 120 +++++++++++++++++++++++++++++ test/testToHast.js | 125 +++++++++++++++++++++++++++++++ 12 files changed, 361 insertions(+), 24 deletions(-) create mode 100644 src/html/html-to-hast.js create mode 100644 src/html/stringify-hast.js create mode 100644 test/testStringifyHast.js create mode 100644 test/testToHast.js diff --git a/docs/response.schema.json b/docs/response.schema.json index 491766aae..2a09dabaa 100644 --- a/docs/response.schema.json +++ b/docs/response.schema.json @@ -34,6 +34,10 @@ } ] }, + "hast": { + "type": "object", + "description": "The Hypertext AST of the reponse body" + }, "headers": { "description": "The HTTP headers of the response", "additionalProperties": { diff --git a/docs/response.schema.md b/docs/response.schema.md index a4a386812..3d13543d0 100644 --- a/docs/response.schema.md +++ b/docs/response.schema.md @@ -16,6 +16,7 @@ The HTTP response object | Property | Type | Required | Nullable | Defined by | |----------|------|----------|----------|------------| | [body](#body) | complex | Optional | No | Response (this schema) | +| [hast](#hast) | `object` | Optional | No | Response (this schema) | | [headers](#headers) | complex | Optional | No | Response (this schema) | | [status](#status) | `integer` | Optional | No | Response (this schema) | @@ -53,6 +54,30 @@ The HTTP response object +## hast + +The Hypertext AST of the reponse body + +`hast` + +* is optional +* type: `object` +* defined in this schema + +### hast Type + + +`object` with following properties: + + +| Property | Type | Required | +|----------|------|----------| + + + + + + ## headers The HTTP headers of the response diff --git a/package.json b/package.json index ff53b8e8e..5db05b4c2 100644 --- a/package.json +++ b/package.json @@ -66,7 +66,6 @@ "micromatch": "^4.0.0", "object-hash": "^1.3.1", "rehype-parse": "^6.0.0", - "rehype-stringify": "^5.0.0", "remark-parse": "^6.0.0", "remark-rehype": "^4.0.0", "request": "^2.87.0", diff --git a/src/defaults/html.pipe.js b/src/defaults/html.pipe.js index edcbf26c0..101cf6063 100644 --- a/src/defaults/html.pipe.js +++ b/src/defaults/html.pipe.js @@ -32,6 +32,8 @@ const { cache, uncached } = require('../html/shared-cache'); const embeds = require('../html/find-embeds'); const parseFrontmatter = require('../html/parse-frontmatter'); const rewriteLinks = require('../html/static-asset-links'); +const tohast = require('../html/html-to-hast'); +const tohtml = require('../html/stringify-hast'); /* eslint no-param-reassign: off */ /* eslint newline-per-chained-call: off */ @@ -63,7 +65,9 @@ const htmlpipe = (cont, payload, action) => { .after(cache).when(uncached) .after(key) .after(debug) + .after(tohast) // start HTML post-processing .after(rewriteLinks).when(production) + .after(tohtml) // end HTML post-processing .after(flag).expose('esi').when(esi) // flag ESI when there is ESI in the response .error(selectStatus(production())); diff --git a/src/html/html-to-hast.js b/src/html/html-to-hast.js new file mode 100644 index 000000000..b15e90ac3 --- /dev/null +++ b/src/html/html-to-hast.js @@ -0,0 +1,25 @@ +/* + * Copyright 2019 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +const unified = require('unified'); +const parse = require('rehype-parse'); + +function tohast({ response: { body } }) { + const fragment = !body.match(/Hello World Here comes an embed.

-

Easy!

`); +

Easy!

`); }); }); diff --git a/test/testRewriteStatic.js b/test/testRewriteStatic.js index 26fbf98ad..fddf632a9 100644 --- a/test/testRewriteStatic.js +++ b/test/testRewriteStatic.js @@ -13,23 +13,36 @@ const assert = require('assert'); const { Logger } = require('@adobe/helix-shared'); const rewrite = require('../src/html/static-asset-links'); +const tohast = require('../src/html/html-to-hast'); +const stringify = require('../src/html/stringify-hast'); const { pipe } = require('../src/defaults/html.pipe.js'); - const logger = Logger.getTestLogger({ // tune this for debugging level: 'info', }); function rw(content) { - return rewrite({ + const hastcontext = tohast({ response: { body: content, headers: { 'Content-Type': 'text/html', }, }, - }).response.body; + }); + + const rewritecontext = rewrite({ + response: { + body: content, + hast: hastcontext.response.hast, + headers: { + 'Content-Type': 'text/html', + }, + }, + }); + + return stringify(rewritecontext).response.body; } describe('Integration Test Static Asset Rewriting', () => { diff --git a/test/testStringifyHast.js b/test/testStringifyHast.js new file mode 100644 index 000000000..f8502bdde --- /dev/null +++ b/test/testStringifyHast.js @@ -0,0 +1,120 @@ +/* + * Copyright 2018 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +/* eslint-env mocha */ +const assert = require('assert'); +const stringify = require('../src/html/stringify-hast'); + +describe('Testing stringify pipeline step', () => { + it('Simple HTML can be transformed', () => { + assert.deepEqual( + stringify({ + response: { + hast: { + type: 'root', + children: [ + { + type: 'element', + tagName: 'html', + properties: {}, + children: [ + { + type: 'element', + tagName: 'head', + properties: {}, + children: [ + { + type: 'text', + value: '\n ', + position: { + start: { line: 2, column: 9, offset: 15 }, + end: { line: 3, column: 5, offset: 20 }, + }, + }, + { + type: 'element', + tagName: 'title', + properties: {}, + children: [ + { + type: 'text', + value: 'Foo', + position: { + start: { line: 3, column: 12, offset: 27 }, + end: { line: 3, column: 15, offset: 30 }, + }, + }, + ], + position: { + start: { line: 3, column: 5, offset: 20 }, + end: { line: 3, column: 23, offset: 38 }, + }, + }, + { + type: 'text', + value: '\n ', + position: { + start: { line: 3, column: 23, offset: 38 }, + end: { line: 4, column: 3, offset: 41 }, + }, + }, + ], + position: { + start: { line: 2, column: 3, offset: 9 }, + end: { line: 4, column: 10, offset: 48 }, + }, + }, + { + type: 'text', + value: '\n ', + position: { + start: { line: 4, column: 10, offset: 48 }, + end: { line: 5, column: 3, offset: 51 }, + }, + }, + { + type: 'element', + tagName: 'body', + properties: {}, + children: [ + { + type: 'text', + value: 'bar\n', + position: { + start: { line: 5, column: 10, offset: 58 }, + end: { line: 6, column: 1, offset: 69 }, + }, + }, + ], + }, + ], + position: { + start: { line: 1, column: 1, offset: 0 }, + end: { line: 6, column: 8, offset: 76 }, + }, + }, + ], + data: { quirksMode: true }, + position: { + start: { line: 1, column: 1, offset: 0 }, + end: { line: 6, column: 8, offset: 76 }, + }, + }, + }, + }).response.body, + ` + Foo + + bar +`, + ); + }); +}); diff --git a/test/testToHast.js b/test/testToHast.js new file mode 100644 index 000000000..a15092b31 --- /dev/null +++ b/test/testToHast.js @@ -0,0 +1,125 @@ +/* + * Copyright 2018 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +/* eslint-env mocha */ +const assert = require('assert'); +const tohast = require('../src/html/html-to-hast'); + +describe('Testing tohast pipeline step', () => { + it('Simple HTML can be transformed', () => { + assert.deepEqual( + tohast({ + response: { + body: ` + + Foo + + bar +`, + }, + }), + { + response: { + hast: { + type: 'root', + children: [ + { + type: 'element', + tagName: 'html', + properties: {}, + children: [ + { + type: 'element', + tagName: 'head', + properties: {}, + children: [ + { + type: 'text', + value: '\n ', + position: { + start: { line: 2, column: 9, offset: 15 }, + end: { line: 3, column: 5, offset: 20 }, + }, + }, + { + type: 'element', + tagName: 'title', + properties: {}, + children: [ + { + type: 'text', + value: 'Foo', + position: { + start: { line: 3, column: 12, offset: 27 }, + end: { line: 3, column: 15, offset: 30 }, + }, + }, + ], + position: { + start: { line: 3, column: 5, offset: 20 }, + end: { line: 3, column: 23, offset: 38 }, + }, + }, + { + type: 'text', + value: '\n ', + position: { + start: { line: 3, column: 23, offset: 38 }, + end: { line: 4, column: 3, offset: 41 }, + }, + }, + ], + position: { + start: { line: 2, column: 3, offset: 9 }, + end: { line: 4, column: 10, offset: 48 }, + }, + }, + { + type: 'text', + value: '\n ', + position: { + start: { line: 4, column: 10, offset: 48 }, + end: { line: 5, column: 3, offset: 51 }, + }, + }, + { + type: 'element', + tagName: 'body', + properties: {}, + children: [ + { + type: 'text', + value: 'bar\n', + position: { + start: { line: 5, column: 10, offset: 58 }, + end: { line: 6, column: 1, offset: 69 }, + }, + }, + ], + }, + ], + position: { + start: { line: 1, column: 1, offset: 0 }, + end: { line: 6, column: 8, offset: 76 }, + }, + }, + ], + data: { quirksMode: true }, + position: { + start: { line: 1, column: 1, offset: 0 }, + end: { line: 6, column: 8, offset: 76 }, + }, + }, + }, + }, + ); + }); +});