Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: secutils-dev/secutils-web-scraper
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: v1.0.0-alpha.2
Choose a base ref
...
head repository: secutils-dev/secutils-web-scraper
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: v1.0.0-alpha.3
Choose a head ref
  • 12 commits
  • 10 files changed
  • 1 contributor

Commits on Jul 23, 2023

  1. Copy the full SHA
    c63cf10 View commit details

Commits on Aug 31, 2023

  1. Copy the full SHA
    15168fc View commit details
  2. [Misc] Update dependencies.

    azasypkin committed Aug 31, 2023
    Copy the full SHA
    08c7eb0 View commit details
  3. [Misc] Log Chromium args.

    azasypkin committed Aug 31, 2023
    Copy the full SHA
    2a70989 View commit details
  4. Copy the full SHA
    4717f74 View commit details

Commits on Sep 5, 2023

  1. Copy the full SHA
    ba5406b View commit details

Commits on Sep 6, 2023

  1. Copy the full SHA
    4d14956 View commit details
  2. Copy the full SHA
    07d6c6e View commit details
  3. Copy the full SHA
    15ee136 View commit details
  4. Copy the full SHA
    19d50df View commit details

Commits on Sep 23, 2023

  1. [Misc] Update dependencies.

    azasypkin committed Sep 23, 2023
    Copy the full SHA
    31c8710 View commit details

Commits on Oct 2, 2023

  1. Copy the full SHA
    2abad71 View commit details
Showing with 726 additions and 463 deletions.
  1. +0 −2 .github/workflows/ci.yml
  2. +1 −2 Dockerfile
  3. +13 −13 package.json
  4. +1 −0 src/api/resources/index.ts
  5. +127 −4 src/api/resources/list.test.ts
  6. +200 −113 src/api/resources/list.ts
  7. +5 −6 src/index.ts
  8. +9 −6 src/mocks.ts
  9. +5 −1 tools/api/resources/list.http
  10. +365 −316 yarn.lock
2 changes: 0 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -7,8 +7,6 @@ on:
- 'Dockerfile'
- 'LICENSE'
- '*.md'
env:
PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD: 1

jobs:
ci:
3 changes: 1 addition & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# syntax=docker/dockerfile:1

FROM --platform=$BUILDPLATFORM node:20-alpine3.18 as BUILDER
ENV PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1
WORKDIR /app
COPY ["./*.json", "./yarn.lock", "./"]
RUN set -x && yarn install --frozen-lockfile
@@ -11,12 +10,12 @@ RUN set -x && yarn build

FROM node:20-alpine3.18
ENV NODE_ENV=production \
PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 \
SECUTILS_WEB_SCRAPER_BROWSER_EXECUTABLE_PATH="/usr/bin/chromium-browser"
WORKDIR /app
RUN set -x && apk update --no-cache && \
apk upgrade --no-cache && \
apk add --no-cache dumb-init nss freetype harfbuzz ca-certificates ttf-freefont chromium
COPY --from=BUILDER ["/app/dist", "/app/package.json", "/app/yarn.lock", "./"]
RUN set -x && yarn install --production --frozen-lockfile && yarn cache clean
USER node
CMD [ "node", "src/index.js" ]
26 changes: 13 additions & 13 deletions package.json
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@
"name": "Secutils.dev",
"email": "dev@secutils.dev"
},
"version": "1.0.0-alpha.2",
"version": "1.0.0-alpha.3",
"engines": {
"node": "20.x"
},
@@ -21,23 +21,23 @@
"dependencies": {
"@fastify/compress": "^6.4.0",
"dotenv": "^16.3.1",
"fastify": "^4.20.0",
"fastify": "^4.22.2",
"node-cache": "^5.1.2",
"playwright": "1.34.3"
"playwright": "1.38.1"
},
"devDependencies": {
"@typescript-eslint/parser": "^6.1.0",
"@typescript-eslint/eslint-plugin": "^6.1.0",
"@types/node": "^20.4.4",
"eslint": "^8.45.0",
"@eslint/eslintrc": "^2.1.0",
"eslint-config-prettier": "^8.8.0",
"eslint-import-resolver-typescript": "^3.5.5",
"eslint-plugin-import": "^2.27.5",
"@typescript-eslint/parser": "^6.7.4",
"@typescript-eslint/eslint-plugin": "^6.7.4",
"@types/node": "^20.8.2",
"eslint": "^8.50.0",
"@eslint/eslintrc": "^2.1.2",
"eslint-config-prettier": "^9.0.0",
"eslint-import-resolver-typescript": "^3.6.1",
"eslint-plugin-import": "^2.28.1",
"eslint-plugin-prettier": "^5.0.0",
"nodemon": "^3.0.1",
"prettier": "^3.0.0",
"typescript": "^5.1.6",
"prettier": "^3.0.3",
"typescript": "^5.2.2",
"ts-node": "^10.9.1"
}
}
1 change: 1 addition & 0 deletions src/api/resources/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { registerResourcesListRoutes } from './list.js';
import type { APIRouteParams } from '../api_route_params.js';
export type { SecutilsWindow } from './list.js';

export function registerRoutes(params: APIRouteParams) {
registerResourcesListRoutes(params);
131 changes: 127 additions & 4 deletions src/api/resources/list.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import * as assert from 'node:assert';
import { Blob } from 'node:buffer';
import { test } from 'node:test';
import { mock, test } from 'node:test';

import type { ResourceWithRawData } from './list.js';
import { registerResourcesListRoutes } from './list.js';
import { createBrowserMock, createPageMock, createResponseMock, createWindowMock } from '../../mocks.js';
import { createMock } from '../api_route_params.mocks.js';
@@ -62,17 +63,17 @@ await test('[/api/resources] can parse resources', async (t) => {
createResponseMock({
url: 'https://secutils.dev/script.js',
body: 'window.document.body.innerHTML = "Hello Secutils.dev and world!";',
resourceType: 'script',
type: 'script',
}),
createResponseMock({
url: 'https://secutils.dev/weird-script.js',
body: `window.document.body.innerHTML = "Hello Secutils.dev and world!";`,
resourceType: 'script',
type: 'script',
}),
createResponseMock({
url: 'https://secutils.dev/fonts.css',
body: '* { color: blue-ish-not-valid; font-size: 100500; }',
resourceType: 'stylesheet',
type: 'stylesheet',
}),
],
});
@@ -199,3 +200,125 @@ await test('[/api/resources] can parse resources', async (t) => {
// Make sure we didn't wait for a selector since it wasn't specified.
assert.strictEqual(pageMock.waitForSelector.mock.callCount(), 0);
});

await test('[/api/resources] can inject resource filters', async (t) => {
t.mock.method(Date, 'now', () => 123000);

const includeResourceMock = mock.fn((resource: ResourceWithRawData) =>
!resource.data.includes('alert') ? resource : null,
);

const windowMock = createWindowMock({ __secutils: { resourceFilterMap: includeResourceMock } });
windowMock.document.querySelectorAll.mock.mockImplementation((selector: string) => {
if (selector === 'script') {
return [
{ src: 'https://secutils.dev/script.js', innerHTML: '' },
{ src: '', innerHTML: 'alert(1)'.repeat(10) },
];
}

if (selector === 'link[rel=stylesheet]') {
return [{ href: 'https://secutils.dev/fonts.css' }];
}

if (selector === 'style') {
return [{ innerHTML: '* { color: black; background-color: white; font-size: 100; }' }];
}

return [];
});

const pageMock = createPageMock({
window: windowMock,
responses: [
createResponseMock({
url: 'https://secutils.dev/script.js',
body: 'window.document.body.innerHTML = "Hello Secutils.dev and world!";',
type: 'script',
}),
createResponseMock({
url: 'https://secutils.dev/fonts.css',
body: '* { color: blue-ish-not-valid; font-size: 100500; }',
type: 'stylesheet',
}),
],
});

const response = await registerResourcesListRoutes(createMock({ browser: createBrowserMock(pageMock) })).inject({
method: 'POST',
url: '/api/resources',
payload: { url: 'https://secutils.dev', delay: 0 },
});

assert.strictEqual(response.statusCode, 200);

assert.strictEqual(
response.body,
JSON.stringify({
timestamp: 123,
scripts: [
{
url: 'https://secutils.dev/script.js',
content: {
data: { type: 'tlsh', value: 'T156A002B39256197413252E602EA57AC67D66540474113459D79DB004B1608C7C8EEEDD' },
size: 65,
},
},
],
styles: [
{
url: 'https://secutils.dev/fonts.css',
content: {
data: { type: 'tlsh', value: 'T19590220E23308028C000888020033280308C008300000328208008C0808CCE02200B00' },
size: 51,
},
},
{
content: {
data: { type: 'tlsh', value: 'T13DA0021ADB65454A32DF5A68356397A0526D548889104B7C3D5EB894D74C0617112791' },
size: 60,
},
},
],
}),
);

// Make sure we loaded correct page.
assert.strictEqual(pageMock.goto.mock.callCount(), 1);
assert.deepEqual(pageMock.goto.mock.calls[0].arguments, [
'https://secutils.dev',
{ waitUntil: 'domcontentloaded', timeout: 5000 },
]);

// Make sure we didn't wait for a selector since it wasn't specified.
assert.strictEqual(pageMock.waitForSelector.mock.callCount(), 0);

// Make sure we called includeResource.
assert.strictEqual(includeResourceMock.mock.callCount(), 4);
assert.deepEqual(includeResourceMock.mock.calls[0].arguments, [
{
data: 'window.document.body.innerHTML = "Hello Secutils.dev and world!";',
type: 'script',
url: 'https://secutils.dev/script.js',
},
]);
assert.deepEqual(includeResourceMock.mock.calls[1].arguments, [
{
data: 'alert(1)alert(1)alert(1)alert(1)alert(1)alert(1)alert(1)alert(1)alert(1)alert(1)',
type: 'script',
},
]);
assert.deepEqual(includeResourceMock.mock.calls[2].arguments, [
{
data: '* { color: blue-ish-not-valid; font-size: 100500; }',
type: 'stylesheet',
url: 'https://secutils.dev/fonts.css',
},
]);
assert.deepEqual(includeResourceMock.mock.calls[3].arguments, [
{
data: '* { color: black; background-color: white; font-size: 100; }',
type: 'stylesheet',
},
]);
});
Loading