Skip to content

Commit

Permalink
Switch to TypeScript (#31)
Browse files Browse the repository at this point in the history
Co-authored-by: Vlad Frangu <[email protected]>
  • Loading branch information
szmarczak and vladfrangu authored Aug 17, 2021
1 parent ee3e7c3 commit 7e828a8
Show file tree
Hide file tree
Showing 36 changed files with 541 additions and 429 deletions.
3 changes: 0 additions & 3 deletions .eslintrc

This file was deleted.

8 changes: 8 additions & 0 deletions .eslintrc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"extends": [
"@apify/ts"
],
"parserOptions": {
"project": "tsconfig.eslint.json"
}
}
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# This file tells Git which files shouldn't be added to source control

dist
.idea
node_modules
coverage
package-lock.json
package-lock.json
1 change: 1 addition & 0 deletions .npmrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
legacy-peer-deps=true
13 changes: 12 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,19 @@
3.0.0 / TBD
====================
- Switch to TypeScript
- Enable insecure parser by default
- Use `header-generator` to order headers
- Migrate to `hpagent`
- Remove `default` export in favor of `import { gotScraping }`

2.1.2 / 2021/08/06
====================
- Mimic `got` interface

2.1.1 / 2021/08/06
====================
- Use `header-generator` v1.0.0


2.1.0 / 2021/08/06
====================
- Add `TransfomHeadersAgent`
Expand Down
10 changes: 0 additions & 10 deletions jest.config.js

This file was deleted.

21 changes: 21 additions & 0 deletions jest.config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import type { Config } from '@jest/types';

export default async (): Promise<Config.InitialOptions> => ({
verbose: true,
preset: 'ts-jest',
testEnvironment: 'node',
testRunner: 'jest-circus/runner',
testTimeout: 20_000,
collectCoverage: true,
collectCoverageFrom: [
'**/src/**/*.ts',
'**/src/**/*.js',
'!**/node_modules/**',
],
maxWorkers: 3,
globals: {
'ts-jest': {
tsconfig: '<rootDir>/test/tsconfig.json',
},
},
});
30 changes: 22 additions & 8 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
{
"name": "got-scraping",
"version": "2.1.3",
"version": "3.0.0",
"description": "HTTP client made for scraping based on got.",
"main": "src/index.js",
"main": "dist/index.js",
"engines": {
"node": ">=15.10.0"
},
"files": [
"src"
"dist"
],
"dependencies": {
"got-cjs": "12.0.0-beta.4",
Expand All @@ -18,22 +18,36 @@
"quick-lru": "^5.1.1"
},
"devDependencies": {
"@apify/eslint-config": "^0.1.3",
"@apify/eslint-config-ts": "^0.1.4",
"@apify/tsconfig": "^0.1.0",
"@types/body-parser": "^1.19.1",
"@types/express": "^4.17.13",
"@types/jest": "^27.0.0",
"@types/node": "^16.4.13",
"@typescript-eslint/eslint-plugin": "^4.29.1",
"@typescript-eslint/parser": "^4.29.1",
"body-parser": "^1.19.0",
"eslint": "^7.0.0",
"express": "^4.17.1",
"fs-extra": "^9.1.0",
"get-stream": "^5.2.0",
"jest": "^26.6.3",
"jest": "^27.0.6",
"jest-circus": "^27.0.6",
"jest-extended": "^0.11.5",
"jsdoc-to-markdown": "^7.0.0",
"markdown-toc": "^1.2.0"
"markdown-toc": "^1.2.0",
"rimraf": "^3.0.2",
"ts-jest": "^27.0.4",
"ts-node": "^10.2.0",
"typescript": "^4.3.5"
},
"scripts": {
"build": "rimraf dist && tsc",
"prepublishOnly": "npm run build",
"build-docs": "npm run build-toc",
"build-toc": "markdown-toc ./README.md -i",
"lint": "./node_modules/.bin/eslint ./src --ext .js,.jsx",
"lint:fix": "./node_modules/.bin/eslint ./src --ext .js,.jsx --fix",
"lint": "eslint src test",
"lint:fix": "eslint src test --fix",
"test": "jest --maxWorkers=3 --collect-coverage"
},
"author": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,25 +1,24 @@
/* eslint-disable no-underscore-dangle */
const HeaderGenerator = require('header-generator');
const http = require('http');
const WrappedAgent = require('./wrapped-agent');
// @ts-expect-error TODO: Type `header-generator`
import HeaderGenerator from 'header-generator';
import { OutgoingMessage, Agent, ClientRequest, ClientRequestArgs } from 'http';
import { WrappedAgent } from './wrapped-agent';

const { _storeHeader } = http.OutgoingMessage.prototype;
// @ts-expect-error Private property
const { _storeHeader } = OutgoingMessage.prototype;

const generator = new HeaderGenerator();

/**
* @description Transforms the casing of the headers to Pascal-Case.
* Transforms the casing of the headers to Pascal-Case.
*/
class TransformHeadersAgent extends WrappedAgent {
export class TransformHeadersAgent<T extends Agent> extends WrappedAgent<T> {
// Rewritten from https://github.com/nodejs/node/blob/533cafcf7e3ab72e98a2478bc69aedfdf06d3a5e/lib/_http_outgoing.js#L442-L479
/**
* @description Transforms the request via header normalization.
* @see {TransformHeadersAgent.toPascalCase}
* @param {http.ClientRequest} request
* @param {boolean} sortHeaders - if the headers should be sorted or not
* Transforms the request via header normalization.
*/
transformRequest(request, sortHeaders) {
const headers = {};
transformRequest(request: ClientRequest, { sortHeaders }: {sortHeaders: boolean}): void {
const headers: Record<string, string | number | string[]> = {};
const hasConnection = request.hasHeader('connection');
const hasContentLength = request.hasHeader('content-length');
const hasTransferEncoding = request.hasHeader('transfer-encoding');
Expand All @@ -28,9 +27,9 @@ class TransformHeadersAgent extends WrappedAgent {

for (const key of keys) {
if (key.toLowerCase().startsWith('x-')) {
headers[key] = request.getHeader(key);
headers[key] = request.getHeader(key)!;
} else {
headers[this.toPascalCase(key)] = request.getHeader(key);
headers[this.toPascalCase(key)] = request.getHeader(key)!;
}

if (sortHeaders) {
Expand All @@ -39,8 +38,15 @@ class TransformHeadersAgent extends WrappedAgent {
}
}

const typedRequest = request as ClientRequest & {
_removedContLen: boolean;
_contentLength: number;
_removedTE: boolean;
agent?: Agent;
};

if (!hasConnection) {
const shouldSendKeepAlive = request.shouldKeepAlive && (hasContentLength || request.useChunkedEncodingByDefault || request.agent);
const shouldSendKeepAlive = request.shouldKeepAlive && (hasContentLength || request.useChunkedEncodingByDefault || typedRequest.agent);
if (shouldSendKeepAlive) {
headers.Connection = 'keep-alive';
} else {
Expand All @@ -54,48 +60,45 @@ class TransformHeadersAgent extends WrappedAgent {
//
// Note: This uses private `_removedTE` property.
// This property tells us whether the transfer-encoding was explicitly removed or not.
if (!hasTrailer && !request._removedContLen && typeof request._contentLength === 'number') {
headers['Content-Length'] = request._contentLength;
} else if (!request._removedTE) {
if (!hasTrailer && !typedRequest._removedContLen && typeof typedRequest._contentLength === 'number') {
headers['Content-Length'] = typedRequest._contentLength;
} else if (!typedRequest._removedTE) {
headers['Transfer-Encoding'] = 'chunked';
}
}

const transformedHeaders = sortHeaders ? generator.orderHeaders(headers) : headers;
const transformedHeaders: Record<string, string | number | string[]> = sortHeaders ? generator.orderHeaders(headers) : headers;

// eslint-disable-next-line no-restricted-syntax, guard-for-in
for (const key in transformedHeaders) {
request.setHeader(key, transformedHeaders[key]);
for (const [key, value] of Object.entries(transformedHeaders)) {
request.setHeader(key, value);
}
}

addRequest(request, options) {
override addRequest(request: ClientRequest, options: ClientRequestArgs): void {
const typedRequest = request as ClientRequest & {
_storeHeader: (firstLine: string, headers: Record<string, string>) => void;
};

// See https://github.com/nodejs/node/blob/533cafcf7e3ab72e98a2478bc69aedfdf06d3a5e/lib/_http_outgoing.js#L373
// Note: This overrides the private `_storeHeader`.
// This is required, because the function copies
// the `connection`, `content-length` and `trasfer-encoding` headers
// directly to the underlying buffer.
request._storeHeader = (...args) => {
this.transformRequest(request, true);
typedRequest._storeHeader = (...args) => {
this.transformRequest(request, { sortHeaders: true });

return _storeHeader.call(request, ...args);
};

// `agent-base` isn't able to detect the protocol correctly
options.secureEndpoint = options.protocol === 'https:';
(options as any).secureEndpoint = options.protocol === 'https:';

return super.addRequest(request, options);
}

/**
* @param {string} header - header with unknown casing
* @returns {string} - header in Pascal-Case
*/
toPascalCase(header) {
toPascalCase(header: string): string {
return header.split('-').map((part) => {
return part[0].toUpperCase() + part.slice(1).toLowerCase();
return part[0]!.toUpperCase() + part.slice(1).toLowerCase();
}).join('-');
}
}

module.exports = TransformHeadersAgent;
42 changes: 0 additions & 42 deletions src/agent/wrapped-agent.js

This file was deleted.

71 changes: 71 additions & 0 deletions src/agent/wrapped-agent.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import { Agent as HttpAgent, AgentOptions, ClientRequest, ClientRequestArgs } from 'http';

/**
* @see https://github.com/nodejs/node/blob/533cafcf7e3ab72e98a2478bc69aedfdf06d3a5e/lib/_http_client.js#L129-L162
* @see https://github.com/nodejs/node/blob/533cafcf7e3ab72e98a2478bc69aedfdf06d3a5e/lib/_http_client.js#L234-L246
* @see https://github.com/nodejs/node/blob/533cafcf7e3ab72e98a2478bc69aedfdf06d3a5e/lib/_http_client.js#L304-L305
* Wraps an existing Agent instance,
* so there's no need to replace `agent.addRequest`.
*/
export class WrappedAgent<T extends HttpAgent> implements HttpAgent {
agent: T;

constructor(agent: T) {
this.agent = agent;
}

addRequest(request: ClientRequest, options: ClientRequestArgs): void {
// @ts-expect-error @types/node has incorrect types
this.agent.addRequest(request, options);
}

get keepAlive(): boolean {
// @ts-expect-error @types/node has incorrect types
return this.agent.keepAlive;
}

get maxSockets(): HttpAgent['maxSockets'] {
return this.agent.maxSockets;
}

get options(): AgentOptions {
// @ts-expect-error @types/node has incorrect types
return this.agent.options;
}

get defaultPort(): number {
// @ts-expect-error @types/node has incorrect types
return this.agent.defaultPort;
}

get protocol(): string {
// @ts-expect-error @types/node has incorrect types
return this.agent.protocol;
}

destroy(): void {
this.agent.destroy();
}

// Let's implement `HttpAgent` so we don't have to
// type `WrappedAgent as unknown as HttpAgent`
get maxFreeSockets(): HttpAgent['maxFreeSockets'] {
return this.agent.maxFreeSockets;
}

get maxTotalSockets(): HttpAgent['maxTotalSockets'] {
return this.agent.maxTotalSockets;
}

get freeSockets(): HttpAgent['freeSockets'] {
return this.agent.freeSockets;
}

get sockets(): HttpAgent['sockets'] {
return this.agent.sockets;
}

get requests(): HttpAgent['requests'] {
return this.agent.requests;
}
}
13 changes: 13 additions & 0 deletions src/context.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { OptionsInit as GotOptionsInit } from 'got-cjs';

export { GotOptionsInit };

export interface Context extends Record<string, unknown> {
proxyUrl?: string;
headerGeneratorOptions?: Record<string, unknown>;
useHeaderGenerator?: boolean;
headerGenerator?: { getHeaders: (options: Record<string, unknown>) => Record<string, string> };
insecureHTTPParser?: boolean;
}

export interface OptionsInit extends Context, GotOptionsInit {}
Loading

0 comments on commit 7e828a8

Please sign in to comment.