Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: improve parsing form-data #2944

Merged
merged 4 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion lib/core/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,8 @@ function addAbortListener (signal, listener) {
return () => signal.removeListener('abort', listener)
}

const hasToWellFormed = !!String.prototype.toWellFormed
const hasToWellFormed = typeof String.prototype.toWellFormed === 'function'
const hasIsWellFormed = typeof String.prototype.isWellFormed === 'function'

/**
* @param {string} val
Expand All @@ -449,6 +450,14 @@ function toUSVString (val) {
return hasToWellFormed ? `${val}`.toWellFormed() : nodeUtil.toUSVString(val)
}

/**
* @param {string} val
*/
// TODO: move this to webidl
function isUSVString (val) {
tsctx marked this conversation as resolved.
Show resolved Hide resolved
return hasIsWellFormed ? `${val}`.isWellFormed() : toUSVString(val) === `${val}`
}

/**
* @see https://tools.ietf.org/html/rfc7230#section-3.2.6
* @param {number} c
Expand Down Expand Up @@ -538,6 +547,7 @@ module.exports = {
isErrored,
isReadable,
toUSVString,
isUSVString,
isReadableAborted,
isBlobLike,
parseOrigin,
Expand Down
42 changes: 29 additions & 13 deletions lib/web/fetch/formdata-parser.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
'use strict'

const { webidl } = require('./webidl')
const { toUSVString, isUSVString, bufferToLowerCasedHeaderName } = require('../../core/util')
const { utf8DecodeBytes } = require('./util')
const { HTTP_TOKEN_CODEPOINTS, isomorphicDecode } = require('./data-url')
const { isFileLike, File: UndiciFile } = require('./file')
const { makeEntry } = require('./formdata')
const assert = require('node:assert')
const { isAscii, File: NodeFile } = require('node:buffer')
const { File: NodeFile } = require('node:buffer')

const File = globalThis.File ?? NodeFile ?? UndiciFile

Expand All @@ -15,6 +15,18 @@ const filenameBuffer = Buffer.from('; filename')
const dd = Buffer.from('--')
const ddcrlf = Buffer.from('--\r\n')

/**
* @param {string} chars
*/
function isAsciiString (chars) {
for (let i = 0; i < chars.length; ++i) {
if ((chars.charCodeAt(i) & ~0x7F) !== 0) {
tsctx marked this conversation as resolved.
Show resolved Hide resolved
return false
}
}
return true
}

/**
* @see https://andreubotella.github.io/multipart-form-data/#multipart-form-data-boundary
* @param {string} boundary
Expand All @@ -30,7 +42,7 @@ function validateBoundary (boundary) {
// - it is composed by bytes in the ranges 0x30 to 0x39, 0x41 to 0x5A, or
// 0x61 to 0x7A, inclusive (ASCII alphanumeric), or which are 0x27 ('),
// 0x2D (-) or 0x5F (_).
for (let i = 0; i < boundary.length; i++) {
for (let i = 0; i < length; ++i) {
const cp = boundary.charCodeAt(i)

if (!(
Expand Down Expand Up @@ -58,12 +70,12 @@ function escapeFormDataName (name, encoding = 'utf-8', isFilename = false) {
// 1. If isFilename is true:
if (isFilename) {
// 1.1. Set name to the result of converting name into a scalar value string.
name = webidl.converters.USVString(name)
name = toUSVString(name)
} else {
// 2. Otherwise:

// 2.1. Assert: name is a scalar value string.
assert(name === webidl.converters.USVString(name))
assert(isUSVString(name))

// 2.2. Replace every occurrence of U+000D (CR) not followed by U+000A (LF),
// and every occurrence of U+000A (LF) not preceded by U+000D (CR), in
Expand Down Expand Up @@ -94,14 +106,16 @@ function multipartFormDataParser (input, mimeType) {
// 1. Assert: mimeType’s essence is "multipart/form-data".
assert(mimeType !== 'failure' && mimeType.essence === 'multipart/form-data')

const boundaryString = mimeType.parameters.get('boundary')

// 2. If mimeType’s parameters["boundary"] does not exist, return failure.
// Otherwise, let boundary be the result of UTF-8 decoding mimeType’s
// parameters["boundary"].
if (!mimeType.parameters.has('boundary')) {
if (boundaryString === undefined) {
return 'failure'
}

const boundary = Buffer.from(`--${mimeType.parameters.get('boundary')}`, 'utf8')
const boundary = Buffer.from(`--${boundaryString}`, 'utf8')

// 3. Let entry list be an empty entry list.
const entryList = []
Expand Down Expand Up @@ -200,7 +214,10 @@ function multipartFormDataParser (input, mimeType) {
contentType ??= 'text/plain'

// 5.10.2. If contentType is not an ASCII string, set contentType to the empty string.
if (!isAscii(Buffer.from(contentType))) {

// Note: `buffer.isAscii` can be used at zero-cost, but converting a string to a buffer is a high overhead.
// Content-Type is a relatively small string, so it is faster to use `String#charCodeAt`.
if (!isAsciiString(contentType)) {
contentType = ''
}

Expand All @@ -214,8 +231,8 @@ function multipartFormDataParser (input, mimeType) {
}

// 5.12. Assert: name is a scalar value string and value is either a scalar value string or a File object.
assert(name === webidl.converters.USVString(name))
assert((typeof value === 'string' && value === webidl.converters.USVString(value)) || isFileLike(value))
assert(isUSVString(name))
assert((typeof value === 'string' && isUSVString(value)) || isFileLike(value))

// 5.13. Create an entry with name and value, and append it to entry list.
entryList.push(makeEntry(name, value, filename))
Expand Down Expand Up @@ -280,7 +297,7 @@ function parseMultipartFormDataHeaders (input, position) {
)

// 2.8. Byte-lowercase header name and switch on the result:
switch (new TextDecoder().decode(headerName).toLowerCase()) {
switch (bufferToLowerCasedHeaderName(headerName)) {
case 'content-disposition': {
// 1. Set name and filename to null.
name = filename = null
Expand Down Expand Up @@ -428,10 +445,9 @@ function parseMultipartFormDataName (input, position) {
*/
function collectASequenceOfBytes (condition, input, position) {
const result = []
let index = 0

while (position.position < input.length && condition(input[position.position])) {
result[index++] = input[position.position]
result.push(input[position.position])

position.position++
}
Expand Down
Loading