Skip to content

Commit

Permalink
urlencoded: Support iso-8859-1, utf8 sentinel, and numeric entities (#…
Browse files Browse the repository at this point in the history
…326)

* urlencoded, in extended mode: Support iso-8859-1 encoded requests, and also accept iso-8859-1 as a default encoding.

* urlencoded: Support an utf8 sentinel to detect the charset.

* Pass the interpretNumericEntities option through to qs.

* Fix lint

* Support charsets, sentinels etc. via custom decoders

Works in both extended and simple mode.

* Simplify

* Fix empty parameter issue with utf8Sentinel in simple mode

* Run all the charset/sentinel tests in both extended and simple modes

* utf8Sentinel => charsetSentinel

ljharb/qs#268 (comment)

* Update qs to 6.9.1

* Always use the qs module, even in simple mode

#326 (comment)

* Create the simple and extended parser with the same function, reducing duplication

* Don't mention the querystring module in the README

* Fix lint

* Update qs to 6.9.4

* Consistently call it "utf8 sentinel"

* Simplify by relying on the qs module's support for detecting the charset
#326 (comment)

* Simplify further

* Put back debug option

* Document that defaultCharset defaults to utf-8
#326 (comment)
  • Loading branch information
papandreou authored Jul 30, 2024
1 parent 35b50b5 commit 6cea6bd
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 8 deletions.
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,23 @@ The `verify` option, if supplied, is called as `verify(req, res, buf, encoding)`
where `buf` is a `Buffer` of the raw request body and `encoding` is the
encoding of the request. The parsing can be aborted by throwing an error.

##### defaultCharset

The default charset to parse as, if not specified in content-type. Must be
either `utf-8` or `iso-8859-1`. Defaults to `utf-8`.

##### charsetSentinel

Whether to let the value of the `utf8` parameter take precedence as the charset
selector. It requires the form to contain a parameter named `utf8` with a value
of ``. Defaults to `false`.

##### interpretNumericEntities

Whether to decode numeric entities such as `☺` when parsing an iso-8859-1
form. Defaults to `false`.


## Errors

The middlewares provided by this module create errors using the
Expand Down
2 changes: 1 addition & 1 deletion lib/read.js
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ function read (req, res, next, parse, debug, options) {
str = typeof body !== 'string' && encoding !== null
? iconv.decode(body, encoding)
: body
req.body = parse(str)
req.body = parse(str, encoding)
} catch (err) {
next(createError(400, err, {
body: str,
Expand Down
28 changes: 21 additions & 7 deletions lib/types/urlencoded.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,18 @@ function urlencoded (options) {
: opts.limit
var type = opts.type || 'application/x-www-form-urlencoded'
var verify = opts.verify || false
var charsetSentinel = opts.charsetSentinel
var interpretNumericEntities = opts.interpretNumericEntities

if (verify !== false && typeof verify !== 'function') {
throw new TypeError('option verify must be function')
}

var defaultCharset = opts.defaultCharset || 'utf-8'
if (defaultCharset !== 'utf-8' && defaultCharset !== 'iso-8859-1') {
throw new TypeError('option defaultCharset must be either utf-8 or iso-8859-1')
}

// create the appropriate query parser
var queryparse = createQueryParser(opts, extended)

Expand All @@ -58,9 +65,9 @@ function urlencoded (options) {
? typeChecker(type)
: type

function parse (body) {
function parse (body, encoding) {
return body.length
? queryparse(body)
? queryparse(body, encoding)
: {}
}

Expand Down Expand Up @@ -92,8 +99,8 @@ function urlencoded (options) {
}

// assert charset
var charset = getCharset(req) || 'utf-8'
if (charset !== 'utf-8') {
var charset = getCharset(req) || defaultCharset
if (charset !== 'utf-8' && charset !== 'iso-8859-1') {
debug('invalid charset')
next(createError(415, 'unsupported charset "' + charset.toUpperCase() + '"', {
charset: charset,
Expand All @@ -108,7 +115,9 @@ function urlencoded (options) {
encoding: charset,
inflate: inflate,
limit: limit,
verify: verify
verify: verify,
charsetSentinel: charsetSentinel,
interpretNumericEntities: interpretNumericEntities
})
}
}
Expand All @@ -123,6 +132,8 @@ function createQueryParser (options, extended) {
var parameterLimit = options.parameterLimit !== undefined
? options.parameterLimit
: 1000
var charsetSentinel = options.charsetSentinel
var interpretNumericEntities = options.interpretNumericEntities

if (isNaN(parameterLimit) || parameterLimit < 1) {
throw new TypeError('option parameterLimit must be a positive number')
Expand All @@ -134,7 +145,7 @@ function createQueryParser (options, extended) {

var depth = extended ? Infinity : 0

return function queryparse (body) {
return function queryparse (body, encoding) {
var paramCount = parameterCount(body, parameterLimit)

if (paramCount === undefined) {
Expand All @@ -152,7 +163,10 @@ function createQueryParser (options, extended) {
allowPrototypes: true,
arrayLimit: arrayLimit,
depth: depth,
parameterLimit: parameterLimit
parameterLimit: parameterLimit,
charsetSentinel: charsetSentinel,
interpretNumericEntities: interpretNumericEntities,
charset: encoding
})
}
}
Expand Down
68 changes: 68 additions & 0 deletions test/urlencoded.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,74 @@ describe('bodyParser.urlencoded()', function () {
.expect(200, '{}', done)
})

var extendedValues = [true, false]
extendedValues.forEach(function (extended) {
describe('in ' + (extended ? 'extended' : 'simple') + ' mode', function () {
it('should parse x-www-form-urlencoded with an explicit iso-8859-1 encoding', function (done) {
var server = createServer({ extended: extended })
request(server)
.post('/')
.set('Content-Type', 'application/x-www-form-urlencoded; charset=iso-8859-1')
.send('%A2=%BD')
.expect(200, '{"¢":"½"}', done)
})

it('should parse x-www-form-urlencoded with unspecified iso-8859-1 encoding when the defaultCharset is set to iso-8859-1', function (done) {
var server = createServer({ defaultCharset: 'iso-8859-1', extended: extended })
request(server)
.post('/')
.set('Content-Type', 'application/x-www-form-urlencoded')
.send('%A2=%BD')
.expect(200, '{"¢":"½"}', done)
})

it('should parse x-www-form-urlencoded with an unspecified iso-8859-1 encoding when the utf8 sentinel has a value of %26%2310003%3B', function (done) {
var server = createServer({ charsetSentinel: true, extended: extended })
request(server)
.post('/')
.set('Content-Type', 'application/x-www-form-urlencoded')
.send('utf8=%26%2310003%3B&user=%C3%B8')
.expect(200, '{"user":"ø"}', done)
})

it('should parse x-www-form-urlencoded with an unspecified utf-8 encoding when the utf8 sentinel has a value of %E2%9C%93 and the defaultCharset is iso-8859-1', function (done) {
var server = createServer({ charsetSentinel: true, extended: extended })
request(server)
.post('/')
.set('Content-Type', 'application/x-www-form-urlencoded')
.send('utf8=%E2%9C%93&user=%C3%B8')
.expect(200, '{"user":"ø"}', done)
})

it('should not leave an empty string parameter when removing the utf8 sentinel from the start of the string', function (done) {
var server = createServer({ charsetSentinel: true, extended: extended })
request(server)
.post('/')
.set('Content-Type', 'application/x-www-form-urlencoded')
.send('utf8=%E2%9C%93&foo=bar')
.expect(200, '{"foo":"bar"}', done)
})

it('should not leave an empty string parameter when removing the utf8 sentinel from the middle of the string', function (done) {
var server = createServer({ charsetSentinel: true, extended: extended })
request(server)
.post('/')
.set('Content-Type', 'application/x-www-form-urlencoded')
.send('foo=bar&utf8=%E2%9C%93&baz=quux')
.expect(200, '{"foo":"bar","baz":"quux"}', done)
})

it('should not leave an empty string parameter when removing the utf8 sentinel from the end of the string', function (done) {
var server = createServer({ charsetSentinel: true, extended: extended })
request(server)
.post('/')
.set('Content-Type', 'application/x-www-form-urlencoded')
.send('foo=bar&baz=quux&utf8=%E2%9C%93')
.expect(200, '{"foo":"bar","baz":"quux"}', done)
})
})
})

it('should handle empty message-body', function (done) {
request(createServer({ limit: '1kb' }))
.post('/')
Expand Down

0 comments on commit 6cea6bd

Please sign in to comment.