Skip to content

Commit

Permalink
Make stream() sample size configurable
Browse files Browse the repository at this point in the history
Improve document stream() detection limitation.

Related: #426, #452
  • Loading branch information
Borewit committed Jul 20, 2021
1 parent 0219e87 commit c635507
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 5 deletions.
15 changes: 13 additions & 2 deletions core.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -355,10 +355,21 @@ declare namespace core {
*/
const mimeTypes: Set<core.MimeType>;

/**
* Stream options.
*/
interface IStreamOptions {
/**
* Sample size in bytes.
*/
readonly sampleSize?: number
}

/**
Detect the file type of a readable stream.
@param readableStream - A [readable stream](https://nodejs.org/api/stream.html#stream_class_stream_readable) containing a file to examine.
@param options - Options
@returns A `Promise` which resolves to the original readable stream argument, but with an added `fileType` property, which is an object like the one returned from `FileType.fromFile()`.
@example
Expand All @@ -370,7 +381,7 @@ declare namespace core {
(async () => {
const read = fs.createReadStream('encrypted.enc');
const decipher = crypto.createDecipheriv(alg, key, iv);
const stream = await fileType.stream(read.pipe(decipher));
const stream = await fileType.stream(read.pipe(decipher), {sampleSize: 1024});
console.log(stream.fileType);
//=> {ext: 'mov', mime: 'video/quicktime'}
Expand All @@ -380,7 +391,7 @@ declare namespace core {
})();
```
*/
function stream(readableStream: ReadableStream): Promise<core.ReadableStreamWithFileType>
function stream(readableStream: ReadableStream, options?: IStreamOptions): Promise<core.ReadableStreamWithFileType>
}

export = core;
9 changes: 7 additions & 2 deletions core.js
Original file line number Diff line number Diff line change
Expand Up @@ -1414,10 +1414,15 @@ async function _fromTokenizer(tokenizer) {
}
}

const stream = readableStream => new Promise((resolve, reject) => {
const stream = (readableStream, options) => new Promise((resolve, reject) => {
// Using `eval` to work around issues when bundling with Webpack
const stream = eval('require')('stream'); // eslint-disable-line no-eval

options = {
sampleSize: minimumBytes,
...options
};

readableStream.on('error', reject);
readableStream.once('readable', async () => {
// Set up output stream
Expand All @@ -1431,7 +1436,7 @@ const stream = readableStream => new Promise((resolve, reject) => {
}

// Read the input stream and detect the filetype
const chunk = readableStream.read(minimumBytes) || readableStream.read() || Buffer.alloc(0);
const chunk = readableStream.read(options.sampleSize) || readableStream.read() || Buffer.alloc(0);
try {
const fileType = await fromBuffer(chunk);
pass.fileType = fileType;
Expand Down
8 changes: 7 additions & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -278,12 +278,18 @@ Type: [`ITokenizer`](https://github.com/Borewit/strtok3#tokenizer)

A file source implementing the [tokenizer interface](https://github.com/Borewit/strtok3#tokenizer).

### FileType.stream(readableStream)
### FileType.stream(readableStream, sampleSize)

Detect the file type of a readable stream.

If `sampleSize` is not provided, a backward compatible sample size of 4100 bytes is used.

Returns a `Promise` which resolves to the original readable stream argument, but with an added `fileType` property, which is an object like the one returned from `FileType.fromFile()`.

This method can be handy to put in between a stream, but it comes with a price.
Internally `stream()` builds up a buffer of `sampleSize` bytes, used as a sample, to determine the file type.
The sample size impacts the file detection resolution. A smaller sample size will result in lower probability of the best file type detection.

*Note:* This method is only available using Node.js.

#### readableStream
Expand Down
16 changes: 16 additions & 0 deletions test.js
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,12 @@ test('.stream() method - short stream', async t => {
t.deepEqual(bufferA, bufferB);
});

test('.stream() method - no End-Of-Stream errors', async t => {
const file = path.join(__dirname, 'fixture', 'fixture.ogm');
const stream = await FileType.stream(fs.createReadStream(file), {sampleSize: 30});
t.is(stream.fileType, undefined);
});

test('.stream() method - error event', async t => {
const errorMessage = 'Fixture';

Expand All @@ -351,6 +357,16 @@ test('.stream() method - error event', async t => {
await t.throwsAsync(FileType.stream(readableStream), errorMessage);
});

test('.stream() method - sampleSize', async t => {
const file = path.join(__dirname, 'fixture', 'fixture.ogm');
let stream = await FileType.stream(fs.createReadStream(file), {sampleSize: 30});
t.is(typeof (stream.fileType), 'undefined', 'file-type cannot be determined with a sampleSize of 30');

stream = await FileType.stream(fs.createReadStream(file), {sampleSize: 4100});
t.is(typeof (stream.fileType), 'object', 'file-type can be determined with a sampleSize of 4100');
t.is(stream.fileType.mime, 'video/ogg');
});

test('FileType.extensions.has', t => {
t.true(FileType.extensions.has('jpg'));
t.false(FileType.extensions.has('blah'));
Expand Down

0 comments on commit c635507

Please sign in to comment.