Skip to content

Commit

Permalink
Reduce the usage of lodash (round 2) (#145)
Browse files Browse the repository at this point in the history
* Removed lodash from index.js

* Removed lodash from stats.js

* Removed lodash from stream.js

Only function I could not re-write was defaults-deep.
I moved the function to a utils folder, so we can remove it in the
near future

* Removed lodash from tests

* fixed linting

* Updated lodash 🚀

I could not completly remove lodash, need help with 2 function calls:

    - stream.js
        --> _.get
        --> _.defaultsDeep

* Attempt to fix travic ci failing build

* Updated mocha - istanbul to modern API

* Moving travis "back" to use npm ci

Maybe this can solve the CI issue?

* Fix CI error by using "normal" install

* removed and re-installed node_modules

* upgraded mocha

* updating out-dated dependencies

* Removed zuul

It was depreceated and not maintained anymore:
https://github.com/defunctzombie/zuul#readme

* Fix pre-commit to match setup in compas

This shoul help align the style used in:
https://github.com/mongodb-js/compass

* updated github workflow to look like compass

* updated mognodb-js-precommit to latest

* very small change from ssh --> https

* fixed failing tests from upgrading BSON
  • Loading branch information
alexbjorlig authored Mar 9, 2021
1 parent 33e0f51 commit 88b20c2
Show file tree
Hide file tree
Showing 26 changed files with 6,352 additions and 17,630 deletions.
4 changes: 2 additions & 2 deletions .eslintrc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"parserOptions": {
"ecmaVersion": 2017
"ecmaVersion": 2019
},
"env": {
"mocha": true,
Expand All @@ -18,6 +18,6 @@
},
"extends": [
"mongodb-js/node",
"mongodb-js/browser",
"mongodb-js/browser"
]
}
3 changes: 2 additions & 1 deletion .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ jobs:
uses: actions/setup-node@v2
with:
node-version: ${{ matrix.node-version }}
- uses: bahmutov/npm-install@v1
- run: npm ci
- run: npm run check
- run: npm test
- run: npm run coverage
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ dist/
# Test output
.nyc_output
# Mac OS files on folder create/delete
.DS_Store
.DS_Store
12 changes: 0 additions & 12 deletions .zuul.yml

This file was deleted.

2 changes: 1 addition & 1 deletion bin/mongodb-schema
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ var bar = new ProgressBar('analyzing [:bar] :percent :etas ', {
clear: true
});

mongodb.connect(uri, function(err, conn) {
mongodb.connect(uri, {useUnifiedTopology: true}, function(err, conn) {
if (err) {
console.error('Failed to connect to MongoDB: ', err);
process.exit(1);
Expand Down
5 changes: 2 additions & 3 deletions lib/index.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
var stream = require('./stream');
var es = require('event-stream');
var _ = require('lodash');

// var debug = require('debug')('mongodb-schema:wrapper');

Expand All @@ -20,7 +19,7 @@ var _ = require('lodash');
module.exports = function(docs, options, callback) {
const promise = new Promise((resolve, reject) => {
// shift parameters if no options are specified
if (_.isUndefined(options) || (_.isFunction(options) && _.isUndefined(callback))) {
if (typeof options === 'undefined' || (typeof options === 'function' && typeof callback === 'undefined')) {
callback = options;
options = {};
}
Expand All @@ -33,7 +32,7 @@ module.exports = function(docs, options, callback) {
} else if (docs.pipe && typeof docs.pipe === 'function') {
src = docs;
// Arrays
} else if (_.isArray(docs)) {
} else if (Array.isArray(docs)) {
src = es.readArray(docs);
} else {
reject(new Error(
Expand Down
44 changes: 24 additions & 20 deletions lib/stats.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
var _ = require('lodash');
// var debug = require('debug')('mongodb-schema:stats');

var widthRecursive = function(schema) {
Expand All @@ -8,17 +7,21 @@ var widthRecursive = function(schema) {
}
if (schema.fields !== undefined) {
width += schema.fields.length;
width += _.sum(schema.fields.map(function(field) {
var doc = _.find(field.types, 'name', 'Document');

width += schema.fields.map(field => {
var doc = field.types.find(v => v.name === 'Document');
return widthRecursive(doc);
}));
width += _.sum(schema.fields.map(function(field) {
var arr = _.find(field.types, 'name', 'Array');
}).reduce((p, c) => p + c || 0, 0);


width += schema.fields.map(field => {
var arr = field.types.find(v => v.name === 'Array');
if (arr) {
var doc = _.find(arr.types, 'name', 'Document');
var doc = arr.types.find(v => v.name === 'Document');
return widthRecursive(doc);
}
}));
})
.reduce((p, c) => p + c || 0, 0);
}
return width;
};
Expand All @@ -30,18 +33,19 @@ var depthRecursive = function(schema) {
var maxChildDepth = 0;
if (schema.fields !== undefined && schema.fields.length > 0) {
maxChildDepth = 1 + Math.max(
_.max(schema.fields.map(function(field) {
var doc = _.find(field.types, 'name', 'Document');
Math.max(...schema.fields.map(field => {
var doc = field.types.find(v => v.name === 'Document');
return depthRecursive(doc);
})),
_.max(schema.fields.map(function(field) {
var arr = _.find(field.types, 'name', 'Array');
Math.max(...schema.fields.map(field => {
var arr = field.types.find(v => v.name === 'Array');
if (arr) {
var doc = _.find(arr.types, 'name', 'Document');
var doc = arr.types.find(v => v.name === 'Document');
return depthRecursive(doc);
}
return 0;
})));
}))
);
}
return maxChildDepth;
};
Expand All @@ -55,21 +59,21 @@ var branchingFactors = function(schema) {
if (schema.fields !== undefined && schema.fields.length > 0) {
branchArray.push(schema.fields.length);
res = schema.fields.map(function(field) {
var doc = _.find(field.types, 'name', 'Document');
var doc = field.types.find(v => v.name === 'Document');
return branchingFactors(doc);
});
branchArray.push.apply(branchArray, _.flatten(res, true));
branchArray.push(...res.flat(Infinity));
res = schema.fields.map(function(field) {
var arr = _.find(field.types, 'name', 'Array');
var arr = field.types.find(v => v.name === 'Array');
if (arr) {
var doc = _.find(arr.types, 'name', 'Document');
var doc = arr.types.find(v => v.name === 'Document');
return branchingFactors(doc);
}
return [];
});
branchArray.push.apply(branchArray, _.flatten(res, true));
branchArray.push(...res.flat(Infinity));
}
return _.sortBy(branchArray).reverse();
return branchArray.sort().reverse();
};

module.exports = {
Expand Down
103 changes: 50 additions & 53 deletions lib/stream.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
var es = require('event-stream');
var _ = require('lodash');
var Reservoir = require('reservoir');
var _ = require('lodash');

// var debug = require('debug')('mongodb-schema:stream');

Expand All @@ -13,14 +13,14 @@ var Reservoir = require('reservoir');
*/
var extractStringValueFromBSON = function(value) {
if (value && value._bsontype) {
if (_.includes([ 'Decimal128', 'Long' ], value._bsontype)) {
if (['Decimal128', 'Long'].includes(value._bsontype)) {
return value.toString();
}
if (_.includes([ 'Double', 'Int32' ], value._bsontype)) {
if ([ 'Double', 'Int32' ].includes(value._bsontype)) {
return String(value.value);
}
}
if (_.isString(value)) {
if (typeof value === 'string') {
return value;
}
return String(value);
Expand Down Expand Up @@ -68,7 +68,7 @@ var finalizeSchema = function(schema, parent, tag) {
finalizeSchema(schema.fields, schema, 'fields');
}
if (tag === 'fields') {
_.each(schema, function(field) {
Object.values(schema).forEach((field) => {
// create `Undefined` pseudo-type
var missing = parent.count - field.count;
if (missing > 0) {
Expand All @@ -79,25 +79,27 @@ var finalizeSchema = function(schema, parent, tag) {
count: missing
};
}
field.total_count = _.sum(field.types, 'count');
field.total_count = Object.values(field.types)
.map(v => v.count)
.reduce((p, c) => p + c, 0);

// recursively finalize types
finalizeSchema(field.types, field, 'types');
field.type = _.pluck(field.types, 'name');
field.type = field.types.map(v => v.name);
if (field.type.length === 1) {
field.type = field.type[0];
}
// a field has duplicates when any of its types have duplicates
field.has_duplicates = _.any(field.types, 'has_duplicates');
field.has_duplicates = !!field.types.find(v => v.has_duplicates);
// compute probability
field.probability = field.count / parent.count;
});
// turn object into array
parent.fields = _.values(parent.fields).sort(fieldComparator);
parent.fields = Object.values(parent.fields).sort(fieldComparator);
}
if (tag === 'types') {
_.each(schema, function(type) {
type.total_count = _.sum(type.lengths);
Object.values(schema).forEach(type => {
type.total_count = (type.lengths || []).reduce((p, c) => p + c || 0, 0);
// debug('recursively calling schema.fields');
finalizeSchema(type.fields, type, 'fields');
// debug('recursively calling schema.types');
Expand All @@ -110,7 +112,7 @@ var finalizeSchema = function(schema, parent, tag) {
type.unique = type.count === 0 ? 0 : 1;
type.has_duplicates = type.count > 1;
} else if (type.values) {
type.unique = _.uniq(type.values, false, extractStringValueFromBSON).length;
type.unique = new Set(type.values.map(extractStringValueFromBSON)).size;
type.has_duplicates = type.unique !== type.values.length;
}
// compute `average_length` for array types
Expand All @@ -119,7 +121,7 @@ var finalizeSchema = function(schema, parent, tag) {
}
// recursively finalize fields and types
});
parent.types = _.sortByOrder(_.values(parent.types), 'probability', 'desc');
parent.types = Object.values(parent.types).sort((a, b) => b.probability - a.probability);
}
return schema;
};
Expand All @@ -146,31 +148,25 @@ module.exports = function parse(options) {
/* eslint no-sync: 0 */

// set default options
options = _.defaults({}, options, {
semanticTypes: false,
storeValues: true
});
options = { semanticTypes: false, storeValues: true, ...options};

var semanticTypes = require('./semantic-types');

if (_.isObject(options.semanticTypes)) {
if (typeof options.semanticTypes === 'object') {
// enable existing types that evaluate to true
var enabledTypes = _(options.semanticTypes)
.pick(function(val) {
return _.isBoolean(val) && val;
})
.keys()
.map(function(val) {
return val.toLowerCase();
})
.value();
semanticTypes = _.pick(semanticTypes, function(val, key) {
return _.includes(enabledTypes, key.toLowerCase());
});
// merge with custom types that are functions
semanticTypes = _.assign(semanticTypes,
_.pick(options.semanticTypes, _.isFunction)
);
var enabledTypes = Object.entries(options.semanticTypes)
.filter(([, v]) => typeof v === 'boolean' && v)
.map(([k]) => k.toLowerCase());

semanticTypes = {...
Object.entries(semanticTypes)
.filter(([k]) => enabledTypes.includes(k.toLowerCase()))
.reduce((p, [k, v]) => ({...p, [k]: v}), {}),
};

Object.entries(options.semanticTypes)
.filter(([, v]) => typeof v === 'function')
.forEach(([k, v]) => {semanticTypes[k] = v;});
}

var rootSchema = {
Expand Down Expand Up @@ -205,9 +201,13 @@ module.exports = function parse(options) {

var getSemanticType = function(value, path) {
// pass value to semantic type detectors, return first match or undefined
return _.findKey(semanticTypes, function(fn) {
return fn(value, path);
});

const returnValue = Object.entries(semanticTypes)
.filter(([, v]) => {
return v(value, path);
})
.map(([k]) => k)[0];
return returnValue;
};

/**
Expand Down Expand Up @@ -236,13 +236,13 @@ module.exports = function parse(options) {
* @param {Object} schema the updated schema object
*/


var addToType = function(path, value, schema) {
var bsonType = getBSONType(value);
// if semantic type detection is enabled, the type is the semantic type
// or the original bson type if no semantic type was detected. If disabled,
// it is always the bson type.
var typeName = (options.semanticTypes) ?
getSemanticType(value, path) || bsonType : bsonType;
var typeName = (options.semanticTypes) ? getSemanticType(value, path) || bsonType : bsonType;
var type = schema[typeName] = _.get(schema, typeName, {
name: typeName,
bsonType: bsonType,
Expand All @@ -252,24 +252,22 @@ module.exports = function parse(options) {
type.count++;
// recurse into arrays by calling `addToType` for each element
if (typeName === 'Array') {
type.types = _.get(type, 'types', {});
type.lengths = _.get(type, 'lengths', []);
type.types = type.types || {};
type.lengths = type.lengths || [];
type.lengths.push(value.length);
_.each(value, function(v) {
addToType(path, v, type.types);
});
value.forEach(v => addToType(path, v, type.types));

// recurse into nested documents by calling `addToField` for all sub-fields
} else if (typeName === 'Document') {
type.fields = _.get(type, 'fields', {});
_.forOwn(value, function(v, k) {
addToField(path + '.' + k, v, type.fields);
});
Object.entries(value).forEach(([k, v]) => addToField(path + '.' + k, v, type.fields));

// if the `storeValues` option is enabled, store some example values
} else if (options.storeValues) {
type.values = _.get(type, 'values', bsonType === 'String' ?
new Reservoir(100) : new Reservoir(10000));
var defaultValue = bsonType === 'String' ?
new Reservoir(100) : new Reservoir(10000);
type.values = type.values || defaultValue;

addToValue(type, value);
}
};
Expand All @@ -284,8 +282,9 @@ module.exports = function parse(options) {
addToField = function(path, value, schema) {
var defaults = {};

var pathSplitOnDot = path.split('.');
defaults[path] = {
name: _.last(path.split('.')),
name: pathSplitOnDot[pathSplitOnDot.length - 1],
path: path,
count: 0,
types: {}
Expand All @@ -306,9 +305,7 @@ module.exports = function parse(options) {
}

var parser = es.through(function write(obj) {
_.each(_.keys(obj), function(key) {
addToField(key, obj[key], rootSchema.fields);
});
Object.keys(obj).forEach(key => addToField(key, obj[key], rootSchema.fields));
rootSchema.count += 1;
this.emit('progress', obj);
}, function end() {
Expand Down
Loading

0 comments on commit 88b20c2

Please sign in to comment.