Elastic-Harvest is a Nodejs implementation of the JSON API Search Profile.
This library ties together harvester.js and elasticsearch to offer the required linked resource filtering and aggregation features.
Apart from that it also provides a number of helper functions to synchronize harvester.js/mongoDB resources with an elasticsearch backend.
Find useful elastic-search tools as well as their documentation in /non-functionals.
Find documentation for querying elasticharvester-powered endpoints here
- Aggregations : stats, extended_stats, top_hits, terms
- Primary and Linked resource filtering interop
- Top_hits aggregation interop with JSON API features, inclusion and sparse fieldsets #6
- More aggregations : min, max, sum, avg, percentiles, percentile_ranks, cardinality, geo_bounds, significant_terms, range, date_range, filter, filters, missing, histogram, date_histogram, geo_distance
- Reliable harvester.js/mongoDB - Elasticsearch data synchronisation ( oplog based )
- Support adaptive queries, use the ES mapping file to figure out whether to use parent/child or nested queries / aggregations
- Use Harvest associations + ES mapping file to discover which Mongodb collections have to be synced rather than having to register them explicitly
- Bootstrap elasticsearch with existing data from Harvest resources through REST endpoint
- Bootstrap elasticsearch mapping file through REST endpoint
elasticSearch v1.4.0+
var Elastic_Search_URL = process.env.BONSAI_URL || "http://127.0.0.1:9200";
var Elastic_Search_Index = "dealer-api";
var type = "dealers";
var harvestApp = harvest(options);
var peopleSearch;
var peopleSearchRoute;
//This circumvents a dependency issue between harvest and elastic-harvest.
harvestApp.router.get('/people/search', function(){
peopleSearchRoute.apply(peopleSearch,arguments);
});
harvestApp
.resource('person', {
name: String
});
peopleSearch = new ElasticHarvest(harvest_app, Elastic_Search_URL,Elastic_Search_Index, type);
peopleSearchRoute = peopleSearch.route;
peopleSearch.setHarvestRoute(harvestApp.route('person'));
peopleSearch.enableAutoSync("person");
#####Note - only 1 "after" callback is allowed per endpoint, so if you enable autosync, you're giving it up to elastic-harvest.
dealerSearch.enableAutoSync("dealer");
Alternative way to create an :after endpoint & sync elastic search. This approach gives you access to do more in the after callback.
this.harvest_app.after("dealer", function (req, res, next) {
if (req.method === 'POST' || (req.method === 'PUT' && this.id)) {
return dealerSearch.expandAndSync(this);
} else {
return this;
}
});
dealerSearch.expandEntity(dealer);
dealerSearch.expandAndSync(dealer);
dealerSearch.sync(dealer);
dealerSearch.delete(dealer.id);
Create an :after callback & keep your elastic search index up to date with PUTs and POSTs on linked documents. (added in 0.0.5)
#####Note - only 1 "after" callback is allowed per endpoint, so if you enable indexUpdateOnModelUpdate, you're giving it up to elastic-harvest.
dealerSearch.enableAutoIndexUpdateOnModelUpdate("subdocumentsHarvestEndpoint","links.path.to.object.id");
e.g. dealerSearch.enableAutoIndexUpdateOnModelUpdate("brand","links.current_contracts.brand.id");
entity = this;
dealerSearch.updateIndexForLinkedDocument("links.path.to.object.id",entity);
dealerSearch.deleteIndex().
dealerSearch.initializeIndex().
dealerSearch.initializeMapping(mappingObject).
v0.0.9 update provides automatic handling of missing-index errors.
The Mapping object can be loaded from a js file that looks like:
module.exports= {
"trackingPoints": {
"properties": {
"data": {
"type": "nested"
},
"loc" : {
"type" : "nested",
"properties": {
"location" : {
"type" : "geo_point"
}
}
},
"time" : {
"type" : "date"
},
"links": {
"type": "nested",
"properties": {
"equipment": {
"type": "nested",
"properties": {
"model": {
"type": "nested",
"properties": {
"brand":{
"type": "nested",
"properties": {
"name":{
"type": "string",
"index": "not_analyzed"
}
}
},
"equipmentType":{
"type": "nested",
"properties": {
"value":{
"type": "string",
"index": "not_analyzed"
}
}
},
"name":{
"type": "string",
"index": "not_analyzed"
}
}
}
}
},
"duty": {
"type": "nested",
"properties": {
"status":{
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
}
There is a sampler
script that you can run when wanting to get a subset of the results you normally get. To run this scripts will have to be enabled in Elastic Search config:
script.disable_dynamic: sandbox
script.default_lang: expression
script.groovy.sandbox.enabled: false
Then place this script as sampler.groovy
file in scripts directory of ES instance.
count=count+1;if(count % skip_rate == 0){ return 1 }; return 0;
Sampler script can be executed in conjunction with any other ES query and aggregations. Just add the following to your query:
script=sampler&script.maxSamples=15
maxSamples
being the number of results you want to get. Script will get a sample from the normal result set. For same query results you will get the same sample data.
An example:
/people/search?aggregations=n&n.property=links.pet.name&n.aggregations=mostpopular&mostpopular.type=top_hits&mostpopular.sort=-appearances&mostpopular.limit=1&mostpopular.include=pets&script=sampler&script.maxSamples=100