Skip to content

Commit

Permalink
perf: Switch most of the ES query to filter context
Browse files Browse the repository at this point in the history
  • Loading branch information
jj618 committed Oct 26, 2022
1 parent 06f628c commit f13a57c
Show file tree
Hide file tree
Showing 7 changed files with 205 additions and 21 deletions.
10 changes: 10 additions & 0 deletions helpers/deduplicates/helpers.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ module.exports = {
buildDuplicatesFromEsHits,
buildSources,
unwrapEsHits,
unwrapResult,
getHits,
};

/*
Expand Down Expand Up @@ -187,10 +189,18 @@ function buildDuplicateFromDocObject (docObject, sessionName, rules) {
);
}

function unwrapResult (result) {
return unwrapEsHits(getHits(result));
}

function unwrapEsHits (hits) {
return hits.map(unwrapEsHit);
}

function unwrapEsHit (hit) {
return hit._source;
}

function getHits (result) {
return result.body.hits.hits;
}
2 changes: 1 addition & 1 deletion index.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ function deduplicate (docObject) {
const ignoredFields = await searchIgnoredBySourceUid(docObject.sourceUid);
const request = buildQuery(docObject, ignoredFields);

if (request.query.bool.should.length === 0) {
if (request.query.bool.filter.bool.should.length === 0) {
business.emit('info', `Not deduplicable {docObject}, internalId: ${docObject.technical.internalId}`);
docObject.business.isDeduplicable = false;
return updateDuplicatesGraph(docObject, currentSessionName);
Expand Down
157 changes: 155 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
"colors": "^1.4.0",
"corhal-config": "github:istex/corhal-config",
"debug": "^4.1.1",
"elastic-builder": "^2.16.0",
"fs-extra": "^7.0.1",
"lodash": "^4.17.21",
"nanoid": "^2.0.1",
Expand Down
14 changes: 11 additions & 3 deletions src/deduplicateQueryBuilder.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,22 @@ function buildQuery (docObject, ignoredFields = []) {
if (_.includes(scenario[docObject.business.duplicateGenre], rule.rule) &&
validateRequiredAndForbiddenParameters(docObject, rule, ignoredFields)
) {
request.query.bool.should.push(buildQueryFromRule(docObject, rule));
request.bool.should.push(buildQueryFromRule(docObject, rule));
}
});
}

request.query.bool.must_not = { term: { _id: docObject.technical.internalId } };
request.bool.must_not = { term: { _id: docObject.technical.internalId } };

return request;
return {
query: {
bool: {
filter:
request,

},
},
};
}

function validateRequiredAndForbiddenParameters (docObject, rule, ignoredFields = []) {
Expand Down
Loading

0 comments on commit f13a57c

Please sign in to comment.