Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed suggestions document language issues #4476

Merged
merged 14 commits into from
Apr 4, 2022
Merged
17 changes: 12 additions & 5 deletions app/api/services/informationextraction/InformationExtraction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -186,9 +186,19 @@ class InformationExtraction {
if (!entity) {
return Promise.resolve();
}

const [segmentation] = await SegmentationModel.get({
xmlname: rawSuggestion.xml_file_name,
});

if (!segmentation) {
return Promise.resolve();
}

const [currentSuggestion] = await IXSuggestionsModel.get({
entityId: entity.sharedId,
propertyName: rawSuggestion.property_name,
fileId: segmentation.fileID,
});

let status: 'ready' | 'failed' = 'ready';
Expand All @@ -207,9 +217,6 @@ class InformationExtraction {

const suggestion: IXSuggestionType = {
...currentSuggestion,
entityId: entity.sharedId!,
language: entity.language!,
propertyName: rawSuggestion.property_name,
suggestedValue,
segment: rawSuggestion.segment_text,
status,
Expand All @@ -228,13 +235,13 @@ class InformationExtraction {
const [existingSuggestions] = await IXSuggestionsModel.get({
entityId: entity.sharedId,
propertyName,
language: entity.language,
fileId: file._id,
});
const suggestion: IXSuggestionType = {
...existingSuggestions,
entityId: entity.sharedId!,
fileId: file._id,
language: entity.language!,
language: languages.get(file.language, 'ISO639_1') || 'other',
propertyName,
status: 'processing',
date: new Date().getTime(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { testingEnvironment } from 'api/utils/testingEnvironment';
import { testingTenants } from 'api/utils/testingTenants';
import { IXSuggestionsModel } from 'api/suggestions/IXSuggestionsModel';
import { fs } from 'api/files';
import { fixtures } from './fixtures';
import { factory, fixtures } from './fixtures';
import { InformationExtraction } from '../InformationExtraction';
import { ExternalDummyService } from '../../tasksmanager/specs/ExternalDummyService';

Expand Down Expand Up @@ -43,6 +43,24 @@ describe('InformationExtraction', () => {
await testingEnvironment.tearDown();
});

const saveSuggestionProcess = async (
id: string,
entity: string,
language: string,
property: string
) => {
await informationExtraction.saveSuggestionProcess(
{
_id: factory.id(id),
entity,
language,
segmentation: {},
extractedMetadata: [],
},
property
);
};

describe('trainModel', () => {
it('should send xmls', async () => {
await informationExtraction.trainModel('property1');
Expand Down Expand Up @@ -203,6 +221,8 @@ describe('InformationExtraction', () => {
},
]);

await saveSuggestionProcess('F3', 'A3', 'eng', 'property1');
await saveSuggestionProcess('F1', 'A1', 'eng', 'property1');
await informationExtraction.processResults({
params: { property_name: 'property1' },
tenant: 'tenant1',
Expand All @@ -229,6 +249,61 @@ describe('InformationExtraction', () => {
);
});

it('should save different language suggestions for the same entity', async () => {
IXExternalService.setResults([
{
tenant: 'tenant1',
property_name: 'property1',
xml_file_name: 'documentA.xml',
text: 'text_in_other_language',
segment_text: 'segmented_text_in_other_language',
},
{
tenant: 'tenant1',
property_name: 'property1',
xml_file_name: 'documentD.xml',
text: 'text_in_eng_language',
segment_text: 'segmented_text_in_eng_language',
},
]);

await saveSuggestionProcess('F1', 'A1', 'other', 'property1');
await saveSuggestionProcess('F4', 'A1', 'eng', 'property1');

await informationExtraction.processResults({
params: { property_name: 'property1' },
tenant: 'tenant1',
task: 'suggestions',
success: true,
data_url: 'http://localhost:1234/suggestions_results',
});

const suggestions = await IXSuggestionsModel.get({
status: 'ready',
propertyName: 'property1',
});

expect(suggestions.length).toBe(2);

expect(suggestions.find(s => s.language === 'other')).toEqual(
expect.objectContaining({
language: 'other',
propertyName: 'property1',
status: 'ready',
suggestedValue: 'text_in_other_language',
})
);

expect(suggestions.find(s => s.language === 'en')).toEqual(
expect.objectContaining({
language: 'en',
propertyName: 'property1',
status: 'ready',
suggestedValue: 'text_in_eng_language',
})
);
});

it('should store failed suggestions', async () => {
IXExternalService.setResults([
{
Expand Down Expand Up @@ -319,6 +394,8 @@ describe('InformationExtraction', () => {
},
]);

await saveSuggestionProcess('F1', 'A1', 'eng', 'property4');

await informationExtraction.processResults({
params: { property_name: 'property1' },
tenant: 'tenant1',
Expand Down Expand Up @@ -359,6 +436,8 @@ describe('InformationExtraction', () => {
},
]);

await saveSuggestionProcess('F5', 'A5', 'eng', 'property1');

await informationExtraction.processResults({
params: { property_name: 'property1' },
tenant: 'tenant1',
Expand Down
3 changes: 2 additions & 1 deletion app/api/services/informationextraction/specs/fixtures.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ const fixtures: DBFixture = {
},
},
]),
factory.file('F4', 'A4', 'document', fixturesPdfNameD, 'eng', '', [
factory.file('F4', 'A1', 'document', fixturesPdfNameD, 'eng', '', [
{
name: 'property2',
selection: {
Expand Down Expand Up @@ -162,6 +162,7 @@ const fixtures: DBFixture = {
],
ixsuggestions: [
{
fileId: factory.id('F1'),
entityId: 'A1',
language: 'en',
propertyName: 'property1',
Expand Down
3 changes: 2 additions & 1 deletion app/api/suggestions/IXSuggestionsModel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ const mongoSchema = new mongoose.Schema(props, {
strict: false,
});

mongoSchema.index({ propertyName: 'text' });
// @ts-ignore
mongoSchema.index({ propertyName: 'text' }, { language_override: '_text' });

const IXSuggestionsModel = instanceModel<IXSuggestionType>('ixsuggestions', mongoSchema);

Expand Down