Skip to content

Commit

Permalink
version 1.40.0
Browse files Browse the repository at this point in the history
  • Loading branch information
daneryl committed Oct 11, 2021
2 parents 0718318 + 977b48b commit 2167c9b
Show file tree
Hide file tree
Showing 33 changed files with 884 additions and 226 deletions.
142 changes: 142 additions & 0 deletions app/api/csv/arrangeThesauri.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import { ImportFile } from 'api/csv/importFile';
import { WithId } from 'api/odm';
import thesauri from 'api/thesauri';
import { PropertySchema } from 'shared/types/commonTypes';
import { TemplateSchema } from 'shared/types/templateType';
import { ThesaurusSchema } from 'shared/types/thesaurusType';

import csv, { CSVRow } from './csv';
import { splitMultiselectLabels } from './typeParsers/multiselect';
import { normalizeThesaurusLabel } from './typeParsers/select';

const filterJSObject = (input: { [k: string]: any }, keys: string[]): { [k: string]: any } => {
const result: { [k: string]: any } = {};
keys.forEach(k => {
if (input.hasOwnProperty(k)) {
result[k] = input[k];
}
});
return result;
};

class ArrangeThesauriError extends Error {
source: Error;
row: CSVRow;
index: number;

constructor(source: Error, row: CSVRow, index: number) {
super(source.message);
this.source = source;
this.row = row;
this.index = index;
}
}

const createNameToIdMap = (
thesauriRelatedProperties: PropertySchema[] | undefined,
languages?: string[]
): { [k: string]: string } => {
const nameToThesauriId: { [k: string]: string } = {};

thesauriRelatedProperties?.forEach(p => {
if (p.content && p.type) {
const thesarusID = p.content.toString();
nameToThesauriId[p.name] = thesarusID;
languages?.forEach(suffix => {
nameToThesauriId[`${p.name}__${suffix}`] = thesarusID;
});
}
});

return nameToThesauriId;
};

type ThesauriValueData = {
thesauriIdToExistingValues: Map<string, Set<string>>;
thesauriIdToNewValues: Map<string, Set<string>>;
thesauriIdToNormalizedNewValues: Map<string, Set<string>>;
};

const setupIdValueMaps = (allRelatedThesauri: WithId<ThesaurusSchema>[]): ThesauriValueData => {
const thesauriIdToExistingValues = new Map();
const thesauriIdToNewValues = new Map();
const thesauriIdToNormalizedNewValues = new Map();

allRelatedThesauri.forEach(t => {
const id = t._id.toString();
thesauriIdToExistingValues.set(
id,
new Set(t.values?.map(v => normalizeThesaurusLabel(v.label)))
);
thesauriIdToNewValues.set(id, new Set());
thesauriIdToNormalizedNewValues.set(id, new Set());
});

return { thesauriIdToExistingValues, thesauriIdToNewValues, thesauriIdToNormalizedNewValues };
};

const syncSaveThesauri = async (
allRelatedThesauri: WithId<ThesaurusSchema>[],
thesauriIdToNewValues: Map<string, Set<string>>
) => {
const thesauriWithNewValues = allRelatedThesauri.filter(
t => (thesauriIdToNewValues.get(t._id.toString()) || new Set()).size > 0
);
for (let i = 0; i < thesauriWithNewValues.length; i += 1) {
const thesaurus = allRelatedThesauri[i];
const newValues = Array.from(
thesauriIdToNewValues.get(thesaurus._id.toString()) || []
).map(tval => ({ label: tval }));
const thesaurusValues = thesaurus.values || [];
// eslint-disable-next-line no-await-in-loop
await thesauri.save({
...thesaurus,
values: thesaurusValues.concat(newValues),
});
}
};

const arrangeThesauri = async (
file: ImportFile,
template: TemplateSchema,
languages?: string[],
stopOnError: boolean = true
) => {
const thesauriRelatedProperties = template.properties?.filter(p =>
['select', 'multiselect'].includes(p.type)
);

const nameToThesauriId = createNameToIdMap(thesauriRelatedProperties, languages);

const allRelatedThesauri = await thesauri.get({
$in: Array.from(
new Set(thesauriRelatedProperties?.map(p => p.content?.toString()).filter(t => t))
),
});

const thesauriValueData = setupIdValueMaps(allRelatedThesauri);

await csv(await file.readStream(), stopOnError)
.onRow(async (row: CSVRow) => {
Object.entries(filterJSObject(nameToThesauriId, Object.keys(row))).forEach(([name, id]) => {
const labels = splitMultiselectLabels(row[name]);
Object.entries(labels).forEach(([normalizedLabel, originalLabel]) => {
if (
!thesauriValueData.thesauriIdToExistingValues.get(id)?.has(normalizedLabel) &&
!thesauriValueData.thesauriIdToNormalizedNewValues.get(id)?.has(normalizedLabel)
) {
thesauriValueData.thesauriIdToNewValues.get(id)?.add(originalLabel);
thesauriValueData.thesauriIdToNormalizedNewValues.get(id)?.add(normalizedLabel);
}
});
});
})
.onError(async (e: Error, row: CSVRow, index: number) => {
throw new ArrangeThesauriError(e, row, index);
})
.read();

await syncSaveThesauri(allRelatedThesauri, thesauriValueData.thesauriIdToNewValues);
};

export { arrangeThesauri, ArrangeThesauriError };
3 changes: 2 additions & 1 deletion app/api/csv/csvLoader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { ThesaurusSchema } from 'shared/types/thesaurusType';

import { ensure } from 'shared/tsUtils';
import { ObjectId } from 'mongodb';
import { arrangeThesauri } from './arrangeThesauri';
import csv, { CSVRow } from './csv';
import importFile from './importFile';
import { importEntity, translateEntity } from './importEntity';
Expand Down Expand Up @@ -55,6 +56,7 @@ export class CSVLoader extends EventEmitter {
(await settings.get()).languages
).map((l: LanguageSchema) => l.key);
const { newNameGeneration = false } = await settings.get();
await arrangeThesauri(file, template, availableLanguages);

await csv(await file.readStream(), this.stopOnError)
.onRow(async (row: CSVRow) => {
Expand All @@ -64,7 +66,6 @@ export class CSVLoader extends EventEmitter {
options.language,
newNameGeneration
);

if (rawEntity) {
const entity = await importEntity(rawEntity, template, file, options);
await translateEntity(entity, rawTranslations, template, file);
Expand Down
4 changes: 2 additions & 2 deletions app/api/csv/importEntity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ import { processDocument } from 'api/files/processDocument';
import { RawEntity } from 'api/csv/entityRow';
import { TemplateSchema } from 'shared/types/templateType';
import { MetadataSchema, PropertySchema } from 'shared/types/commonTypes';
import { propertyTypes } from 'shared/propertyTypes';
import { ImportFile } from 'api/csv/importFile';
import { EntitySchema } from 'shared/types/entityType';
import { ensure } from 'shared/tsUtils';

import { attachmentsPath, files } from 'api/files';
import { propertyTypes } from 'shared/propertyTypes';
import { generateID } from 'shared/IDGenerator';

import typeParsers from './typeParsers';

const parse = async (toImportEntity: RawEntity, prop: PropertySchema) =>
Expand Down
10 changes: 3 additions & 7 deletions app/api/csv/importFile.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import fs from 'fs';
import path from 'path';
import { Readable } from 'stream';

import { generateFileName, fileFromReadStream, uploadsPath } from 'api/files/filesystem';
import { createError } from 'api/utils';
Expand All @@ -17,16 +16,13 @@ const extractFromZip = async (zipPath: string, fileName: string) => {
};

export class ImportFile {
filePath: string | Readable;
filePath: string;

constructor(filePath: string | Readable) {
constructor(filePath: string) {
this.filePath = filePath;
}

async readStream(fileName = 'import.csv') {
if (this.filePath instanceof Readable) {
return this.filePath;
}
if (path.extname(this.filePath) === '.zip') {
return extractFromZip(this.filePath, fileName);
}
Expand All @@ -46,6 +42,6 @@ export class ImportFile {
}
}

const importFile = (filePath: string | Readable) => new ImportFile(filePath);
const importFile = (filePath: string) => new ImportFile(filePath);

export default importFile;
8 changes: 4 additions & 4 deletions app/api/csv/specs/__snapshots__/importFile.spec.ts.snap
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP

exports[`importFile readStream should return a readable stream for the csv file 1`] = `
"Title , text label , numeric label, non configured, select label, not defined type, geolocation_geolocation,auto id, additional tag(s)
"Title , text label , numeric label, non configured, select_label, not defined type, geolocation_geolocation,auto id, additional tag(s), multi_select_label
title1, text value 1, 1977, ______________, thesauri1 , notType1 , 1|1,,tag1
title2, text value 2, 2019, ______________, thesauri2 , notType2 , ,,tag2
title3, text value 3, 2020, ______________, thesauri2 , notType3 , 0|0,,tag3
title1, text value 1, 1977, ______________, thesauri1 , notType1 , 1|1,,tag1, multivalue1
title2, text value 2, 2019, ______________, thesauri2 , notType2 , ,,tag2, multivalue2
title3, text value 3, 2020, ______________, thesauri2 , notType3 , 0|0,,tag3, multivalue1|multivalue3
"
`;

Expand Down
17 changes: 17 additions & 0 deletions app/api/csv/specs/arrangeThesauriTest.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
title,unrelated_property,select_property__en, select_property__es,multiselect_property__en, multiselect_property__es
select_1,unrelated_text,B,Bes,A,Aes
select_2,unrelated_text,C,Ces,A,Aes
select_3,unrelated_text,b,bes,A,Aes
select_4,unrelated_text,B,Bes,A,Aes
select_5,unrelated_text,d,des,A,Aes
select_6,unrelated_text,D,Des,A,Aes
select_7,unrelated_text, b,bes,A,Aes
select_8,unrelated_text, , ,A,Aes
select_8,unrelated_text, , ,A,Aes
multiselect_1,unrelated_text,A,Aes,B,Bes
multiselect_2,unrelated_text,A,Aes,c,ces
multiselect_3,unrelated_text,A,Aes,A|b,Aes|bes
multiselect_4,unrelated_text,A,Aes,a|B|C,aes|Bes|Ces
multiselect_5,unrelated_text,A,Aes, a| b | , aes| bes |
multiselect_6,unrelated_text,A,Aes, | | , | |
multiselect_7, unrelated_text,A,Aes,A|B|C|D| |E| e| g ,Aes|Bes|Ces|Des| |Ees| ees| ges
Loading

0 comments on commit 2167c9b

Please sign in to comment.