Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Search improvements #256

Merged
merged 15 commits into from
Feb 28, 2022
Merged
2 changes: 1 addition & 1 deletion app/configurator/components/select-dataset-step.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ export const SelectDatasetStepContent = () => {
const { search, order, includeDrafts, filters, dataset } = browseState;

const [configState] = useConfiguratorState();
const [debouncedQuery] = useDebounce(search, 150, {
const [debouncedQuery] = useDebounce(search, 500, {
leading: true,
});
const router = useRouter();
Expand Down
177 changes: 97 additions & 80 deletions app/graphql/resolvers.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { ascending, descending } from "d3";
import DataLoader from "dataloader";
import fuzzaldrin from "fuzzaldrin-plus";
import { GraphQLJSONObject } from "graphql-type-json";
import { topology } from "topojson-server";
import { parse as parseWKT } from "wellknown";
Expand All @@ -11,15 +10,17 @@ import {
GeoProperties,
GeoShapes,
} from "../domain/data";
import { parseLocaleString } from "../locales/locales";

import { keyBy } from "lodash";
import { defaultLocale, parseLocaleString } from "../locales/locales";
import { Loaders } from "../pages/api/graphql";
import {
createCubeDimensionValuesLoader,
createSource,
getCube,
getCubeDimensions,
getCubeObservations,
getCubes,
getCubes as rawGetCubes,
getSparqlEditorUrl,
} from "../rdf/queries";
import {
Expand All @@ -40,7 +41,63 @@ import {
QueryResolvers,
Resolvers,
} from "./resolver-types";
import { ResolvedDimension } from "./shared-types";
import { ResolvedDataCube, ResolvedDimension } from "./shared-types";
import cachedWithTTL from "../utils/cached-with-ttl";
import {
makeCubeIndex as makeCubeIndexRaw,
searchCubes,
} from "../utils/search";

const CUBES_CACHE_TTL = 60 * 1000;

const getCubes = cachedWithTTL(
rawGetCubes,
({ filters, includeDrafts, locale }) =>
JSON.stringify({ filters, includeDrafts, locale }),
CUBES_CACHE_TTL
);

const makeCubeIndex = cachedWithTTL(
async ({ filters, includeDrafts, locale }) => {
const cubes = await getCubes({
locale: parseLocaleString(locale),
includeDrafts: includeDrafts ? true : false,
filters: filters ? filters : undefined,
});
const cubesByIri = keyBy(cubes, (c) => c.data.iri);

const dataCubeCandidates = cubes.map(({ data }) => data);
const themes = (
await loadThemes({ locale: locale || defaultLocale })
).filter(truthy);
const organizations = (
await loadOrganizations({ locale: locale || defaultLocale })
).filter(truthy);

const themeIndex = keyBy(themes, (t) => t.iri);
const organizationIndex = keyBy(organizations, (o) => o.iri);
const fullCubes = dataCubeCandidates.map((c) => ({
...c,
creator: c.creator?.iri
? {
...c.creator,
label: organizationIndex[c.creator.iri]?.label || "",
}
: c.creator,
themes: c.themes?.map((t) => ({
...t,
label: themeIndex[t.iri]?.label,
})),
}));
return {
index: makeCubeIndexRaw(fullCubes),
cubesByIri,
};
},
({ filters, includeDrafts, locale }) =>
JSON.stringify({ filters, includeDrafts, locale }),
CUBES_CACHE_TTL
);

export const Query: QueryResolvers = {
possibleFilters: async (_, { iri, filters }) => {
Expand Down Expand Up @@ -82,91 +139,51 @@ export const Query: QueryResolvers = {
}
return [];
},
dataCubes: async (_, { locale, query, order, includeDrafts, filters }) => {
const cubes = await getCubes({
locale: parseLocaleString(locale),
includeDrafts: includeDrafts ? true : false,
filters: filters ? filters : undefined,
});

const dataCubeCandidates = cubes.map(({ data }) => data);

if (query) {
/**
* This uses https://github.com/jeancroy/fuzz-aldrin-plus which is a re-implementation of the Atom editor file picker algorithm
*
* Alternatives:
* - https://github.com/kentcdodds/match-sorter looks nice, but does not support highlighting results.
* - https://fusejs.io/ tried out but result matching is a bit too random (order of letters seems to be ignored). Whole-word matches don't support highlighting and scoring for some reason.
*/

const titleResults = fuzzaldrin.filter(dataCubeCandidates, `${query}`, {
key: "title",
});

const descriptionResults = fuzzaldrin.filter(
dataCubeCandidates,
`${query}`,
{ key: "description" }
);

const results = Array.from(
new Set([...titleResults, ...descriptionResults])
);

if (order == null || order === DataCubeResultOrder.Score) {
results.sort((a, b) => {
return (
fuzzaldrin.score(b.title, query) +
fuzzaldrin.score(b.description, query) * 0.5 -
(fuzzaldrin.score(a.title, query) +
fuzzaldrin.score(a.description, query) * 0.5)
);
});
} else if (order === DataCubeResultOrder.TitleAsc) {
results.sort((a, b) =>
a.title.localeCompare(b.title, locale ?? undefined)
dataCubes: async (_, { locale, query, order, includeDrafts, filters }) => {
const sortResults = <T extends unknown[]>(
results: T,
getter: (d: T[number]) => ResolvedDataCube["data"]
) => {
if (order === DataCubeResultOrder.TitleAsc) {
results.sort((a: any, b: any) =>
getter(a).title.localeCompare(getter(b).title, locale ?? undefined)
);
} else if (order === DataCubeResultOrder.CreatedDesc) {
results.sort((a, b) => descending(a.datePublished, b.datePublished));
results.sort((a: any, b: any) =>
descending(getter(a).datePublished, getter(b).datePublished)
);
}
};

return results.map((result) => {
const cube = cubes.find((c) => c.data.iri === result.iri)!;
return {
dataCube: cube,
highlightedTitle: result.title
? fuzzaldrin.wrap(result.title, query, {
wrap: { tagOpen: "<strong>", tagClose: "</strong>" },
})
: "",
highlightedDescription: result.description
? fuzzaldrin.wrap(result.description, query, {
wrap: { tagOpen: "<strong>", tagClose: "</strong>" },
})
: "",
score:
fuzzaldrin.score(result.title, query) +
fuzzaldrin.score(result.description, query) * 0.5,
};
if (query) {
const { index: cubesIndex, cubesByIri } = await makeCubeIndex({
locale,
query,
order,
includeDrafts,
filters,
});
const candidates = searchCubes(cubesIndex, query, cubesByIri);
sortResults(candidates, (x) => x.dataCube.data);
return candidates;
} else {
const cubes = await getCubes({
locale: parseLocaleString(locale),
includeDrafts: includeDrafts ? true : false,
filters: filters ? filters : undefined,
});
}

if (order === DataCubeResultOrder.TitleAsc) {
dataCubeCandidates.sort((a, b) =>
a.title.localeCompare(b.title, locale ?? undefined)
);
} else if (order === DataCubeResultOrder.CreatedDesc) {
dataCubeCandidates.sort((a, b) =>
descending(a.datePublished, b.datePublished)
);
const dataCubeCandidates = cubes.map(({ data }) => data);
const cubesByIri = keyBy(cubes, (c) => c.data.iri);
sortResults(dataCubeCandidates, (x) => x);
return dataCubeCandidates.map(({ iri }) => {
const cube = cubesByIri[iri];
return { dataCube: cube };
});
}

return dataCubeCandidates.map(({ iri }) => {
const cube = cubes.find((c) => c.data.iri === iri)!;
return { dataCube: cube };
});
},

dataCubeByIri: async (_, { iri, locale, latest }) => {
return getCube({
iri,
Expand Down
2 changes: 2 additions & 0 deletions app/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"@tpluscode/rdf-ns-builders": "^1.0.0",
"@tpluscode/rdf-string": "^0.2.24",
"@tpluscode/sparql-builder": "^0.3.17",
"@types/lunr": "^2.3.4",
"@types/react-inspector": "^4.0.2",
"@types/topojson-client": "^3.0.0",
"@urql/devtools": "^2.0.3",
Expand All @@ -59,6 +60,7 @@
"io-ts": "^2.2.10",
"isomorphic-unfetch": "^3.0.0",
"lodash": "^4.17.20",
"lunr": "^2.3.9",
"maplibre-gl": "^2.1.6",
"micro-cors": "^0.1.1",
"nanoid": "^3.1.12",
Expand Down
4 changes: 1 addition & 3 deletions app/rdf/queries.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import {
shouldValuesBeLoadedForResolvedDimension,
} from "../domain/data";
import { SPARQL_EDITOR, SPARQL_ENDPOINT } from "../domain/env";
import { DataCubeSearchFilter, DataCubeTheme } from "../graphql/query-hooks";
import { DataCubeSearchFilter } from "../graphql/query-hooks";
import { ResolvedDataCube, ResolvedDimension } from "../graphql/shared-types";
import isAttrEqual from "../utils/is-attr-equal";
import truthy from "../utils/truthy";
Expand Down Expand Up @@ -131,12 +131,10 @@ export const getCubes = async ({
includeDrafts,
locale,
filters,
themesIndex,
}: {
includeDrafts: boolean;
locale: string;
filters?: DataCubeSearchFilter[];
themesIndex?: Record<string, DataCubeTheme>;
}): Promise<ResolvedDataCube[]> => {
const source = createSource();

Expand Down
27 changes: 27 additions & 0 deletions app/utils/cached-with-ttl.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
const cachedWithTTL = <T extends (...args: any[]) => any>(
fn: T,
keyer: (...args: Parameters<T>) => string,
ttl: number
) => {
const cache = {} as Record<string, { date: number; result: ReturnType<T> }>;
return async function (...args: Parameters<T>) {
const key = keyer(...args);
for (let k of Object.keys(cache)) {
// Delete old results
if (cache[k].date < Date.now() - ttl) {
delete cache[k];
}
}
let res;
if (cache[key]) {
res = cache[key].result;
} else {
const cached = await fn(...args);
cache[key] = { date: Date.now(), result: cached };
res = cache[key].result;
}
return res;
};
};

export default cachedWithTTL;
19 changes: 19 additions & 0 deletions app/utils/search.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import { wrap } from "./search";

describe("wrap ", () => {
it("should be able to highlight given part of a string, given indices and open/close tags", () => {
const example =
"The science of operations, as derived from mathematics more especially, is a science of itself, and has its own abstract truth and value.";
const indices = [
[4, 10],
[30, 36],
] as readonly [number, number][];
const wrapped = wrap(example, indices, {
tagOpen: "<b>",
tagClose: "</b>",
});
expect(wrapped).toEqual(
"The <b>science</b> of operations, as <b>derived</b> from mathematics more especially, is a science of itself, and has its own abstract truth and value."
);
});
});
Loading