Skip to content

Commit

Permalink
feat(institutions): add support for some institution types
Browse files Browse the repository at this point in the history
  • Loading branch information
ClaudiuCeia committed Jul 21, 2022
1 parent d6b0323 commit f70d6ad
Show file tree
Hide file tree
Showing 3 changed files with 291 additions and 0 deletions.
3 changes: 3 additions & 0 deletions mod.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import { Time, TimeEntity } from "./src/Time.ts";
import { Location } from "./src/Location.ts";
import { URL } from "./src/URL.ts";
import { Email } from "./src/Email.ts";
import { Institution } from "./src/Institution.ts";

export type AnyEntity =
| Entity<unknown, unknown>
Expand All @@ -41,6 +42,7 @@ export const Duckling = (
Location.parser,
URL.parser,
Email.parser,
Institution.parser,
]
) =>
createLanguage<DucklingLanguage>({
Expand Down Expand Up @@ -72,3 +74,4 @@ export * from "./src/Time.ts";
export * from "./src/Location.ts";
export * from "./src/URL.ts";
export * from "./src/Email.ts";
export * from "./src/Institution.ts";
132 changes: 132 additions & 0 deletions src/Institution.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import {
any,
Context,
createLanguage,
map,
Parser,
seq,
str,
letter,
many,
regex,
optional,
many1,
manyTill,
peek,
} from "https://deno.land/x/combine@v0.0.8/mod.ts";
import { EntityLanguage, __, dot } from "./common.ts";
import { ent, Entity } from "./Entity.ts";
import { fuzzyCase } from "./parsers.ts";

export type InstitutionEntity = Entity<
"institution",
{
name: string;
type: "university" | "college" | "school" | "city hall" | "town hall";
}
>;

export const institution = (
value: InstitutionEntity["value"],
before: Context,
after: Context
): InstitutionEntity => {
return ent(value, "institution", before, after);
};

type InstitutionEntityLanguage = EntityLanguage<
{
Educational: Parser<string>;
Administrative: Parser<string>;
Capitalized: Parser<string>;
EducationalFull: Parser<InstitutionEntity>;
AdministrativeFull: Parser<InstitutionEntity>;
},
InstitutionEntity
>;

export const Institution = createLanguage<InstitutionEntityLanguage>({
Capitalized: () =>
map(
seq(regex(/[A-Z]/, "capital-letter"), __(many(letter()))),
([capital, rest]) => `${capital}${rest.join("")}`
),
Educational: () =>
__(any(fuzzyCase("university"), fuzzyCase("college"), fuzzyCase("school"))),
Administrative: () => __(any(fuzzyCase("city hall"), fuzzyCase("town hall"))),
EducationalFull: (s) =>
any(
map(
seq(
s.Educational,
optional(__(str("of"))),
optional(__(str("the"))),
many1(s.Capitalized)
),
([educational], b, a) =>
institution(
{
name: b.text.substring(b.index, a.index),
type: educational.toLowerCase() as InstitutionEntity["value"]["type"],
},
b,
a
)
),
map(
seq(
many1(s.Capitalized),
s.Educational,
optional(__(str("of"))),
optional(__(str("the"))),
many(s.Capitalized)
),
([, educational], b, a) =>
institution(
{
name: b.text.substring(b.index, a.index),
type: educational.toLowerCase() as InstitutionEntity["value"]["type"],
},
b,
a
)
)
),
AdministrativeFull: (s) =>
any(
map(
seq(
s.Capitalized,
manyTill(s.Capitalized, peek(s.Administrative)),
s.Administrative
),
([, , administrative], b, a) =>
institution(
{
name: b.text.substring(b.index, a.index),
type: administrative.toLowerCase() as InstitutionEntity["value"]["type"],
},
b,
a
)
),
map(
seq(
s.Administrative,
optional(__(str("of"))),
optional(__(str("the"))),
many1(s.Capitalized)
),
([administrative], b, a) =>
institution(
{
name: b.text.substring(b.index, a.index),
type: administrative.toLowerCase() as InstitutionEntity["value"]["type"],
},
b,
a
)
)
),
parser: (s) => dot(any(s.EducationalFull, s.AdministrativeFull)),
});
156 changes: 156 additions & 0 deletions tests/Institution.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
import { assertEquals } from "https://deno.land/std@0.120.0/testing/asserts.ts";
import { Duckling } from "../mod.ts";

Deno.test("Educational", () => {
const res = Duckling().extract({
text: `The term was coined by the Italian University of Bologna, which is considered to be the first university with a traditional founding date of 1088.`,
index: 0,
});

assertEquals(res.success, true);

if (res.success) {
assertEquals(res.value, [
{
end: 56,
kind: "institution",
start: 35,
text: "University of Bologna",
value: {
name: "University of Bologna",
type: "university",
},
},
{
end: 146,
kind: "quantity",
start: 141,
text: "1088.",
value: {
amount: 1088,
},
},
]);
}
});

Deno.test("New York City Hall", () => {
const res = Duckling().extract({
text: `New York City Hall, the oldest continuous seat of local government in the United States, completed in 1812`,
index: 0,
});

assertEquals(res.success, true);

if (res.success) {
assertEquals(res.value, [
{
end: 18,
kind: "institution",
start: 0,
text: "New York City Hall",
value: {
name: "New York City Hall",
type: "city hall",
},
},
{
end: 87,
kind: "location",
start: 74,
text: "United States",
value: {
place: "United States",
type: "country",
},
},
{
end: 106,
kind: "quantity",
start: 102,
text: "1812",
value: {
amount: 1812,
},
},
]);
}
});

Deno.test("Fordwich Town Hall", () => {
const res = Duckling().extract({
text: `16th-century Fordwich Town Hall in Kent, England, closely resembling a market hall in its design`,
index: 0,
});

assertEquals(res.success, true);

if (res.success) {
assertEquals(res.value, [
{
end: 2,
kind: "quantity",
start: 0,
text: "16",
value: {
amount: 16,
},
},
{
end: 12,
kind: "time",
start: 5,
text: "century",
value: {
era: "CE",
grain: "century",
when: "century",
},
},
{
end: 32,
kind: "institution",
start: 13,
text: "Fordwich Town Hall ",
value: {
name: "Fordwich Town Hall ",
type: "town hall",
},
},
]);
}
});

Deno.test("Town hall of Recife, Brazil", () => {
const res = Duckling().extract({
text: `Town hall of Recife, Brazil`,
index: 0,
});

assertEquals(res.success, true);

if (res.success) {
assertEquals(res.value, [
{
end: 19,
kind: "institution",
start: 0,
text: "Town hall of Recife",
value: {
name: "Town hall of Recife",
type: "town hall",
},
},
{
end: 27,
kind: "location",
start: 21,
text: "Brazil",
value: {
place: "Brazil",
type: "country",
},
},
]);
}
});

0 comments on commit f70d6ad

Please sign in to comment.