-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(institutions): add support for some institution types
- Loading branch information
1 parent
d6b0323
commit f70d6ad
Showing
3 changed files
with
291 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
import { | ||
any, | ||
Context, | ||
createLanguage, | ||
map, | ||
Parser, | ||
seq, | ||
str, | ||
letter, | ||
many, | ||
regex, | ||
optional, | ||
many1, | ||
manyTill, | ||
peek, | ||
} from "https://deno.land/x/combine@v0.0.8/mod.ts"; | ||
import { EntityLanguage, __, dot } from "./common.ts"; | ||
import { ent, Entity } from "./Entity.ts"; | ||
import { fuzzyCase } from "./parsers.ts"; | ||
|
||
export type InstitutionEntity = Entity< | ||
"institution", | ||
{ | ||
name: string; | ||
type: "university" | "college" | "school" | "city hall" | "town hall"; | ||
} | ||
>; | ||
|
||
export const institution = ( | ||
value: InstitutionEntity["value"], | ||
before: Context, | ||
after: Context | ||
): InstitutionEntity => { | ||
return ent(value, "institution", before, after); | ||
}; | ||
|
||
type InstitutionEntityLanguage = EntityLanguage< | ||
{ | ||
Educational: Parser<string>; | ||
Administrative: Parser<string>; | ||
Capitalized: Parser<string>; | ||
EducationalFull: Parser<InstitutionEntity>; | ||
AdministrativeFull: Parser<InstitutionEntity>; | ||
}, | ||
InstitutionEntity | ||
>; | ||
|
||
export const Institution = createLanguage<InstitutionEntityLanguage>({ | ||
Capitalized: () => | ||
map( | ||
seq(regex(/[A-Z]/, "capital-letter"), __(many(letter()))), | ||
([capital, rest]) => `${capital}${rest.join("")}` | ||
), | ||
Educational: () => | ||
__(any(fuzzyCase("university"), fuzzyCase("college"), fuzzyCase("school"))), | ||
Administrative: () => __(any(fuzzyCase("city hall"), fuzzyCase("town hall"))), | ||
EducationalFull: (s) => | ||
any( | ||
map( | ||
seq( | ||
s.Educational, | ||
optional(__(str("of"))), | ||
optional(__(str("the"))), | ||
many1(s.Capitalized) | ||
), | ||
([educational], b, a) => | ||
institution( | ||
{ | ||
name: b.text.substring(b.index, a.index), | ||
type: educational.toLowerCase() as InstitutionEntity["value"]["type"], | ||
}, | ||
b, | ||
a | ||
) | ||
), | ||
map( | ||
seq( | ||
many1(s.Capitalized), | ||
s.Educational, | ||
optional(__(str("of"))), | ||
optional(__(str("the"))), | ||
many(s.Capitalized) | ||
), | ||
([, educational], b, a) => | ||
institution( | ||
{ | ||
name: b.text.substring(b.index, a.index), | ||
type: educational.toLowerCase() as InstitutionEntity["value"]["type"], | ||
}, | ||
b, | ||
a | ||
) | ||
) | ||
), | ||
AdministrativeFull: (s) => | ||
any( | ||
map( | ||
seq( | ||
s.Capitalized, | ||
manyTill(s.Capitalized, peek(s.Administrative)), | ||
s.Administrative | ||
), | ||
([, , administrative], b, a) => | ||
institution( | ||
{ | ||
name: b.text.substring(b.index, a.index), | ||
type: administrative.toLowerCase() as InstitutionEntity["value"]["type"], | ||
}, | ||
b, | ||
a | ||
) | ||
), | ||
map( | ||
seq( | ||
s.Administrative, | ||
optional(__(str("of"))), | ||
optional(__(str("the"))), | ||
many1(s.Capitalized) | ||
), | ||
([administrative], b, a) => | ||
institution( | ||
{ | ||
name: b.text.substring(b.index, a.index), | ||
type: administrative.toLowerCase() as InstitutionEntity["value"]["type"], | ||
}, | ||
b, | ||
a | ||
) | ||
) | ||
), | ||
parser: (s) => dot(any(s.EducationalFull, s.AdministrativeFull)), | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
import { assertEquals } from "https://deno.land/std@0.120.0/testing/asserts.ts"; | ||
import { Duckling } from "../mod.ts"; | ||
|
||
Deno.test("Educational", () => { | ||
const res = Duckling().extract({ | ||
text: `The term was coined by the Italian University of Bologna, which is considered to be the first university with a traditional founding date of 1088.`, | ||
index: 0, | ||
}); | ||
|
||
assertEquals(res.success, true); | ||
|
||
if (res.success) { | ||
assertEquals(res.value, [ | ||
{ | ||
end: 56, | ||
kind: "institution", | ||
start: 35, | ||
text: "University of Bologna", | ||
value: { | ||
name: "University of Bologna", | ||
type: "university", | ||
}, | ||
}, | ||
{ | ||
end: 146, | ||
kind: "quantity", | ||
start: 141, | ||
text: "1088.", | ||
value: { | ||
amount: 1088, | ||
}, | ||
}, | ||
]); | ||
} | ||
}); | ||
|
||
Deno.test("New York City Hall", () => { | ||
const res = Duckling().extract({ | ||
text: `New York City Hall, the oldest continuous seat of local government in the United States, completed in 1812`, | ||
index: 0, | ||
}); | ||
|
||
assertEquals(res.success, true); | ||
|
||
if (res.success) { | ||
assertEquals(res.value, [ | ||
{ | ||
end: 18, | ||
kind: "institution", | ||
start: 0, | ||
text: "New York City Hall", | ||
value: { | ||
name: "New York City Hall", | ||
type: "city hall", | ||
}, | ||
}, | ||
{ | ||
end: 87, | ||
kind: "location", | ||
start: 74, | ||
text: "United States", | ||
value: { | ||
place: "United States", | ||
type: "country", | ||
}, | ||
}, | ||
{ | ||
end: 106, | ||
kind: "quantity", | ||
start: 102, | ||
text: "1812", | ||
value: { | ||
amount: 1812, | ||
}, | ||
}, | ||
]); | ||
} | ||
}); | ||
|
||
Deno.test("Fordwich Town Hall", () => { | ||
const res = Duckling().extract({ | ||
text: `16th-century Fordwich Town Hall in Kent, England, closely resembling a market hall in its design`, | ||
index: 0, | ||
}); | ||
|
||
assertEquals(res.success, true); | ||
|
||
if (res.success) { | ||
assertEquals(res.value, [ | ||
{ | ||
end: 2, | ||
kind: "quantity", | ||
start: 0, | ||
text: "16", | ||
value: { | ||
amount: 16, | ||
}, | ||
}, | ||
{ | ||
end: 12, | ||
kind: "time", | ||
start: 5, | ||
text: "century", | ||
value: { | ||
era: "CE", | ||
grain: "century", | ||
when: "century", | ||
}, | ||
}, | ||
{ | ||
end: 32, | ||
kind: "institution", | ||
start: 13, | ||
text: "Fordwich Town Hall ", | ||
value: { | ||
name: "Fordwich Town Hall ", | ||
type: "town hall", | ||
}, | ||
}, | ||
]); | ||
} | ||
}); | ||
|
||
Deno.test("Town hall of Recife, Brazil", () => { | ||
const res = Duckling().extract({ | ||
text: `Town hall of Recife, Brazil`, | ||
index: 0, | ||
}); | ||
|
||
assertEquals(res.success, true); | ||
|
||
if (res.success) { | ||
assertEquals(res.value, [ | ||
{ | ||
end: 19, | ||
kind: "institution", | ||
start: 0, | ||
text: "Town hall of Recife", | ||
value: { | ||
name: "Town hall of Recife", | ||
type: "town hall", | ||
}, | ||
}, | ||
{ | ||
end: 27, | ||
kind: "location", | ||
start: 21, | ||
text: "Brazil", | ||
value: { | ||
place: "Brazil", | ||
type: "country", | ||
}, | ||
}, | ||
]); | ||
} | ||
}); |