Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DE number parser #18

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 26 additions & 11 deletions Duckling/Numeral/DE/Corpus.hs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ allExamples = concat
, examples (NumeralValue 1)
[ "1"
, "eins"
, "Eine"
, "einen"
]
, examples (NumeralValue 2)
[ "2"
, "Zwei"
, "Zwo"
]
, examples (NumeralValue 3)
[ "3"
Expand All @@ -46,11 +53,12 @@ allExamples = concat
]
, examples (NumeralValue 30)
[ "30"
, "dreißig"
, "dreissig"
]
, examples (NumeralValue 33)
[ "33"
, "drei Und dreissig"
, "dreiunddreißig"
, "dreiunddreissig"
, "0033"
]
Expand All @@ -76,11 +84,11 @@ allExamples = concat
]
, examples (NumeralValue 200)
[ "200"
, "zwei hundert"
, "zweihundert"
]
, examples (NumeralValue 102)
[ "102"
, "Hundert zwei"
, "Hundertzwei"
]
, examples (NumeralValue 1.1)
[ "1,1"
Expand All @@ -97,12 +105,15 @@ allExamples = concat
, "100000"
, "100K"
, "100k"
, "einhunderttausend"
, "hunderttausend"
]
, examples (NumeralValue 3000000)
[ "3M"
, "3000K"
, "3000000"
, "3.000.000"
, "drei Millionen"
]
, examples (NumeralValue 1200000)
[ "1.200.000"
Expand All @@ -120,30 +131,34 @@ allExamples = concat
, "-1200K"
, "-,0012G"
]
, examples (NumeralValue 1852)
[ "eintausendachthundertzweiundfünfzig"
, "tausendachthundertzweiundfünfzig"
, "achtzehnhundertzweiundfünfzig"]
, examples (NumeralValue 5000)
[ "5 tausend"
, "fünf tausend"
, "fünftausend"
]
, examples (NumeralValue 200000)
[ "zwei hundert tausend"
[ "zweihunderttausend"
]
, examples (NumeralValue 721012)
[ "sieben hundert einundzwanzig tausend zwölf"
[ "siebenhunderteinundzwanzigtausendzwölf"
, "siebenhunderteinundzwanzigtausendundzwölf"
]
, examples (NumeralValue 31256721)
[ "ein und dreissig millionen zwei hundert sechs und fünfzig tausend sieben hundert ein und zwanzig"
[ "einunddreissig millionen zweihundertsechsundfünfzigtausendsiebenhunderteinundzwanzig"
, "einunddreißig Millionen zweihundertsechsundfünfzigtausendundsiebenhunderteinundzwanzig"
]
, examples (NumeralValue 1416.15)
[ "1416,15"
]
, examples (NumeralValue 1416.15)
[ "1.416,15"
, "tausendvierhundertsechzehn Komma fünfzehn"
]
, examples (NumeralValue 1000000.0)
[ "1.000.000,00",
"eine million"
]
, examples (NumeralValue 2771090092000000.0)
[ "zwei billiarden sieben hundert ein und siebzig billionen neunzig milliarden zwei und neunzig millionen"
[ "zwei billiarden siebenhunderteinundsiebzig billionen neunzig milliarden zweiundneunzig millionen"
]
]
195 changes: 195 additions & 0 deletions Duckling/Numeral/DE/NumParser.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree.
{-# LANGUAGE DeriveFunctor #-}

module Duckling.Numeral.DE.NumParser (parseNumeral) where

import Control.Applicative
import Data.Char
import Data.Foldable
import Data.List
import Data.String
import Prelude

newtype Parser a = Parser {runParser :: String -> Maybe (a, String)}
deriving (Functor)

char :: Char -> Parser Char
char c = Parser p
where
p [] = Nothing
p (x : xs)
| x == c = Just (x, xs)
| otherwise = Nothing

instance Applicative Parser where
pure a = Parser (\s -> Just (a, s))
(Parser fp) <*> xp = Parser $ \s ->
case fp s of
Nothing -> Nothing
Just (f, s') -> runParser (f <$> xp) s'

instance Alternative Parser where
empty = Parser (const Nothing)
Parser p1 <|> Parser p2 = Parser $ liftA2 (<|>) p1 p2

type NumParser = Parser Integer

(.+.) :: NumParser -> NumParser -> NumParser
p .+. p' = (+) <$> p <*> p'

(.*.) :: NumParser -> NumParser -> NumParser
p .*. p' = (*) <$> p <*> p'

infixl 6 .+.

infixl 7 .*.

opt :: NumParser -> NumParser
opt p = p <|> Parser p'
where
p' s = Just (0, s)

data NumItem = NumItem
{ base :: NumParser
, plus10 :: NumParser
, times10 :: [NumParser]
}

defaultNumItem :: Integer -> String -> NumItem
defaultNumItem value form =
NumItem
{ base = p
, plus10 = p .+. ten
, times10 = [p .*. ty]
}
where
p = assign value form

type Assignment = Integer -> String -> NumParser

assign :: Assignment
assign value = foldr (\c p -> (1 <$ char c) .*. p) (pure value)

ten :: NumParser
ten = assign 10 "zehn"

ty :: NumParser
ty = assign 10 "zig"

hundred :: NumParser
hundred = assign 100 "hundert"

thousand :: NumParser
thousand = assign 1000 "tausend"

und :: NumParser
und = assign 0 "und"

one :: NumItem
one =
(defaultNumItem 1 "ein")
{ plus10 = assign 11 "elf"
, times10 = [ten]
}

two :: NumItem
two =
(defaultNumItem 2 "zwei")
{ plus10 = assign 12 "zwölf"
, times10 = [assign 20 "zwanzig"]
}

two_alternative :: NumItem
two_alternative = defaultNumItem 2 "zwo"


three :: NumItem
three =
(defaultNumItem 3 "drei")
{ times10 =
[ assign 30 "dreißig"
, assign 30 "dreissig"
]
}

four :: NumItem
four = defaultNumItem 4 "vier"

five :: NumItem
five = defaultNumItem 5 "fünf"

six :: NumItem
six =
(defaultNumItem 6 "sechs")
{ plus10 = assign 16 "sechzehn"
, times10 = [assign 60 "sechzig"]
}

seven :: NumItem
seven =
(defaultNumItem 7 "sieben")
{ plus10 = assign 17 "siebzehn"
, times10 = [assign 70 "siebzig"]
}

eight :: NumItem
eight = defaultNumItem 8 "acht"

nine :: NumItem
nine = defaultNumItem 9 "neun"

digitLexicon :: [NumItem]
digitLexicon = [one, two_alternative, two, three, four, five, six, seven, eight, nine]

from1to9 :: NumParser
from1to9 = foldr ((<|>) . base) empty digitLexicon

tensFrom20 :: NumParser
tensFrom20 = asum (concatMap times10 (tail digitLexicon))

from1to99 :: NumParser
from1to99 =
opt (from1to9 .+. und) .+. tensFrom20
<|> foldr ((<|>) . plus10) empty digitLexicon
<|> ten
<|> from1to9

from1to999 :: NumParser
from1to999 = opt (from1to9 .*. hundred .+. opt und) .+. opt from1to99

from1to999999 :: NumParser
from1to999999 = opt (from1to999 .*. thousand .+. opt und) .+. opt from1to999

from1to999999' :: NumParser
from1to999999' = Parser p
where
p s
| isPrefixOf "hundert" s || isPrefixOf "tausend" s =
runParser from1to999999 ("ein" ++ s)
| otherwise =
runParser from1to999999 s

fromYear1100to1999 :: NumParser
fromYear1100to1999 =
asum ((\n -> plus10 n .*. hundred) <$> digitLexicon)
.+. opt (opt und .+. from1to99)

allNumerals :: NumParser
allNumerals =
fromYear1100to1999
<|> from1to999999'

removeInflection :: (Integer, String) -> Maybe Integer
removeInflection (n, suffix)
| n `mod` 10 == 1 && suffix `elem` inflection = Just n
where
inflection = ["s", "e", "em", "en", "er", "es"]
removeInflection (n, "") = Just n
removeInflection _ = Nothing

parseNumeral :: String -> Maybe Integer
parseNumeral s = removeInflection =<< runParser allNumerals s
Loading
Loading