-
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #33 from hmlendea/japanese
New transliterator for `Japanese`
- Loading branch information
Showing
3 changed files
with
237 additions
and
0 deletions.
There are no files selected for viewing
62 changes: 62 additions & 0 deletions
62
TransliterationAPI.UnitTests/Service/Transliterators/JapaneseTransliteratorTests.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
using NUnit.Framework; | ||
using TransliterationAPI.Service.Transliterators; | ||
|
||
namespace TransliterationAPI.UnitTests.Service.Transliterators | ||
{ | ||
public class JapaneseTransliteratorTests | ||
{ | ||
private IJapaneseTransliterator transliterator; | ||
|
||
[SetUp] | ||
public void SetUp() | ||
{ | ||
this.transliterator = new JapaneseTransliterator(); | ||
} | ||
|
||
[Test] | ||
[TestCase("京都", "Kyōto")] | ||
[TestCase("仙台", "Sendai")] | ||
[TestCase("北海道", "Hokkaidō")] | ||
[TestCase("名古屋", "Nagoya")] | ||
[TestCase("和歌山", "Wakayama")] | ||
[TestCase("大阪", "Ōsaka")] | ||
[TestCase("奈良", "Nara")] | ||
[TestCase("宮崎", "Miyazaki")] | ||
[TestCase("富士山", "Fujisan")] | ||
[TestCase("山口", "Yamaguchi")] | ||
[TestCase("山形", "Yamagata")] | ||
[TestCase("岐阜", "Gifu")] | ||
[TestCase("岡山", "Okayama")] | ||
[TestCase("島根", "Shimane")] | ||
[TestCase("広島", "Hiroshima")] | ||
[TestCase("愛媛", "Ehime")] | ||
[TestCase("新潟", "Niigata")] | ||
[TestCase("札幌", "Sapporo")] | ||
[TestCase("東京", "Tōkyō")] | ||
[TestCase("横浜", "Yokohama")] | ||
[TestCase("横須賀", "Yokosuka")] | ||
[TestCase("沖縄", "Okinawa")] | ||
[TestCase("滋賀", "Shiga")] | ||
[TestCase("熊本", "Kumamoto")] | ||
[TestCase("石川", "Ishikawa")] | ||
[TestCase("福井", "Fukui")] | ||
[TestCase("福岡", "Fukuoka")] | ||
[TestCase("福島", "Fukushima")] | ||
[TestCase("群馬", "Gunma")] | ||
[TestCase("茨城", "Ibaraki")] | ||
[TestCase("金沢", "Kanazawa")] | ||
[TestCase("鎌倉", "Kamakura")] | ||
[TestCase("長崎", "Nagasaki")] | ||
[TestCase("長野", "Nagano")] | ||
[TestCase("青森", "Aomori")] | ||
[TestCase("静岡", "Shizuoka")] | ||
[TestCase("高松", "Takamatsu")] | ||
[TestCase("高知", "Kōchi")] | ||
[TestCase("鳥取", "Tottori")] | ||
[TestCase("鹿児島", "Kagoshima")] | ||
public void GivenATextInJapaneseScript_WhenTransliteratingIntoLatin_ThenTheCorrectTextIsReturned( | ||
string japaneseText, | ||
string expectedTransliteratedText) | ||
=> Assert.That(transliterator.Transliterate(japaneseText), Is.EqualTo(expectedTransliteratedText)); | ||
} | ||
} |
7 changes: 7 additions & 0 deletions
7
TransliterationAPI/Service/Transliterators/IJapaneseTransliterator.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
namespace TransliterationAPI.Service.Transliterators | ||
{ | ||
public interface IJapaneseTransliterator | ||
{ | ||
string Transliterate(string text); | ||
} | ||
} |
168 changes: 168 additions & 0 deletions
168
TransliterationAPI/Service/Transliterators/JapaneseTransliterator.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
using System.Collections.Generic; | ||
using System.Text.RegularExpressions; | ||
|
||
using NuciExtensions; | ||
|
||
namespace TransliterationAPI.Service.Transliterators | ||
{ | ||
public class JapaneseTransliterator : IJapaneseTransliterator | ||
{ | ||
Dictionary<char, string> transliterationMap; | ||
|
||
public JapaneseTransliterator() | ||
{ | ||
transliterationMap = new Dictionary<char, string>() | ||
{ | ||
// Basic Hiragana | ||
{'あ', "a"}, {'い', "i"}, {'う', "u"}, {'え', "e"}, {'お', "o"}, | ||
{'か', "ka"}, {'き', "ki"}, {'く', "ku"}, {'け', "ke"}, {'こ', "ko"}, | ||
{'さ', "sa"}, {'し', "shi"}, {'す', "su"}, {'せ', "se"}, {'そ', "so"}, | ||
{'た', "ta"}, {'ち', "chi"}, {'つ', "tsu"}, {'て', "te"}, {'と', "to"}, | ||
{'な', "na"}, {'に', "ni"}, {'ぬ', "nu"}, {'ね', "ne"}, {'の', "no"}, | ||
{'は', "ha"}, {'ひ', "hi"}, {'ふ', "fu"}, {'へ', "he"}, {'ほ', "ho"}, | ||
{'ま', "ma"}, {'み', "mi"}, {'む', "mu"}, {'め', "me"}, {'も', "mo"}, | ||
{'や', "ya"}, {'ゆ', "yu"}, {'よ', "yo"}, | ||
{'ら', "ra"}, {'り', "ri"}, {'る', "ru"}, {'れ', "re"}, {'ろ', "ro"}, | ||
{'わ', "wa"}, {'を', "wo"}, {'ん', "n"}, | ||
|
||
// Basic Katakana | ||
{'ア', "a"}, {'イ', "i"}, {'ウ', "u"}, {'エ', "e"}, {'オ', "o"}, | ||
{'カ', "ka"}, {'キ', "ki"}, {'ク', "ku"}, {'ケ', "ke"}, {'コ', "ko"}, | ||
{'サ', "sa"}, {'シ', "shi"}, {'ス', "su"}, {'セ', "se"}, {'ソ', "so"}, | ||
{'タ', "ta"}, {'チ', "chi"}, {'ツ', "tsu"}, {'テ', "te"}, {'ト', "to"}, | ||
{'ナ', "na"}, {'ニ', "ni"}, {'ヌ', "nu"}, {'ネ', "ne"}, {'ノ', "no"}, | ||
{'ハ', "ha"}, {'ヒ', "hi"}, {'フ', "fu"}, {'ヘ', "he"}, {'ホ', "ho"}, | ||
{'マ', "ma"}, {'ミ', "mi"}, {'ム', "mu"}, {'メ', "me"}, {'モ', "mo"}, | ||
{'ヤ', "ya"}, {'ユ', "yu"}, {'ヨ', "yo"}, | ||
{'ラ', "ra"}, {'リ', "ri"}, {'ル', "ru"}, {'レ', "re"}, {'ロ', "ro"}, | ||
{'ワ', "wa"}, {'ヲ', "wo"}, {'ン', "n"}, | ||
{'ィ', "i"}, {'デ', "de"}, {'プ', "pu"}, {'グ', "gu"}, | ||
{'・', " "}, | ||
|
||
// Special Characters | ||
{'ゃ', "ya"}, {'ゅ', "yu"}, {'ょ', "yo"}, // small ya, yu, yo | ||
{'ャ', "ya"}, {'ュ', "yu"}, {'ョ', "yo"}, // small YA, YU, YO | ||
{'っ', "tsu"}, // small tsu | ||
{'ッ', "tsu"}, // small TSU | ||
|
||
// Kanji for toponyms | ||
{ '東', "tō" }, // East, as in 東京 (Tōkyō) | ||
{ '京', "kyō" }, // Capital, as in 京都 (Kyōto) | ||
{ '大', "ō" }, // Big, as in 大阪 (Ōsaka) | ||
{ '阪', "saka" }, // Hill, slope, as in 大阪 (Ōsaka) | ||
{ '北', "hoku" }, // North, as in 北海道 (Hokkaidō) | ||
{ '海', "kai" }, // Sea, as in 北海道 (Hokkaidō) | ||
{ '道', "dō" }, // Road, path, as in 北海道 (Hokkaidō) | ||
{ '名', "na" }, // Name, as in 名古屋 (Nagoya) | ||
{ '古', "ko" }, // Old, as in 名古屋 (Nagoya) | ||
{ '屋', "ya" }, // Shop, house, as in 名古屋 (Nagoya) | ||
{ '神', "kami" }, // God, as in 神戸 (Kōbe) | ||
{ '戸', "ko" }, // Door, gate, as in 神戸 (Kōbe) | ||
{ '横', "yoko" }, // Horizontal, as in 横浜 (Yokohama) | ||
{ '浜', "hama" }, // Beach, as in 横浜 (Yokohama) | ||
{ '仙', "sen" }, // Hermit, wizard, as in 仙台 (Sendai) | ||
{ '台', "dai" }, // Stand, support, as in 仙台 (Sendai) | ||
|
||
{ '井', "i" }, | ||
{ '倉', "kura" }, | ||
{ '児', "go" }, | ||
{ '取', "tori" }, | ||
{ '口', "guchi" }, | ||
{ '和', "wa" }, | ||
{ '城', "baraki" }, | ||
{ '士', "ji" }, | ||
{ '奈', "na" }, | ||
{ '媛', "hime" }, | ||
{ '宮', "miya" }, | ||
{ '富', "fu" }, | ||
{ '山', "yama" }, | ||
{ '岐', "gi" }, | ||
{ '岡', "oka" }, | ||
{ '島', "shima" }, | ||
{ '崎', "saki" }, | ||
{ '川', "kawa" }, | ||
{ '幌', "poro" }, | ||
{ '広', "hiro" }, | ||
{ '形', "gata" }, | ||
{ '愛', "e" }, | ||
{ '新', "nii" }, | ||
{ '本', "moto" }, | ||
{ '札', "satsu" }, | ||
{ '松', "matsu" }, | ||
{ '根', "ne" }, | ||
{ '森', "mori" }, | ||
{ '歌', "ka" }, | ||
{ '殿', "dono" }, | ||
{ '沖', "oki" }, | ||
{ '沢', "zawa" }, | ||
{ '滋', "shi" }, | ||
{ '潟', "gata" }, | ||
{ '熊', "kuma" }, | ||
{ '知', "chi" }, | ||
{ '石', "ishi" }, | ||
{ '福', "fuku" }, | ||
{ '縄', "nawa" }, | ||
{ '群', "gun" }, | ||
{ '良', "ra" }, | ||
{ '茨', "i" }, | ||
{ '賀', "ka" }, | ||
{ '都', "to" }, | ||
{ '野', "no" }, | ||
{ '金', "kana" }, | ||
{ '鎌', "kama" }, | ||
{ '長', "naga" }, | ||
{ '阜', "fu" }, | ||
{ '青', "ao" }, | ||
{ '静', "shizu" }, | ||
{ '須', "su" }, | ||
{ '馬', "ma" }, | ||
{ '高', "taka" }, | ||
{ '鳥', "tori" }, | ||
{ '鹿', "ka" }, | ||
}; | ||
|
||
} | ||
|
||
public string Transliterate(string text) | ||
{ | ||
string transliteratedText = string.Empty; | ||
|
||
foreach (char character in text) | ||
{ | ||
if (transliterationMap.ContainsKey(character)) | ||
{ | ||
transliteratedText += transliterationMap[character]; | ||
} | ||
else | ||
{ | ||
transliteratedText += character; | ||
} | ||
} | ||
|
||
transliteratedText = ApplyFixes(transliteratedText); | ||
|
||
return transliteratedText; | ||
} | ||
|
||
string ApplyFixes(string text) | ||
{ | ||
string fixedText = text.ToTitleCase(); | ||
|
||
fixedText = Regex.Replace(fixedText, "([Tt])orit", "$1ott"); | ||
fixedText = Regex.Replace(fixedText, "Takac", "Kōc"); | ||
|
||
fixedText = Regex.Replace(fixedText, "Oo", "Ō"); | ||
fixedText = Regex.Replace(fixedText, "oo", "ō"); | ||
|
||
fixedText = Regex.Replace(fixedText, "akoy", "agoy"); | ||
fixedText = Regex.Replace(fixedText, "ika ", "iga "); | ||
fixedText = Regex.Replace(fixedText, "ika$", "iga"); | ||
fixedText = Regex.Replace(fixedText, "iyama", "isan"); | ||
fixedText = Regex.Replace(fixedText, "kuk", "kk"); | ||
fixedText = Regex.Replace(fixedText, "tsup", "pp"); | ||
fixedText = Regex.Replace(fixedText, "yasa", "yaza"); | ||
|
||
return fixedText; | ||
} | ||
} | ||
} |