Skip to content

Commit

Permalink
Merge pull request #33 from hmlendea/japanese
Browse files Browse the repository at this point in the history
New transliterator for `Japanese`
  • Loading branch information
hmlendea authored May 26, 2023
2 parents 195ab78 + 274ed91 commit a2c0bf4
Show file tree
Hide file tree
Showing 3 changed files with 237 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
using NUnit.Framework;
using TransliterationAPI.Service.Transliterators;

namespace TransliterationAPI.UnitTests.Service.Transliterators
{
public class JapaneseTransliteratorTests
{
private IJapaneseTransliterator transliterator;

[SetUp]
public void SetUp()
{
this.transliterator = new JapaneseTransliterator();
}

[Test]
[TestCase("京都", "Kyōto")]
[TestCase("仙台", "Sendai")]
[TestCase("北海道", "Hokkaidō")]
[TestCase("名古屋", "Nagoya")]
[TestCase("和歌山", "Wakayama")]
[TestCase("大阪", "Ōsaka")]
[TestCase("奈良", "Nara")]
[TestCase("宮崎", "Miyazaki")]
[TestCase("富士山", "Fujisan")]
[TestCase("山口", "Yamaguchi")]
[TestCase("山形", "Yamagata")]
[TestCase("岐阜", "Gifu")]
[TestCase("岡山", "Okayama")]
[TestCase("島根", "Shimane")]
[TestCase("広島", "Hiroshima")]
[TestCase("愛媛", "Ehime")]
[TestCase("新潟", "Niigata")]
[TestCase("札幌", "Sapporo")]
[TestCase("東京", "Tōkyō")]
[TestCase("横浜", "Yokohama")]
[TestCase("横須賀", "Yokosuka")]
[TestCase("沖縄", "Okinawa")]
[TestCase("滋賀", "Shiga")]
[TestCase("熊本", "Kumamoto")]
[TestCase("石川", "Ishikawa")]
[TestCase("福井", "Fukui")]
[TestCase("福岡", "Fukuoka")]
[TestCase("福島", "Fukushima")]
[TestCase("群馬", "Gunma")]
[TestCase("茨城", "Ibaraki")]
[TestCase("金沢", "Kanazawa")]
[TestCase("鎌倉", "Kamakura")]
[TestCase("長崎", "Nagasaki")]
[TestCase("長野", "Nagano")]
[TestCase("青森", "Aomori")]
[TestCase("静岡", "Shizuoka")]
[TestCase("高松", "Takamatsu")]
[TestCase("高知", "Kōchi")]
[TestCase("鳥取", "Tottori")]
[TestCase("鹿児島", "Kagoshima")]
public void GivenATextInJapaneseScript_WhenTransliteratingIntoLatin_ThenTheCorrectTextIsReturned(
string japaneseText,
string expectedTransliteratedText)
=> Assert.That(transliterator.Transliterate(japaneseText), Is.EqualTo(expectedTransliteratedText));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
namespace TransliterationAPI.Service.Transliterators
{
public interface IJapaneseTransliterator
{
string Transliterate(string text);
}
}
168 changes: 168 additions & 0 deletions TransliterationAPI/Service/Transliterators/JapaneseTransliterator.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
using System.Collections.Generic;
using System.Text.RegularExpressions;

using NuciExtensions;

namespace TransliterationAPI.Service.Transliterators
{
public class JapaneseTransliterator : IJapaneseTransliterator
{
Dictionary<char, string> transliterationMap;

public JapaneseTransliterator()
{
transliterationMap = new Dictionary<char, string>()
{
// Basic Hiragana
{'あ', "a"}, {'い', "i"}, {'う', "u"}, {'え', "e"}, {'お', "o"},
{'か', "ka"}, {'き', "ki"}, {'く', "ku"}, {'け', "ke"}, {'こ', "ko"},
{'さ', "sa"}, {'し', "shi"}, {'す', "su"}, {'せ', "se"}, {'そ', "so"},
{'た', "ta"}, {'ち', "chi"}, {'つ', "tsu"}, {'て', "te"}, {'と', "to"},
{'な', "na"}, {'に', "ni"}, {'ぬ', "nu"}, {'ね', "ne"}, {'の', "no"},
{'は', "ha"}, {'ひ', "hi"}, {'ふ', "fu"}, {'へ', "he"}, {'ほ', "ho"},
{'ま', "ma"}, {'み', "mi"}, {'む', "mu"}, {'め', "me"}, {'も', "mo"},
{'や', "ya"}, {'ゆ', "yu"}, {'よ', "yo"},
{'ら', "ra"}, {'り', "ri"}, {'る', "ru"}, {'れ', "re"}, {'ろ', "ro"},
{'わ', "wa"}, {'を', "wo"}, {'ん', "n"},

// Basic Katakana
{'ア', "a"}, {'イ', "i"}, {'ウ', "u"}, {'エ', "e"}, {'オ', "o"},
{'カ', "ka"}, {'キ', "ki"}, {'ク', "ku"}, {'ケ', "ke"}, {'コ', "ko"},
{'サ', "sa"}, {'シ', "shi"}, {'ス', "su"}, {'セ', "se"}, {'ソ', "so"},
{'タ', "ta"}, {'チ', "chi"}, {'ツ', "tsu"}, {'テ', "te"}, {'ト', "to"},
{'ナ', "na"}, {'ニ', "ni"}, {'ヌ', "nu"}, {'ネ', "ne"}, {'ノ', "no"},
{'ハ', "ha"}, {'ヒ', "hi"}, {'フ', "fu"}, {'ヘ', "he"}, {'ホ', "ho"},
{'マ', "ma"}, {'ミ', "mi"}, {'ム', "mu"}, {'メ', "me"}, {'モ', "mo"},
{'ヤ', "ya"}, {'ユ', "yu"}, {'ヨ', "yo"},
{'ラ', "ra"}, {'リ', "ri"}, {'ル', "ru"}, {'レ', "re"}, {'ロ', "ro"},
{'ワ', "wa"}, {'ヲ', "wo"}, {'ン', "n"},
{'ィ', "i"}, {'デ', "de"}, {'プ', "pu"}, {'グ', "gu"},
{'・', " "},

// Special Characters
{'ゃ', "ya"}, {'ゅ', "yu"}, {'ょ', "yo"}, // small ya, yu, yo
{'ャ', "ya"}, {'ュ', "yu"}, {'ョ', "yo"}, // small YA, YU, YO
{'っ', "tsu"}, // small tsu
{'ッ', "tsu"}, // small TSU

// Kanji for toponyms
{ '東', "" }, // East, as in 東京 (Tōkyō)
{ '京', "kyō" }, // Capital, as in 京都 (Kyōto)
{ '大', "ō" }, // Big, as in 大阪 (Ōsaka)
{ '阪', "saka" }, // Hill, slope, as in 大阪 (Ōsaka)
{ '北', "hoku" }, // North, as in 北海道 (Hokkaidō)
{ '海', "kai" }, // Sea, as in 北海道 (Hokkaidō)
{ '道', "" }, // Road, path, as in 北海道 (Hokkaidō)
{ '名', "na" }, // Name, as in 名古屋 (Nagoya)
{ '古', "ko" }, // Old, as in 名古屋 (Nagoya)
{ '屋', "ya" }, // Shop, house, as in 名古屋 (Nagoya)
{ '神', "kami" }, // God, as in 神戸 (Kōbe)
{ '戸', "ko" }, // Door, gate, as in 神戸 (Kōbe)
{ '横', "yoko" }, // Horizontal, as in 横浜 (Yokohama)
{ '浜', "hama" }, // Beach, as in 横浜 (Yokohama)
{ '仙', "sen" }, // Hermit, wizard, as in 仙台 (Sendai)
{ '台', "dai" }, // Stand, support, as in 仙台 (Sendai)

{ '井', "i" },
{ '倉', "kura" },
{ '児', "go" },
{ '取', "tori" },
{ '口', "guchi" },
{ '和', "wa" },
{ '城', "baraki" },
{ '士', "ji" },
{ '奈', "na" },
{ '媛', "hime" },
{ '宮', "miya" },
{ '富', "fu" },
{ '山', "yama" },
{ '岐', "gi" },
{ '岡', "oka" },
{ '島', "shima" },
{ '崎', "saki" },
{ '川', "kawa" },
{ '幌', "poro" },
{ '広', "hiro" },
{ '形', "gata" },
{ '愛', "e" },
{ '新', "nii" },
{ '本', "moto" },
{ '札', "satsu" },
{ '松', "matsu" },
{ '根', "ne" },
{ '森', "mori" },
{ '歌', "ka" },
{ '殿', "dono" },
{ '沖', "oki" },
{ '沢', "zawa" },
{ '滋', "shi" },
{ '潟', "gata" },
{ '熊', "kuma" },
{ '知', "chi" },
{ '石', "ishi" },
{ '福', "fuku" },
{ '縄', "nawa" },
{ '群', "gun" },
{ '良', "ra" },
{ '茨', "i" },
{ '賀', "ka" },
{ '都', "to" },
{ '野', "no" },
{ '金', "kana" },
{ '鎌', "kama" },
{ '長', "naga" },
{ '阜', "fu" },
{ '青', "ao" },
{ '静', "shizu" },
{ '須', "su" },
{ '馬', "ma" },
{ '高', "taka" },
{ '鳥', "tori" },
{ '鹿', "ka" },
};

}

public string Transliterate(string text)
{
string transliteratedText = string.Empty;

foreach (char character in text)
{
if (transliterationMap.ContainsKey(character))
{
transliteratedText += transliterationMap[character];
}
else
{
transliteratedText += character;
}
}

transliteratedText = ApplyFixes(transliteratedText);

return transliteratedText;
}

string ApplyFixes(string text)
{
string fixedText = text.ToTitleCase();

fixedText = Regex.Replace(fixedText, "([Tt])orit", "$1ott");
fixedText = Regex.Replace(fixedText, "Takac", "Kōc");

fixedText = Regex.Replace(fixedText, "Oo", "Ō");
fixedText = Regex.Replace(fixedText, "oo", "ō");

fixedText = Regex.Replace(fixedText, "akoy", "agoy");
fixedText = Regex.Replace(fixedText, "ika ", "iga ");
fixedText = Regex.Replace(fixedText, "ika$", "iga");
fixedText = Regex.Replace(fixedText, "iyama", "isan");
fixedText = Regex.Replace(fixedText, "kuk", "kk");
fixedText = Regex.Replace(fixedText, "tsup", "pp");
fixedText = Regex.Replace(fixedText, "yasa", "yaza");

return fixedText;
}
}
}

0 comments on commit a2c0bf4

Please sign in to comment.