Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New transliterator for Belarussian #55

Merged
merged 2 commits into from
May 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,123 @@ public void SetUp()
this.transliterator = new CyrillicTransliterator();
}

[Test]
[TestCase("Асіповічы", "Asipovičy")]
[TestCase("Астравец", "Astraviec")]
[TestCase("Ашмяны", "Ašmiany")]
[TestCase("Бабруйск", "Babrujsk")]
[TestCase("Баранавічы", "Baranavičy")]
[TestCase("Барань", "Barań")]
[TestCase("Барысаў", "Barysaŭ")]
[TestCase("Белаазёрск", "Bielaaziorsk")]
[TestCase("Беразіно", "Bierazino")]
[TestCase("Браслаў", "Braslaŭ")]
[TestCase("Брэст", "Brest")]
[TestCase("Буда-Кашалёва", "Buda-Kašaliova")]
[TestCase("Быхаў", "Bychaŭ")]
[TestCase("Бялынічы", "Bialyničy")]
[TestCase("Бяроза", "Biaroza")]
[TestCase("Бярозаўка", "Biarozaŭka")]
[TestCase("Валожын", "Valožyn")]
[TestCase("Ваўкавыск", "Vaŭkavysk")]
[TestCase("Верхнядзвінск", "Vierchniadzvinsk")]
[TestCase("Ветка", "Vietka")]
[TestCase("Вілейка", "Viliejka")]
[TestCase("Віцебск", "Viciebsk")]
[TestCase("Высокае", "Vysokaje")]
[TestCase("Ганцавічы", "Hancavičy")]
[TestCase("Гарадок", "Haradok")]
[TestCase("Глыбокае", "Hlybokaje")]
[TestCase("Гомель", "Homieĺ")]
[TestCase("Горкі", "Horki")]
[TestCase("Гродна", "Hrodna")]
[TestCase("Давыд-Гарадок", "Davyd-Haradok")]
[TestCase("Дзяржынск", "Dziaržynsk")]
[TestCase("Дзятлава", "Dziatlava")]
[TestCase("Добруш", "Dobruš")]
[TestCase("Докшыцы", "Dokšycy")]
[TestCase("Драгічын", "Drahičyn")]
[TestCase("Дуброўна", "Dubroŭna")]
[TestCase("Ельск", "Jeĺsk")]
[TestCase("Жабінка", "Žabinka")]
[TestCase("Жлобін", "Žlobin")]
[TestCase("Жодзіна", "Žodzina")]
[TestCase("Жыткавічы", "Žytkavičy")]
[TestCase("Заслаўе", "Zaslaŭje")]
[TestCase("Іванава", "Ivanava")]
[TestCase("Івацэвічы", "Ivacevičy")]
[TestCase("Іўе", "Iŭje")]
[TestCase("Калінкавічы", "Kalinkavičy")]
[TestCase("Камянец", "Kamianiec")]
[TestCase("Капыль", "Kapyĺ")]
[TestCase("Касцюковічы", "Kasciukovičy")]
[TestCase("Кіраўск", "Kiraŭsk")]
[TestCase("Клецк", "Klieck")]
[TestCase("Клімавічы", "Klimavičy")]
[TestCase("Клічаў", "Kličaŭ")]
[TestCase("Кобрын", "Kobryn")]
[TestCase("Крупкі", "Krupki")]
[TestCase("Крычаў", "Kryčaŭ")]
[TestCase("Лагойск", "Lahojsk")]
[TestCase("Лепель", "Liepieĺ")]
[TestCase("Ліда", "Lida")]
[TestCase("Лунінец", "Luniniec")]
[TestCase("Любань", "Liubań")]
[TestCase("Ляхавічы", "Liachavičy")]
[TestCase("Магілёў", "Mahilioŭ")]
[TestCase("Мазыр", "Mazyr")]
[TestCase("Маладзечна", "Maladziečna")]
[TestCase("Маларыта", "Malaryta")]
[TestCase("Мар'іна Горка", "Marjina Horka")]
[TestCase("Масты", "Masty")]
[TestCase("Міёры", "Mijory")]
[TestCase("Мікашэвічы", "Mikaševičy")]
[TestCase("Мінск", "Minsk")]
[TestCase("Мсціслаў", "Mscislaŭ")]
[TestCase("Мядзел", "Miadziel")]
[TestCase("Навагрудак", "Navahrudak")]
[TestCase("Наваполацк", "Navapolack")]
[TestCase("Нароўля", "Naroŭlia")]
[TestCase("Новалукомль", "Novalukomĺ")]
[TestCase("Нясвіж", "Niasviž")]
[TestCase("Орша", "Orša")]
[TestCase("Паставы", "Pastavy")]
[TestCase("Петрыкаў", "Pietrykaŭ")]
[TestCase("Пінск", "Pinsk")]
[TestCase("Полацк", "Polack")]
[TestCase("Пружаны", "Pružany")]
[TestCase("Рагачоў", "Rahačoŭ")]
[TestCase("Рэчыца", "Rečyca")]
[TestCase("Салігорск", "Salihorsk")]
[TestCase("Светлагорск", "Svietlahorsk")]
[TestCase("Свіслач", "Svislač")]
[TestCase("Скідзель", "Skidzieĺ")]
[TestCase("Слаўгарад", "Slaŭharad")]
[TestCase("Слонім", "Slonim")]
[TestCase("Слуцк", "Sluck")]
[TestCase("Смалявічы", "Smaliavičy")]
[TestCase("Смаргонь", "Smarhoń")]
[TestCase("Старыя Дарогі", "Staryja Darohi")]
[TestCase("Столін", "Stolin")]
[TestCase("Стоўбцы", "Stoŭbcy")]
[TestCase("Сянно", "Sianno")]
[TestCase("Талачын", "Talačyn")]
[TestCase("Узда", "Uzda")]
[TestCase("Фаніпаль", "Fanipaĺ")]
[TestCase("Хойнікі", "Chojniki")]
[TestCase("Чавусы", "Čavusy")]
[TestCase("Чачэрск", "Čačersk")]
[TestCase("Чашнікі", "Čašniki")]
[TestCase("Чэрвень", "Červień")]
[TestCase("Чэрыкаў", "Čerykaŭ")]
[TestCase("Шаркаўшчына", "Šarkaŭščyna")]
[TestCase("Шклоў", "Škloŭ")]
[TestCase("Шчучын", "Ščučyn")]
public void GivvenATextInBelarussianCyrillicScript_WhenTransliteratingIntoLatin_ThenTheCorrectTextIsReturned(
string belarussianText,
string expectedTransliteratedText)
=> Assert.That(transliterator.Transliterate(belarussianText, Language.Belarussian), Is.EqualTo(expectedTransliteratedText));

[Test]
[TestCase("Айтос", "Aytos")]
[TestCase("Асеновград", "Asenovgrad")]
Expand Down
2 changes: 1 addition & 1 deletion TransliterationAPI/Service/Entities/Language.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public sealed class Language : IEquatable<Language>
public static Language Arabic => new Language("ar", "Arabic", nameof(ArabicTransliterator));
public static Language Armenian => new Language("hy", "Armenian", nameof(TranslitterationDotComTransliterator));
public static Language Bashkir => new Language("ba", "Bashkir", nameof(TranslitterationDotComTransliterator));
public static Language Belarussian => new Language("be", "Belarussian", nameof(TranslitterationDotComTransliterator));
public static Language Belarussian => new Language("be", "Belarussian", nameof(CyrillicTransliterator));
public static Language Bengali => new Language("bn", "Bengali", nameof(UshuaiaTransliterator));
public static Language Bulgarian => new Language("bg", "Bulgarian", nameof(CyrillicTransliterator));
public static Language Chinese => new Language("zh", "Chinese", nameof(PinyinTransliterator));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ public class CyrillicTransliterator : ITransliterator
{
Dictionary<string, string> bgnPcgnTransliterationTable;

Dictionary<string, string> belarussianTransliterationTable;
Dictionary<string, string> bulgarianTransliterationTable;
Dictionary<string, string> kazakhTransliterationTable;
Dictionary<string, string> russianTransliterationTable;
Expand Down Expand Up @@ -87,6 +88,59 @@ public CyrillicTransliterator()
{ "я", "ya" }
};

belarussianTransliterationTable = new Dictionary<string, string>
{
// Uppeercase exceptions
{ "Ль", "Ĺ" },
{ "Нь", "Ń" },
{ @"\bЕ", "Je" },
{ @"\bЁ", "Jo" },
{ @"\bЯ", "Ja" },
{ @"\bЮ", "Ju" },

// Lowercase exceptions
{ "іё", "іjo" },
{ "ль", "ĺ" },
{ "ля", "лia" },
{ "нь", "ń" },
{ @"е\b", "je" },
{ @"ё\b", "jo" },
{ @"ю\b", "ju" },
{ @"я\b", "ja" },

// Uppeercase characters
{ "Г", "H" },
{ "Д", "D" },
{ "Е", "Ie" }, // Also Je
{ "Ё", "Jo" }, // Also Io
{ "Ж", "Ž" },
{ "І", "I" },
{ "Й", "J" },
{ "Ў", "Ŭ" },
{ "Х", "Ch" },
{ "Ц", "C" },
{ "Ч", "Č" },
{ "Ш", "Š" },
{ "Ю", "Iu" }, // Also Ju
{ "Я", "Ia" }, // Also Ja

// Lowercase characters
{ "г", "h" },
{ "д", "d" },
{ "е", "ie" },
{ "ё", "io" },
{ "ж", "ž" },
{ "і", "i" },
{ "й", "j" },
{ "ў", "ŭ" },
{ "х", "ch" },
{ "ц", "c" },
{ "ч", "č" },
{ "ш", "š" },
{ "ю", "iu" },
{ "я", "ia" },
};

bulgarianTransliterationTable = new Dictionary<string, string>
{
{ @"ия\b", "ia" },
Expand Down Expand Up @@ -176,6 +230,11 @@ public CyrillicTransliterator()

foreach (var characterTransliteration in bgnPcgnTransliterationTable)
{
if (!belarussianTransliterationTable.ContainsKey(characterTransliteration.Key))
{
belarussianTransliterationTable.Add(characterTransliteration.Key, characterTransliteration.Value);
}

if (!bulgarianTransliterationTable.ContainsKey(characterTransliteration.Key))
{
bulgarianTransliterationTable.Add(characterTransliteration.Key, characterTransliteration.Value);
Expand All @@ -202,7 +261,11 @@ public string Transliterate(string text, Language language)
{
IDictionary<string, string> transliterationTable;

if (language.Equals(Language.Bulgarian))
if (language.Equals(Language.Belarussian))
{
transliterationTable = belarussianTransliterationTable;
}
else if (language.Equals(Language.Bulgarian))
{
transliterationTable = bulgarianTransliterationTable;
}
Expand Down Expand Up @@ -230,14 +293,27 @@ public string Transliterate(string text, Language language)
transliteratedText = Regex.Replace(transliteratedText, character, transliterationTable[character]);
}

if (language.Equals(Language.Russian))
if (language.Equals(Language.Belarussian))
{
transliteratedText = ApplyBelarussianFixes(transliteratedText);
}
else if (language.Equals(Language.Russian))
{
transliteratedText = ApplyRussianFixes(transliteratedText);
}

return transliteratedText;
}

string ApplyBelarussianFixes(string text)
{
string fixedText = text;

fixedText = Regex.Replace(fixedText, "'i", "ji");

return fixedText;
}

string ApplyRussianFixes(string text)
{
string fixedText = text;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

using NuciExtensions;
Expand Down Expand Up @@ -29,16 +28,7 @@ private string ApplyFixes(string text, Language language)
{
string fixedText = text;

if (language.Equals(Language.Belarussian))
{
fixedText = Regex.Replace(fixedText, "([a-zA-Z])H", "$1h");
fixedText = Regex.Replace(fixedText, "([a-zA-Z])S", "$1s");
fixedText = Regex.Replace(fixedText, "([a-zA-Z])T", "$1t");
fixedText = Regex.Replace(fixedText, "([a-zA-Z])U", "$1u");
fixedText = Regex.Replace(fixedText, "([a-zA-Z])Z", "$1z");
fixedText = Regex.Replace(fixedText, "([a-zA-Z])Ž", "$1ž");
}
else if (language.Equals(Language.Chuvash))
if (language.Equals(Language.Chuvash))
{
fixedText = fixedText.Replace("i͡", "y");
}
Expand Down Expand Up @@ -90,11 +80,6 @@ private async Task<string> SendTransliterationRequest(string text, string langua
formData["tlang"] = "bak";
formData["scheme"] = "iso-9";
}
else if (languageCode.Equals(Language.Belarussian))
{
formData["tlang"] = "bel";
formData["scheme"] = "national";
}
else if (languageCode.Equals(Language.Chuvash))
{
formData["tlang"] = "chv";
Expand Down