diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..96b9ed3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +.idea +build +vendor +composer.lock + diff --git a/README.md b/README.md index de3ff97..ceeb032 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,24 @@ Currently it assume conversion of Cyrillic, Georgian, Armenian and Greek scripts into Roman (Latin). Cyrillic has sub-groups for Russian, Ukrainian, Belarusian, Bulgarian and Kazakh languages with custom rules. +## Installation + +This package can be installed through Composer: + +```sh +$ composer require ashtokalo/php-translit +``` + +Make sure to use Composer's autoload: + +```php +require __DIR__.'/../vendor/autoload.php'; +``` + +## Usage + Main class Translit could be used as singleton or object itself. Built-in -translitaration tables could be refered by following language codes: +transliteration tables could be referred by following language codes: * ru - Russian cyrillic chars, * uk - Ukrainian cyrillic chars, * mk - Macedonian cyrillic chars, @@ -23,12 +39,25 @@ translitaration tables could be refered by following language codes: Language codes could be combined by comma to handle more cases, e.g. - echo Translit::object()->convert('Беларусь', 'be') . ' vs ' . - Translit::object()->convert('Беларусь', 'be,latin'); +```php +echo \ashtokalo\translit\Translit::object()->convert('Беларусь', 'be') . ' vs ' . + \ashtokalo\translit\Translit::object()->convert('Беларусь', 'be,latin'); +``` produce output: - Bielaruś vs Bielarus +``` +Bielaruś vs Bielarus +``` + +You can also add alternative transliteration tables through property `classes`, +which is list of language code in keys and class names in values: + +```php +$translit = new \ashtokalo\translit\Translit; +$translit->classes['tlh'] = \startrek\TranslitKlingon::class; +echo $translit->convert(' ', 'tlh'); +``` By default wrong language codes ignored. But this behavior could be changed by using strict mode. For all language codes that prepended with exclamation mark @@ -41,11 +70,42 @@ For example: // but next code fires Exception, because strict mode have used echo Translit::object()->convert('Привет', '!ru_ru') . PHP_EOL; -There are many sources of these tables which could be used - ISO and BGN/PCGN -standards, a lot of native standards and informal standard used by people. -For this library I assume next order of sources - native, ISO, BGN/PCGN, -informal. +## Tests + +The package contains integration tests. You can run them using PHPUnit. + +```sh +$ vendor/bin/phpunit +``` + +## Credits All transliteration tables were created from information found at Wikipedia. The links to these pages posted into header of each file. Please update me if any mistakes found or you have new transliteration tables to add here. + +There are many sources of these tables which could be used - ISO and BGN/PCGN +standards, a lot of native standards and informal standard used by people. +For this library I assume next order of sources - native, ISO, BGN/PCGN, informal. + +## Contributing + +Contributions are very welcome. + +Only contributions via Pull Requests on [Github](https://github.com/ashtokalo/php-translit) is accepted: + +- **[PSR-2 Coding Standard](https://github.com/php-fig/fig-standards/blob/master/accepted/PSR-2-coding-style-guide.md)** + +- **Document any change in behaviour** - Make sure the `README.md` and any other relevant +documentation are kept up-to-date. + +- **Create feature branches** - Don't ask me to pull from your master branch. + +- **One pull request per feature** - If you want to do more than one thing, send multiple pull requests. + +- **Send coherent history** - Make sure each individual commit in your pull request is meaningful. +If you had to make multiple intermediate commits while developing, please [squash them](http://www.git-scm.com/book/en/v2/Git-Tools-Rewriting-History#Changing-Multiple-Commit-Messages) before submitting. + +## License + +The MIT License (MIT). Refer to the [License](LICENSE) for more information. diff --git a/composer.json b/composer.json index dae9410..e362d90 100644 --- a/composer.json +++ b/composer.json @@ -4,6 +4,7 @@ "keywords": ["transliteration", "romanization", "latinization", "translit"], "description": "PHP library to convert text from one script to another.", "license": "MIT", + "version": "0.2.0", "author": { "name": "Alexey Shtokalo", @@ -13,6 +14,15 @@ }, "require": { - "php": ">=5.2.0" + "php": ">=7.0" + }, + "require-dev": + { + "phpunit/phpunit": "~7.0" + }, + "autoload": { + "psr-4": { + "ashtokalo\\translit\\": "src/" + } } } diff --git a/phpunit.xml.dist b/phpunit.xml.dist new file mode 100644 index 0000000..486e00a --- /dev/null +++ b/phpunit.xml.dist @@ -0,0 +1,28 @@ + + + + + ./tests + + + + + ./src + + + + + + + + + diff --git a/Translit.php b/src/Translit.php similarity index 74% rename from Translit.php rename to src/Translit.php index a5f1cd8..1b99c2f 100644 --- a/Translit.php +++ b/src/Translit.php @@ -1,5 +1,9 @@ TranslitBe::class, + 'ka' => TranslitKa::class, + 'uk' => TranslitUk::class, + 'ascii' => TranslitAscii::class, + ]; /** * Converts given text to the Roman (Latin) script. @@ -72,7 +79,7 @@ class Translit * @return string converted text * @throws Exception if handler not available or wrong (strict mode only) */ - public function convert($text, $code) + public function convert(string $text, string $code): string { foreach (explode(',', $code) as $code) { @@ -98,7 +105,7 @@ public function convert($text, $code) * * @return Translit */ - public static function object($dataPath = '') + public static function object(string $dataPath = '') { static $object = null; @@ -111,6 +118,11 @@ public static function object($dataPath = '') return $object; } + public function __invoke() + { + print_r(func_get_args()); + } + /** * Returns language handler - array or object. * @@ -123,7 +135,7 @@ public static function object($dataPath = '') * * @throws Exception if handler not available or wrong (strict mode only) */ - protected function getLanguage($code) + protected function getLanguage(string $code) { // all language codes prepended with exclamation mark really required $strict = false; @@ -135,11 +147,25 @@ protected function getLanguage($code) if ($code && !isset($this->languages[$code])) { - $dataFile = $this->getDataPath() . $code . '.php'; - $className = 'Translit' . ucfirst($code); - $classFile = $this->getClassPath() . $className . '.php'; - - if (file_exists($dataFile)) + if (class_exists($className = isset($this->classes[$code]) ? $this->classes[$code] : '')) + { + $language = new $className; + if (method_exists($language, 'convert')) + { + if (property_exists($language, 'translit')) + { + $language->translit = $this; + } + $this->languages[$code] = $language; + } + else if ($strict) + { + throw new Exception( + sprintf('class "%s" does not have convert() method', + $className)); + } + } + else if (file_exists($dataFile = $this->getDataPath() . $code . '.php')) { $language = include $dataFile; if (is_array($language)) @@ -156,36 +182,10 @@ protected function getLanguage($code) gettype($language))); } } - else + else if ($strict) { - if (!@class_exists($className) && file_exists($classFile)) - { - include $classFile; - } - - if (@class_exists($className)) - { - $language = new $className; - if (method_exists($language, 'convert')) - { - if (property_exists($language, 'translit')) - { - $language->translit = $this; - } - $this->languages[$code] = $language; - } - else if ($strict) - { - throw new Exception( - sprintf('class "%s" does not have convert() method', - $className)); - } - } - else if ($strict) - { - throw new Exception( - sprintf('language "%s" does not have handlers', $code)); - } + throw new Exception( + sprintf('language "%s" does not have handlers', $code)); } } @@ -197,32 +197,17 @@ protected function getLanguage($code) * * @return string */ - protected function getDataPath() + protected function getDataPath(): string { if (!$this->dataPath) { - $this->dataPath = dirname(__FILE__) . DIRECTORY_SEPARATOR . + $this->dataPath = __DIR__ . DIRECTORY_SEPARATOR . 'data' . DIRECTORY_SEPARATOR; } return $this->dataPath; } - /** - * Returns path to directory with transliteration classes - * - * @return string - */ - protected function getClassPath() - { - if (!$this->classPath) - { - $this->classPath = dirname(__FILE__) . DIRECTORY_SEPARATOR; - } - - return $this->classPath; - } - /** * Cached language handlers, could be an associative array or object. * diff --git a/TranslitAscii.php b/src/TranslitAscii.php similarity index 92% rename from TranslitAscii.php rename to src/TranslitAscii.php index 697d580..fde29a9 100644 --- a/TranslitAscii.php +++ b/src/TranslitAscii.php @@ -1,5 +1,7 @@ + */ + +return array ( + // upper case + 'А' => 'A', 'Б' => 'B', 'В' => 'V', 'Г' => 'H', + 'Ґ' => 'G', 'ДЖ' => 'Dhz', 'Дж' => 'Dhz', 'ДЗ' => 'Dz', + 'Дз' => 'Dz', 'Д' => 'D', 'Е' => 'E', 'Ё' => 'Ë', + 'Ж' => 'Zh', 'З' => 'Z', 'І' => 'I', 'Й' => 'Ĭ', + 'К' => 'K', 'Л' => 'L', 'М' => 'M', 'Н' => 'N', + 'О' => 'O', 'П' => 'P', 'Р' => 'R', 'С' => 'S', + 'ТС' => 'T-s', 'Тс' => 'T-s', 'Т' => 'T', 'У' => 'U', + 'Ў' => 'W', 'Ф' => 'F', 'Х' => 'Kh', 'Ц' => 'Ts', + 'Ч' => 'Ch', 'Ш' => 'Sh', '’' => ', ', 'Ы' => 'Ȳ', + 'Ь' => '', 'Э' => 'É', 'Ю' => 'Yu', 'Я' => 'Ya', + // lower case + 'а' => 'a', 'б' => 'b', 'в' => 'v', 'г' => 'h', + 'ґ' => 'g', 'дж' => 'dhz', 'дз' => 'dz', 'д' => 'd', + 'е' => 'e', 'ё' => 'ë', 'ж' => 'zh', 'з' => 'z', + 'і' => 'i', 'й' => 'ĭ', 'к' => 'k', 'л' => 'l', + 'м' => 'm', 'н' => 'n', 'о' => 'o', 'п' => 'p', + 'р' => 'r', 'с' => 's', 'тс' => 't-s', 'т' => 't', + 'у' => 'u', 'ў' => 'w', 'ф' => 'f', 'х' => 'kh', + 'ц' => 'ts', 'ч' => 'ch', 'ш' => 'sh', 'ы' => 'ȳ', + 'ь' => '', 'э' => 'é', 'ю' => 'yu', 'я' => 'ya', +); diff --git a/data/bg.php b/src/data/bg.php similarity index 100% rename from data/bg.php rename to src/data/bg.php diff --git a/data/cyrillic.php b/src/data/cyrillic.php similarity index 100% rename from data/cyrillic.php rename to src/data/cyrillic.php diff --git a/data/digraph.php b/src/data/digraph.php similarity index 100% rename from data/digraph.php rename to src/data/digraph.php diff --git a/data/el.php b/src/data/el.php similarity index 100% rename from data/el.php rename to src/data/el.php diff --git a/data/hy.php b/src/data/hy.php similarity index 100% rename from data/hy.php rename to src/data/hy.php diff --git a/data/kk.php b/src/data/kk.php similarity index 100% rename from data/kk.php rename to src/data/kk.php diff --git a/data/latin.php b/src/data/latin.php similarity index 100% rename from data/latin.php rename to src/data/latin.php diff --git a/data/mk.php b/src/data/mk.php similarity index 100% rename from data/mk.php rename to src/data/mk.php diff --git a/data/ru.php b/src/data/ru.php similarity index 100% rename from data/ru.php rename to src/data/ru.php diff --git a/tests/TranslitTest.php b/tests/TranslitTest.php new file mode 100644 index 0000000..e6aa401 --- /dev/null +++ b/tests/TranslitTest.php @@ -0,0 +1,25 @@ +assertEquals('Rossiia', Translit::object()->convert('Россия', 'ru')); + $this->assertEquals('Rossiâ', Translit::object()->convert('Россия', 'cyrillic')); + $this->assertEquals('Rossia', Translit::object()->convert('Россия', 'cyrillic,latin')); + $this->assertEquals('Ukraina', Translit::object()->convert('Україна', 'uk')); + $this->assertEquals('Ukraïna', Translit::object()->convert('Україна', 'cyrillic')); + $this->assertEquals('Ukraina', Translit::object()->convert('Україна', 'cyrillic,latin')); + $this->assertEquals('Bielaruś', Translit::object()->convert('Беларусь', 'be')); + $this->assertEquals('Bielarus', Translit::object()->convert('Беларусь', 'be,latin')); + // test undefined language + $this->assertEquals('мир', Translit::object()->convert('мир', 'cyr')); + // and strict mode + $this->expectException(\Exception::class); + $this->assertEquals('мир', Translit::object()->convert('мир', '!cyr')); + } +}