diff --git a/libs/plugins/shared.mb_str_replace.php b/libs/plugins/shared.mb_str_replace.php index 226d9035d..6366205bf 100644 --- a/libs/plugins/shared.mb_str_replace.php +++ b/libs/plugins/shared.mb_str_replace.php @@ -44,9 +44,43 @@ function smarty_mb_str_replace($search, $replace, $subject, &$count = 0) } } } else { - $parts = mb_split(preg_quote($search), $subject) ?: array(); + $mb_reg_charset = mb_regex_encoding(); + // Check if mbstring regex is using UTF-8 + $reg_is_unicode = !strcasecmp($mb_reg_charset, "UTF-8"); + if(!$reg_is_unicode) { + // ...and set to UTF-8 if not + mb_regex_encoding("UTF-8"); + } + + // See if charset used by Smarty is matching one used by regex... + $current_charset = mb_regex_encoding(); + $convert_result = (bool)strcasecmp(Smarty::$_CHARSET, $current_charset); + if($convert_result) { + // ...convert to it if not. + $subject = mb_convert_encoding($subject, $current_charset, Smarty::$_CHARSET); + $search = mb_convert_encoding($search, $current_charset, Smarty::$_CHARSET); + $replace = mb_convert_encoding($replace, $current_charset, Smarty::$_CHARSET); + } + + $parts = mb_split(preg_quote($search), $subject); + // If original regex encoding was not unicode... + if(!$reg_is_unicode) { + // ...restore original regex encoding to avoid breaking the system. + mb_regex_encoding($mb_reg_charset); + } + if($parts === false) { + // This exception is thrown if call to mb_split failed. + // Usually it happens, when $search or $replace are not valid for given mb_regex_encoding(). + // There may be other cases for it to fail, please file an issue if you find a reproducible one. + throw new SmartyException("Source string is not a valid $current_charset sequence (probably)"); + } + $count = count($parts) - 1; $subject = implode($replace, $parts); + // Convert results back to charset used by Smarty, if needed. + if($convert_result) { + $subject = mb_convert_encoding($subject, Smarty::$_CHARSET, $current_charset); + } } return $subject; } diff --git a/tests/UnitTests/TemplateSource/_Issues/549/MbSplitEncodingIssue549Test.php b/tests/UnitTests/TemplateSource/_Issues/549/MbSplitEncodingIssue549Test.php new file mode 100644 index 000000000..7cb02ec78 --- /dev/null +++ b/tests/UnitTests/TemplateSource/_Issues/549/MbSplitEncodingIssue549Test.php @@ -0,0 +1,77 @@ + + */ + +/** + * class for compiler tests + * + * @runTestsInSeparateProcess + * @preserveGlobalState disabled + * @backupStaticAttributes enabled + * + * mb_split breaks if Smarty encoding is not the same as mbstring regex encoding. + */ +class MbSplitEncodingIssue549Test extends PHPUnit_Smarty +{ + /** @var string Saved Smarty charset */ + private $charset; + + /** @var array Source data for tests, hexed to protect from accidental reencoding */ + private $data = array( + "subject" => '4772c3bc6e6577616c64', // "Grünewald" + "pattern" => '77616c64', // "wald" + "replacement" => '7374c3bc726d', // "stürm" + "result" => '4772c3bc6e657374c3bc726d', // "Grünestürm" + ); + + public function setUp() + { + if(!\Smarty::$_MBSTRING) + { + return $this->markTestSkipped("mbstring extension is not in use by Smarty"); + } + + $this->charset = \Smarty::$_CHARSET; + $this->setUpSmarty(dirname(__FILE__)); + } + + public function tearDown() + { + \Smarty::$_CHARSET = $this->charset ?: \Smarty::$_CHARSET; + $this->cleanDirs(); + } + + /** Provider for testReplaceModifier + */ + public function encodingPairsProvider() + { + return array( + "with non-UNICODE src/non-UNICODE regex (PHP < 5.6 default)" => array("Windows-1252", "EUC-JP"), + "with UTF-8 src/non-UNICODE regex (PHP < 5.6 default)" => array("UTF-8", "EUC-JP"), + "with UTF-8 src/UTF-8 regex (PHP >= 5.6)" => array("UTF-8", "UTF-8"), + "with non-UNICODE src/UTF-8 regex" => array("Windows-1252", "UTF-8"), + ); + } + + /** Test behavior of `replace` modifier with different source and regex encodings + * + * @dataProvider encodingPairsProvider + */ + public function testReplaceModifier($mb_int_encoding, $mb_regex_encoding) + { + $data = $this->data; + \array_walk($data, function(&$value, $key) use($mb_int_encoding) { + $value = \mb_convert_encoding(pack("H*", $value), $mb_int_encoding, "UTF-8"); + }); + \extract($data, \EXTR_SKIP); + + \mb_regex_encoding($mb_regex_encoding); + \Smarty::$_CHARSET = $mb_int_encoding; + $this->assertEquals($result, $this->smarty->fetch("string:{\"$subject\"|replace:\"$pattern\":\"$replacement\"}")); + } + +} diff --git a/tests/UnitTests/TemplateSource/_Issues/549/cache/.gitignore b/tests/UnitTests/TemplateSource/_Issues/549/cache/.gitignore new file mode 100644 index 000000000..d88cc1446 --- /dev/null +++ b/tests/UnitTests/TemplateSource/_Issues/549/cache/.gitignore @@ -0,0 +1,2 @@ +# Ignore anything in here, but keep this directory +* diff --git a/tests/UnitTests/TemplateSource/_Issues/549/templates_c/.gitignore b/tests/UnitTests/TemplateSource/_Issues/549/templates_c/.gitignore new file mode 100644 index 000000000..d88cc1446 --- /dev/null +++ b/tests/UnitTests/TemplateSource/_Issues/549/templates_c/.gitignore @@ -0,0 +1,2 @@ +# Ignore anything in here, but keep this directory +*