From d69395649e0c9a196dbcc2c723d59899f35532a2 Mon Sep 17 00:00:00 2001 From: matthewcpp Date: Fri, 13 Oct 2023 11:16:44 -0700 Subject: [PATCH] SDFAssetPath: _ReadUTF8: Refactor character pointer parameter to be of type unsigned char. This fixes an issue where certain multibyte unicode characters would not be decoded correctly. This change is a result of Github issue #2560 reported by shigeno-y and solution proposed by syoyo. Fixes #2560 (Internal change: 2299146) --- pxr/usd/sdf/assetPath.cpp | 4 ++-- .../sdf/testenv/testSdfParsing.testenv/180_asset_paths.sdf | 4 ++++ .../testSdfParsing.testenv/baseline/180_asset_paths.sdf | 4 ++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/pxr/usd/sdf/assetPath.cpp b/pxr/usd/sdf/assetPath.cpp index 8af43b9f4b..071e5a9a08 100644 --- a/pxr/usd/sdf/assetPath.cpp +++ b/pxr/usd/sdf/assetPath.cpp @@ -56,7 +56,7 @@ static const char Delimiter = '@'; // advance 'cp' to the start of the next UTF-8 character. If 'cp' does not // point to a valid UTF-8 char, leave 'cp' unmodified and return -1. static int -_ReadUTF8(char const *&cp, std::string *errMsg) +_ReadUTF8(unsigned char const *&cp, std::string *errMsg) { // Return a byte with the high `n` bits set, rest clear. auto highBits = [](int n) { @@ -114,7 +114,7 @@ _ValidateAssetPathString(char const *path) (0x80 <= code && code <= 0x9f)); }; - char const *cp = path; + unsigned char const *cp = reinterpret_cast(path); std::string err; int utf8Char = _ReadUTF8(cp, &err); int charNum = 1; diff --git a/pxr/usd/sdf/testenv/testSdfParsing.testenv/180_asset_paths.sdf b/pxr/usd/sdf/testenv/testSdfParsing.testenv/180_asset_paths.sdf index 59716cc9ea..10c3b20100 100644 --- a/pxr/usd/sdf/testenv/testSdfParsing.testenv/180_asset_paths.sdf +++ b/pxr/usd/sdf/testenv/testSdfParsing.testenv/180_asset_paths.sdf @@ -13,6 +13,7 @@ over "AssetPathTest" ( asset wversion = @//menv30/global/unit_defs.sdf#20@ asset allchars = @ !"#$%&'()*+,-./0123456789:;<=>?ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~@ asset utf8 = @aɪ kæn iːt glɑːs ænd ɪt dɐz nɒt hɜːt miː Можам да јадам стакло, а не ме штета ನನಗೆ ಹಾನಿ ಆಗದೆ, ನಾನು ಗಜನ್ನು ತಿನಬಹುದು@ + asset pathWithJapaneseCharacter = @path/to/響@ asset looksLikeEscapes = @\x01\x03\x7f@ asset[] array = [@@, @sdf/Model.sdf@, @//$STAGEDIR/sim/$MODELINSTANCE.sdf@, @//menv30/global/unit_defs.sdf#20@] } @@ -30,6 +31,7 @@ over "AssetPathTest" ( asset allchars = @ !"#$%&'()*+,-./0123456789:;<=>?ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~@ asset utf8 = @aɪ kæn iːt glɑːs ænd ɪt dɐz nɒt hɜːt miː Можам да јадам стакло, а не ме штета ನನಗೆ ಹಾನಿ ಆಗದೆ, ನಾನು ಗಜನ್ನು ತಿನಬಹುದು@ + asset pathWithJapaneseCharacter = @path/to/響@ asset looksLikeEscapes = @\x01\x03\x7f@ asset[] array = [@@, @sdf/Model.sdf@, @//$STAGEDIR/sim/$MODELINSTANCE.sdf@, @//menv30/global/unit_defs.sdf#20@] } @@ -53,6 +55,7 @@ over "AssetPathTest2" ( asset escapeddelimiter = @@@foo.sdf\@@@testing\\@@@@@@ asset beginandendwithextradelim = @@@@foo.sdf@@@@ asset utf8 = @@@aɪ kæn iːt glɑːs ænd ɪt dɐz nɒt hɜːt miː Можам да јадам стакло, а не ме штета ನನಗೆ ಹಾನಿ ಆಗದೆ, ನಾನು ಗಜನ್ನು ತಿನಬಹುದು@@@ + asset pathWithJapaneseCharacter = @path/to/響@ asset looksLikeEscapes = @@@\x01\x03\x7f@@@ asset[] array = [@@@@@@, @@@sdf/Model.sdf@@@, @@@//menv30/global/unit_defs.sdf@stable-prepro_0.42@@@, @@@//menv30/global/unit_defs.sdf@2012/06/20@@@] } @@ -78,6 +81,7 @@ over "AssetPathTest2" ( asset beginandendwithextradelim = @@@@foo.sdf@@@@ asset utf8 = @@@aɪ kæn iːt glɑːs ænd ɪt dɐz nɒt hɜːt miː Можам да јадам стакло, а не ме штета ನನಗೆ ಹಾನಿ ಆಗದೆ, ನಾನು ಗಜನ್ನು ತಿನಬಹುದು@@@ + asset pathWithJapaneseCharacter = @path/to/響@ asset looksLikeEscapes = @@@\x01\x03\x7f@@@ asset[] array = [@@@@@@, @@@sdf/Model.sdf@@@, @@@//menv30/global/unit_defs.sdf@stable-prepro_0.42@@@, @@@//menv30/global/unit_defs.sdf@2012/06/20@@@] } \ No newline at end of file diff --git a/pxr/usd/sdf/testenv/testSdfParsing.testenv/baseline/180_asset_paths.sdf b/pxr/usd/sdf/testenv/testSdfParsing.testenv/baseline/180_asset_paths.sdf index 569a727a1e..7a6e18afb6 100644 --- a/pxr/usd/sdf/testenv/testSdfParsing.testenv/baseline/180_asset_paths.sdf +++ b/pxr/usd/sdf/testenv/testSdfParsing.testenv/baseline/180_asset_paths.sdf @@ -8,6 +8,7 @@ over "AssetPathTest" ( asset empty = @@ asset looksLikeEscapes = @\x01\x03\x7f@ asset overlay = @///global/unit_defs.sdf@ + asset pathWithJapaneseCharacter = @path/to/響@ asset relative1 = @sdf/Model.sdf@ asset relative2 = @../sdf/Model.sdf@ asset repo = @//menv30/global/unit_defs.sdf@ @@ -24,6 +25,7 @@ over "AssetPathTest" ( asset empty = @@ asset looksLikeEscapes = @\x01\x03\x7f@ asset overlay = @///global/unit_defs.sdf@ + asset pathWithJapaneseCharacter = @path/to/響@ asset relative1 = @sdf/Model.sdf@ asset relative2 = @../sdf/Model.sdf@ asset repo = @//menv30/global/unit_defs.sdf@ @@ -43,6 +45,7 @@ over "AssetPathTest2" ( asset escapeddelimiter = @@@foo.sdf\@@@testing\\@@@@@@ asset looksLikeEscapes = @\x01\x03\x7f@ asset overlay = @///global/unit_defs.sdf@ + asset pathWithJapaneseCharacter = @path/to/響@ asset relative1 = @sdf/Model.sdf@ asset relative2 = @../sdf/Model.sdf@ asset repo = @//menv30/global/unit_defs.sdf@ @@ -65,6 +68,7 @@ over "AssetPathTest2" ( asset escapeddelimiter = @@@foo.sdf\@@@testing\\@@@@@@ asset looksLikeEscapes = @\x01\x03\x7f@ asset overlay = @///global/unit_defs.sdf@ + asset pathWithJapaneseCharacter = @path/to/響@ asset relative1 = @sdf/Model.sdf@ asset relative2 = @../sdf/Model.sdf@ asset repo = @//menv30/global/unit_defs.sdf@