Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Require string-style identifiers to be UTF-8 #6941

Merged
merged 3 commits into from
Sep 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/parser/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,13 @@ struct LexStrResult : LexResult {
// Allocate a string only if there are escape sequences, otherwise just use
// the original string_view.
std::optional<std::string> str;

std::string_view getStr() {
if (str) {
return *str;
}
return span;
}
};

struct LexStrCtx : LexCtx {
Expand Down Expand Up @@ -860,6 +867,9 @@ std::optional<LexIdResult> ident(std::string_view in) {
return {};
}
if (auto s = str(ctx.next())) {
if (!String::isUTF8(s->getStr())) {
return {};
}
ctx.isStr = true;
ctx.str = s->str;
ctx.take(*s);
Expand Down
7 changes: 5 additions & 2 deletions src/passes/StringLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,12 @@ struct StringGathering : public Pass {
[[maybe_unused]] bool valid =
String::convertWTF16ToWTF8(wtf8, string.str);
assert(valid);
// TODO: Use wtf8.view() once we have C++20.
// Then escape it because identifiers must be valid UTF-8.
// TODO: Use wtf8.view() and escaped.view() once we have C++20.
std::stringstream escaped;
String::printEscaped(escaped, wtf8.str());
auto name = Names::getValidGlobalName(
*module, std::string("string.const_") + std::string(wtf8.str()));
*module, std::string("string.const_") + std::string(escaped.str()));
globalName = name;
newNames.insert(name);
auto* stringConst = builder.makeStringConst(string);
Expand Down
8 changes: 8 additions & 0 deletions test/lit/parse-bad-identifier.wast
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
;; RUN: not wasm-opt %s -S -o - 2>&1 | filecheck %s

;; CHECK: 7:10: error: expected valtype

(module
;; String identifiers must still be UTF-8.
(global $"\ff" i32 (i32.const 0))
)
36 changes: 18 additions & 18 deletions test/lit/passes/string-gathering.wast
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@

;; CHECK: (type $0 (func))

;; CHECK: (global $string.const_bar (ref string) (string.const "bar"))
;; CHECK: (global $"string.const_\"bar\"" (ref string) (string.const "bar"))

;; CHECK: (global $string.const_other (ref string) (string.const "other"))
;; CHECK: (global $"string.const_\"other\"" (ref string) (string.const "other"))

;; CHECK: (global $global (ref string) (string.const "foo"))
(global $global (ref string) (string.const "foo"))

;; CHECK: (global $global2 stringref (global.get $string.const_bar))
;; CHECK: (global $global2 stringref (global.get $"string.const_\"bar\""))
;; LOWER: (type $0 (array (mut i16)))

;; LOWER: (type $1 (func))
Expand All @@ -45,9 +45,9 @@

;; LOWER: (type $9 (func (param externref i32 i32) (result (ref extern))))

;; LOWER: (import "string.const" "0" (global $string.const_bar (ref extern)))
;; LOWER: (import "string.const" "0" (global $"string.const_\"bar\"" (ref extern)))

;; LOWER: (import "string.const" "1" (global $string.const_other (ref extern)))
;; LOWER: (import "string.const" "1" (global $"string.const_\"other\"" (ref extern)))

;; LOWER: (import "string.const" "2" (global $global (ref extern)))

Expand All @@ -69,20 +69,20 @@

;; LOWER: (import "wasm:js-string" "substring" (func $substring (type $9) (param externref i32 i32) (result (ref extern))))

;; LOWER: (global $global2 externref (global.get $string.const_bar))
;; LOWER: (global $global2 externref (global.get $"string.const_\"bar\""))
(global $global2 (ref null string) (string.const "bar"))

;; CHECK: (func $a (type $0)
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (global.get $string.const_bar)
;; CHECK-NEXT: (global.get $"string.const_\"bar\"")
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (global.get $global)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; LOWER: (func $a (type $1)
;; LOWER-NEXT: (drop
;; LOWER-NEXT: (global.get $string.const_bar)
;; LOWER-NEXT: (global.get $"string.const_\"bar\"")
;; LOWER-NEXT: )
;; LOWER-NEXT: (drop
;; LOWER-NEXT: (global.get $global)
Expand All @@ -99,10 +99,10 @@

;; CHECK: (func $b (type $0)
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (global.get $string.const_bar)
;; CHECK-NEXT: (global.get $"string.const_\"bar\"")
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (global.get $string.const_other)
;; CHECK-NEXT: (global.get $"string.const_\"other\"")
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (global.get $global)
Expand All @@ -113,10 +113,10 @@
;; CHECK-NEXT: )
;; LOWER: (func $b (type $1)
;; LOWER-NEXT: (drop
;; LOWER-NEXT: (global.get $string.const_bar)
;; LOWER-NEXT: (global.get $"string.const_\"bar\"")
;; LOWER-NEXT: )
;; LOWER-NEXT: (drop
;; LOWER-NEXT: (global.get $string.const_other)
;; LOWER-NEXT: (global.get $"string.const_\"other\"")
;; LOWER-NEXT: )
;; LOWER-NEXT: (drop
;; LOWER-NEXT: (global.get $global)
Expand Down Expand Up @@ -217,9 +217,9 @@
(module
;; CHECK: (type $0 (func))

;; CHECK: (global $string.const_foo (ref string) (string.const "foo"))
;; CHECK: (global $"string.const_\"foo\"" (ref string) (string.const "foo"))

;; CHECK: (global $global (mut (ref string)) (global.get $string.const_foo))
;; CHECK: (global $global (mut (ref string)) (global.get $"string.const_\"foo\""))
;; LOWER: (type $0 (array (mut i16)))

;; LOWER: (type $1 (func (param externref externref) (result i32)))
Expand All @@ -240,7 +240,7 @@

;; LOWER: (type $9 (func (param externref i32 i32) (result (ref extern))))

;; LOWER: (import "string.const" "0" (global $string.const_foo (ref extern)))
;; LOWER: (import "string.const" "0" (global $"string.const_\"foo\"" (ref extern)))

;; LOWER: (import "wasm:js-string" "fromCharCodeArray" (func $fromCharCodeArray (type $3) (param (ref null $0) i32 i32) (result (ref extern))))

Expand All @@ -260,17 +260,17 @@

;; LOWER: (import "wasm:js-string" "substring" (func $substring (type $9) (param externref i32 i32) (result (ref extern))))

;; LOWER: (global $global (mut (ref extern)) (global.get $string.const_foo))
;; LOWER: (global $global (mut (ref extern)) (global.get $"string.const_\"foo\""))
(global $global (mut (ref string)) (string.const "foo"))

;; CHECK: (func $a (type $0)
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (global.get $string.const_foo)
;; CHECK-NEXT: (global.get $"string.const_\"foo\"")
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; LOWER: (func $a (type $2)
;; LOWER-NEXT: (drop
;; LOWER-NEXT: (global.get $string.const_foo)
;; LOWER-NEXT: (global.get $"string.const_\"foo\"")
;; LOWER-NEXT: )
;; LOWER-NEXT: )
(func $a
Expand Down
26 changes: 16 additions & 10 deletions test/lit/passes/string-lowering-imports.wast
Original file line number Diff line number Diff line change
@@ -1,38 +1,44 @@
;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.

;; RUN: wasm-opt %s -all --string-lowering-magic-imports --remove-unused-module-elements -S -o - | filecheck %s

;; Text round tripping
;; RUN: wasm-opt %s -all --string-lowering-magic-imports --remove-unused-module-elements -S -o - | wasm-opt -all -S -o - | filecheck %s
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add a comment that this is checking roundtripping?


;; Binary round tripping
;; RUN: wasm-opt %s -all --string-lowering-magic-imports --remove-unused-module-elements --roundtrip -S -o - | filecheck %s --check-prefix=RTRIP


(module
;; CHECK: (type $0 (func))

;; CHECK: (import "\'" "bar" (global $string.const_bar (ref extern)))
;; CHECK: (import "\'" "bar" (global $"string.const_\"bar\"" (ref extern)))

;; CHECK: (import "\'" "foo" (global $string.const_foo (ref extern)))
;; CHECK: (import "\'" "foo" (global $"string.const_\"foo\"" (ref extern)))

;; CHECK: (import "\'" "needs\tescaping\00.\'#%- .\r\n\\08\0c\n\r\t.\ea\99\ae" (global $"string.const_needs\tescaping\00.\'#%- .\r\n\\08\0c\n\r\t.\ea\99\ae" (ref extern)))
;; CHECK: (import "\'" "needs\tescaping\00.\'#%- .\r\n\\08\0c\n\r\t.\ea\99\ae" (global $"string.const_\"needs\\tescaping\\00.\\\'#%- .\\r\\n\\\\08\\0c\\n\\r\\t.\\ea\\99\\ae\"" (ref extern)))

;; CHECK: (import "string.const" "0" (global $"string.const_unpaired high surrogate \ed\a0\80 " (ref extern)))
;; CHECK: (import "string.const" "0" (global $"string.const_\"unpaired high surrogate \\ed\\a0\\80 \"" (ref extern)))

;; CHECK: (import "string.const" "1" (global $"string.const_unpaired low surrogate \ed\bd\88 " (ref extern)))
;; CHECK: (import "string.const" "1" (global $"string.const_\"unpaired low surrogate \\ed\\bd\\88 \"" (ref extern)))

;; CHECK: (export "consts" (func $consts))

;; CHECK: (func $consts (type $0)
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (global.get $string.const_foo)
;; CHECK-NEXT: (global.get $"string.const_\"foo\"")
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (global.get $string.const_bar)
;; CHECK-NEXT: (global.get $"string.const_\"bar\"")
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (global.get $"string.const_needs\tescaping\00.\'#%- .\r\n\\08\0c\n\r\t.\ea\99\ae")
;; CHECK-NEXT: (global.get $"string.const_\"needs\\tescaping\\00.\\\'#%- .\\r\\n\\\\08\\0c\\n\\r\\t.\\ea\\99\\ae\"")
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (global.get $"string.const_unpaired high surrogate \ed\a0\80 ")
;; CHECK-NEXT: (global.get $"string.const_\"unpaired high surrogate \\ed\\a0\\80 \"")
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (global.get $"string.const_unpaired low surrogate \ed\bd\88 ")
;; CHECK-NEXT: (global.get $"string.const_\"unpaired low surrogate \\ed\\bd\\88 \"")
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; RTRIP: (type $0 (func))
Expand Down
8 changes: 4 additions & 4 deletions test/lit/passes/string-lowering-instructions.wast
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@

;; CHECK: (type $25 (func (param externref i32 i32) (result (ref extern))))

;; CHECK: (import "string.const" "0" (global $string.const_exported (ref extern)))
;; CHECK: (import "string.const" "0" (global $"string.const_\"exported\"" (ref extern)))

;; CHECK: (import "string.const" "1" (global $string.const_value (ref extern)))
;; CHECK: (import "string.const" "1" (global $"string.const_\"value\"" (ref extern)))

;; CHECK: (import "colliding" "name" (func $fromCodePoint (type $0)))
(import "colliding" "name" (func $fromCodePoint))
Expand Down Expand Up @@ -269,7 +269,7 @@
)

;; CHECK: (func $exported-string-returner (type $16) (result externref)
;; CHECK-NEXT: (global.get $string.const_exported)
;; CHECK-NEXT: (global.get $"string.const_\"exported\"")
;; CHECK-NEXT: )
(func $exported-string-returner (export "export.1") (result stringref)
;; We should update the signature of this function even though it is public
Expand Down Expand Up @@ -368,7 +368,7 @@
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (struct.new $struct-of-string
;; CHECK-NEXT: (global.get $string.const_value)
;; CHECK-NEXT: (global.get $"string.const_\"value\"")
;; CHECK-NEXT: (i32.const 10)
;; CHECK-NEXT: (ref.null none)
;; CHECK-NEXT: )
Expand Down
Loading