From 0e6c01284f469299273e257a42ce17baecd998bf Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 8 Feb 2024 07:45:21 -0800 Subject: [PATCH] StringLowering: Lower all remaining important string operations (#6283) All those in the list from #6271 (comment) --- src/passes/StringLowering.cpp | 84 ++++++++++ test/lit/passes/string-gathering.wast | 64 ++++++-- .../passes/string-lowering-instructions.wast | 150 +++++++++++++++--- 3 files changed, 267 insertions(+), 31 deletions(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index e4b3ed865d5..e1bae85200a 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -239,13 +239,20 @@ struct StringLowering : public StringGathering { // Imported string functions. Name fromCharCodeArrayImport; + Name intoCharCodeArrayImport; Name fromCodePointImport; + Name equalsImport; + Name compareImport; + Name lengthImport; + Name codePointAtImport; + Name substringImport; // The name of the module to import string functions from. Name WasmStringsModule = "wasm:js-string"; // Common types used in imports. Type nullArray16 = Type(Array(Field(Field::i16, Mutable)), Nullable); + Type nullExt = Type(HeapType::ext, Nullable); Type nnExt = Type(HeapType::ext, NonNullable); // Creates an imported string function, returning its name (which is equal to @@ -269,6 +276,23 @@ struct StringLowering : public StringGathering { module, "fromCharCodeArray", {nullArray16, Type::i32, Type::i32}, nnExt); // string.fromCodePoint: codepoint -> ext fromCodePointImport = addImport(module, "fromCodePoint", Type::i32, nnExt); + // string.intoCharCodeArray: string, array, start -> num written + intoCharCodeArrayImport = addImport(module, + "intoCharCodeArray", + {nullExt, nullArray16, Type::i32}, + Type::i32); + // string.equals: string, string -> i32 + equalsImport = addImport(module, "equals", {nullExt, nullExt}, Type::i32); + // string.compare: string, string -> i32 + compareImport = addImport(module, "compare", {nullExt, nullExt}, Type::i32); + // string.length: string -> i32 + lengthImport = addImport(module, "length", nullExt, Type::i32); + // string.codePointAt: string, offset -> i32 + codePointAtImport = + addImport(module, "codePointAt", {nullExt, Type::i32}, Type::i32); + // string.substring: string, start, end -> string + substringImport = + addImport(module, "substring", {nullExt, Type::i32, Type::i32}, nnExt); // Replace the string instructions in parallel. struct Replacer : public WalkerPass> { @@ -304,6 +328,66 @@ struct StringLowering : public StringGathering { // strings: they are all just JS strings, so no conversion is needed. replaceCurrent(curr->ref); } + + void visitStringEncode(StringEncode* curr) { + Builder builder(*getModule()); + switch (curr->op) { + case StringEncodeWTF16Array: + replaceCurrent(builder.makeCall(lowering.intoCharCodeArrayImport, + {curr->ref, curr->ptr, curr->start}, + Type::i32)); + return; + default: + WASM_UNREACHABLE("TODO: all of string.encode*"); + } + } + + void visitStringEq(StringEq* curr) { + Builder builder(*getModule()); + switch (curr->op) { + case StringEqEqual: + replaceCurrent(builder.makeCall( + lowering.equalsImport, {curr->left, curr->right}, Type::i32)); + return; + case StringEqCompare: + replaceCurrent(builder.makeCall( + lowering.compareImport, {curr->left, curr->right}, Type::i32)); + return; + default: + WASM_UNREACHABLE("invalid string.eq*"); + } + } + + void visitStringMeasure(StringMeasure* curr) { + Builder builder(*getModule()); + switch (curr->op) { + case StringMeasureWTF16View: + replaceCurrent( + builder.makeCall(lowering.lengthImport, {curr->ref}, Type::i32)); + return; + default: + WASM_UNREACHABLE("invalid string.measure*"); + } + } + + void visitStringWTF16Get(StringWTF16Get* curr) { + Builder builder(*getModule()); + replaceCurrent(builder.makeCall( + lowering.codePointAtImport, {curr->ref, curr->pos}, Type::i32)); + } + + void visitStringSliceWTF(StringSliceWTF* curr) { + Builder builder(*getModule()); + switch (curr->op) { + case StringSliceWTF16: + replaceCurrent(builder.makeCall(lowering.substringImport, + {curr->ref, curr->start, curr->end}, + lowering.nnExt)); + return; + default: + WASM_UNREACHABLE("TODO: all string.slice*"); + } + } }; Replacer replacer(*this); diff --git a/test/lit/passes/string-gathering.wast b/test/lit/passes/string-gathering.wast index 8c315ddc140..a7d0418ecf2 100644 --- a/test/lit/passes/string-gathering.wast +++ b/test/lit/passes/string-gathering.wast @@ -27,11 +27,21 @@ ;; CHECK: (global $global2 stringref (global.get $string.const_bar)) ;; LOWER: (type $0 (func)) - ;; LOWER: (type $1 (array (mut i16))) + ;; LOWER: (type $1 (func (param externref externref) (result i32))) - ;; LOWER: (type $2 (func (param (ref null $1) i32 i32) (result (ref extern)))) + ;; LOWER: (type $2 (array (mut i16))) - ;; LOWER: (type $3 (func (param i32) (result (ref extern)))) + ;; LOWER: (type $3 (func (param (ref null $2) i32 i32) (result (ref extern)))) + + ;; LOWER: (type $4 (func (param i32) (result (ref extern)))) + + ;; LOWER: (type $5 (func (param externref (ref null $2) i32) (result i32))) + + ;; LOWER: (type $6 (func (param externref) (result i32))) + + ;; LOWER: (type $7 (func (param externref i32) (result i32))) + + ;; LOWER: (type $8 (func (param externref i32 i32) (result (ref extern)))) ;; LOWER: (import "string.const" "0" (global $string.const_bar (ref extern))) @@ -39,9 +49,21 @@ ;; LOWER: (import "string.const" "2" (global $global (ref extern))) - ;; LOWER: (import "wasm:js-string" "fromCharCodeArray" (func $fromCharCodeArray (type $2) (param (ref null $1) i32 i32) (result (ref extern)))) + ;; LOWER: (import "wasm:js-string" "fromCharCodeArray" (func $fromCharCodeArray (type $3) (param (ref null $2) i32 i32) (result (ref extern)))) - ;; LOWER: (import "wasm:js-string" "fromCodePoint" (func $fromCodePoint (type $3) (param i32) (result (ref extern)))) + ;; LOWER: (import "wasm:js-string" "fromCodePoint" (func $fromCodePoint (type $4) (param i32) (result (ref extern)))) + + ;; LOWER: (import "wasm:js-string" "intoCharCodeArray" (func $intoCharCodeArray (type $5) (param externref (ref null $2) i32) (result i32))) + + ;; LOWER: (import "wasm:js-string" "equals" (func $equals (type $1) (param externref externref) (result i32))) + + ;; LOWER: (import "wasm:js-string" "compare" (func $compare (type $1) (param externref externref) (result i32))) + + ;; LOWER: (import "wasm:js-string" "length" (func $length (type $6) (param externref) (result i32))) + + ;; LOWER: (import "wasm:js-string" "codePointAt" (func $codePointAt (type $7) (param externref i32) (result i32))) + + ;; LOWER: (import "wasm:js-string" "substring" (func $substring (type $8) (param externref i32 i32) (result (ref extern)))) ;; LOWER: (global $global2 externref (global.get $string.const_bar)) (global $global2 (ref null string) (string.const "bar")) @@ -121,11 +143,21 @@ ;; Multiple possible reusable globals. Also test ignoring of imports. (module ;; CHECK: (import "a" "b" (global $import (ref string))) - ;; LOWER: (type $0 (array (mut i16))) + ;; LOWER: (type $0 (func (param externref externref) (result i32))) - ;; LOWER: (type $1 (func (param (ref null $0) i32 i32) (result (ref extern)))) + ;; LOWER: (type $1 (array (mut i16))) + + ;; LOWER: (type $2 (func (param (ref null $1) i32 i32) (result (ref extern)))) + + ;; LOWER: (type $3 (func (param i32) (result (ref extern)))) + + ;; LOWER: (type $4 (func (param externref (ref null $1) i32) (result i32))) + + ;; LOWER: (type $5 (func (param externref) (result i32))) + + ;; LOWER: (type $6 (func (param externref i32) (result i32))) - ;; LOWER: (type $2 (func (param i32) (result (ref extern)))) + ;; LOWER: (type $7 (func (param externref i32 i32) (result (ref extern)))) ;; LOWER: (import "a" "b" (global $import (ref extern))) (import "a" "b" (global $import (ref string))) @@ -138,9 +170,21 @@ ;; LOWER: (import "string.const" "1" (global $global4 (ref extern))) - ;; LOWER: (import "wasm:js-string" "fromCharCodeArray" (func $fromCharCodeArray (type $1) (param (ref null $0) i32 i32) (result (ref extern)))) + ;; LOWER: (import "wasm:js-string" "fromCharCodeArray" (func $fromCharCodeArray (type $2) (param (ref null $1) i32 i32) (result (ref extern)))) + + ;; LOWER: (import "wasm:js-string" "fromCodePoint" (func $fromCodePoint (type $3) (param i32) (result (ref extern)))) + + ;; LOWER: (import "wasm:js-string" "intoCharCodeArray" (func $intoCharCodeArray (type $4) (param externref (ref null $1) i32) (result i32))) + + ;; LOWER: (import "wasm:js-string" "equals" (func $equals (type $0) (param externref externref) (result i32))) + + ;; LOWER: (import "wasm:js-string" "compare" (func $compare (type $0) (param externref externref) (result i32))) + + ;; LOWER: (import "wasm:js-string" "length" (func $length (type $5) (param externref) (result i32))) + + ;; LOWER: (import "wasm:js-string" "codePointAt" (func $codePointAt (type $6) (param externref i32) (result i32))) - ;; LOWER: (import "wasm:js-string" "fromCodePoint" (func $fromCodePoint (type $2) (param i32) (result (ref extern)))) + ;; LOWER: (import "wasm:js-string" "substring" (func $substring (type $7) (param externref i32 i32) (result (ref extern)))) ;; LOWER: (global $global2 (ref extern) (global.get $global1)) (global $global2 (ref string) (string.const "foo")) diff --git a/test/lit/passes/string-lowering-instructions.wast b/test/lit/passes/string-lowering-instructions.wast index 05d555ef0df..8d00942310e 100644 --- a/test/lit/passes/string-lowering-instructions.wast +++ b/test/lit/passes/string-lowering-instructions.wast @@ -3,28 +3,60 @@ ;; RUN: foreach %s %t wasm-opt --string-lowering -all -S -o - | filecheck %s (module - ;; CHECK: (type $0 (func)) - ;; CHECK: (type $array16 (array (mut i16))) (type $array16 (array (mut i16))) + ;; CHECK: (type $1 (func (param externref externref) (result i32))) + ;; CHECK: (rec - ;; CHECK-NEXT: (type $2 (func (param (ref $array16)))) + ;; CHECK-NEXT: (type $2 (func (param externref) (result externref))) + + ;; CHECK: (type $3 (func (param externref) (result i32))) + + ;; CHECK: (type $4 (func (param externref externref) (result i32))) + + ;; CHECK: (type $5 (func (param externref (ref $array16)) (result i32))) + + ;; CHECK: (type $6 (func (result externref))) + + ;; CHECK: (type $7 (func (param (ref $array16)))) - ;; CHECK: (type $3 (func (param externref externref externref externref))) + ;; CHECK: (type $8 (func (param externref externref externref externref))) - ;; CHECK: (type $4 (func (param (ref null $array16) i32 i32) (result (ref extern)))) + ;; CHECK: (type $9 (func)) - ;; CHECK: (type $5 (func (param i32) (result (ref extern)))) + ;; CHECK: (type $10 (func (param (ref null $array16) i32 i32) (result (ref extern)))) - ;; CHECK: (import "colliding" "name" (func $fromCodePoint (type $0))) + ;; CHECK: (type $11 (func (param i32) (result (ref extern)))) + + ;; CHECK: (type $12 (func (param externref (ref null $array16) i32) (result i32))) + + ;; CHECK: (type $13 (func (param externref) (result i32))) + + ;; CHECK: (type $14 (func (param externref i32) (result i32))) + + ;; CHECK: (type $15 (func (param externref i32 i32) (result (ref extern)))) + + ;; CHECK: (import "colliding" "name" (func $fromCodePoint (type $9))) (import "colliding" "name" (func $fromCodePoint)) - ;; CHECK: (import "wasm:js-string" "fromCharCodeArray" (func $fromCharCodeArray (type $4) (param (ref null $array16) i32 i32) (result (ref extern)))) + ;; CHECK: (import "wasm:js-string" "fromCharCodeArray" (func $fromCharCodeArray (type $10) (param (ref null $array16) i32 i32) (result (ref extern)))) + + ;; CHECK: (import "wasm:js-string" "fromCodePoint" (func $fromCodePoint_11 (type $11) (param i32) (result (ref extern)))) + + ;; CHECK: (import "wasm:js-string" "intoCharCodeArray" (func $intoCharCodeArray (type $12) (param externref (ref null $array16) i32) (result i32))) - ;; CHECK: (import "wasm:js-string" "fromCodePoint" (func $fromCodePoint_5 (type $5) (param i32) (result (ref extern)))) + ;; CHECK: (import "wasm:js-string" "equals" (func $equals (type $1) (param externref externref) (result i32))) - ;; CHECK: (func $string.as (type $3) (param $a externref) (param $b externref) (param $c externref) (param $d externref) + ;; CHECK: (import "wasm:js-string" "compare" (func $compare (type $1) (param externref externref) (result i32))) + + ;; CHECK: (import "wasm:js-string" "length" (func $length (type $13) (param externref) (result i32))) + + ;; CHECK: (import "wasm:js-string" "codePointAt" (func $codePointAt (type $14) (param externref i32) (result i32))) + + ;; CHECK: (import "wasm:js-string" "substring" (func $substring (type $15) (param externref i32 i32) (result (ref extern)))) + + ;; CHECK: (func $string.as (type $8) (param $a externref) (param $b externref) (param $c externref) (param $d externref) ;; CHECK-NEXT: (local.set $b ;; CHECK-NEXT: (local.get $a) ;; CHECK-NEXT: ) @@ -59,7 +91,7 @@ ) ) - ;; CHECK: (func $string.new.gc (type $2) (param $array16 (ref $array16)) + ;; CHECK: (func $string.new.gc (type $7) (param $array16 (ref $array16)) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (call $fromCharCodeArray ;; CHECK-NEXT: (local.get $array16) @@ -78,18 +110,94 @@ ) ) - ;; CHECK: (func $string.from_code_point (type $0) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (call $fromCodePoint_5 - ;; CHECK-NEXT: (i32.const 1) - ;; CHECK-NEXT: ) + ;; CHECK: (func $string.from_code_point (type $6) (result externref) + ;; CHECK-NEXT: (call $fromCodePoint_11 + ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $string.from_code_point - (drop - (string.from_code_point - (i32.const 1) - ) + (func $string.from_code_point (result stringref) + (string.from_code_point + (i32.const 1) + ) + ) + + ;; CHECK: (func $string.encode (type $5) (param $ref externref) (param $array16 (ref $array16)) (result i32) + ;; CHECK-NEXT: (call $intoCharCodeArray + ;; CHECK-NEXT: (local.get $ref) + ;; CHECK-NEXT: (local.get $array16) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $string.encode (param $ref stringref) (param $array16 (ref $array16)) (result i32) + (string.encode_wtf16_array + (local.get $ref) + (local.get $array16) + (i32.const 10) + ) + ) + + ;; CHECK: (func $string.eq (type $4) (param $a externref) (param $b externref) (result i32) + ;; CHECK-NEXT: (call $equals + ;; CHECK-NEXT: (local.get $a) + ;; CHECK-NEXT: (local.get $b) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $string.eq (param $a stringref) (param $b stringref) (result i32) + (string.eq + (local.get $a) + (local.get $b) + ) + ) + + ;; CHECK: (func $string.compare (type $4) (param $a externref) (param $b externref) (result i32) + ;; CHECK-NEXT: (call $compare + ;; CHECK-NEXT: (local.get $a) + ;; CHECK-NEXT: (local.get $b) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $string.compare (param $a stringref) (param $b stringref) (result i32) + (string.compare + (local.get $a) + (local.get $b) + ) + ) + + ;; CHECK: (func $string.length (type $3) (param $ref externref) (result i32) + ;; CHECK-NEXT: (call $length + ;; CHECK-NEXT: (local.get $ref) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $string.length (param $ref stringview_wtf16) (result i32) + (stringview_wtf16.length + (local.get $ref) + ) + ) + + ;; CHECK: (func $string.get_codeunit (type $3) (param $ref externref) (result i32) + ;; CHECK-NEXT: (call $codePointAt + ;; CHECK-NEXT: (local.get $ref) + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $string.get_codeunit (param $ref stringview_wtf16) (result i32) + (stringview_wtf16.get_codeunit + (local.get $ref) + (i32.const 2) + ) + ) + + ;; CHECK: (func $string.slice (type $2) (param $ref externref) (result externref) + ;; CHECK-NEXT: (call $substring + ;; CHECK-NEXT: (local.get $ref) + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: (i32.const 3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $string.slice (param $ref stringview_wtf16) (result stringref) + (stringview_wtf16.slice + (local.get $ref) + (i32.const 2) + (i32.const 3) ) ) )