Skip to content

Commit

Permalink
feat(stdlib): Add forEachChar, forEachCharI, map and mapi to …
Browse files Browse the repository at this point in the history
…String module (#1864)

Co-authored-by: Oscar Spencer <oscar@grain-lang.org>
  • Loading branch information
spotandjake and ospencer authored Jul 28, 2024
1 parent d822c87 commit 4305e82
Show file tree
Hide file tree
Showing 3 changed files with 281 additions and 2 deletions.
33 changes: 33 additions & 0 deletions compiler/test/stdlib/string.test.gr
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,39 @@ assert String.decode(bytes, String.UTF16_LE) == "¢"
Array.mapi((c, i) => (c, i), codes)
}

// char iteration tests
// conveniently reusing data from `explode` tests
{
let mut tmp = []
String.forEachChar(char => {
tmp = [char, ...tmp]
}, emojis)
assert Array.reverse(Array.fromList(tmp)) == String.explode(emojis)
}

{
let mut tmp = []
String.forEachChari((char, idx) => {
tmp = [(char, idx), ...tmp]
}, emojis)
assert Array.reverse(Array.fromList(tmp)) ==
Array.mapi((c, i) => (c, i), String.explode(emojis))
}

// String.map
assert String.map(c => 'a', "") == ""
assert String.map(c => 'a', "Hello world") == "aaaaaaaaaaa"
assert String.map(c => c, "Hello world") == "Hello world"

// String.mapi
assert String.mapi((char, index) => String.charAt(0, toString(index)), "") == ""
assert String.mapi(
(char, index) => String.charAt(0, toString(index)),
"Hello world"
) ==
"01234567891"
assert String.mapi((char, index) => char, "Hello world") == "Hello world"

// String.trimStart
assert String.trimStart("t test") == "t test"
assert String.trimStart(" test") == "test"
Expand Down
142 changes: 140 additions & 2 deletions stdlib/string.gr
Original file line number Diff line number Diff line change
Expand Up @@ -1953,7 +1953,6 @@ provide let forEachCodePoint = (fn: Number => Void, str: String) => {
let mut ptr = strPtr + 8n
let end = ptr + byteSize

let mut idx = 0n
while (ptr < end) {
let byte = WasmI32.load8U(ptr, 0n)
let codePointByteCount = if ((byte & 0x80n) == 0x00n) {
Expand All @@ -1977,7 +1976,6 @@ provide let forEachCodePoint = (fn: Number => Void, str: String) => {
fn(tagSimpleNumber(codePoint))

ptr += codePointByteCount
idx += 1n
}

ignore(str)
Expand Down Expand Up @@ -2040,6 +2038,146 @@ provide let forEachCodePointi = (fn: (Number, Number) => Void, str: String) => {
void
}

/**
* Iterates over Unicode characters in a string.
*
* @param fn: The iterator function
* @param str: The string to iterate
*
* @example String.forEachChar(print, "Hello world")
*
* @since v0.6.5
*/
@unsafe
provide let forEachChar = (fn: Char => Void, str: String) => {
use WasmI32.{ (+), (-), (&), (>>>), ltU as (<), leU as (<=), (==) }

let strPtr = WasmI32.fromGrain(str)

let byteSize = WasmI32.load(strPtr, 4n)

let mut ptr = strPtr + 8n
let end = ptr + byteSize

while (ptr < end) {
let byte = WasmI32.load8U(ptr, 0n)
let codePointByteCount = if ((byte & 0x80n) == 0x00n) {
1n
} else if ((byte & 0xF0n) == 0xF0n) {
4n
} else if ((byte & 0xE0n) == 0xE0n) {
3n
} else {
2n
}

// Note that even if up to 4 bytes are needed to represent Unicode
// codepoints, this doesn't mean 32 bits. The highest allowed code point is
// 0x10FFFF and it should not change in future versions of Unicode. This
// means no more than 21 bits are necessary to represent a code point and
// thus we can use Grain's "simple" numbers that hold up to 31 bits and
// avoid heap allocations. `getCodePoint` will throw
// MalformedUnicode exception for values exceeding this limit.
let codePoint = getCodePoint(ptr)
fn(tagChar(codePoint))

ptr += codePointByteCount
}
void
}

/**
* Iterates over Unicode characters in a string. This is the same as
* `forEachChar`, but provides the characters's index in the string
* as the second argument to the iterator function.
*
* @param fn: The iterator function
* @param str: The string to iterate
*
* @example String.forEachChari((char, index) => print((char, index)), "Hello world")
*
* @since v0.6.5
*/
@unsafe
provide let forEachChari = (fn: (Char, Number) => Void, str: String) => {
use WasmI32.{ (+), (-), (&), (>>>), ltU as (<), leU as (<=), (==) }

let strPtr = WasmI32.fromGrain(str)

let byteSize = WasmI32.load(strPtr, 4n)

let mut ptr = strPtr + 8n
let end = ptr + byteSize

let mut idx = 0n
while (ptr < end) {
let byte = WasmI32.load8U(ptr, 0n)
let codePointByteCount = if ((byte & 0x80n) == 0x00n) {
1n
} else if ((byte & 0xF0n) == 0xF0n) {
4n
} else if ((byte & 0xE0n) == 0xE0n) {
3n
} else {
2n
}

// Note that even if up to 4 bytes are needed to represent Unicode
// codepoints, this doesn't mean 32 bits. The highest allowed code point is
// 0x10FFFF and it should not change in future versions of Unicode. This
// means no more than 21 bits are necessary to represent a code point and
// thus we can use Grain's "simple" numbers that hold up to 31 bits and
// avoid heap allocations. `getCodePoint` will throw
// MalformedUnicode exception for values exceeding this limit.
let codePoint = getCodePoint(ptr)
fn(tagChar(codePoint), tagSimpleNumber(idx))

ptr += codePointByteCount
idx += 1n
}
void
}

/**
* Builds a new string by mapping Unicode characters.
*
* @param fn: The mapping function
* @param str: The string to map
*
* @example assert String.map((c) => 'a', "Hello world") == "aaaaaaaaaaa"
*
* @since v0.6.5
*/
provide let map = (fn: Char => Char, str: String) => {
let chars = explode(str)
let arrLen = arrayLength(chars)
for (let mut i = 0; i < arrLen; i += 1) {
chars[i] = fn(chars[i])
}
implode(chars)
}

/**
* Builds a new string by mapping Unicode characters. This is the same as
* `mapChar`, but provides the characters's index in the string
* as the second argument to the mapping function.
*
* @param fn: The mapping function
* @param str: The string to map
*
* @example assert String.mapi((char, index) => String.charAt(0, toString(index)), "Hello world") == "01234567891"
*
* @since v0.6.5
*/
provide let mapi = (fn: (Char, Number) => Char, str: String) => {
let chars = explode(str)
let arrLen = arrayLength(chars)
for (let mut i = 0; i < arrLen; i += 1) {
chars[i] = fn(chars[i], i)
}
implode(chars)
}

@unsafe
let trimString = (stringPtr: WasmI32, byteLength: WasmI32, fromEnd: Bool) => {
use WasmI32.{ (+), (-), (*), (>>>), ltU as (<), (==), (!=) }
Expand Down
108 changes: 108 additions & 0 deletions stdlib/string.md
Original file line number Diff line number Diff line change
Expand Up @@ -911,6 +911,114 @@ Examples:
String.forEachCodePointi((codepoint, index) => print((codepoint, index)), "Hello world")
```

### String.**forEachChar**

<details disabled>
<summary tabindex="-1">Added in <code>next</code></summary>
No other changes yet.
</details>

```grain
forEachChar : (fn: (Char => Void), str: String) => Void
```

Iterates over Unicode characters in a string.

Parameters:

|param|type|description|
|-----|----|-----------|
|`fn`|`Char => Void`|The iterator function|
|`str`|`String`|The string to iterate|

Examples:

```grain
String.forEachChar(print, "Hello world")
```

### String.**forEachChari**

<details disabled>
<summary tabindex="-1">Added in <code>next</code></summary>
No other changes yet.
</details>

```grain
forEachChari : (fn: ((Char, Number) => Void), str: String) => Void
```

Iterates over Unicode characters in a string. This is the same as
`forEachChar`, but provides the characters's index in the string
as the second argument to the iterator function.

Parameters:

|param|type|description|
|-----|----|-----------|
|`fn`|`(Char, Number) => Void`|The iterator function|
|`str`|`String`|The string to iterate|

Examples:

```grain
String.forEachChari((char, index) => print((char, index)), "Hello world")
```

### String.**map**

<details disabled>
<summary tabindex="-1">Added in <code>next</code></summary>
No other changes yet.
</details>

```grain
map : (fn: (Char => Char), str: String) => String
```

Builds a new string by mapping Unicode characters.

Parameters:

|param|type|description|
|-----|----|-----------|
|`fn`|`Char => Char`|The mapping function|
|`str`|`String`|The string to map|

Examples:

```grain
assert String.map((c) => 'a', "Hello world") == "aaaaaaaaaaa"
```

### String.**mapi**

<details disabled>
<summary tabindex="-1">Added in <code>next</code></summary>
No other changes yet.
</details>

```grain
mapi : (fn: ((Char, Number) => Char), str: String) => String
```

Builds a new string by mapping Unicode characters. This is the same as
`mapChar`, but provides the characters's index in the string
as the second argument to the mapping function.

Parameters:

|param|type|description|
|-----|----|-----------|
|`fn`|`(Char, Number) => Char`|The mapping function|
|`str`|`String`|The string to map|

Examples:

```grain
assert String.mapi((char, index) => String.charAt(0, toString(index)), "Hello world") == "01234567891"
```

### String.**trimStart**

<details disabled>
Expand Down

0 comments on commit 4305e82

Please sign in to comment.