Skip to content
This repository has been archived by the owner on Nov 15, 2024. It is now read-only.

Commit

Permalink
feat: support expandNestedCharacterClass
Browse files Browse the repository at this point in the history
  • Loading branch information
antfu committed Sep 12, 2024
1 parent e40e961 commit 3c4c544
Show file tree
Hide file tree
Showing 8 changed files with 1,701 additions and 272 deletions.
1 change: 1 addition & 0 deletions src/convert.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export function onigurumaToRegexp(
convertHexDigitsShorthand: true,
convertUnicodeCategory: true,
useRegex: true,
expandNestedCharacterClass: true,
...options,
})

Expand Down
26 changes: 26 additions & 0 deletions src/lowering.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,13 @@ export interface SyntaxLoweringOptions {
* @default false
*/
convertUnicodeCategory?: boolean

/**
* Expand nested character class like `[a-z[0-9]]` to `[a-z0-9]`.
*
* @default false
*/
expandNestedCharacterClass?: boolean
}

export interface SyntaxLoweringResult {
Expand All @@ -107,6 +114,7 @@ export function syntaxLowering(
removeAtomicGroup = false,
convertHexDigitsShorthand = false,
convertUnicodeCategory = false,
expandNestedCharacterClass = false,
} = options

let output = ''
Expand All @@ -117,6 +125,7 @@ export function syntaxLowering(

const freeSpacingLocal: number[] = []
let freeSpacingGlobal = false
let isInNestedCharClass = false

let i = 0
try {
Expand Down Expand Up @@ -339,13 +348,30 @@ export function syntaxLowering(
stack.unshift(char)
}

// Nested character class
if (head === '[' && expandNestedCharacterClass) {
isInNestedCharClass = true
i += 1
// Nested character class starting with `-`
if (input[i] === '-') {
output += '\\-'
i += 1
}
continue
}

output += char
i += 1
continue
}

// Alternation close bracket
if (char === ']') {
if (isInNestedCharClass) {
isInNestedCharClass = false
i += 1
continue
}
if (head === '[')
stack.shift()
output += char
Expand Down
2 changes: 1 addition & 1 deletion test-generated/codeql.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ it('unexpected match: 0', () => {
0,
)
expect.soft(regex.source).toMatchInlineSnapshot(`"(^)\\s*([^*]|\\*(?!\\/))(?=([^*]|[*](?!\\/))*$)"`)
expect.soft(regex.flags).toMatchInlineSnapshot(`"dmy"`)
expect.soft(regex.flags).toMatchInlineSnapshot(`"dgmy"`)
expect.soft(match).toMatchInlineSnapshot(`null`)
expect.soft(indices).toMatchInlineSnapshot(`[]`)
expect(match).toBe(null)
Expand Down
178 changes: 166 additions & 12 deletions test-generated/haskell.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,19 @@ it('expected match: 0', () => {
'data WebApp = WebApp\n',
0,
)
expect.soft(regex.source).toMatchInlineSnapshot()
expect.soft(indices).toMatchInlineSnapshot()
expect.soft(regex.source).toMatchInlineSnapshot(`"(?=\\b(?<!')(where)\\b(?!'))|(?=\\}|;)|^(?!\\s+\\S|\\s*(?:$|\\{-[^@]|--+(?![\\p{S}\\p{P}&&^(),;\\[\\]{}\`_"']).*$))"`)
expect.soft(indices).toMatchInlineSnapshot(`
[
[
0,
0,
],
[
4294967295,
4294967295,
],
]
`)
expect(indices).toMatchObject([[0, 0], [4294967295, 4294967295]])
})

Expand All @@ -29,8 +40,23 @@ it('expected match: 2', () => {
'data WebApp = WebApp\n',
12,
)
expect.soft(regex.source).toMatchInlineSnapshot()
expect.soft(indices).toMatchInlineSnapshot()
expect.soft(regex.source).toMatchInlineSnapshot(`"(?<![\\p{S}\\p{P}&&^(),;\\[\\]\`{}_"'])(?:(=)|(\\|))(?![\\p{S}\\p{P}&&^(),;\\[\\]\`{}_"'])"`)
expect.soft(indices).toMatchInlineSnapshot(`
[
[
12,
13,
],
[
12,
13,
],
[
4294967295,
4294967295,
],
]
`)
expect(indices).toMatchObject([[12, 13], [12, 13], [4294967295, 4294967295]])
})

Expand All @@ -51,8 +77,15 @@ it('expected match: 4', () => {
'instance Yesod WebApp\n',
0,
)
expect.soft(regex.source).toMatchInlineSnapshot()
expect.soft(indices).toMatchInlineSnapshot()
expect.soft(regex.source).toMatchInlineSnapshot(`"(?=\\}|;)|^(?!\\s+\\S|\\s*(?:$|\\{-[^@]|--+(?![\\p{S}\\p{P}&&^(),;\\[\\]{}\`_"']).*$))"`)
expect.soft(indices).toMatchInlineSnapshot(`
[
[
0,
0,
],
]
`)
expect(indices).toMatchObject([[0, 0]])
})

Expand All @@ -62,8 +95,19 @@ it('expected match: 5', () => {
'mkYesod "WebApp" [parseRoutes|\n',
0,
)
expect.soft(regex.source).toMatchInlineSnapshot()
expect.soft(indices).toMatchInlineSnapshot()
expect.soft(regex.source).toMatchInlineSnapshot(`"(?=\\b(?<!')(where)\\b(?!'))|(?=\\}|;)|^(?!\\s+\\S|\\s*(?:$|\\{-[^@]|--+(?![\\p{S}\\p{P}&&^(),;\\[\\]{}\`_"']).*$))"`)
expect.soft(indices).toMatchInlineSnapshot(`
[
[
0,
0,
],
[
4294967295,
4294967295,
],
]
`)
expect(indices).toMatchObject([[0, 0], [4294967295, 4294967295]])
})

Expand All @@ -73,8 +117,63 @@ it('expected match: 6', () => {
'getHomeR = defaultLayout [whamlet|\n',
0,
)
expect.soft(regex.source).toMatchInlineSnapshot()
expect.soft(indices).toMatchInlineSnapshot()
expect.soft(regex.source).toMatchInlineSnapshot(`"(?<![\\p{S}\\p{P}&&^(),;\\[\\]\`{}_"''])(?:(\\.\\.)|(:)|(=)|(\\\\)|(\\|)|(<-|←)|(->|→)|(-<|↢)|(-<<|⤛)|(>-|⤚)|(>>-|⤜)|(∀))(?![\\p{S}\\p{P}&&^(),;\\[\\]\`{}_"''])"`)
expect.soft(indices).toMatchInlineSnapshot(`
[
[
9,
10,
],
[
4294967295,
4294967295,
],
[
4294967295,
4294967295,
],
[
9,
10,
],
[
4294967295,
4294967295,
],
[
4294967295,
4294967295,
],
[
4294967295,
4294967295,
],
[
4294967295,
4294967295,
],
[
4294967295,
4294967295,
],
[
4294967295,
4294967295,
],
[
4294967295,
4294967295,
],
[
4294967295,
4294967295,
],
[
4294967295,
4294967295,
],
]
`)
expect(indices).toMatchObject([[9, 10], [4294967295, 4294967295], [4294967295, 4294967295], [9, 10], [4294967295, 4294967295], [4294967295, 4294967295], [4294967295, 4294967295], [4294967295, 4294967295], [4294967295, 4294967295], [4294967295, 4294967295], [4294967295, 4294967295], [4294967295, 4294967295], [4294967295, 4294967295]])
})

Expand All @@ -84,7 +183,62 @@ it('expected match: 7', () => {
'main = warpEnv WebApp\n',
0,
)
expect.soft(regex.source).toMatchInlineSnapshot()
expect.soft(indices).toMatchInlineSnapshot()
expect.soft(regex.source).toMatchInlineSnapshot(`"(?<![\\p{S}\\p{P}&&^(),;\\[\\]\`{}_"''])(?:(\\.\\.)|(:)|(=)|(\\\\)|(\\|)|(<-|←)|(->|→)|(-<|↢)|(-<<|⤛)|(>-|⤚)|(>>-|⤜)|(∀))(?![\\p{S}\\p{P}&&^(),;\\[\\]\`{}_"''])"`)
expect.soft(indices).toMatchInlineSnapshot(`
[
[
5,
6,
],
[
4294967295,
4294967295,
],
[
4294967295,
4294967295,
],
[
5,
6,
],
[
4294967295,
4294967295,
],
[
4294967295,
4294967295,
],
[
4294967295,
4294967295,
],
[
4294967295,
4294967295,
],
[
4294967295,
4294967295,
],
[
4294967295,
4294967295,
],
[
4294967295,
4294967295,
],
[
4294967295,
4294967295,
],
[
4294967295,
4294967295,
],
]
`)
expect(indices).toMatchObject([[5, 6], [4294967295, 4294967295], [4294967295, 4294967295], [5, 6], [4294967295, 4294967295], [4294967295, 4294967295], [4294967295, 4294967295], [4294967295, 4294967295], [4294967295, 4294967295], [4294967295, 4294967295], [4294967295, 4294967295], [4294967295, 4294967295], [4294967295, 4294967295]])
})
Loading

0 comments on commit 3c4c544

Please sign in to comment.