diff --git a/.travis.yml b/.travis.yml index c608053..7a6cd42 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,10 +12,7 @@ addons: - g++-4.8 node_js: - - "iojs" - "node" - - "0.10" - - "0.12" install: - export CXX=g++-4.8 diff --git a/Gruntfile.coffee b/Gruntfile.coffee deleted file mode 100644 index 9c9acd8..0000000 --- a/Gruntfile.coffee +++ /dev/null @@ -1,57 +0,0 @@ -module.exports = (grunt) -> - grunt.initConfig - pkg: grunt.file.readJSON('package.json') - - coffee: - glob_to_multiple: - expand: true - cwd: 'src' - src: ['*.coffee'] - dest: 'lib' - ext: '.js' - - coffeelint: - options: - max_line_length: - level: 'ignore' - src: ['src/**/*.coffee'] - test: ['spec/**/*.coffee'] - - cpplint: - files: [ - 'src/**/*.cc' - 'src/**/*.h' - ] - reporter: 'spec' - verbosity: 1 - filters: - build: - include: false - legal: - copyright: false - whitespace: - line_length: false - - shell: - rebuild: - command: 'npm build .' - options: - stdout: true - stderr: true - failOnError: true - - test: - command: 'node node_modules/jasmine-focused/bin/jasmine-focused --captureExceptions --coffee spec/' - options: - stdout: true - stderr: true - failOnError: true - - grunt.loadNpmTasks('grunt-contrib-coffee') - grunt.loadNpmTasks('grunt-coffeelint') - grunt.loadNpmTasks('grunt-shell') - grunt.loadNpmTasks('node-cpplint') - grunt.registerTask('default', ['lint', 'coffee', 'shell:rebuild']) - grunt.registerTask('test', ['default', 'shell:test']) - grunt.registerTask('lint', ['coffeelint', 'cpplint']) - grunt.registerTask('publish', ['coffee', 'lint']) diff --git a/benchmark/benchmark.coffee b/benchmark/benchmark.coffee deleted file mode 100755 index 9f62c8b..0000000 --- a/benchmark/benchmark.coffee +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env coffee - -fs = require 'fs' -path = require 'path' -{OnigScanner} = require '../src/oniguruma' - -runBenchmarkSync = (lines, scanner) -> - startTime = Date.now() - matches = 0 - - for line in lines - for position in [0..line.length] - matches++ if scanner.findNextMatchSync(line, position) - - console.log "sync: #{matches} matches in #{Date.now() - startTime}ms" - -runBenchmarkAsync = (lines, scanner) -> - matches = 0 - callsInProgress = 0 - - callback = (error, match) -> - matches++ if match? - if --callsInProgress is 0 - console.log "async: #{matches} matches in #{Date.now() - startTime}ms" - - startTime = Date.now() - for line in lines - for position in [0..line.length] - callsInProgress++ - scanner.findNextMatch(line, position, callback) - -console.log 'oneline.js' -runBenchmarkSync(fs.readFileSync(path.join(__dirname, 'oneline.js'), 'utf8').split('\n'), - new OnigScanner(['\\[', '\\]', '\\{', '\\}'])) -console.log 'large.js' -runBenchmarkSync(fs.readFileSync(path.join(__dirname, 'large.js'), 'utf8').split('\n'), - new OnigScanner(['this', 'var', 'selector', 'window'])) -runBenchmarkAsync(fs.readFileSync(path.join(__dirname, 'large.js'), 'utf8').split('\n'), - new OnigScanner(['this', 'var', 'selector', 'window'])) diff --git a/benchmark/benchmark.js b/benchmark/benchmark.js new file mode 100755 index 0000000..bc62a57 --- /dev/null +++ b/benchmark/benchmark.js @@ -0,0 +1,57 @@ +#!/usr/bin/env node + +const fs = require('fs') +const path = require('path') +const {OnigScanner} = require('..') + +function runBenchmarkSync (lines, scanner) { + let startTime = Date.now() + let matches = 0 + + for (let row = 0, rowCount = lines.length; row < rowCount; row++) { + const line = lines[row] + for (let position = 0, length = line.length; position <= length; position++) { + if (scanner.findNextMatchSync(line, position)) matches++ + } + } + + console.log(`sync: ${matches} matches in ${Date.now() - startTime}ms`) +} + +function runBenchmarkAsync (lines, scanner) { + let matches = 0 + let callsInProgress = 0 + + function callback (error, match) { + if (match != null) { matches++ } + if (--callsInProgress === 0) { + console.log(`async: ${matches} matches in ${Date.now() - startTime}ms`) + } + }; + + var startTime = Date.now() + for (let row = 0, rowCount = lines.length; row < rowCount; row++) { + const line = lines[row] + for (let position = 0, length = line.length; position <= length; position++) { + callsInProgress++ + scanner.findNextMatch(line, position, callback) + } + } +} + +console.log('oneline.js') +runBenchmarkSync( + fs.readFileSync(path.join(__dirname, 'oneline.js'), 'utf8').split('\n'), + new OnigScanner(['\\[', '\\]', '\\{', '\\}']) +) + +console.log('large.js') +runBenchmarkSync( + fs.readFileSync(path.join(__dirname, 'large.js'), 'utf8').split('\n'), + new OnigScanner(['this', 'var', 'selector', 'window']) +) + +runBenchmarkAsync( + fs.readFileSync(path.join(__dirname, 'large.js'), 'utf8').split('\n'), + new OnigScanner(['this', 'var', 'selector', 'window']) +) diff --git a/package.json b/package.json index 21b8fea..f974116 100644 --- a/package.json +++ b/package.json @@ -1,8 +1,8 @@ { - "main": "./lib/oniguruma.js", + "main": "./src/oniguruma.js", "name": "oniguruma", "description": "oniguruma regular expression library", - "version": "6.1.1", + "version": "6.2.0-1", "licenses": [ { "type": "MIT", @@ -26,22 +26,13 @@ ], "devDependencies": { "async": "~0.9.0", - "coffee-script": "~1.9.1", - "coffeelint": "~1.9.2", - "grunt": "~0.4.5", - "grunt-cli": "~0.1.13", - "grunt-coffeelint": "0.0.13", - "grunt-contrib-coffee": "~0.13.0", - "grunt-shell": "~1.1.2", - "jasmine-focused": "~1.0.7", - "node-cpplint": "~0.4.0" + "jasmine-focused": "~1.0.7" }, "dependencies": { "nan": "^2.0.9" }, "scripts": { - "prepublish": "grunt publish", - "test": "grunt test", + "test": "jasmine-focused --captureExceptions spec/*", "benchmark": "benchmark/benchmark.coffee" } } diff --git a/spec/onig-reg-exp-spec.coffee b/spec/onig-reg-exp-spec.coffee deleted file mode 100644 index a48bf45..0000000 --- a/spec/onig-reg-exp-spec.coffee +++ /dev/null @@ -1,173 +0,0 @@ -{OnigRegExp} = require '../lib/oniguruma' - -describe 'OnigRegExp', -> - describe '::search(string, index, callback)', -> - it 'returns an array of the match and all capture groups', -> - regex = new OnigRegExp('\\w(\\d+)') - searchCallback = jasmine.createSpy('searchCallback') - result = regex.search('----a123----', searchCallback) - - waitsFor -> - searchCallback.callCount is 1 - - runs -> - result = searchCallback.argsForCall[0][1] - expect(result.length).toBe 2 - expect(result[0].match).toBe 'a123' - expect(result[0].start).toBe 4 - expect(result[0].end).toBe 8 - expect(result[0].index).toBe 0 - expect(result[0].length).toBe 4 - expect(result[1].match).toBe '123' - expect(result[1].start).toBe 5 - expect(result[1].end).toBe 8 - expect(result[1].index).toBe 1 - expect(result[1].length).toBe 3 - - it 'returns null if it does not match', -> - regex = new OnigRegExp('\\w(\\d+)') - searchCallback = jasmine.createSpy('searchCallback') - result = regex.search('--------', searchCallback) - - waitsFor -> - searchCallback.callCount is 1 - - runs -> - result = searchCallback.argsForCall[0][1] - expect(result).toBeNull() - - describe 'when the string being searched contains a unicode character', -> - it 'returns correct indices and lengths', -> - regex = new OnigRegExp('a') - searchCallback = jasmine.createSpy('searchCallback') - regex.search('ç√Ωa', 0, searchCallback) - - waitsFor -> - searchCallback.callCount is 1 - - runs -> - firstMatch = searchCallback.argsForCall[0][1] - expect(firstMatch[0].start).toBe 3 - expect(firstMatch[0].match).toBe 'a' - regex.search('ç√Ωabcd≈ßåabcd', 5, searchCallback) - - waitsFor -> - searchCallback.callCount is 2 - - runs -> - secondMatch = searchCallback.argsForCall[1][1] - expect(secondMatch[0].start).toBe 10 - expect(secondMatch[0].match).toBe 'a' - - describe 'when the string being searched contains non-Basic Multilingual Plane characters', -> - it 'returns correct indices and matches', -> - regex = new OnigRegExp("'") - searchCallback = jasmine.createSpy('searchCallback') - regex.search("'\uD835\uDF97'", 0, searchCallback) - - waitsFor -> - searchCallback.callCount is 1 - - runs -> - match = searchCallback.argsForCall[0][1] - expect(match[0].start).toBe 0 - expect(match[0].match).toBe "'" - regex.search("'\uD835\uDF97'", 1, searchCallback) - - waitsFor -> - searchCallback.callCount is 2 - - runs -> - match = searchCallback.argsForCall[1][1] - expect(match[0].start).toBe 3 - expect(match[0].match).toBe "'" - regex.search("'\uD835\uDF97'", 2, searchCallback) - - waitsFor -> - searchCallback.callCount is 3 - - runs -> - match = searchCallback.argsForCall[2][1] - expect(match[0].start).toBe 3 - expect(match[0].match).toBe "'" - - describe '::searchSync(string, index)', -> - it 'returns an array of the match and all capture groups', -> - regex = new OnigRegExp('\\w(\\d+)') - result = regex.searchSync('----a123----') - expect(result.length).toBe 2 - expect(result[0].match).toBe 'a123' - expect(result[0].start).toBe 4 - expect(result[0].end).toBe 8 - expect(result[0].index).toBe 0 - expect(result[0].length).toBe 4 - expect(result[1].match).toBe '123' - expect(result[1].start).toBe 5 - expect(result[1].end).toBe 8 - expect(result[1].index).toBe 1 - expect(result[1].length).toBe 3 - - it 'returns null if it does not match', -> - regex = new OnigRegExp('\\w(\\d+)') - result = regex.searchSync('--------') - expect(result).toBeNull() - - describe 'when the string being searched contains a unicode character', -> - it 'returns correct indices and lengths', -> - regex = new OnigRegExp('a') - - firstMatch = regex.searchSync('ç√Ωa', 0) - expect(firstMatch[0].start).toBe 3 - expect(firstMatch[0].match).toBe 'a' - - secondMatch = regex.searchSync('ç√Ωabcd≈ßåabcd', 5) - expect(secondMatch[0].start).toBe 10 - expect(secondMatch[0].match).toBe 'a' - - describe 'when the string being searched contains non-Basic Multilingual Plane characters', -> - it 'returns correct indices and matches', -> - regex = new OnigRegExp("'") - - match = regex.searchSync("'\uD835\uDF97'", 0) - expect(match[0].start).toBe 0 - expect(match[0].match).toBe "'" - - match = regex.searchSync("'\uD835\uDF97'", 1) - expect(match[0].start).toBe 3 - expect(match[0].match).toBe "'" - - match = regex.searchSync("'\uD835\uDF97'", 2) - expect(match[0].start).toBe 3 - expect(match[0].match).toBe "'" - - match = regex.searchSync("'\uD835\uDF97'", 3) - expect(match[0].start).toBe 3 - expect(match[0].match).toBe "'" - - describe '::testSync(string)', -> - it 'returns true if the string matches the pattern', -> - expect(new OnigRegExp("a[b-d]c").testSync('aec')).toBe false - expect(new OnigRegExp("a[b-d]c").testSync('abc')).toBe true - expect(new OnigRegExp(false).testSync(false)).toBe true - expect(new OnigRegExp(false).testSync(true)).toBe false - - describe '::test(string, callback)', -> - it 'calls back with true if the string matches the pattern', -> - testCallback = jasmine.createSpy('testCallback') - - new OnigRegExp("a[b-d]c").test('aec', testCallback) - - waitsFor -> - testCallback.callCount is 1 - - runs -> - expect(testCallback.argsForCall[0][0]).toBeNull() - expect(testCallback.argsForCall[0][1]).toBe false - new OnigRegExp("a[b-d]c").test('abc', testCallback) - - waitsFor -> - testCallback.callCount is 2 - - runs -> - expect(testCallback.argsForCall[1][0]).toBeNull() - expect(testCallback.argsForCall[1][1]).toBe true diff --git a/spec/onig-reg-exp-spec.js b/spec/onig-reg-exp-spec.js new file mode 100644 index 0000000..7f9b83e --- /dev/null +++ b/spec/onig-reg-exp-spec.js @@ -0,0 +1,192 @@ +const {OnigRegExp} = require('..') + +describe('OnigRegExp', () => { + describe('::search(string, index, callback)', () => { + it('returns an array of the match and all capture groups', () => { + let regex = new OnigRegExp('\\w(\\d+)') + let searchCallback = jasmine.createSpy('searchCallback') + let result = regex.search('----a123----', searchCallback) + + waitsFor(() => searchCallback.callCount === 1) + + runs(() => { + result = searchCallback.argsForCall[0][1] + expect(result.length).toBe(2) + expect(result[0].match).toBe('a123') + expect(result[0].start).toBe(4) + expect(result[0].end).toBe(8) + expect(result[0].index).toBe(0) + expect(result[0].length).toBe(4) + expect(result[1].match).toBe('123') + expect(result[1].start).toBe(5) + expect(result[1].end).toBe(8) + expect(result[1].index).toBe(1) + expect(result[1].length).toBe(3) + }) + }) + + it('returns null if it does not match', () => { + let regex = new OnigRegExp('\\w(\\d+)') + let searchCallback = jasmine.createSpy('searchCallback') + let result = regex.search('--------', searchCallback) + + waitsFor(() => searchCallback.callCount === 1) + + runs(() => { + result = searchCallback.argsForCall[0][1] + expect(result).toBeNull() + }) + }) + + describe('when the string being searched contains a unicode character', () => + it('returns correct indices and lengths', () => { + let regex = new OnigRegExp('a') + let searchCallback = jasmine.createSpy('searchCallback') + regex.search('ç√Ωa', 0, searchCallback) + + waitsFor(() => searchCallback.callCount === 1) + + runs(() => { + let firstMatch = searchCallback.argsForCall[0][1] + expect(firstMatch[0].start).toBe(3) + expect(firstMatch[0].match).toBe('a') + regex.search('ç√Ωabcd≈ßåabcd', 5, searchCallback) + }) + + waitsFor(() => searchCallback.callCount === 2) + + runs(() => { + let secondMatch = searchCallback.argsForCall[1][1] + expect(secondMatch[0].start).toBe(10) + expect(secondMatch[0].match).toBe('a') + }) + }) + ) + + describe('when the string being searched contains non-Basic Multilingual Plane characters', () => + it('returns correct indices and matches', () => { + let regex = new OnigRegExp("'") + let searchCallback = jasmine.createSpy('searchCallback') + regex.search("'\uD835\uDF97'", 0, searchCallback) + + waitsFor(() => searchCallback.callCount === 1) + + runs(() => { + let match = searchCallback.argsForCall[0][1] + expect(match[0].start).toBe(0) + expect(match[0].match).toBe("'") + regex.search("'\uD835\uDF97'", 1, searchCallback) + }) + + waitsFor(() => searchCallback.callCount === 2) + + runs(() => { + let match = searchCallback.argsForCall[1][1] + expect(match[0].start).toBe(3) + expect(match[0].match).toBe("'") + regex.search("'\uD835\uDF97'", 2, searchCallback) + }) + + waitsFor(() => searchCallback.callCount === 3) + + runs(() => { + let match = searchCallback.argsForCall[2][1] + expect(match[0].start).toBe(3) + expect(match[0].match).toBe("'") + }) + }) + ) + }) + + describe('::searchSync(string, index)', () => { + it('returns an array of the match and all capture groups', () => { + let regex = new OnigRegExp('\\w(\\d+)') + let result = regex.searchSync('----a123----') + expect(result.length).toBe(2) + expect(result[0].match).toBe('a123') + expect(result[0].start).toBe(4) + expect(result[0].end).toBe(8) + expect(result[0].index).toBe(0) + expect(result[0].length).toBe(4) + expect(result[1].match).toBe('123') + expect(result[1].start).toBe(5) + expect(result[1].end).toBe(8) + expect(result[1].index).toBe(1) + expect(result[1].length).toBe(3) + }) + + it('returns null if it does not match', () => { + let regex = new OnigRegExp('\\w(\\d+)') + let result = regex.searchSync('--------') + expect(result).toBeNull() + }) + + describe('when the string being searched contains a unicode character', () => + it('returns correct indices and lengths', () => { + let regex = new OnigRegExp('a') + + let firstMatch = regex.searchSync('ç√Ωa', 0) + expect(firstMatch[0].start).toBe(3) + expect(firstMatch[0].match).toBe('a') + + let secondMatch = regex.searchSync('ç√Ωabcd≈ßåabcd', 5) + expect(secondMatch[0].start).toBe(10) + expect(secondMatch[0].match).toBe('a') + }) + ) + + describe('when the string being searched contains non-Basic Multilingual Plane characters', () => + it('returns correct indices and matches', () => { + let regex = new OnigRegExp("'") + + let match = regex.searchSync("'\uD835\uDF97'", 0) + expect(match[0].start).toBe(0) + expect(match[0].match).toBe("'") + + match = regex.searchSync("'\uD835\uDF97'", 1) + expect(match[0].start).toBe(3) + expect(match[0].match).toBe("'") + + match = regex.searchSync("'\uD835\uDF97'", 2) + expect(match[0].start).toBe(3) + expect(match[0].match).toBe("'") + + match = regex.searchSync("'\uD835\uDF97'", 3) + expect(match[0].start).toBe(3) + expect(match[0].match).toBe("'") + }) + ) + }) + + describe('::testSync(string)', () => + it('returns true if the string matches the pattern', () => { + expect(new OnigRegExp('a[b-d]c').testSync('aec')).toBe(false) + expect(new OnigRegExp('a[b-d]c').testSync('abc')).toBe(true) + expect(new OnigRegExp(false).testSync(false)).toBe(true) + expect(new OnigRegExp(false).testSync(true)).toBe(false) + }) + ) + + describe('::test(string, callback)', () => + it('calls back with true if the string matches the pattern', () => { + let testCallback = jasmine.createSpy('testCallback') + + new OnigRegExp('a[b-d]c').test('aec', testCallback) + + waitsFor(() => testCallback.callCount === 1) + + runs(() => { + expect(testCallback.argsForCall[0][0]).toBeNull() + expect(testCallback.argsForCall[0][1]).toBe(false) + new OnigRegExp('a[b-d]c').test('abc', testCallback) + }) + + waitsFor(() => testCallback.callCount === 2) + + runs(() => { + expect(testCallback.argsForCall[1][0]).toBeNull() + expect(testCallback.argsForCall[1][1]).toBe(true) + }) + }) + ) +}) diff --git a/spec/onig-scanner-spec.coffee b/spec/onig-scanner-spec.coffee deleted file mode 100644 index a61ec31..0000000 --- a/spec/onig-scanner-spec.coffee +++ /dev/null @@ -1,173 +0,0 @@ -{OnigScanner} = require '../lib/oniguruma' - -describe "OnigScanner", -> - describe "::findNextMatchSync", -> - it "returns the index of the matching pattern", -> - scanner = new OnigScanner(["a", "b", "c"]) - expect(scanner.findNextMatchSync("x", 0)).toBe null - expect(scanner.findNextMatchSync("xxaxxbxxc", 0).index).toBe 0 - expect(scanner.findNextMatchSync("xxaxxbxxc", 4).index).toBe 1 - expect(scanner.findNextMatchSync("xxaxxbxxc", 7).index).toBe 2 - expect(scanner.findNextMatchSync("xxaxxbxxc", 9)).toBe null - - it "includes the scanner with the results", -> - scanner = new OnigScanner(["a"]) - expect(scanner.findNextMatchSync("a", 0).scanner).toBe scanner - - describe "when the string searched contains unicode characters", -> - it "returns the correct matching pattern", -> - scanner = new OnigScanner(["1", "2"]) - match = scanner.findNextMatchSync('ab…cde21', 5) - expect(match.index).toBe 1 - - scanner = new OnigScanner(['\"']) - match = scanner.findNextMatchSync('{"…": 1}', 1) - expect(match.captureIndices).toEqual [{index: 0, start: 1, end: 2, length: 1}] - - describe "when the string searched contains surrogate pairs", -> - it "counts paired characters as 2 characters in both arguments and return values", -> - scanner = new OnigScanner(["Y", "X"]) - match = scanner.findNextMatchSync('a💻bYX', 0) - expect(match.captureIndices).toEqual [{index: 0, start: 4, end: 5, length: 1}] - - match = scanner.findNextMatchSync('a💻bYX', 1) - expect(match.captureIndices).toEqual [{index: 0, start: 4, end: 5, length: 1}] - - match = scanner.findNextMatchSync('a💻bYX', 3) - expect(match.captureIndices).toEqual [{index: 0, start: 4, end: 5, length: 1}] - - match = scanner.findNextMatchSync('a💻bYX', 4) - expect(match.captureIndices).toEqual [{index: 0, start: 4, end: 5, length: 1}] - - match = scanner.findNextMatchSync('a💻bYX', 5) - expect(match.index).toBe 1 - expect(match.captureIndices).toEqual [{index: 0, start: 5, end: 6, length: 1}] - - it "returns false when the input string isn't a string", -> - scanner = new OnigScanner(["1"]) - expect(scanner.findNextMatchSync()).toBe null - expect(scanner.findNextMatchSync(null)).toBe null - expect(scanner.findNextMatchSync(2)).toBe null - expect(scanner.findNextMatchSync(false)).toBe null - - it "uses 0 as the start position when the input start position isn't a valid number", -> - scanner = new OnigScanner(["1"]) - expect(scanner.findNextMatchSync('a1', Infinity).index).toBe 0 - expect(scanner.findNextMatchSync('a1', -1).index).toBe 0 - expect(scanner.findNextMatchSync('a1', false).index).toBe 0 - expect(scanner.findNextMatchSync('a1', 'food').index).toBe 0 - - describe "when the regular expression contains double byte characters", -> - it "returns the correct match length", -> - scanner = new OnigScanner(["Возврат"]) - match = scanner.findNextMatchSync('Возврат long_var_name;', 0) - expect(match.captureIndices).toEqual [{index: 0, start: 0, end: 7, length: 7}] - - describe "when the input string contains invalid surrogate pairs", -> - it "interprets them as a code point", -> - scanner = new OnigScanner(["X"]) - match = scanner.findNextMatchSync('X' + String.fromCharCode(0xd83c) + 'X', 0) - expect(match.captureIndices).toEqual [{index: 0, start: 0, end: 1, length: 1}] - - match = scanner.findNextMatchSync('X' + String.fromCharCode(0xd83c) + 'X', 1) - expect(match.captureIndices).toEqual [{index: 0, start: 2, end: 3, length: 1}] - - match = scanner.findNextMatchSync('X' + String.fromCharCode(0xd83c) + 'X', 2) - expect(match.captureIndices).toEqual [{index: 0, start: 2, end: 3, length: 1}] - - match = scanner.findNextMatchSync('X' + String.fromCharCode(0xdfff) + 'X', 0) - expect(match.captureIndices).toEqual [{index: 0, start: 0, end: 1, length: 1}] - - match = scanner.findNextMatchSync('X' + String.fromCharCode(0xdfff) + 'X', 1) - expect(match.captureIndices).toEqual [{index: 0, start: 2, end: 3, length: 1}] - - match = scanner.findNextMatchSync('X' + String.fromCharCode(0xdfff) + 'X', 2) - expect(match.captureIndices).toEqual [{index: 0, start: 2, end: 3, length: 1}] - - # These are actually valid, just testing the min & max - match = scanner.findNextMatchSync('X' + String.fromCharCode(0xd800) + String.fromCharCode(0xdc00) + 'X', 2) - expect(match.captureIndices).toEqual [{index: 0, start: 3, end: 4, length: 1}] - - match = scanner.findNextMatchSync('X' + String.fromCharCode(0xdbff) + String.fromCharCode(0xdfff) + 'X', 2) - expect(match.captureIndices).toEqual [{index: 0, start: 3, end: 4, length: 1}] - - describe "when the start offset is out of bounds", -> - it "it gets clamped", -> - scanner = new OnigScanner(["X"]) - match = scanner.findNextMatchSync('X💻X', -1000) - expect(match.captureIndices).toEqual [{index: 0, start: 0, end: 1, length: 1}] - - match = scanner.findNextMatchSync('X💻X', 1000) - expect(match).toEqual null - - describe "::findNextMatch", -> - matchCallback = null - - beforeEach -> - matchCallback = jasmine.createSpy('matchCallback') - - it "returns the index of the matching pattern", -> - scanner = new OnigScanner(["a", "b", "c"]) - scanner.findNextMatch("x", 0, matchCallback) - - waitsFor -> - matchCallback.callCount is 1 - - runs -> - expect(matchCallback.argsForCall[0][0]).toBeNull() - expect(matchCallback.argsForCall[0][1]).toBeNull() - scanner.findNextMatch("xxaxxbxxc", 0, matchCallback) - - waitsFor -> - matchCallback.callCount is 2 - - runs -> - expect(matchCallback.argsForCall[1][0]).toBeNull() - expect(matchCallback.argsForCall[1][1].index).toBe 0 - scanner.findNextMatch("xxaxxbxxc", 4, matchCallback) - - waitsFor -> - matchCallback.callCount is 3 - - runs -> - expect(matchCallback.argsForCall[2][0]).toBeNull() - expect(matchCallback.argsForCall[2][1].index).toBe 1 - scanner.findNextMatch("xxaxxbxxc", 7, matchCallback) - - waitsFor -> - matchCallback.callCount is 4 - - runs -> - expect(matchCallback.argsForCall[3][0]).toBeNull() - expect(matchCallback.argsForCall[3][1].index).toBe 2 - scanner.findNextMatch("xxaxxbxxc", 9, matchCallback) - - waitsFor -> - matchCallback.callCount is 5 - - runs -> - expect(matchCallback.argsForCall[4][0]).toBeNull() - expect(matchCallback.argsForCall[4][1]).toBeNull() - - it "includes the scanner with the results", -> - scanner = new OnigScanner(["a"]) - scanner.findNextMatch("a", 0, matchCallback) - - waitsFor -> - matchCallback.callCount is 1 - - runs -> - expect(matchCallback.argsForCall[0][0]).toBeNull() - expect(matchCallback.argsForCall[0][1].scanner).toBe scanner - - describe "when the string searched contains unicode characters", -> - it "returns the correct matching pattern", -> - scanner = new OnigScanner(["1", "2"]) - scanner.findNextMatch('ab…cde21', 5, matchCallback) - - waitsFor -> - matchCallback.callCount is 1 - - runs -> - expect(matchCallback.argsForCall[0][0]).toBeNull() - expect(matchCallback.argsForCall[0][1].index).toBe 1 diff --git a/spec/onig-scanner-spec.js b/spec/onig-scanner-spec.js new file mode 100644 index 0000000..7b2e8bb --- /dev/null +++ b/spec/onig-scanner-spec.js @@ -0,0 +1,190 @@ +const {OnigScanner} = require('..') + +describe('OnigScanner', () => { + describe('::findNextMatchSync', () => { + it('returns the index of the matching pattern', () => { + let scanner = new OnigScanner(['a', 'b', 'c']) + expect(scanner.findNextMatchSync('x', 0)).toBe(null) + expect(scanner.findNextMatchSync('xxaxxbxxc', 0).index).toBe(0) + expect(scanner.findNextMatchSync('xxaxxbxxc', 4).index).toBe(1) + expect(scanner.findNextMatchSync('xxaxxbxxc', 7).index).toBe(2) + expect(scanner.findNextMatchSync('xxaxxbxxc', 9)).toBe(null) + }) + + it('includes the scanner with the results', () => { + let scanner = new OnigScanner(['a']) + expect(scanner.findNextMatchSync('a', 0).scanner).toBe(scanner) + }) + + describe('when the string searched contains unicode characters', () => + it('returns the correct matching pattern', () => { + let scanner = new OnigScanner(['1', '2']) + let match = scanner.findNextMatchSync('ab…cde21', 5) + expect(match.index).toBe(1) + + scanner = new OnigScanner(['\"']) + match = scanner.findNextMatchSync('{"…": 1}', 1) + expect(match.captureIndices).toEqual([{index: 0, start: 1, end: 2, length: 1}]) + })) + + describe('when the string searched contains surrogate pairs', () => + it('counts paired characters as 2 characters in both arguments and return values', () => { + let scanner = new OnigScanner(['Y', 'X']) + let match = scanner.findNextMatchSync('a💻bYX', 0) + expect(match.captureIndices).toEqual([{index: 0, start: 4, end: 5, length: 1}]) + + match = scanner.findNextMatchSync('a💻bYX', 1) + expect(match.captureIndices).toEqual([{index: 0, start: 4, end: 5, length: 1}]) + + match = scanner.findNextMatchSync('a💻bYX', 3) + expect(match.captureIndices).toEqual([{index: 0, start: 4, end: 5, length: 1}]) + + match = scanner.findNextMatchSync('a💻bYX', 4) + expect(match.captureIndices).toEqual([{index: 0, start: 4, end: 5, length: 1}]) + + match = scanner.findNextMatchSync('a💻bYX', 5) + expect(match.index).toBe(1) + expect(match.captureIndices).toEqual([{index: 0, start: 5, end: 6, length: 1}]) + })) + + it("returns false when the input string isn't a string", () => { + let scanner = new OnigScanner(['1']) + expect(scanner.findNextMatchSync()).toBe(null) + expect(scanner.findNextMatchSync(null)).toBe(null) + expect(scanner.findNextMatchSync(2)).toBe(null) + expect(scanner.findNextMatchSync(false)).toBe(null) + }) + + it("uses 0 as the start position when the input start position isn't a valid number", () => { + let scanner = new OnigScanner(['1']) + expect(scanner.findNextMatchSync('a1', Infinity).index).toBe(0) + expect(scanner.findNextMatchSync('a1', -1).index).toBe(0) + expect(scanner.findNextMatchSync('a1', false).index).toBe(0) + expect(scanner.findNextMatchSync('a1', 'food').index).toBe(0) + }) + }) + + describe('when the regular expression contains double byte characters', () => + it('returns the correct match length', () => { + let scanner = new OnigScanner(['Возврат']) + let match = scanner.findNextMatchSync('Возврат long_var_name;', 0) + expect(match.captureIndices).toEqual([{index: 0, start: 0, end: 7, length: 7}]) + })) + + describe('when the input string contains invalid surrogate pairs', () => + it('interprets them as a code point', () => { + let scanner = new OnigScanner(['X']) + let match = scanner.findNextMatchSync(`X${String.fromCharCode(0xd83c)}X`, 0) + expect(match.captureIndices).toEqual([{index: 0, start: 0, end: 1, length: 1}]) + + // Characters after unmatched high surrogates are not found. + match = scanner.findNextMatchSync(`X${String.fromCharCode(0xd83c)}X`, 1) + expect(match).toBeNull() + + match = scanner.findNextMatchSync(`X${String.fromCharCode(0xd83c)}X`, 2) + expect(match.captureIndices).toEqual([{index: 0, start: 2, end: 3, length: 1}]) + + match = scanner.findNextMatchSync(`X${String.fromCharCode(0xdfff)}X`, 0) + expect(match.captureIndices).toEqual([{index: 0, start: 0, end: 1, length: 1}]) + + match = scanner.findNextMatchSync(`X${String.fromCharCode(0xdfff)}X`, 1) + expect(match.captureIndices).toEqual([{index: 0, start: 2, end: 3, length: 1}]) + + match = scanner.findNextMatchSync(`X${String.fromCharCode(0xdfff)}X`, 2) + expect(match.captureIndices).toEqual([{index: 0, start: 2, end: 3, length: 1}]) + + // These are actually valid, just testing the min & max + match = scanner.findNextMatchSync(`X${String.fromCharCode(0xd800)}${String.fromCharCode(0xdc00)}X`, 2) + expect(match.captureIndices).toEqual([{index: 0, start: 3, end: 4, length: 1}]) + + match = scanner.findNextMatchSync(`X${String.fromCharCode(0xdbff)}${String.fromCharCode(0xdfff)}X`, 2) + expect(match.captureIndices).toEqual([{index: 0, start: 3, end: 4, length: 1}]) + })) + + describe('when the start offset is out of bounds', () => + it('it gets clamped', () => { + let scanner = new OnigScanner(['X']) + let match = scanner.findNextMatchSync('X💻X', -1000) + expect(match.captureIndices).toEqual([{index: 0, start: 0, end: 1, length: 1}]) + + match = scanner.findNextMatchSync('X💻X', 1000) + expect(match).toEqual(null) + }) + ) + + describe('::findNextMatch', () => { + let matchCallback + + beforeEach(() => matchCallback = jasmine.createSpy('matchCallback')) + + it('returns the index of the matching pattern', () => { + let scanner = new OnigScanner(['a', 'b', 'c']) + scanner.findNextMatch('x', 0, matchCallback) + + waitsFor(() => matchCallback.callCount === 1) + + runs(() => { + expect(matchCallback.argsForCall[0][0]).toBeNull() + expect(matchCallback.argsForCall[0][1]).toBeNull() + scanner.findNextMatch('xxaxxbxxc', 0, matchCallback) + }) + + waitsFor(() => matchCallback.callCount === 2) + + runs(() => { + expect(matchCallback.argsForCall[1][0]).toBeNull() + expect(matchCallback.argsForCall[1][1].index).toBe(0) + scanner.findNextMatch('xxaxxbxxc', 4, matchCallback) + }) + + waitsFor(() => matchCallback.callCount === 3) + + runs(() => { + expect(matchCallback.argsForCall[2][0]).toBeNull() + expect(matchCallback.argsForCall[2][1].index).toBe(1) + scanner.findNextMatch('xxaxxbxxc', 7, matchCallback) + }) + + waitsFor(() => matchCallback.callCount === 4) + + runs(() => { + expect(matchCallback.argsForCall[3][0]).toBeNull() + expect(matchCallback.argsForCall[3][1].index).toBe(2) + scanner.findNextMatch('xxaxxbxxc', 9, matchCallback) + }) + + waitsFor(() => matchCallback.callCount === 5) + + runs(() => { + expect(matchCallback.argsForCall[4][0]).toBeNull() + expect(matchCallback.argsForCall[4][1]).toBeNull() + }) + }) + + it('includes the scanner with the results', () => { + let scanner = new OnigScanner(['a']) + scanner.findNextMatch('a', 0, matchCallback) + + waitsFor(() => matchCallback.callCount === 1) + + runs(() => { + expect(matchCallback.argsForCall[0][0]).toBeNull() + expect(matchCallback.argsForCall[0][1].scanner).toBe(scanner) + }) + }) + + describe('when the string searched contains unicode characters', () => + it('returns the correct matching pattern', () => { + let scanner = new OnigScanner(['1', '2']) + scanner.findNextMatch('ab…cde21', 5, matchCallback) + + waitsFor(() => matchCallback.callCount === 1) + + runs(() => { + expect(matchCallback.argsForCall[0][0]).toBeNull() + expect(matchCallback.argsForCall[0][1].index).toBe(1) + }) + }) + ) + }) +}) diff --git a/spec/onig-string-spec.js b/spec/onig-string-spec.js new file mode 100644 index 0000000..957305d --- /dev/null +++ b/spec/onig-string-spec.js @@ -0,0 +1,28 @@ +const {OnigString} = require('..') + +describe('OnigString', () => { + it('has a length property', () => { + expect(new OnigString('abc').length).toBe(3) + }) + + it('can be converted back into a string', () => { + expect(new OnigString('abc').toString()).toBe('abc') + }) + + it('can retrieve substrings (for conveniently inspecting captured text)', () => { + const string = 'abcdef' + const onigString = new OnigString(string) + expect(onigString.substring(2, 3)).toBe(string.substring(2, 3)) + expect(onigString.substring(2)).toBe(string.substring(2)) + expect(onigString.substring()).toBe(string.substring()) + expect(onigString.substring(-1)).toBe(string.substring(-1)) + expect(onigString.substring(-1, -2)).toBe(string.substring(-1, -2)) + + onigString.substring({}) + onigString.substring(null, undefined) + }) + + it('handles invalid arguments', () => { + expect(() => new OnigString(undefined)).toThrow('Argument must be a string') + }) +}) diff --git a/src/onig-reg-exp.cc b/src/onig-reg-exp.cc index 78609eb..c962de1 100644 --- a/src/onig-reg-exp.cc +++ b/src/onig-reg-exp.cc @@ -3,17 +3,17 @@ using ::v8::Exception; using ::v8::String; -OnigRegExp::OnigRegExp(const string& source) - : source_(source), - regex_(NULL) { +OnigRegExp::OnigRegExp(const OnigString &source) : regex_(NULL) { lastSearchStrUniqueId = -1; lastSearchPosition = -1; lastSearchResult = NULL; OnigErrorInfo error; - const UChar* sourceData = (const UChar*)source.data(); - int status = onig_new(®ex_, sourceData, sourceData + source.length(), - ONIG_OPTION_CAPTURE_GROUP, ONIG_ENCODING_UTF8, + + const UChar* sourceStart = reinterpret_cast(source.content.data()); + const UChar* sourceEnd = reinterpret_cast(source.content.data() + source.content.size()); + int status = onig_new(®ex_, sourceStart, sourceEnd, + ONIG_OPTION_CAPTURE_GROUP, ONIG_ENCODING_UTF16_LE, ONIG_SYNTAX_DEFAULT, &error); if (status != ONIG_NORMAL) { @@ -28,35 +28,37 @@ OnigRegExp::~OnigRegExp() { } shared_ptr OnigRegExp::Search(OnigString* str, int position) { - if (lastSearchStrUniqueId == str->uniqueId() && lastSearchPosition <= position) { + if (lastSearchStrUniqueId == str->uniqueId && lastSearchPosition <= position) { if (lastSearchResult == NULL || lastSearchResult->LocationAt(0) >= position) { return lastSearchResult; } } - lastSearchStrUniqueId = str->uniqueId(); + lastSearchStrUniqueId = str->uniqueId; lastSearchPosition = position; - lastSearchResult = Search(str->utf8_value(), position, str->utf8_length()); - return lastSearchResult; -} -shared_ptr OnigRegExp::Search(const char* data, - size_t position, size_t end) { if (!regex_) { Nan::ThrowError(Exception::Error(Nan::New("RegExp is not valid").ToLocalChecked())); return shared_ptr(); } - const UChar* searchData = reinterpret_cast(data); + const uint16_t *data = str->content.data(); + const UChar* searchStart = reinterpret_cast(data); + const UChar* searchPosition = reinterpret_cast(data + position); + const UChar* searchEnd = reinterpret_cast(data + str->content.size()); + OnigRegion* region = onig_region_new(); - int status = onig_search(regex_, searchData, searchData + end, - searchData + position, searchData + end, region, - ONIG_OPTION_NONE); + int status = onig_search( + regex_, searchStart, searchEnd, searchPosition, searchEnd, region, + ONIG_OPTION_NONE + ); if (status != ONIG_MISMATCH) { - return shared_ptr(new OnigResult(region, -1)); + lastSearchResult = shared_ptr(new OnigResult(region, -1)); } else { onig_region_free(region, 1); - return shared_ptr(); + lastSearchResult = shared_ptr(); } + + return lastSearchResult; } diff --git a/src/onig-reg-exp.coffee b/src/onig-reg-exp.coffee deleted file mode 100644 index c159098..0000000 --- a/src/onig-reg-exp.coffee +++ /dev/null @@ -1,33 +0,0 @@ -{OnigScanner} = require '../build/Release/onig_scanner.node' - -module.exports = -class OnigRegExp - constructor: (@source) -> - @scanner = new OnigScanner([@source]) - - captureIndicesForMatch: (string, match) -> - if match? - {captureIndices} = match - string = @scanner.convertToString(string) - for capture in captureIndices - capture.match = string[capture.start...capture.end] - captureIndices - else - null - - searchSync: (string, startPosition=0) -> - match = @scanner.findNextMatchSync(string, startPosition) - @captureIndicesForMatch(string, match) - - search: (string, startPosition=0, callback) -> - if typeof startPosition is 'function' - callback = startPosition - startPosition = 0 - - @scanner.findNextMatch string, startPosition, (error, match) => - callback?(error, @captureIndicesForMatch(string, match)) - - testSync: (string) -> @searchSync(string)? - - test: (string, callback) -> - @search string, 0, (error, result) -> callback?(error, result?) diff --git a/src/onig-reg-exp.h b/src/onig-reg-exp.h index 0f707a4..3bf66c1 100644 --- a/src/onig-reg-exp.h +++ b/src/onig-reg-exp.h @@ -12,7 +12,7 @@ using ::std::string; class OnigRegExp { public: - explicit OnigRegExp(const string& source); + explicit OnigRegExp(const OnigString &source); ~OnigRegExp(); shared_ptr Search(OnigString* str, int position); @@ -23,7 +23,6 @@ class OnigRegExp { shared_ptr Search(const char* data, size_t position, size_t end); - string source_; regex_t* regex_; int lastSearchStrUniqueId; diff --git a/src/onig-result.cc b/src/onig-result.cc index 3fed531..5645d35 100644 --- a/src/onig-result.cc +++ b/src/onig-result.cc @@ -1,5 +1,7 @@ #include "onig-result.h" +static const unsigned BYTES_PER_UTF16_CHARACTER = 2; + OnigResult::OnigResult(OnigRegion* region, int indexInScanner) : region_(region), indexInScanner(indexInScanner) {} @@ -14,7 +16,7 @@ int OnigResult::Count() { int OnigResult::LocationAt(int index) { int bytes = *(region_->beg + index); if (bytes > 0) - return bytes; + return bytes / BYTES_PER_UTF16_CHARACTER; else return 0; } @@ -22,7 +24,7 @@ int OnigResult::LocationAt(int index) { int OnigResult::LengthAt(int index) { int bytes = *(region_->end + index) - *(region_->beg + index); if (bytes > 0) - return bytes; + return bytes / BYTES_PER_UTF16_CHARACTER; else return 0; } diff --git a/src/onig-scanner-worker.cc b/src/onig-scanner-worker.cc index ead8539..cbf4c2a 100644 --- a/src/onig-scanner-worker.cc +++ b/src/onig-scanner-worker.cc @@ -1,8 +1,6 @@ #include "onig-scanner-worker.h" -using ::v8::Array; -using ::v8::Number; -using ::v8::Value; +using namespace v8; void OnigScannerWorker::Execute() { bestResult = searcher->Search(source, charOffset); @@ -18,8 +16,8 @@ void OnigScannerWorker::HandleOKCallback() { int resultCount = bestResult->Count(); Local captures = Nan::New(resultCount); for (int index = 0; index < resultCount; index++) { - int captureStart = source->ConvertUtf8OffsetToUtf16(bestResult->LocationAt(index)); - int captureEnd = source->ConvertUtf8OffsetToUtf16(bestResult->LocationAt(index) + bestResult->LengthAt(index)); + int captureStart = bestResult->LocationAt(index); + int captureEnd = bestResult->LocationAt(index) + bestResult->LengthAt(index); Local capture = Nan::New(); capture->Set(Nan::New("index").ToLocalChecked(), Nan::New(index)); diff --git a/src/onig-scanner-worker.h b/src/onig-scanner-worker.h index 44f9996..3a482dd 100644 --- a/src/onig-scanner-worker.h +++ b/src/onig-scanner-worker.h @@ -8,14 +8,11 @@ #include "onig-reg-exp.h" #include "onig-searcher.h" -using ::std::shared_ptr; -using ::std::vector; - class OnigScannerWorker : public Nan::AsyncWorker { public: OnigScannerWorker(Nan::Callback *callback, - vector> regExps, - Local v8String, + std::vector> regExps, + v8::Local v8String, int charOffset) : Nan::AsyncWorker(callback), charOffset(charOffset) { @@ -33,8 +30,8 @@ class OnigScannerWorker : public Nan::AsyncWorker { private: OnigString* source; int charOffset; - shared_ptr searcher; - shared_ptr bestResult; + std::shared_ptr searcher; + std::shared_ptr bestResult; }; #endif // SRC_ONIG_SCANNER_WORKER_H_ diff --git a/src/onig-scanner.cc b/src/onig-scanner.cc index 5db6cc3..f2a60cc 100644 --- a/src/onig-scanner.cc +++ b/src/onig-scanner.cc @@ -1,7 +1,7 @@ #include "onig-scanner.h" #include "onig-scanner-worker.h" -using ::v8::FunctionTemplate; +using namespace v8; void OnigScanner::Init(Local target) { Local tpl = Nan::New(OnigScanner::New); @@ -58,8 +58,8 @@ OnigScanner::OnigScanner(Local sources) { regExps.resize(length); for (int i = 0; i < length; i++) { - String::Utf8Value utf8Value(sources->Get(i)); - regExps[i] = shared_ptr(new OnigRegExp(string(*utf8Value))); + Local source = Local::Cast(sources->Get(i)); + regExps[i] = shared_ptr(new OnigRegExp(OnigString(source))); } searcher = shared_ptr(new OnigSearcher(regExps)); @@ -101,8 +101,8 @@ Local OnigScanner::CaptureIndicesForMatch(OnigResult* result, OnigString* Local captures = Nan::New(resultCount); for (int index = 0; index < resultCount; index++) { - int captureStart = source->ConvertUtf8OffsetToUtf16(result->LocationAt(index)); - int captureEnd = source->ConvertUtf8OffsetToUtf16(result->LocationAt(index) + result->LengthAt(index)); + int captureStart = result->LocationAt(index); + int captureEnd = result->LocationAt(index) + result->LengthAt(index); Local capture = Nan::New(); capture->Set(Nan::New("index").ToLocalChecked(), Nan::New(index)); diff --git a/src/onig-searcher.cc b/src/onig-searcher.cc index 7709d2f..4f6c851 100644 --- a/src/onig-searcher.cc +++ b/src/onig-searcher.cc @@ -1,8 +1,6 @@ #include "onig-searcher.h" shared_ptr OnigSearcher::Search(OnigString* source, int charOffset) { - int byteOffset = source->ConvertUtf16OffsetToUtf8(charOffset); - int bestLocation = 0; shared_ptr bestResult; @@ -10,7 +8,7 @@ shared_ptr OnigSearcher::Search(OnigString* source, int charOffset) int index = 0; while (iter < regExps.end()) { OnigRegExp *regExp = (*iter).get(); - shared_ptr result = regExp->Search(source, byteOffset); + shared_ptr result = regExp->Search(source, charOffset); if (result != NULL && result->Count() > 0) { int location = result->LocationAt(0); @@ -20,7 +18,7 @@ shared_ptr OnigSearcher::Search(OnigString* source, int charOffset) bestResult->SetIndex(index); } - if (location == byteOffset) { + if (location == charOffset) { break; } } diff --git a/src/onig-string.cc b/src/onig-string.cc index f32619f..f9e16ad 100644 --- a/src/onig-string.cc +++ b/src/onig-string.cc @@ -1,114 +1,71 @@ #include "onig-string.h" -using ::v8::FunctionTemplate; +using namespace v8; void OnigString::Init(Local target) { Local tpl = Nan::New(OnigString::New); tpl->SetClassName(Nan::New("OnigString").ToLocalChecked()); tpl->InstanceTemplate()->SetInternalFieldCount(1); - + tpl->PrototypeTemplate()->Set(Nan::New("toString").ToLocalChecked(), Nan::New(ToString)); + tpl->PrototypeTemplate()->Set(Nan::New("substring").ToLocalChecked(), Nan::New(Substring)); + Nan::SetAccessor(tpl->InstanceTemplate(), Nan::New("length").ToLocalChecked(), Length); target->Set(Nan::New("OnigString").ToLocalChecked(), tpl->GetFunction()); } -NAN_METHOD(OnigString::New) { - Nan::HandleScope scope; - OnigString* scanner = new OnigString(Local::Cast(info[0])); - scanner->Wrap(info.This()); - info.GetReturnValue().SetUndefined(); +void OnigString::New(const Nan::FunctionCallbackInfo &info) { + if (info[0]->IsString()) { + OnigString* scanner = new OnigString(Local::Cast(info[0])); + scanner->Wrap(info.This()); + } else { + Nan::ThrowTypeError("Argument must be a string"); + } } -OnigString::OnigString(Local value) - : utf8Value(value), utf8_length_(utf8Value.length()) { - static int idGenerator = 0; - uniqueId_ = ++idGenerator; - - hasMultiByteChars = (value->Length() != utf8_length_); - - if (hasMultiByteChars) { - String::Value utf16Value(value); - utf16_length_ = utf16Value.length(); - - utf16OffsetToUtf8 = new int[utf16_length_ + 1]; - utf16OffsetToUtf8[utf16_length_] = utf8_length_; - - utf8OffsetToUtf16 = new int[utf8_length_ + 1]; - utf8OffsetToUtf16[utf8_length_] = utf16_length_; +void OnigString::ToString(const Nan::FunctionCallbackInfo &info) { + OnigString *string = node::ObjectWrap::Unwrap(info.This()); + Local result; + if (Nan::New(string->content.data(), string->content.size()).ToLocal(&result)) { + info.GetReturnValue().Set(result); + } +} - // http://stackoverflow.com/a/148766 - int i8 = 0; - for (int i16 = 0, len = utf16_length_; i16 < len; i16++) { - uint16_t in = (*utf16Value)[i16]; +static int32_t constrain(int32_t min, int32_t value, int32_t max) { + if (value < min) return min; + if (value > max) return max; + return value; +} - unsigned int codepoint = in; - bool wasSurrogatePair = false; +void OnigString::Substring(const Nan::FunctionCallbackInfo &info) { + OnigString *string = node::ObjectWrap::Unwrap(info.This()); + Local result; - if (in >= 0xd800 && in <= 0xdbff) { - // Hit a high surrogate, try to look for a matching low surrogate - if (i16 + 1 < len) { - uint16_t next = (*utf16Value)[i16 + 1]; - if (next >= 0xdc00 && next <= 0xdfff) { - // Found the matching low surrogate - codepoint = (((in - 0xd800) << 10) + 0x10000) | (next - 0xdc00); - wasSurrogatePair = true; - } - } - } + const uint16_t *data = string->content.data(); + const uint32_t length = string->content.size(); - utf16OffsetToUtf8[i16] = i8; + int32_t start_index = 0; + int32_t end_index = length; - if (codepoint <= 0x7f) { - utf8OffsetToUtf16[i8++] = i16; - } else if (codepoint <= 0x7ff) { - utf8OffsetToUtf16[i8++] = i16; - utf8OffsetToUtf16[i8++] = i16; - } else if (codepoint <= 0xffff) { - utf8OffsetToUtf16[i8++] = i16; - utf8OffsetToUtf16[i8++] = i16; - utf8OffsetToUtf16[i8++] = i16; - } else { - utf8OffsetToUtf16[i8++] = i16; - utf8OffsetToUtf16[i8++] = i16; - utf8OffsetToUtf16[i8++] = i16; - utf8OffsetToUtf16[i8++] = i16; - } + if (info[0]->IsNumber()) { + start_index = constrain(0, info[0]->IntegerValue(), length); - if (wasSurrogatePair) { - utf16OffsetToUtf8[i16 + 1] = utf16OffsetToUtf8[i16]; - i16++; - } + if (info[1]->IsNumber()) { + end_index = constrain(0, info[1]->IntegerValue(), length); } } -} -OnigString::~OnigString() { - if (hasMultiByteChars) { - delete []utf16OffsetToUtf8; - delete []utf8OffsetToUtf16; + if (Nan::New(data + start_index, end_index - start_index).ToLocal(&result)) { + info.GetReturnValue().Set(result); } } -int OnigString::ConvertUtf8OffsetToUtf16(int utf8Offset) { - if (hasMultiByteChars) { - if (utf8Offset < 0) { - return 0; - } - if ((size_t)utf8Offset > utf8_length_) { - return utf16_length_; - } - return utf8OffsetToUtf16[utf8Offset]; - } - return utf8Offset; +void OnigString::Length(v8::Local, const Nan::PropertyCallbackInfo &info) { + OnigString *string = node::ObjectWrap::Unwrap(info.This()); + info.GetReturnValue().Set(Nan::New(string->content.size())); } -int OnigString::ConvertUtf16OffsetToUtf8(int utf16Offset) { - if (hasMultiByteChars) { - if (utf16Offset < 0) { - return 0; - } - if ((size_t)utf16Offset > utf16_length_) { - return utf8_length_; - } - return utf16OffsetToUtf8[utf16Offset]; - } - return utf16Offset; +OnigString::OnigString(Local value) { + static int nextId = 0; + uniqueId = ++nextId; + content.resize(value->Length()); + value->Write(content.data(), 0, -1, String::NO_NULL_TERMINATION); } diff --git a/src/onig-string.h b/src/onig-string.h index 28dae9d..b8753ae 100644 --- a/src/onig-string.h +++ b/src/onig-string.h @@ -2,41 +2,21 @@ #define SRC_ONIG_STRING_H_ #include - #include "nan.h" -using ::std::shared_ptr; - -using ::v8::Local; -using ::v8::Object; -using ::v8::String; - class OnigString : public node::ObjectWrap { public: - static void Init(Local target); - explicit OnigString(Local value); - ~OnigString(); - - int uniqueId() { return uniqueId_; } + static void Init(v8::Local target); + explicit OnigString(v8::Local value); - const char* utf8_value() const { return *utf8Value; } - size_t utf8_length() const { return utf8_length_; } - - int ConvertUtf8OffsetToUtf16(int utf8Offset); - int ConvertUtf16OffsetToUtf8(int utf16Offset); + int uniqueId; + std::vector content; private: - static NAN_METHOD(New); - - int uniqueId_; - String::Utf8Value utf8Value; - size_t utf8_length_; - bool hasMultiByteChars; - - // - the following members are used only if hasMultiByteChars is true - size_t utf16_length_; - int *utf16OffsetToUtf8; - int *utf8OffsetToUtf16; + static void New(const Nan::FunctionCallbackInfo &info); + static void ToString(const Nan::FunctionCallbackInfo &info); + static void Substring(const Nan::FunctionCallbackInfo &info); + static void Length(v8::Local, const Nan::PropertyCallbackInfo &); }; #endif // SRC_ONIG_STRING_H_ diff --git a/src/oniguruma.coffee b/src/oniguruma.coffee deleted file mode 100644 index 5237fed..0000000 --- a/src/oniguruma.coffee +++ /dev/null @@ -1,41 +0,0 @@ -{OnigScanner} = require '../build/Release/onig_scanner.node' -{OnigString} = require '../build/Release/onig_scanner.node' -OnigRegExp = require './onig-reg-exp' - -OnigScanner::findNextMatch = (string, startPosition=0, callback) -> - if typeof startPosition is 'function' - callback = startPosition - startPosition = 0 - - string = @convertToString(string) - startPosition = @convertToNumber(startPosition) - - @_findNextMatch string, startPosition, (error, match) => - match?.scanner = this - callback?(error, match) - -OnigScanner::findNextMatchSync = (string, startPosition=0) -> - string = @convertToString(string) - startPosition = @convertToNumber(startPosition) - - match = @_findNextMatchSync(string, startPosition) - match?.scanner = this - match - -OnigScanner::convertToString = (value) -> - if value is undefined - 'undefined' - else if value is null - 'null' - else - value.toString() - -OnigScanner::convertToNumber = (value) -> - value = parseInt(value) - value = 0 unless isFinite(value) - value = Math.max(value, 0) - value - -exports.OnigString = OnigString -exports.OnigScanner = OnigScanner -exports.OnigRegExp = OnigRegExp diff --git a/src/oniguruma.js b/src/oniguruma.js new file mode 100644 index 0000000..a9a5575 --- /dev/null +++ b/src/oniguruma.js @@ -0,0 +1,91 @@ +const {OnigScanner, OnigString} = require('../build/Release/onig_scanner.node') + +class OnigRegExp { + constructor (source) { + this.source = source.toString() + this.scanner = new OnigScanner([this.source]) + } + + captureIndicesForMatch (string, match) { + if (match) { + let {captureIndices} = match + string = this.scanner.convertToString(string) + for (let capture of Array.from(captureIndices)) { + capture.match = string.slice(capture.start, capture.end) + } + return captureIndices + } else { + return null + } + } + + searchSync (string, startPosition) { + if (startPosition == null) startPosition = 0 + let match = this.scanner.findNextMatchSync(string, startPosition) + return this.captureIndicesForMatch(string, match) + } + + search (string, startPosition, callback) { + if (startPosition == null) { startPosition = 0 } + if (typeof startPosition === 'function') { + callback = startPosition + startPosition = 0 + } + + this.scanner.findNextMatch(string, startPosition, (error, match) => { + callback(error, this.captureIndicesForMatch(string, match)) + }) + } + + testSync (string) { + return this.searchSync(string) != null + } + + test (string, callback) { + this.search(string, 0, (error, result) => callback(error, result != null)) + } +} + +OnigScanner.prototype.findNextMatch = function (string, startPosition, callback) { + if (startPosition == null) startPosition = 0 + if (typeof startPosition === 'function') { + callback = startPosition + startPosition = 0 + } + + string = this.convertToString(string) + startPosition = this.convertToNumber(startPosition) + + this._findNextMatch(string, startPosition, (error, match) => { + if (match) match.scanner = this + return callback(error, match) + }) +} + +OnigScanner.prototype.findNextMatchSync = function (string, startPosition) { + if (startPosition == null) { startPosition = 0 } + string = this.convertToString(string) + startPosition = this.convertToNumber(startPosition) + + let match = this._findNextMatchSync(string, startPosition) + if (match) match.scanner = this + return match +} + +OnigScanner.prototype.convertToString = function (value) { + if (value === undefined) return 'undefined' + if (value === null) return 'null' + if (value.constructor == OnigString) return value + return value.toString() +} + +OnigScanner.prototype.convertToNumber = function (value) { + value = parseInt(value) + if (!isFinite(value)) { value = 0 } + value = Math.max(value, 0) + return value +} + +exports.OnigScanner = OnigScanner +exports.OnigRegExp = OnigRegExp +exports.OnigString = OnigString