From 2c5eb8b229d4937a16a1bd17c5e2b137ce3b4597 Mon Sep 17 00:00:00 2001 From: Cal Henderson Date: Fri, 4 Mar 2022 16:19:49 -0800 Subject: [PATCH] Throw on bad syntax (#23) * detect when we've reached the end of input when lexing an unterminated string * added a mode where we throw exceptions for invalid syntax * added tests for invalid syntax, in both default and exception mode * documented `$parser->throw_on_bad_syntax` * get tests running again on php 8.1 (broken by https://github.com/sebastianbergmann/phpunit/issues/4740) --- .github/workflows/php.yml | 2 +- README.md | 4 ++++ src/SQLParser.php | 11 ++++++++++ tests/InvalidTest.php | 43 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 tests/InvalidTest.php diff --git a/.github/workflows/php.yml b/.github/workflows/php.yml index 7a68055..b098f1f 100644 --- a/.github/workflows/php.yml +++ b/.github/workflows/php.yml @@ -37,7 +37,7 @@ jobs: phpunit-versions: '7.5.20' coverage: true - php-versions: '8.1' - phpunit-versions: '9.5.4' + phpunit-versions: '9.5.16' steps: - name: Setup PHP diff --git a/README.md b/README.md index d61b12a..1e2e73f 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,10 @@ The `tokens` property contains an array of tokens. SQL keywords are returned as with multi-word terms (e.g. `DEFAULT CHARACTER SET`) as a single token. Strings and escaped identifiers are not further processed; they are returned exactly as expressed in the input SQL. +By default, the tokenizer will ignore unterminated comments and strings, and stop parsing at +that point, producing no further tokens. You can set `$parser->throw_on_bad_syntax = true;` to +throw an exception of type `iamcal\SQLParserSyntaxException` instead. + ## Performance diff --git a/src/SQLParser.php b/src/SQLParser.php index ed4eb15..b1e52af 100644 --- a/src/SQLParser.php +++ b/src/SQLParser.php @@ -2,6 +2,8 @@ namespace iamcal; +class SQLParserSyntaxException extends \Exception { } + class SQLParser{ # @@ -13,6 +15,7 @@ class SQLParser{ public $source_map = array(); public $find_single_table = false; + public $throw_on_bad_syntax = false; public function parse($sql){ @@ -59,6 +62,7 @@ private function _lex($sql){ if (preg_match('!--!A', $sql, $m, 0, $pos)){ $p2 = strpos($sql, "\n", $pos); if ($p2 === false){ + if ($this->throw_on_bad_syntax) throw new SQLParserSyntaxException("Unterminated comment at position $pos"); $pos = $len; }else{ $pos = $p2+1; @@ -68,6 +72,7 @@ private function _lex($sql){ if (preg_match('!/\\*!A', $sql, $m, 0, $pos)){ $p2 = strpos($sql, "*/", $pos); if ($p2 === false){ + if ($this->throw_on_bad_syntax) throw new SQLParserSyntaxException("Unterminated comment at position $pos"); $pos = $len; }else{ $pos = $p2+2; @@ -88,6 +93,7 @@ private function _lex($sql){ if (substr($sql, $pos, 1) == '`'){ $p2 = strpos($sql, "`", $pos+1); if ($p2 === false){ + if ($this->throw_on_bad_syntax) throw new SQLParserSyntaxException("Unterminated backtick at position $pos"); $pos = $len; }else{ $source_map[] = array($pos, 1+$p2-$pos); @@ -113,6 +119,7 @@ private function _lex($sql){ # if ($sql[$pos] == "'" || $sql[$pos] == '"'){ + $str_start_pos = $pos; $c = $pos+1; $q = $sql[$pos]; while ($c < strlen($sql)){ @@ -128,6 +135,10 @@ private function _lex($sql){ } $c++; } + if ($c >= strlen($sql)){ + if ($this->throw_on_bad_syntax) throw new SQLParserSyntaxException("Unterminated string at position $str_start_pos"); + $pos = $len; + } continue; } diff --git a/tests/InvalidTest.php b/tests/InvalidTest.php new file mode 100644 index 0000000..20e8dce --- /dev/null +++ b/tests/InvalidTest.php @@ -0,0 +1,43 @@ +lex("CREATE TABLE `users ( id int(10) )"); + $this->assertEquals(count($tokens), 1); + + $tokens = $obj->lex("CREATE TABLE `users` ' ( `id` int(10) )"); + $this->assertEquals(count($tokens), 2); + } + + function testBrokenSyntaxException(){ + + // in exception mode, it throws an exception... + + $obj = new iamcal\SQLParser(); + $obj->throw_on_bad_syntax = true; + + try { + $obj->lex("CREATE TABLE `users ( id int(10) )"); + $this->fail("Expected Exception has not been raised"); + } catch (Exception $ex) { + $this->assertInstanceOf('iamcal\SQLParserSyntaxException', $ex); + } + + try { + $obj->lex("CREATE TABLE `users` ' ( `id` int(10) )"); + $this->fail("Expected Exception has not been raised"); + } catch (Exception $ex) { + $this->assertInstanceOf('iamcal\SQLParserSyntaxException', $ex); + } + + } + }