Small contribution - code readability improvements (#306)

* Some code improvements - tests same as beginning * isUTF8Invalid returns a boolean * Changes for tests passed * Revert change + scrutinizer to flaky * Null coalescing on token to return a null token * Psalm pass and PHPunit too (php 7.4) * Minor change to set lookahead to nullToken
egulias · Nov 20, 2021 · 007bfd4 · 007bfd4
1 parent 5cc0471
commit 007bfd4
Show file tree

Hide file tree

Showing 9 changed files with 140 additions and 157 deletions.
diff --git a/.scrutinizer.yml b/.scrutinizer.yml
@@ -7,7 +7,7 @@ build:
     tests:
         override:
             -
-                command: 'vendor/bin/phpunit --coverage-clover=clover.xml --exclude-group slow'
+                command: 'vendor/bin/phpunit --coverage-clover=clover.xml --exclude-group flaky'
                 coverage:
                     file: 'clover.xml'
                     format: 'clover'

diff --git a/src/EmailLexer.php b/src/EmailLexer.php
@@ -61,7 +61,7 @@ class EmailLexer extends AbstractLexer
      *
      * @var array
      */
-    protected $charValue = array(
+    protected $charValue = [
         '{'    => self::S_OPENCURLYBRACES,
         '}'    => self::S_CLOSECURLYBRACES,
         '('    => self::S_OPENPARENTHESIS,
@@ -105,11 +105,29 @@ class EmailLexer extends AbstractLexer
         '?'    => self::QUESTIONMARK,
         '#'    => self::NUMBER_SIGN,
         '¡'    => self::INVERT_EXCLAMATION,
-    );
+    ];
 
-    /**
-     * @var bool
-     */
+    const INVALID_CHARS_REGEX = "/[^\p{S}\p{C}\p{Cc}]+/iu";
+
+    const VALID_UTF8_REGEX = '/\p{Cc}+/u';
+
+    const CATCHABLE_PATTERNS = [
+        '[a-zA-Z]+[46]?', //ASCII and domain literal
+        '[^\x00-\x7F]',  //UTF-8
+        '[0-9]+',
+        '\r\n',
+        '::',
+        '\s+?',
+        '.',
+    ];
+
+    const NON_CATCHABLE_PATTERNS = [
+        '[\xA0-\xff]+',
+    ];
+
+    const MODIFIERS = 'iu';
+
+    /** @var bool */
     protected $hasInvalidTokens = false;
 
     /**
@@ -137,23 +155,17 @@ class EmailLexer extends AbstractLexer
      */
     public $lookahead;
 
-    /**
-     * @psalm-var array{value:'', type:null, position:0}
-     */
+    /** @psalm-var array{value:'', type:null, position:0} */
     private static $nullToken = [
         'value' => '',
         'type' => null,
         'position' => 0,
     ];
 
-    /**
-     * @var string
-     */
+    /** @var string */
     private $accumulator = '';
 
-    /**
-     * @var bool
-     */
+    /** @var bool */
     private $hasToRecord = false;
 
     public function __construct()
@@ -162,32 +174,21 @@ public function __construct()
         $this->lookahead = null;
     }
 
-    /**
-     * @return void
-     */
-    public function reset()
+    public function reset() : void
     {
         $this->hasInvalidTokens = false;
         parent::reset();
         $this->previous = $this->token = self::$nullToken;
     }
 
-    /**
-     * @return bool
-     */
-    public function hasInvalidTokens()
-    {
-        return $this->hasInvalidTokens;
-    }
-
     /**
      * @param int $type
      * @throws \UnexpectedValueException
      * @return boolean
      *
      * @psalm-suppress InvalidScalarArgument
      */
-    public function find($type)
+    public function find($type) : bool
     {
         $search = clone $this;
         $search->skipUntil($type);
@@ -198,30 +199,24 @@ public function find($type)
         return true;
     }
 
-    /**
-     * getPrevious
-     *
-     * @return array
-     */
-    public function getPrevious()
-    {
-        return $this->previous;
-    }
-
     /**
      * moveNext
      *
      * @return boolean
      */
-    public function moveNext()
+    public function moveNext() : bool
     {
         if ($this->hasToRecord && $this->previous === self::$nullToken) {
             $this->accumulator .= $this->token['value'];
         }
 
         $this->previous = $this->token;
+
+        if($this->lookahead === null) {
+            $this->lookahead = self::$nullToken;
+        }
+
         $hasNext = parent::moveNext();
-        $this->token = $this->token ?: self::$nullToken;
 
         if ($this->hasToRecord) {
             $this->accumulator .= $this->token['value'];
@@ -230,36 +225,6 @@ public function moveNext()
         return $hasNext;
     }
 
-    /**
-     * Lexical catchable patterns.
-     *
-     * @return string[]
-     */
-    protected function getCatchablePatterns()
-    {
-        return array(
-            '[a-zA-Z]+[46]?', //ASCII and domain literal
-            '[^\x00-\x7F]',  //UTF-8
-            '[0-9]+',
-            '\r\n',
-            '::',
-            '\s+?',
-            '.',
-            );
-    }
-
-    /**
-     * Lexical non-catchable patterns.
-     *
-     * @return string[]
-     */
-    protected function getNonCatchablePatterns()
-    {
-        return [
-            '[\xA0-\xff]+',
-        ];
-    }
-
     /**
      * Retrieve token type. Also processes the token value if necessary.
      *
@@ -292,51 +257,64 @@ protected function getType(&$value)
         return  self::GENERIC;
     }
 
+    protected function isValid(string $value) : bool
+    {
+        return isset($this->charValue[$value]);
+    }
+
+    protected function isNullType(string $value) : bool
+    {
+        return $value === "\0";
+    }
+
     protected function isInvalidChar(string $value) : bool
     {
-        if(preg_match("/[^\p{S}\p{C}\p{Cc}]+/iu", $value) ) {
-            return false;
-        }
-        return true;
+        return !preg_match(self::INVALID_CHARS_REGEX, $value);
     }
 
-    protected function isValid(string $value) : bool
+    protected function isUTF8Invalid(string $value) : bool
     {
-        if (isset($this->charValue[$value])) {
-            return true;
-        }
+        return preg_match(self::VALID_UTF8_REGEX, $value) !== false;
+    }
 
-        return false;
+    public function hasInvalidTokens() : bool
+    {
+        return $this->hasInvalidTokens;
     }
 
     /**
-     * @param string $value
-     * @return bool
+     * getPrevious
+     *
+     * @return array
      */
-    protected function isNullType($value)
+    public function getPrevious() : array
     {
-        if ($value === "\0") {
-            return true;
-        }
-
-        return false;
+        return $this->previous;
     }
 
-    protected function isUTF8Invalid(string $value) : bool
+    /**
+     * Lexical catchable patterns.
+     *
+     * @return string[]
+     */
+    protected function getCatchablePatterns() : array
     {
-        if (preg_match('/\p{Cc}+/u', $value)) {
-            return true;
-        }
-
-        return false;
+        return self::CATCHABLE_PATTERNS;
     }
 
     /**
-     * @return string
+     * Lexical non-catchable patterns.
+     *
+     * @return string[]
      */
-    protected function getModifiers()
+    protected function getNonCatchablePatterns() : array
+    {
+        return self::NON_CATCHABLE_PATTERNS;
+    }
+
+    protected function getModifiers() : string
     {
-        return 'iu';
+        return self::MODIFIERS;
     }
 
     public function getAccumulatedValues() : string

diff --git a/src/MessageIDParser.php b/src/MessageIDParser.php
@@ -3,7 +3,6 @@
 namespace Egulias\EmailValidator;
 
 use Egulias\EmailValidator\Parser;
-use Egulias\EmailValidator\EmailLexer;
 use Egulias\EmailValidator\Result\Result;
 use Egulias\EmailValidator\Parser\IDLeftPart;
 use Egulias\EmailValidator\Parser\IDRightPart;

diff --git a/src/Parser/DomainLiteral.php b/src/Parser/DomainLiteral.php
@@ -22,6 +22,15 @@
 
 class DomainLiteral extends PartParser
 {
+    const IPV4_REGEX = '/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/';
+
+    const OBSOLETE_WARNINGS = [
+        EmailLexer::INVALID,
+        EmailLexer::C_DEL,
+        EmailLexer::S_LF,
+        EmailLexer::S_BACKSLASH
+    ];
+
     public function parse() : Result
     {
         $this->addTagWarnings();
@@ -138,11 +147,8 @@ public function checkIPV6Tag($addressLiteral, $maxGroups = 8) : void
 
     public function convertIPv4ToIPv6(string $addressLiteralIPv4) : string
     {
-        $matchesIP  = array();
-        $IPv4Match = preg_match(
-            '/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/',
-            $addressLiteralIPv4,
-            $matchesIP);
+        $matchesIP  = [];
+        $IPv4Match = preg_match(self::IPV4_REGEX, $addressLiteralIPv4, $matchesIP);
 
         // Extract IPv4 part from the end of the address-literal (if there is one)
         if ($IPv4Match > 0) {
@@ -164,11 +170,8 @@ public function convertIPv4ToIPv6(string $addressLiteralIPv4) : string
      */
     protected function checkIPV4Tag($addressLiteral) : bool
     {
-        $matchesIP  = array();
-        $IPv4Match = preg_match(
-            '/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/',
-            $addressLiteral,
-            $matchesIP);
+        $matchesIP  = [];
+        $IPv4Match = preg_match(self::IPV4_REGEX, $addressLiteral, $matchesIP);
 
         // Extract IPv4 part from the end of the address-literal (if there is one)
 
@@ -186,11 +189,7 @@ protected function checkIPV4Tag($addressLiteral) : bool
 
     private function addObsoleteWarnings() : void
     {
-        if ($this->lexer->token['type'] === EmailLexer::INVALID ||
-            $this->lexer->token['type'] === EmailLexer::C_DEL   ||
-            $this->lexer->token['type'] === EmailLexer::S_LF ||
-            $this->lexer->token['type'] === EmailLexer::S_BACKSLASH
-        ) {
+        if(in_array($this->lexer->token['type'], self::OBSOLETE_WARNINGS)) {
             $this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT();
         }
     }

diff --git a/src/Parser/DoubleQuote.php b/src/Parser/DoubleQuote.php
@@ -19,18 +19,19 @@ public function parse() : Result
         $validQuotedString = $this->checkDQUOTE();
         if($validQuotedString->isInvalid()) return $validQuotedString;
 
-        $special = array(
+        $special = [
             EmailLexer::S_CR => true,
             EmailLexer::S_HTAB => true,
             EmailLexer::S_LF => true
-        );
+        ];
 
-        $invalid = array(
+        $invalid = [
             EmailLexer::C_NUL => true,
             EmailLexer::S_HTAB => true,
             EmailLexer::S_CR => true,
             EmailLexer::S_LF => true
-        );
+        ];
+
         $setSpecialsWarning = true;
 
         $this->lexer->moveNext();

diff --git a/src/Parser/FoldingWhiteSpace.php b/src/Parser/FoldingWhiteSpace.php
@@ -15,6 +15,14 @@
 
 class  FoldingWhiteSpace extends PartParser
 {
+    const FWS_TYPES = [
+        EmailLexer::S_SP,
+        EmailLexer::S_HTAB,
+        EmailLexer::S_CR,
+        EmailLexer::S_LF,
+        EmailLexer::CRLF
+    ];
+
     public function parse() : Result
     {
         if (!$this->isFWS()) {
@@ -73,10 +81,6 @@ protected function isFWS() : bool
             return false;
         }
 
-        return $this->lexer->token['type'] === EmailLexer::S_SP ||
-            $this->lexer->token['type'] === EmailLexer::S_HTAB ||
-            $this->lexer->token['type'] === EmailLexer::S_CR ||
-            $this->lexer->token['type'] === EmailLexer::S_LF ||
-            $this->lexer->token['type'] === EmailLexer::CRLF;
+        return in_array($this->lexer->token['type'], self::FWS_TYPES);
     }
 }