Skip to content

Commit

Permalink
Small contribution - code readability improvements (#306)
Browse files Browse the repository at this point in the history
* Some code improvements - tests same as beginning

* isUTF8Invalid returns a boolean

* Changes for tests passed

* Revert change + scrutinizer to flaky

* Null coalescing on token to return a null token

* Psalm pass and PHPunit too (php 7.4)

* Minor change to set lookahead to nullToken
  • Loading branch information
JuGid authored Nov 20, 2021
1 parent 5cc0471 commit 007bfd4
Show file tree
Hide file tree
Showing 9 changed files with 140 additions and 157 deletions.
2 changes: 1 addition & 1 deletion .scrutinizer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ build:
tests:
override:
-
command: 'vendor/bin/phpunit --coverage-clover=clover.xml --exclude-group slow'
command: 'vendor/bin/phpunit --coverage-clover=clover.xml --exclude-group flaky'
coverage:
file: 'clover.xml'
format: 'clover'
Expand Down
168 changes: 73 additions & 95 deletions src/EmailLexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class EmailLexer extends AbstractLexer
*
* @var array
*/
protected $charValue = array(
protected $charValue = [
'{' => self::S_OPENCURLYBRACES,
'}' => self::S_CLOSECURLYBRACES,
'(' => self::S_OPENPARENTHESIS,
Expand Down Expand Up @@ -105,11 +105,29 @@ class EmailLexer extends AbstractLexer
'?' => self::QUESTIONMARK,
'#' => self::NUMBER_SIGN,
'¡' => self::INVERT_EXCLAMATION,
);
];

/**
* @var bool
*/
const INVALID_CHARS_REGEX = "/[^\p{S}\p{C}\p{Cc}]+/iu";

const VALID_UTF8_REGEX = '/\p{Cc}+/u';

const CATCHABLE_PATTERNS = [
'[a-zA-Z]+[46]?', //ASCII and domain literal
'[^\x00-\x7F]', //UTF-8
'[0-9]+',
'\r\n',
'::',
'\s+?',
'.',
];

const NON_CATCHABLE_PATTERNS = [
'[\xA0-\xff]+',
];

const MODIFIERS = 'iu';

/** @var bool */
protected $hasInvalidTokens = false;

/**
Expand Down Expand Up @@ -137,23 +155,17 @@ class EmailLexer extends AbstractLexer
*/
public $lookahead;

/**
* @psalm-var array{value:'', type:null, position:0}
*/
/** @psalm-var array{value:'', type:null, position:0} */
private static $nullToken = [
'value' => '',
'type' => null,
'position' => 0,
];

/**
* @var string
*/
/** @var string */
private $accumulator = '';

/**
* @var bool
*/
/** @var bool */
private $hasToRecord = false;

public function __construct()
Expand All @@ -162,32 +174,21 @@ public function __construct()
$this->lookahead = null;
}

/**
* @return void
*/
public function reset()
public function reset() : void
{
$this->hasInvalidTokens = false;
parent::reset();
$this->previous = $this->token = self::$nullToken;
}

/**
* @return bool
*/
public function hasInvalidTokens()
{
return $this->hasInvalidTokens;
}

/**
* @param int $type
* @throws \UnexpectedValueException
* @return boolean
*
* @psalm-suppress InvalidScalarArgument
*/
public function find($type)
public function find($type) : bool
{
$search = clone $this;
$search->skipUntil($type);
Expand All @@ -198,30 +199,24 @@ public function find($type)
return true;
}

/**
* getPrevious
*
* @return array
*/
public function getPrevious()
{
return $this->previous;
}

/**
* moveNext
*
* @return boolean
*/
public function moveNext()
public function moveNext() : bool
{
if ($this->hasToRecord && $this->previous === self::$nullToken) {
$this->accumulator .= $this->token['value'];
}

$this->previous = $this->token;

if($this->lookahead === null) {
$this->lookahead = self::$nullToken;
}

$hasNext = parent::moveNext();
$this->token = $this->token ?: self::$nullToken;

if ($this->hasToRecord) {
$this->accumulator .= $this->token['value'];
Expand All @@ -230,36 +225,6 @@ public function moveNext()
return $hasNext;
}

/**
* Lexical catchable patterns.
*
* @return string[]
*/
protected function getCatchablePatterns()
{
return array(
'[a-zA-Z]+[46]?', //ASCII and domain literal
'[^\x00-\x7F]', //UTF-8
'[0-9]+',
'\r\n',
'::',
'\s+?',
'.',
);
}

/**
* Lexical non-catchable patterns.
*
* @return string[]
*/
protected function getNonCatchablePatterns()
{
return [
'[\xA0-\xff]+',
];
}

/**
* Retrieve token type. Also processes the token value if necessary.
*
Expand Down Expand Up @@ -292,51 +257,64 @@ protected function getType(&$value)
return self::GENERIC;
}

protected function isValid(string $value) : bool
{
return isset($this->charValue[$value]);
}

protected function isNullType(string $value) : bool
{
return $value === "\0";
}

protected function isInvalidChar(string $value) : bool
{
if(preg_match("/[^\p{S}\p{C}\p{Cc}]+/iu", $value) ) {
return false;
}
return true;
return !preg_match(self::INVALID_CHARS_REGEX, $value);
}

protected function isValid(string $value) : bool
protected function isUTF8Invalid(string $value) : bool
{
if (isset($this->charValue[$value])) {
return true;
}
return preg_match(self::VALID_UTF8_REGEX, $value) !== false;
}

return false;
public function hasInvalidTokens() : bool
{
return $this->hasInvalidTokens;
}

/**
* @param string $value
* @return bool
* getPrevious
*
* @return array
*/
protected function isNullType($value)
public function getPrevious() : array
{
if ($value === "\0") {
return true;
}

return false;
return $this->previous;
}

protected function isUTF8Invalid(string $value) : bool
/**
* Lexical catchable patterns.
*
* @return string[]
*/
protected function getCatchablePatterns() : array
{
if (preg_match('/\p{Cc}+/u', $value)) {
return true;
}

return false;
return self::CATCHABLE_PATTERNS;
}

/**
* @return string
* Lexical non-catchable patterns.
*
* @return string[]
*/
protected function getModifiers()
protected function getNonCatchablePatterns() : array
{
return self::NON_CATCHABLE_PATTERNS;
}

protected function getModifiers() : string
{
return 'iu';
return self::MODIFIERS;
}

public function getAccumulatedValues() : string
Expand Down
1 change: 0 additions & 1 deletion src/MessageIDParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
namespace Egulias\EmailValidator;

use Egulias\EmailValidator\Parser;
use Egulias\EmailValidator\EmailLexer;
use Egulias\EmailValidator\Result\Result;
use Egulias\EmailValidator\Parser\IDLeftPart;
use Egulias\EmailValidator\Parser\IDRightPart;
Expand Down
29 changes: 14 additions & 15 deletions src/Parser/DomainLiteral.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@

class DomainLiteral extends PartParser
{
const IPV4_REGEX = '/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/';

const OBSOLETE_WARNINGS = [
EmailLexer::INVALID,
EmailLexer::C_DEL,
EmailLexer::S_LF,
EmailLexer::S_BACKSLASH
];

public function parse() : Result
{
$this->addTagWarnings();
Expand Down Expand Up @@ -138,11 +147,8 @@ public function checkIPV6Tag($addressLiteral, $maxGroups = 8) : void

public function convertIPv4ToIPv6(string $addressLiteralIPv4) : string
{
$matchesIP = array();
$IPv4Match = preg_match(
'/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/',
$addressLiteralIPv4,
$matchesIP);
$matchesIP = [];
$IPv4Match = preg_match(self::IPV4_REGEX, $addressLiteralIPv4, $matchesIP);

// Extract IPv4 part from the end of the address-literal (if there is one)
if ($IPv4Match > 0) {
Expand All @@ -164,11 +170,8 @@ public function convertIPv4ToIPv6(string $addressLiteralIPv4) : string
*/
protected function checkIPV4Tag($addressLiteral) : bool
{
$matchesIP = array();
$IPv4Match = preg_match(
'/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/',
$addressLiteral,
$matchesIP);
$matchesIP = [];
$IPv4Match = preg_match(self::IPV4_REGEX, $addressLiteral, $matchesIP);

// Extract IPv4 part from the end of the address-literal (if there is one)

Expand All @@ -186,11 +189,7 @@ protected function checkIPV4Tag($addressLiteral) : bool

private function addObsoleteWarnings() : void
{
if ($this->lexer->token['type'] === EmailLexer::INVALID ||
$this->lexer->token['type'] === EmailLexer::C_DEL ||
$this->lexer->token['type'] === EmailLexer::S_LF ||
$this->lexer->token['type'] === EmailLexer::S_BACKSLASH
) {
if(in_array($this->lexer->token['type'], self::OBSOLETE_WARNINGS)) {
$this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT();
}
}
Expand Down
9 changes: 5 additions & 4 deletions src/Parser/DoubleQuote.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,19 @@ public function parse() : Result
$validQuotedString = $this->checkDQUOTE();
if($validQuotedString->isInvalid()) return $validQuotedString;

$special = array(
$special = [
EmailLexer::S_CR => true,
EmailLexer::S_HTAB => true,
EmailLexer::S_LF => true
);
];

$invalid = array(
$invalid = [
EmailLexer::C_NUL => true,
EmailLexer::S_HTAB => true,
EmailLexer::S_CR => true,
EmailLexer::S_LF => true
);
];

$setSpecialsWarning = true;

$this->lexer->moveNext();
Expand Down
14 changes: 9 additions & 5 deletions src/Parser/FoldingWhiteSpace.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@

class FoldingWhiteSpace extends PartParser
{
const FWS_TYPES = [
EmailLexer::S_SP,
EmailLexer::S_HTAB,
EmailLexer::S_CR,
EmailLexer::S_LF,
EmailLexer::CRLF
];

public function parse() : Result
{
if (!$this->isFWS()) {
Expand Down Expand Up @@ -73,10 +81,6 @@ protected function isFWS() : bool
return false;
}

return $this->lexer->token['type'] === EmailLexer::S_SP ||
$this->lexer->token['type'] === EmailLexer::S_HTAB ||
$this->lexer->token['type'] === EmailLexer::S_CR ||
$this->lexer->token['type'] === EmailLexer::S_LF ||
$this->lexer->token['type'] === EmailLexer::CRLF;
return in_array($this->lexer->token['type'], self::FWS_TYPES);
}
}
Loading

0 comments on commit 007bfd4

Please sign in to comment.