From 76150ae7512439b4e6903db834e4a327596b617d Mon Sep 17 00:00:00 2001 From: Richard van Velzen Date: Fri, 10 Jun 2022 11:21:06 +0200 Subject: [PATCH] Optimize generated regex --- src/Lexer/Lexer.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Lexer/Lexer.php b/src/Lexer/Lexer.php index 40747d95..89501e03 100644 --- a/src/Lexer/Lexer.php +++ b/src/Lexer/Lexer.php @@ -121,6 +121,12 @@ public function tokenize(string $s): array private function initialize(): void { $patterns = [ + self::TOKEN_HORIZONTAL_WS => '[\\x09\\x20]++', + + self::TOKEN_IDENTIFIER => '(?:[\\\\]?+[a-z_\\x80-\\xFF][0-9a-z_\\x80-\\xFF-]*+)++', + self::TOKEN_THIS_VARIABLE => '\\$this(?![0-9a-z_\\x80-\\xFF])', + self::TOKEN_VARIABLE => '\\$[a-z_\\x80-\\xFF][0-9a-z_\\x80-\\xFF]*+', + // '&' followed by TOKEN_VARIADIC, TOKEN_VARIABLE, TOKEN_EQUAL, TOKEN_EQUAL or TOKEN_CLOSE_PARENTHESES self::TOKEN_REFERENCE => '&(?=\\s*+(?:[.,=)]|(?:\\$(?!this(?![0-9a-z_\\x80-\\xFF])))))', self::TOKEN_UNION => '\\|', @@ -154,12 +160,6 @@ private function initialize(): void self::TOKEN_SINGLE_QUOTED_STRING => '\'(?:\\\\[^\\r\\n]|[^\'\\r\\n\\\\])*+\'', self::TOKEN_DOUBLE_QUOTED_STRING => '"(?:\\\\[^\\r\\n]|[^"\\r\\n\\\\])*+"', - self::TOKEN_IDENTIFIER => '(?:[\\\\]?+[a-z_\\x80-\\xFF][0-9a-z_\\x80-\\xFF-]*+)++', - self::TOKEN_THIS_VARIABLE => '\\$this(?![0-9a-z_\\x80-\\xFF])', - self::TOKEN_VARIABLE => '\\$[a-z_\\x80-\\xFF][0-9a-z_\\x80-\\xFF]*+', - - self::TOKEN_HORIZONTAL_WS => '[\\x09\\x20]++', - self::TOKEN_WILDCARD => '\\*', // anything but TOKEN_CLOSE_PHPDOC or TOKEN_HORIZONTAL_WS or TOKEN_EOL