refactor php7

This commit is contained in:
Vadym Slizov
2020-05-13 00:16:36 +03:00
parent aab9da03f0
commit 6a84d58ee6
54 changed files with 29034 additions and 28821 deletions

227
internal/php5/parser.go Normal file
View File

@@ -0,0 +1,227 @@
package php5
import (
"github.com/z7zmey/php-parser/freefloating"
"github.com/z7zmey/php-parser/node"
"strings"
"github.com/z7zmey/php-parser/internal/positionbuilder"
"github.com/z7zmey/php-parser/pkg/ast"
"github.com/z7zmey/php-parser/pkg/errors"
"github.com/z7zmey/php-parser/pkg/position"
"github.com/z7zmey/php-parser/scanner"
)
func (lval *yySymType) Token(t *scanner.Token) {
lval.token = t
}
// Parser structure
type Parser struct {
Lexer scanner.Scanner
currentToken *scanner.Token
positionBuilder *positionbuilder.PositionBuilder
rootNode ast.Vertex
}
// NewParser creates and returns new Parser
func NewParser(src []byte, v string) *Parser {
lexer := scanner.NewLexer(src)
lexer.PHPVersion = v
return &Parser{
lexer,
nil,
nil,
nil,
}
}
// Lex proxy to lexer Lex
func (l *Parser) Lex(lval *yySymType) int {
t := l.Lexer.Lex(lval)
l.currentToken = lval.token
return t
}
func (l *Parser) Error(msg string) {
pos := &position.Position{
StartLine: l.currentToken.StartLine,
EndLine: l.currentToken.EndLine,
StartPos: l.currentToken.StartPos,
EndPos: l.currentToken.EndPos,
}
l.Lexer.AddError(errors.NewError(msg, pos))
}
func (l *Parser) WithTokens() {
l.Lexer.SetWithFreeFloating(true)
}
// Parse the php7 Parser entrypoint
func (l *Parser) Parse() int {
// init
l.Lexer.SetErrors(nil)
l.rootNode = nil
l.positionBuilder = &positionbuilder.PositionBuilder{}
// parse
return yyParse(l)
}
// GetRootNode returns root node
func (l *Parser) GetRootNode() ast.Vertex {
return l.rootNode
}
// GetErrors returns errors list
func (l *Parser) GetErrors() []*errors.Error {
return l.Lexer.GetErrors()
}
// helpers
func lastNode(nn []node.Node) node.Node {
if len(nn) == 0 {
return nil
}
return nn[len(nn)-1]
}
func firstNode(nn []node.Node) node.Node {
return nn[0]
}
func isDollar(r rune) bool {
return r == '$'
}
func (l *Parser) MoveFreeFloating(src node.Node, dst node.Node) {
if l.Lexer.GetWithFreeFloating() == false {
return
}
if src.GetFreeFloating() == nil {
return
}
l.setFreeFloating(dst, freefloating.Start, (*src.GetFreeFloating())[freefloating.Start])
delete((*src.GetFreeFloating()), freefloating.Start)
}
func (l *Parser) setFreeFloating(dst node.Node, p freefloating.Position, strings []freefloating.String) {
if l.Lexer.GetWithFreeFloating() == false {
return
}
if len(strings) == 0 {
return
}
dstCollection := dst.GetFreeFloating()
if *dstCollection == nil {
*dstCollection = make(freefloating.Collection)
}
(*dstCollection)[p] = strings
}
func (l *Parser) GetFreeFloatingToken(t *scanner.Token) []freefloating.String {
if l.Lexer.GetWithFreeFloating() == false {
return []freefloating.String{}
}
return t.GetFreeFloatingToken()
}
func (l *Parser) addDollarToken(v node.Node) {
if l.Lexer.GetWithFreeFloating() == false {
return
}
l.setFreeFloating(v, freefloating.Dollar, []freefloating.String{
{
StringType: freefloating.TokenType,
Value: "$",
Position: &position.Position{
StartLine: v.GetPosition().StartLine,
EndLine: v.GetPosition().StartLine,
StartPos: v.GetPosition().StartPos,
EndPos: v.GetPosition().StartPos + 1,
},
},
})
}
func (l *Parser) splitSemiColonAndPhpCloseTag(htmlNode node.Node, prevNode node.Node) {
if l.Lexer.GetWithFreeFloating() == false {
return
}
semiColon := (*prevNode.GetFreeFloating())[freefloating.SemiColon]
delete((*prevNode.GetFreeFloating()), freefloating.SemiColon)
if len(semiColon) == 0 {
return
}
p := semiColon[0].Position
if semiColon[0].Value[0] == ';' {
l.setFreeFloating(prevNode, freefloating.SemiColon, []freefloating.String{
{
StringType: freefloating.TokenType,
Value: ";",
Position: &position.Position{
StartLine: p.StartLine,
EndLine: p.StartLine,
StartPos: p.StartPos,
EndPos: p.StartPos + 1,
},
},
})
}
vlen := len(semiColon[0].Value)
tlen := 2
if strings.HasSuffix(semiColon[0].Value, "?>\n") {
tlen = 3
}
phpCloseTag := []freefloating.String{}
if vlen-tlen > 1 {
phpCloseTag = append(phpCloseTag, freefloating.String{
StringType: freefloating.WhiteSpaceType,
Value: semiColon[0].Value[1 : vlen-tlen],
Position: &position.Position{
StartLine: p.StartLine,
EndLine: p.EndLine,
StartPos: p.StartPos + 1,
EndPos: p.EndPos - tlen,
},
})
}
phpCloseTag = append(phpCloseTag, freefloating.String{
StringType: freefloating.WhiteSpaceType,
Value: semiColon[0].Value[vlen-tlen:],
Position: &position.Position{
StartLine: p.EndLine,
EndLine: p.EndLine,
StartPos: p.EndPos - tlen,
EndPos: p.EndPos,
},
})
l.setFreeFloating(htmlNode, freefloating.Start, append(phpCloseTag, (*htmlNode.GetFreeFloating())[freefloating.Start]...))
}
func (p *Parser) returnTokenToPool(yyDollar []yySymType, yyVAL *yySymType) {
for i := 1; i < len(yyDollar); i++ {
if yyDollar[i].token != nil {
p.Lexer.ReturnTokenToPool(yyDollar[i].token)
}
yyDollar[i].token = nil
}
yyVAL.token = nil
}

9823
internal/php5/php5.go Normal file

File diff suppressed because it is too large Load Diff

7205
internal/php5/php5.y Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,419 @@
package php5_test
import (
"testing"
"github.com/z7zmey/php-parser/php5"
)
func BenchmarkPhp5(b *testing.B) {
src := `<?
foo($a, ...$b);
$foo($a, ...$b);
$foo->bar($a, ...$b);
foo::bar($a, ...$b);
$foo::bar($a, ...$b);
new foo($a, ...$b);
function foo(bar $bar=null, baz &...$baz) {}
class foo {public function foo(bar $bar=null, baz &...$baz) {}}
function(bar $bar=null, baz &...$baz) {};
static function(bar $bar=null, baz &...$baz) {};
"test";
"\$test";
"
test
";
'$test';
'
$test
';
<<<CAD
CAD;
<<<CAD
hello
CAD;
<<<"CAD"
hello
CAD;
<<<"CAD"
hello $world
CAD;
<<<'CAD'
hello $world
CAD;
1234567890123456789;
12345678901234567890;
0.;
0b0111111111111111111111111111111111111111111111111111111111111111;
0b1111111111111111111111111111111111111111111111111111111111111111;
0x007111111111111111;
0x8111111111111111;
__CLASS__;
__DIR__;
__FILE__;
__FUNCTION__;
__LINE__;
__NAMESPACE__;
__METHOD__;
__TRAIT__;
"test $var";
"test $var[1]";
"test $var[1234567890123456789012345678901234567890]";
"test $var[bar]";
"test $var[$bar]";
"$foo $bar";
"test $foo->bar()";
"test ${foo}";
"test ${foo[0]}";
"test {$foo->bar()}";
if ($a) :
endif;
if ($a) :
elseif ($b):
endif;
if ($a) :
else:
endif;
if ($a) :
elseif ($b):
elseif ($c):
else:
endif;
while (1) { break; }
while (1) { break 2; }
while (1) : break(3); endwhile;
class foo{ const FOO = 1, BAR = 2; }
class foo{ function bar() {} }
class foo{ public static function &bar() {} }
class foo{ final private function bar() {} protected function baz() {} }
abstract class foo{ abstract public function bar(); }
final class foo extends bar { }
final class foo implements bar { }
final class foo implements bar, baz { }
const FOO = 1, BAR = 2;
while (1) { continue; }
while (1) { continue 2; }
while (1) { continue(3); }
declare(ticks=1);
declare(ticks=1, strict_types=1) {}
declare(ticks=1): enddeclare;
do {} while(1);
echo $a, 1;
echo($a);
for($i = 0; $i < 10; $i++, $i++) {}
for(; $i < 10; $i++) : endfor;
foreach ($a as $v) {}
foreach ([] as $v) {}
foreach ($a as $v) : endforeach;
foreach ($a as $k => $v) {}
foreach ([] as $k => $v) {}
foreach ($a as $k => &$v) {}
foreach ($a as $k => list($v)) {}
function foo() {}
function foo() {
__halt_compiler();
function bar() {}
class Baz {}
return $a;
}
function foo(array $a, callable $b) {return;}
function &foo() {return 1;}
function &foo() {}
global $a, $b, $$c, ${foo()};
a:
goto a;
__halt_compiler();
if ($a) {}
if ($a) {} elseif ($b) {}
if ($a) {} else {}
if ($a) {} elseif ($b) {} elseif ($c) {} else {}
if ($a) {} elseif ($b) {} else if ($c) {} else {}
?> <div></div> <?
interface Foo {}
interface Foo extends Bar {}
interface Foo extends Bar, Baz {}
namespace Foo;
namespace Foo\Bar {}
namespace {}
class foo {var $a;}
class foo {public static $a, $b = 1;}
class foo {public static $a = 1, $b;}
static $a, $b = 1;
static $a = 1, $b;
switch (1) :
case 1:
default:
case 2:
endswitch;
switch (1) :;
case 1;
case 2;
endswitch;
switch (1) {
case 1: break;
case 2: break;
}
switch (1) {;
case 1; break;
case 2; break;
}
throw $e;
trait Foo {}
class Foo { use Bar; }
class Foo { use Bar, Baz {} }
class Foo { use Bar, Baz { one as public; } }
class Foo { use Bar, Baz { one as public two; } }
class Foo { use Bar, Baz { Bar::one insteadof Baz, Quux; Baz::one as two; } }
try {}
try {} catch (Exception $e) {}
try {} catch (Exception $e) {} catch (RuntimeException $e) {}
try {} catch (Exception $e) {} catch (\RuntimeException $e) {} catch (namespace\AdditionException $e) {}
try {} catch (Exception $e) {} finally {}
unset($a, $b);
use Foo;
use \Foo;
use \Foo as Bar;
use Foo, Bar;
use Foo, Bar as Baz;
use function Foo, \Bar;
use function Foo as foo, \Bar as bar;
use const Foo, \Bar;
use const Foo as foo, \Bar as bar;
$a[1];
$a[1][2];
array();
array(1);
array(1=>1, &$b,);
array(3 =>&$b);
array(&$b, 1=>1, 1, 3 =>&$b);
~$a;
!$a;
Foo::Bar;
clone($a);
clone $a;
function(){};
function($a, $b) use ($c, &$d) {};
function($a, $b) use (&$c, $d) {};
function() {};
foo;
namespace\foo;
\foo;
empty($a);
empty(Foo);
@$a;
eval($a);
exit;
exit($a);
die();
die($a);
foo();
namespace\foo(&$a);
\foo([]);
$foo(yield $a);
$a--;
$a++;
--$a;
++$a;
include $a;
include_once $a;
require $a;
require_once $a;
$a instanceof Foo;
$a instanceof namespace\Foo;
$a instanceof \Foo;
isset($a, $b);
isset(Foo);
list() = $b;
list($a, $b) = $b;
list($a[]) = $b;
list(list($a)) = $b;
$a->foo();
new Foo;
new namespace\Foo();
new \Foo();
print($a);
$a->foo;
$a->foo[1];
$a->foo->bar->baz()->quux[0];
$a->foo()[1][1];
` + "`cmd $a`;" + `
` + "`cmd`;" + `
` + "``;" + `
[];
[1];
[1=>1, &$b,];
Foo::bar();
namespace\Foo::bar();
\Foo::bar();
Foo::$bar();
$foo::$bar();
Foo::$bar;
namespace\Foo::$bar;
\Foo::$bar;
$a ? $b : $c;
$a ? : $c;
$a ? $b ? $c : $d : $e;
$a ? $b : $c ? $d : $e;
-$a;
+$a;
$$a;
$$$a;
yield;
yield $a;
yield $a => $b;
yield Foo::class;
yield $a => Foo::class;
(array)$a;
(boolean)$a;
(bool)$a;
(double)$a;
(float)$a;
(integer)$a;
(int)$a;
(object)$a;
(string)$a;
(unset)$a;
$a & $b;
$a | $b;
$a ^ $b;
$a && $b;
$a || $b;
$a . $b;
$a / $b;
$a == $b;
$a >= $b;
$a > $b;
$a === $b;
$a and $b;
$a or $b;
$a xor $b;
$a - $b;
$a % $b;
$a * $b;
$a != $b;
$a !== $b;
$a + $b;
$a ** $b;
$a << $b;
$a >> $b;
$a <= $b;
$a < $b;
$a =& $b;
$a =& new Foo;
$a =& new Foo($b);
$a = $b;
$a &= $b;
$a |= $b;
$a ^= $b;
$a .= $b;
$a /= $b;
$a -= $b;
$a %= $b;
$a *= $b;
$a += $b;
$a **= $b;
$a <<= $b;
$a >>= $b;
(new \Foo());
(new \Foo())->bar()->baz;
(new \Foo())[0][0];
(new \Foo())[0]->bar();
array([0])[0][0];
"foo"[0];
foo[0];
static::foo;
new $foo;
new $foo::$bar;
new $a->b[0];
new $a->b{$b ?: null}->$c->d[0];static $a = [1][0];
static $a = !1;
static $a = ~1;
static $a = +1;
static $a = -1;
static $a = (1);
static $a = 1 ?: 2;
static $a = 1 ? 2 : 3;
static $a = 1 & 2;
static $a = 1 | 2;
static $a = 1 ^ 2;
static $a = 1 && 2;
static $a = 1 || 2;
static $a = 1 . 2;
static $a = 1 / 2;
static $a = 1 == 2;
static $a = 1 >= 2;
static $a = 1 > 2;
static $a = 1 === 2;
static $a = 1 and 2;
static $a = 1 or 2;
static $a = 1 xor 2;
static $a = 1 - 2;
static $a = 1 % 2;
static $a = 1 * 2;
static $a = 1 != 2;
static $a = 1 !== 2;
static $a = 1 + 2;
static $a = 1 ** 2;
static $a = 1 << 2;
static $a = 1 >> 2;
static $a = 1 <= 2;
static $a = 1 < 2;
static $a = Foo::bar;
static $a = Foo::class;
static $a = __CLASS__;
static $a = Foo;
static $a = namespace\Foo;
static $a = \Foo;
static $a = array();
static $a = array(1 => 1, 2);
static $a = [1, 2 => 2][0];
if (yield 1) {}
Foo::$$bar;
$foo();
$foo()[0][0];
$a{$b};
${$a};
$foo::{$bar}();
$foo::bar;
`
for n := 0; n < b.N; n++ {
php5parser := php5.NewParser([]byte(src), "5.6")
php5parser.Parse()
}
}

18733
internal/php5/php5_test.go Normal file

File diff suppressed because it is too large Load Diff

190
internal/php7/parser.go Normal file
View File

@@ -0,0 +1,190 @@
package php7
import (
"bytes"
"github.com/z7zmey/php-parser/internal/positionbuilder"
"github.com/z7zmey/php-parser/internal/scanner"
"github.com/z7zmey/php-parser/pkg/ast"
"github.com/z7zmey/php-parser/pkg/errors"
"github.com/z7zmey/php-parser/pkg/position"
"github.com/z7zmey/php-parser/pkg/token"
)
func (lval *yySymType) Token(t *scanner.Token) {
lval.token = t
}
// Parser structure
type Parser struct {
Lexer scanner.Scanner
currentToken *scanner.Token
positionBuilder *positionbuilder.PositionBuilder
rootNode ast.Vertex
}
// NewParser creates and returns new Parser
func NewParser(src []byte, v string) *Parser {
lexer := scanner.NewLexer(src)
lexer.PHPVersion = v
return &Parser{
lexer,
nil,
nil,
nil,
}
}
func (l *Parser) Lex(lval *yySymType) int {
t := l.Lexer.Lex(lval)
l.currentToken = lval.token
return t
}
func (l *Parser) Error(msg string) {
pos := &position.Position{
StartLine: l.currentToken.StartLine,
EndLine: l.currentToken.EndLine,
StartPos: l.currentToken.StartPos,
EndPos: l.currentToken.EndPos,
}
l.Lexer.AddError(errors.NewError(msg, pos))
}
func (l *Parser) WithTokens() {
l.Lexer.SetWithTokens(true)
}
// Parse the php7 Parser entrypoint
func (l *Parser) Parse() int {
// init
l.Lexer.SetErrors(nil)
l.rootNode = nil
l.positionBuilder = &positionbuilder.PositionBuilder{}
// parse
return yyParse(l)
}
// GetRootNode returns root node
func (l *Parser) GetRootNode() ast.Vertex {
return l.rootNode
}
// GetErrors returns errors list
func (l *Parser) GetErrors() []*errors.Error {
return l.Lexer.GetErrors()
}
// helpers
func lastNode(nn []ast.Vertex) ast.Vertex {
if len(nn) == 0 {
return nil
}
return nn[len(nn)-1]
}
func firstNode(nn []ast.Vertex) ast.Vertex {
return nn[0]
}
func isDollar(r rune) bool {
return r == '$'
}
func (l *Parser) MoveFreeFloating(src ast.Vertex, dst ast.Vertex) {
if l.Lexer.GetWithFreeFloating() == false {
return
}
if src.GetNode().Tokens == nil {
return
}
l.setFreeFloating(dst, token.Start, src.GetNode().Tokens[token.Start])
delete(src.GetNode().Tokens, token.Start)
}
func (l *Parser) setFreeFloating(dst ast.Vertex, p token.Position, strings []token.Token) {
if l.Lexer.GetWithFreeFloating() == false {
return
}
if len(strings) == 0 {
return
}
dstCollection := &dst.GetNode().Tokens
if *dstCollection == nil {
*dstCollection = make(token.Collection)
}
(*dstCollection)[p] = strings
}
func (l *Parser) GetFreeFloatingToken(t *scanner.Token) []token.Token {
if l.Lexer.GetWithFreeFloating() == false {
return []token.Token{}
}
tokens := make([]token.Token, len(t.Tokens))
copy(tokens, t.Tokens)
return tokens
}
func (l *Parser) splitSemiColonAndPhpCloseTag(htmlNode ast.Vertex, prevNode ast.Vertex) {
if l.Lexer.GetWithFreeFloating() == false {
return
}
semiColon := prevNode.GetNode().Tokens[token.SemiColon]
delete(prevNode.GetNode().Tokens, token.SemiColon)
if len(semiColon) == 0 {
return
}
if semiColon[0].Value[0] == ';' {
l.setFreeFloating(prevNode, token.SemiColon, []token.Token{
{
ID: token.ID(';'),
Value: semiColon[0].Value[0:1],
},
})
}
vlen := len(semiColon[0].Value)
tlen := 2
if bytes.HasSuffix(semiColon[0].Value, []byte("?>\n")) {
tlen = 3
}
phpCloseTag := []token.Token{}
if vlen-tlen > 1 {
phpCloseTag = append(phpCloseTag, token.Token{
ID: token.T_WHITESPACE,
Value: semiColon[0].Value[1 : vlen-tlen],
})
}
phpCloseTag = append(phpCloseTag, token.Token{
ID: T_CLOSE_TAG,
Value: semiColon[0].Value[vlen-tlen:],
})
l.setFreeFloating(htmlNode, token.Start, append(phpCloseTag, htmlNode.GetNode().Tokens[token.Start]...))
}
func (p *Parser) returnTokenToPool(yyDollar []yySymType, yyVAL *yySymType) {
for i := 1; i < len(yyDollar); i++ {
if yyDollar[i].token != nil {
p.Lexer.ReturnTokenToPool(yyDollar[i].token)
}
yyDollar[i].token = nil
}
yyVAL.token = nil
}

8382
internal/php7/php7.go Normal file

File diff suppressed because it is too large Load Diff

5655
internal/php7/php7.y Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,387 @@
package php7_test
import (
"testing"
"github.com/z7zmey/php-parser/internal/php7"
)
func BenchmarkPhp7(b *testing.B) {
src := `<?
foo($a, ...$b);
$foo($a, ...$b);
$foo->bar($a, ...$b);
foo::bar($a, ...$b);
$foo::bar($a, ...$b);
new foo($a, ...$b);
/** anonymous class */
new class ($a, ...$b) {};
new class {};
new $foo;
new $foo[1];
new $foo{$bar};
new $foo->bar;
new $foo::$bar;
new static::$bar;
function foo(?bar $bar=null, baz &...$baz) {}
class foo {public function foo(?bar $bar=null, baz &...$baz) {}}
function(?bar $bar=null, baz &...$baz) {};
static function(?bar $bar=null, baz &...$baz) {};
"test";
"\$test";
"
test
";
'$test';
'
$test
';
<<<CAD
CAD;
<<<CAD
hello
CAD;
<<<"CAD"
hello
CAD;
<<<"CAD"
hello $world
CAD;
<<<'CAD'
hello $world
CAD;
1234567890123456789;
12345678901234567890;
0.;
0b0111111111111111111111111111111111111111111111111111111111111111;
0b1111111111111111111111111111111111111111111111111111111111111111;
0x007111111111111111;
0x8111111111111111;
__CLASS__;
__DIR__;
__FILE__;
__FUNCTION__;
__LINE__;
__NAMESPACE__;
__METHOD__;
__TRAIT__;
"test $var";
"test $var[1]";
"test $var[-1]";
"test $var[1234567890123456789012345678901234567890]";
"test $var[-1234567890123456789012345678901234567890]";
"test $var[bar]";
"test $var[$bar]";
"$foo $bar";
"test $foo->bar()";
"test ${foo}";
"test ${foo[0]}";
"test ${$foo}";
"test {$foo->bar()}";
if ($a) :
endif;
if ($a) :
elseif ($b):
endif;
if ($a) :
else:
endif;
if ($a) :
elseif ($b):
elseif ($c):
else:
endif;
while (1) { break; }
while (1) { break 2; }
while (1) : break(3); endwhile;
class foo{ public const FOO = 1, BAR = 2; }
class foo{ const FOO = 1, BAR = 2; }
class foo{ function bar() {} }
class foo{ public static function &bar() {} }
class foo{ public static function &bar(): void {} }
abstract class foo{ }
final class foo extends bar { }
final class foo implements bar { }
final class foo implements bar, baz { }
new class() extends foo implements bar, baz { };
const FOO = 1, BAR = 2;
while (1) { continue; }
while (1) { continue 2; }
while (1) { continue(3); }
declare(ticks=1);
declare(ticks=1) {}
declare(ticks=1): enddeclare;
do {} while(1);
echo $a, 1;
echo($a);
for($i = 0; $i < 10; $i++, $i++) {}
for(; $i < 10; $i++, $i++) : endfor;
foreach ($a as $v) {}
foreach ($a as $v) : endforeach;
foreach ($a as $k => $v) {}
foreach ($a as $k => &$v) {}
foreach ($a as $k => list($v)) {}
foreach ($a as $k => [$v]) {}
function foo() {}
function foo() {return;}
function &foo() {return 1;}
function &foo(): void {}
global $a, $b;
a:
goto a;
__halt_compiler();
if ($a) {}
if ($a) {} elseif ($b) {}
if ($a) {} else {}
if ($a) {} elseif ($b) {} elseif ($c) {} else {}
if ($a) {} elseif ($b) {} else if ($c) {} else {}
?> <div></div> <?
interface Foo {}
interface Foo extends Bar {}
interface Foo extends Bar, Baz {}
namespace Foo;
namespace Foo {}
namespace {}
class foo {var $a;}
class foo {public static $a, $b = 1;}
static $a, $b = 1;
switch (1) :
case 1:
default:
case 2:
endswitch;
switch (1) :;
case 1;
case 2;
endswitch;
switch (1) {
case 1: break;
case 2: break;
}
switch (1) {;
case 1; break;
case 2; break;
}
throw $e;
trait Foo {}
class Foo { use Bar; }
class Foo { use Bar, Baz {} }
class Foo { use Bar, Baz { one as include; } }
class Foo { use Bar, Baz { one as public; } }
class Foo { use Bar, Baz { one as public two; } }
class Foo { use Bar, Baz { Bar::one insteadof Baz, Quux; Baz::one as two; } }
try {}
try {} catch (Exception $e) {}
try {} catch (Exception|RuntimeException $e) {}
try {} catch (Exception $e) {} catch (RuntimeException $e) {}
try {} catch (Exception $e) {} finally {}
unset($a, $b,);
use Foo;
use \Foo;
use \Foo as Bar;
use Foo, Bar;
use Foo, Bar as Baz;
use function Foo, \Bar;
use function Foo as foo, \Bar as bar;
use const Foo, \Bar;
use const Foo as foo, \Bar as bar;
use \Foo\{Bar, Baz};
use Foo\{Bar, Baz as quux};
use function Foo\{Bar, Baz};
use const \Foo\{Bar, Baz};
use Foo\{const Bar, function Baz};
$a[1];
$a[1][2];
array();
array(1);
array(1=>1, &$b,);
~$a;
!$a;
Foo::Bar;
$foo::Bar;
clone($a);
clone $a;
function(){};
function($a, $b) use ($c, &$d) {};
function(): void {};
foo;
namespace\foo;
\foo;
empty($a);
@$a;
eval($a);
exit;
exit($a);
die;
die($a);
foo();
namespace\foo();
\foo();
$foo();
$a--;
$a++;
--$a;
++$a;
include $a;
include_once $a;
require $a;
require_once $a;
$a instanceof Foo;
$a instanceof namespace\Foo;
$a instanceof \Foo;
isset($a, $b);
list($a) = $b;
list($a[]) = $b;
list(list($a)) = $b;
$a->foo();
new Foo();
new namespace\Foo();
new \Foo();
new class ($a, ...$b) {};
print($a);
$a->foo;
` + "`cmd $a`;" + `
` + "`cmd`;" + `
` + "``;" + `
[];
[1];
[1=>1, &$b,];
[$a] = $b;
[$a[]] = $b;
[list($a)] = $b;
Foo::bar();
namespace\Foo::bar();
\Foo::bar();
Foo::$bar;
$foo::$bar;
namespace\Foo::$bar;
\Foo::$bar;
$a ? $b : $c;
$a ? : $c;
$a ? $b ? $c : $d : $e;
$a ? $b : $c ? $d : $e;
-$a;
+$a;
$$a;
yield;
yield $a;
yield $a => $b;
yield from $a;
(array)$a;
(boolean)$a;
(bool)$a;
(double)$a;
(float)$a;
(integer)$a;
(int)$a;
(object)$a;
(string)$a;
(unset)$a;
$a & $b;
$a | $b;
$a ^ $b;
$a && $b;
$a || $b;
$a ?? $b;
$a . $b;
$a / $b;
$a == $b;
$a >= $b;
$a > $b;
$a === $b;
$a and $b;
$a or $b;
$a xor $b;
$a - $b;
$a % $b;
$a * $b;
$a != $b;
$a !== $b;
$a + $b;
$a ** $b;
$a << $b;
$a >> $b;
$a <= $b;
$a < $b;
$a <=> $b;
$a =& $b;
$a = $b;
$a &= $b;
$a |= $b;
$a ^= $b;
$a .= $b;
$a /= $b;
$a -= $b;
$a %= $b;
$a *= $b;
$a += $b;
$a **= $b;
$a <<= $b;
$a >>= $b;
class foo {public function class() {} }
\foo\bar();
function foo(&$a, ...$b) {
__halt_compiler();
function bar() {}
class Baz {}
trait Quux{}
interface Quuux {}
}
function foo(&$a = 1, ...$b = 1, $c = 1) {}
function foo(array $a, callable $b) {}
abstract final class foo { abstract protected static function bar(); final private function baz() {} }
(new Foo)->bar;
(new Foo)();
[$foo][0]();
foo[1]();
"foo"();
[1]{$foo}();
${foo()};
Foo::$bar();
Foo::{$bar[0]}();
$foo->$bar;
$foo->{$bar[0]};
[1=>&$a, 2=>list($b)];
`
for n := 0; n < b.N; n++ {
php7parser := php7.NewParser([]byte(src), "7.4")
php7parser.Parse()
}
}

20023
internal/php7/php7_test.go Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,207 @@
package positionbuilder
import (
"github.com/z7zmey/php-parser/internal/scanner"
"github.com/z7zmey/php-parser/pkg/ast"
"github.com/z7zmey/php-parser/pkg/position"
)
// PositionBuilder provide functions to constuct positions
type PositionBuilder struct{}
type startPos struct {
startLine int
startPos int
}
type endPos struct {
endLine int
endPos int
}
func (b *PositionBuilder) getListStartPos(l []ast.Vertex) startPos {
if l == nil {
return startPos{-1, -1}
}
if len(l) == 0 {
return startPos{-1, -1}
}
return b.getNodeStartPos(l[0])
}
func (b *PositionBuilder) getNodeStartPos(n ast.Vertex) startPos {
sl := -1
sp := -1
if n == nil {
return startPos{-1, -1}
}
p := n.GetNode().Position
if p != nil {
sl = p.StartLine
sp = p.StartPos
}
return startPos{sl, sp}
}
func (b *PositionBuilder) getListEndPos(l []ast.Vertex) endPos {
if l == nil {
return endPos{-1, -1}
}
if len(l) == 0 {
return endPos{-1, -1}
}
return b.getNodeEndPos(l[len(l)-1])
}
func (b *PositionBuilder) getNodeEndPos(n ast.Vertex) endPos {
el := -1
ep := -1
if n == nil {
return endPos{-1, -1}
}
p := n.GetNode().Position
if p != nil {
el = p.EndLine
ep = p.EndPos
}
return endPos{el, ep}
}
// NewNodeListPosition returns new Position
func (b *PositionBuilder) NewNodeListPosition(list []ast.Vertex) *position.Position {
return &position.Position{
StartLine: b.getListStartPos(list).startLine,
EndLine: b.getListEndPos(list).endLine,
StartPos: b.getListStartPos(list).startPos,
EndPos: b.getListEndPos(list).endPos,
}
}
// NewNodePosition returns new Position
func (b *PositionBuilder) NewNodePosition(n ast.Vertex) *position.Position {
return &position.Position{
StartLine: b.getNodeStartPos(n).startLine,
EndLine: b.getNodeEndPos(n).endLine,
StartPos: b.getNodeStartPos(n).startPos,
EndPos: b.getNodeEndPos(n).endPos,
}
}
// NewTokenPosition returns new Position
func (b *PositionBuilder) NewTokenPosition(t *scanner.Token) *position.Position {
return &position.Position{
StartLine: t.StartLine,
EndLine: t.EndLine,
StartPos: t.StartPos,
EndPos: t.EndPos,
}
}
// NewTokensPosition returns new Position
func (b *PositionBuilder) NewTokensPosition(startToken *scanner.Token, endToken *scanner.Token) *position.Position {
return &position.Position{
StartLine: startToken.StartLine,
EndLine: endToken.EndLine,
StartPos: startToken.StartPos,
EndPos: endToken.EndPos,
}
}
// NewTokenNodePosition returns new Position
func (b *PositionBuilder) NewTokenNodePosition(t *scanner.Token, n ast.Vertex) *position.Position {
return &position.Position{
StartLine: t.StartLine,
EndLine: b.getNodeEndPos(n).endLine,
StartPos: t.StartPos,
EndPos: b.getNodeEndPos(n).endPos,
}
}
// NewNodeTokenPosition returns new Position
func (b *PositionBuilder) NewNodeTokenPosition(n ast.Vertex, t *scanner.Token) *position.Position {
return &position.Position{
StartLine: b.getNodeStartPos(n).startLine,
EndLine: t.EndLine,
StartPos: b.getNodeStartPos(n).startPos,
EndPos: t.EndPos,
}
}
// NewNodesPosition returns new Position
func (b *PositionBuilder) NewNodesPosition(startNode ast.Vertex, endNode ast.Vertex) *position.Position {
return &position.Position{
StartLine: b.getNodeStartPos(startNode).startLine,
EndLine: b.getNodeEndPos(endNode).endLine,
StartPos: b.getNodeStartPos(startNode).startPos,
EndPos: b.getNodeEndPos(endNode).endPos,
}
}
// NewNodeListTokenPosition returns new Position
func (b *PositionBuilder) NewNodeListTokenPosition(list []ast.Vertex, t *scanner.Token) *position.Position {
return &position.Position{
StartLine: b.getListStartPos(list).startLine,
EndLine: t.EndLine,
StartPos: b.getListStartPos(list).startPos,
EndPos: t.EndPos,
}
}
// NewTokenNodeListPosition returns new Position
func (b *PositionBuilder) NewTokenNodeListPosition(t *scanner.Token, list []ast.Vertex) *position.Position {
return &position.Position{
StartLine: t.StartLine,
EndLine: b.getListEndPos(list).endLine,
StartPos: t.StartPos,
EndPos: b.getListEndPos(list).endPos,
}
}
// NewNodeNodeListPosition returns new Position
func (b *PositionBuilder) NewNodeNodeListPosition(n ast.Vertex, list []ast.Vertex) *position.Position {
return &position.Position{
StartLine: b.getNodeStartPos(n).startLine,
EndLine: b.getListEndPos(list).endLine,
StartPos: b.getNodeStartPos(n).startPos,
EndPos: b.getListEndPos(list).endPos,
}
}
// NewNodeListNodePosition returns new Position
func (b *PositionBuilder) NewNodeListNodePosition(list []ast.Vertex, n ast.Vertex) *position.Position {
return &position.Position{
StartLine: b.getListStartPos(list).startLine,
EndLine: b.getNodeEndPos(n).endLine,
StartPos: b.getListStartPos(list).startPos,
EndPos: b.getNodeEndPos(n).endPos,
}
}
// NewOptionalListTokensPosition returns new Position
func (b *PositionBuilder) NewOptionalListTokensPosition(list []ast.Vertex, t *scanner.Token, endToken *scanner.Token) *position.Position {
if list == nil {
return &position.Position{
StartLine: t.StartLine,
EndLine: endToken.EndLine,
StartPos: t.StartPos,
EndPos: endToken.EndPos,
}
}
return &position.Position{
StartLine: b.getListStartPos(list).startLine,
EndLine: endToken.EndLine,
StartPos: b.getListStartPos(list).startPos,
EndPos: endToken.EndPos,
}
}

View File

@@ -0,0 +1,485 @@
package positionbuilder_test
import (
"gotest.tools/assert"
"testing"
"github.com/z7zmey/php-parser/internal/positionbuilder"
"github.com/z7zmey/php-parser/internal/scanner"
"github.com/z7zmey/php-parser/pkg/ast"
"github.com/z7zmey/php-parser/pkg/position"
)
func TestNewTokenPosition(t *testing.T) {
builder := positionbuilder.PositionBuilder{}
tkn := &scanner.Token{
Value: []byte(`foo`),
StartLine: 1,
EndLine: 1,
StartPos: 0,
EndPos: 3,
}
pos := builder.NewTokenPosition(tkn)
assert.DeepEqual(t, &position.Position{1, 1, 0, 3}, pos)
assert.DeepEqual(t, &position.Position{1, 1, 0, 3}, pos)
}
func TestNewTokensPosition(t *testing.T) {
builder := positionbuilder.PositionBuilder{}
token1 := &scanner.Token{
Value: []byte(`foo`),
StartLine: 1,
EndLine: 1,
StartPos: 0,
EndPos: 3,
}
token2 := &scanner.Token{
Value: []byte(`foo`),
StartLine: 2,
EndLine: 2,
StartPos: 4,
EndPos: 6,
}
pos := builder.NewTokensPosition(token1, token2)
assert.DeepEqual(t, &position.Position{1, 2, 0, 6}, pos)
}
func TestNewNodePosition(t *testing.T) {
n := &ast.Identifier{
Node: ast.Node{
Position: &position.Position{
StartLine: 1,
EndLine: 1,
StartPos: 0,
EndPos: 3,
},
},
}
builder := positionbuilder.PositionBuilder{}
pos := builder.NewNodePosition(n)
assert.DeepEqual(t, &position.Position{1, 1, 0, 3}, pos)
}
func TestNewTokenNodePosition(t *testing.T) {
tkn := &scanner.Token{
Value: []byte(`foo`),
StartLine: 1,
EndLine: 1,
StartPos: 0,
EndPos: 3,
}
n := &ast.Identifier{
Node: ast.Node{
Position: &position.Position{
StartLine: 2,
EndLine: 2,
StartPos: 4,
EndPos: 12,
},
},
}
builder := positionbuilder.PositionBuilder{}
pos := builder.NewTokenNodePosition(tkn, n)
assert.DeepEqual(t, &position.Position{1, 2, 0, 12}, pos)
}
func TestNewNodeTokenPosition(t *testing.T) {
n := &ast.Identifier{
Node: ast.Node{
Position: &position.Position{
StartLine: 1,
EndLine: 1,
StartPos: 0,
EndPos: 9,
},
},
}
tkn := &scanner.Token{
Value: []byte(`foo`),
StartLine: 2,
EndLine: 2,
StartPos: 10,
EndPos: 12,
}
builder := positionbuilder.PositionBuilder{}
pos := builder.NewNodeTokenPosition(n, tkn)
assert.DeepEqual(t, &position.Position{1, 2, 0, 12}, pos)
}
func TestNewNodeListPosition(t *testing.T) {
n1 := &ast.Identifier{
Node: ast.Node{
Position: &position.Position{
StartLine: 1,
EndLine: 1,
StartPos: 0,
EndPos: 9,
},
},
}
n2 := &ast.Identifier{
Node: ast.Node{
Position: &position.Position{
StartLine: 2,
EndLine: 2,
StartPos: 10,
EndPos: 19,
},
},
}
builder := positionbuilder.PositionBuilder{}
pos := builder.NewNodeListPosition([]ast.Vertex{n1, n2})
assert.DeepEqual(t, &position.Position{1, 2, 0, 19}, pos)
}
func TestNewNodesPosition(t *testing.T) {
n1 := &ast.Identifier{
Node: ast.Node{
Position: &position.Position{
StartLine: 1,
EndLine: 1,
StartPos: 0,
EndPos: 9,
},
},
}
n2 := &ast.Identifier{
Node: ast.Node{
Position: &position.Position{
StartLine: 2,
EndLine: 2,
StartPos: 10,
EndPos: 19,
},
},
}
builder := positionbuilder.PositionBuilder{}
pos := builder.NewNodesPosition(n1, n2)
assert.DeepEqual(t, &position.Position{1, 2, 0, 19}, pos)
}
func TestNewNodeListTokenPosition(t *testing.T) {
n1 := &ast.Identifier{
Node: ast.Node{
Position: &position.Position{
StartLine: 1,
EndLine: 1,
StartPos: 0,
EndPos: 9,
},
},
}
n2 := &ast.Identifier{
Node: ast.Node{
Position: &position.Position{
StartLine: 2,
EndLine: 2,
StartPos: 10,
EndPos: 19,
},
},
}
tkn := &scanner.Token{
Value: []byte(`foo`),
StartLine: 3,
EndLine: 3,
StartPos: 20,
EndPos: 22,
}
builder := positionbuilder.PositionBuilder{}
pos := builder.NewNodeListTokenPosition([]ast.Vertex{n1, n2}, tkn)
assert.DeepEqual(t, &position.Position{1, 3, 0, 22}, pos)
}
func TestNewTokenNodeListPosition(t *testing.T) {
tkn := &scanner.Token{
Value: []byte(`foo`),
StartLine: 1,
EndLine: 1,
StartPos: 0,
EndPos: 2,
}
n1 := &ast.Identifier{
Node: ast.Node{
Position: &position.Position{
StartLine: 2,
EndLine: 2,
StartPos: 3,
EndPos: 10,
},
},
}
n2 := &ast.Identifier{
Node: ast.Node{
Position: &position.Position{
StartLine: 3,
EndLine: 3,
StartPos: 11,
EndPos: 20,
},
},
}
builder := positionbuilder.PositionBuilder{}
pos := builder.NewTokenNodeListPosition(tkn, []ast.Vertex{n1, n2})
assert.DeepEqual(t, &position.Position{1, 3, 0, 20}, pos)
}
func TestNewNodeNodeListPosition(t *testing.T) {
n1 := &ast.Identifier{
Node: ast.Node{
Position: &position.Position{
StartLine: 1,
EndLine: 1,
StartPos: 0,
EndPos: 8,
},
},
}
n2 := &ast.Identifier{
Node: ast.Node{
Position: &position.Position{
StartLine: 2,
EndLine: 2,
StartPos: 9,
EndPos: 17,
},
},
}
n3 := &ast.Identifier{
Node: ast.Node{
Position: &position.Position{
StartLine: 3,
EndLine: 3,
StartPos: 18,
EndPos: 26,
},
},
}
builder := positionbuilder.PositionBuilder{}
pos := builder.NewNodeNodeListPosition(n1, []ast.Vertex{n2, n3})
assert.DeepEqual(t, &position.Position{1, 3, 0, 26}, pos)
}
func TestNewNodeListNodePosition(t *testing.T) {
n1 := &ast.Identifier{
Node: ast.Node{
Position: &position.Position{
StartLine: 1,
EndLine: 1,
StartPos: 0,
EndPos: 8,
},
},
}
n2 := &ast.Identifier{
Node: ast.Node{
Position: &position.Position{
StartLine: 2,
EndLine: 2,
StartPos: 9,
EndPos: 17,
},
},
}
n3 := &ast.Identifier{
Node: ast.Node{
Position: &position.Position{
StartLine: 3,
EndLine: 3,
StartPos: 18,
EndPos: 26,
},
},
}
builder := positionbuilder.PositionBuilder{}
pos := builder.NewNodeListNodePosition([]ast.Vertex{n1, n2}, n3)
assert.DeepEqual(t, &position.Position{1, 3, 0, 26}, pos)
}
func TestNewOptionalListTokensPosition(t *testing.T) {
builder := positionbuilder.PositionBuilder{}
token1 := &scanner.Token{
Value: []byte(`foo`),
StartLine: 1,
EndLine: 1,
StartPos: 0,
EndPos: 3,
}
token2 := &scanner.Token{
Value: []byte(`foo`),
StartLine: 2,
EndLine: 2,
StartPos: 4,
EndPos: 6,
}
pos := builder.NewOptionalListTokensPosition(nil, token1, token2)
assert.DeepEqual(t, &position.Position{1, 2, 0, 6}, pos)
}
func TestNewOptionalListTokensPosition2(t *testing.T) {
n2 := &ast.Identifier{
Node: ast.Node{
Position: &position.Position{
StartLine: 2,
EndLine: 2,
StartPos: 9,
EndPos: 17,
},
},
}
n3 := &ast.Identifier{
Node: ast.Node{
Position: &position.Position{
StartLine: 3,
EndLine: 3,
StartPos: 18,
EndPos: 26,
},
},
}
builder := positionbuilder.PositionBuilder{}
token1 := &scanner.Token{
Value: []byte(`foo`),
StartLine: 4,
EndLine: 4,
StartPos: 27,
EndPos: 29,
}
token2 := &scanner.Token{
Value: []byte(`foo`),
StartLine: 5,
EndLine: 5,
StartPos: 30,
EndPos: 32,
}
pos := builder.NewOptionalListTokensPosition([]ast.Vertex{n2, n3}, token1, token2)
assert.DeepEqual(t, &position.Position{2, 5, 9, 32}, pos)
}
func TestNilNodePos(t *testing.T) {
builder := positionbuilder.PositionBuilder{}
pos := builder.NewNodesPosition(nil, nil)
assert.DeepEqual(t, &position.Position{-1, -1, -1, -1}, pos)
}
func TestNilNodeListPos(t *testing.T) {
n1 := &ast.Identifier{
Node: ast.Node{
Position: &position.Position{
StartLine: 1,
EndLine: 1,
StartPos: 0,
EndPos: 8,
},
},
}
builder := positionbuilder.PositionBuilder{}
pos := builder.NewNodeNodeListPosition(n1, nil)
assert.DeepEqual(t, &position.Position{1, -1, 0, -1}, pos)
}
func TestNilNodeListTokenPos(t *testing.T) {
token := &scanner.Token{
Value: []byte(`foo`),
StartLine: 1,
EndLine: 1,
StartPos: 0,
EndPos: 3,
}
builder := positionbuilder.PositionBuilder{}
pos := builder.NewNodeListTokenPosition(nil, token)
assert.DeepEqual(t, &position.Position{-1, 1, -1, 3}, pos)
}
func TestEmptyNodeListPos(t *testing.T) {
n1 := &ast.Identifier{
Node: ast.Node{
Position: &position.Position{
StartLine: 1,
EndLine: 1,
StartPos: 0,
EndPos: 8,
},
},
}
builder := positionbuilder.PositionBuilder{}
pos := builder.NewNodeNodeListPosition(n1, []ast.Vertex{})
assert.DeepEqual(t, &position.Position{1, -1, 0, -1}, pos)
}
func TestEmptyNodeListTokenPos(t *testing.T) {
token := &scanner.Token{
Value: []byte(`foo`),
StartLine: 1,
EndLine: 1,
StartPos: 0,
EndPos: 3,
}
builder := positionbuilder.PositionBuilder{}
pos := builder.NewNodeListTokenPosition([]ast.Vertex{}, token)
assert.DeepEqual(t, &position.Position{-1, 1, -1, 3}, pos)
}

267
internal/scanner/lexer.go Normal file
View File

@@ -0,0 +1,267 @@
package scanner
import (
"bytes"
"strings"
"github.com/z7zmey/php-parser/internal/version"
"github.com/z7zmey/php-parser/pkg/errors"
"github.com/z7zmey/php-parser/pkg/position"
"github.com/z7zmey/php-parser/pkg/token"
)
type Scanner interface {
Lex(lval Lval) int
ReturnTokenToPool(t *Token)
GetPhpDocComment() string
SetPhpDocComment(string)
GetErrors() []*errors.Error
GetWithFreeFloating() bool
SetWithTokens(bool)
AddError(e *errors.Error)
SetErrors(e []*errors.Error)
}
// Lval parsers yySymType must implement this interface
type Lval interface {
Token(tkn *Token)
}
type Lexer struct {
data []byte
p, pe, cs int
ts, te, act int
stack []int
top int
heredocLabel []byte
TokenPool *TokenPool
Tokens []token.Token
WithTokens bool
PhpDocComment string
lastToken *Token
Errors []*errors.Error
NewLines NewLines
PHPVersion string
}
func (l *Lexer) ReturnTokenToPool(t *Token) {
l.TokenPool.Put(t)
}
func (l *Lexer) GetPhpDocComment() string {
return l.PhpDocComment
}
func (l *Lexer) SetPhpDocComment(s string) {
l.PhpDocComment = s
}
func (l *Lexer) GetErrors() []*errors.Error {
return l.Errors
}
func (l *Lexer) GetWithFreeFloating() bool {
return l.WithTokens
}
func (l *Lexer) SetWithTokens(b bool) {
l.WithTokens = b
}
func (l *Lexer) AddError(e *errors.Error) {
l.Errors = append(l.Errors, e)
}
func (l *Lexer) SetErrors(e []*errors.Error) {
l.Errors = e
}
func (lex *Lexer) setTokenPosition(token *Token) {
token.StartLine = lex.NewLines.GetLine(lex.ts)
token.EndLine = lex.NewLines.GetLine(lex.te - 1)
token.StartPos = lex.ts
token.EndPos = lex.te
}
func (lex *Lexer) addToken(id TokenID, ps, pe int) {
if !lex.WithTokens {
return
}
lex.Tokens = append(lex.Tokens, token.Token{
ID: token.ID(id),
Value: lex.data[ps:pe],
})
}
func (lex *Lexer) isNotStringVar() bool {
p := lex.p
if lex.data[p-1] == '\\' && lex.data[p-2] != '\\' {
return true
}
if len(lex.data) < p+1 {
return true
}
if lex.data[p] == '$' && (lex.data[p+1] == '{' || isValidVarNameStart(lex.data[p+1])) {
return false
}
if lex.data[p] == '{' && lex.data[p+1] == '$' {
return false
}
return true
}
func (lex *Lexer) isNotStringEnd(s byte) bool {
p := lex.p
if lex.data[p-1] == '\\' && lex.data[p-2] != '\\' {
return true
}
return !(lex.data[p] == s)
}
func (lex *Lexer) isHeredocEnd(p int) bool {
r, err := version.Compare(lex.PHPVersion, "7.3")
if err != nil {
return lex.isHeredocEndSince73(p)
}
if r == -1 {
return lex.isHeredocEndBefore73(p)
}
return lex.isHeredocEndSince73(p)
}
func (lex *Lexer) isHeredocEndBefore73(p int) bool {
if lex.data[p-1] != '\r' && lex.data[p-1] != '\n' {
return false
}
l := len(lex.heredocLabel)
if len(lex.data) < p+l {
return false
}
if len(lex.data) > p+l && lex.data[p+l] != ';' && lex.data[p+l] != '\r' && lex.data[p+l] != '\n' {
return false
}
if len(lex.data) > p+l+1 && lex.data[p+l] == ';' && lex.data[p+l+1] != '\r' && lex.data[p+l+1] != '\n' {
return false
}
return bytes.Equal(lex.heredocLabel, lex.data[p:p+l])
}
func (lex *Lexer) isHeredocEndSince73(p int) bool {
if lex.data[p-1] != '\r' && lex.data[p-1] != '\n' {
return false
}
for lex.data[p] == ' ' || lex.data[p] == '\t' {
p++
}
l := len(lex.heredocLabel)
if len(lex.data) < p+l {
return false
}
if len(lex.data) > p+l && isValidVarName(lex.data[p+l]) {
return false
}
a := string(lex.heredocLabel)
b := string(lex.data[p : p+l])
_, _ = a, b
if bytes.Equal(lex.heredocLabel, lex.data[p:p+l]) {
lex.p = p
return true
}
return false
}
func (lex *Lexer) isNotHeredocEnd(p int) bool {
return !lex.isHeredocEnd(p)
}
func (lex *Lexer) growCallStack() {
if lex.top == len(lex.stack) {
lex.stack = append(lex.stack, 0)
}
}
func (lex *Lexer) isNotPhpCloseToken() bool {
if lex.p+1 == len(lex.data) {
return true
}
return lex.data[lex.p] != '?' || lex.data[lex.p+1] != '>'
}
func (lex *Lexer) isNotNewLine() bool {
if lex.data[lex.p] == '\n' && lex.data[lex.p-1] == '\r' {
return true
}
return lex.data[lex.p-1] != '\n' && lex.data[lex.p-1] != '\r'
}
func (lex *Lexer) call(state int, fnext int) {
lex.growCallStack()
lex.stack[lex.top] = state
lex.top++
lex.p++
lex.cs = fnext
}
func (lex *Lexer) ret(n int) {
lex.top = lex.top - n
if lex.top < 0 {
lex.top = 0
}
lex.cs = lex.stack[lex.top]
lex.p++
}
func (lex *Lexer) ungetStr(s string) {
tokenStr := string(lex.data[lex.ts:lex.te])
if strings.HasSuffix(tokenStr, s) {
lex.ungetCnt(len(s))
}
}
func (lex *Lexer) ungetCnt(n int) {
lex.p = lex.p - n
lex.te = lex.te - n
}
func (lex *Lexer) Error(msg string) {
pos := position.NewPosition(
lex.NewLines.GetLine(lex.ts),
lex.NewLines.GetLine(lex.te-1),
lex.ts,
lex.te,
)
lex.Errors = append(lex.Errors, errors.NewError(msg, pos))
}
func isValidVarNameStart(r byte) bool {
return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || r == '_' || (r >= 0x80 && r <= 0xff)
}
func isValidVarName(r byte) bool {
return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '_' || (r >= 0x80 && r <= 0xff)
}

View File

@@ -0,0 +1,145 @@
package scanner
type TokenID int
//go:generate stringer -type=TokenID -output ./tokenid_string.go
const (
T_INCLUDE TokenID = iota + 57346
T_INCLUDE_ONCE
T_EXIT
T_IF
T_LNUMBER
T_DNUMBER
T_STRING
T_STRING_VARNAME
T_VARIABLE
T_NUM_STRING
T_INLINE_HTML
T_CHARACTER
T_BAD_CHARACTER
T_ENCAPSED_AND_WHITESPACE
T_CONSTANT_ENCAPSED_STRING
T_ECHO
T_DO
T_WHILE
T_ENDWHILE
T_FOR
T_ENDFOR
T_FOREACH
T_ENDFOREACH
T_DECLARE
T_ENDDECLARE
T_AS
T_SWITCH
T_ENDSWITCH
T_CASE
T_DEFAULT
T_BREAK
T_CONTINUE
T_GOTO
T_FUNCTION
T_FN
T_CONST
T_RETURN
T_TRY
T_CATCH
T_FINALLY
T_THROW
T_USE
T_INSTEADOF
T_GLOBAL
T_VAR
T_UNSET
T_ISSET
T_EMPTY
T_HALT_COMPILER
T_CLASS
T_TRAIT
T_INTERFACE
T_EXTENDS
T_IMPLEMENTS
T_OBJECT_OPERATOR
T_DOUBLE_ARROW
T_LIST
T_ARRAY
T_CALLABLE
T_CLASS_C
T_TRAIT_C
T_METHOD_C
T_FUNC_C
T_LINE
T_FILE
T_COMMENT
T_DOC_COMMENT
T_OPEN_TAG
T_OPEN_TAG_WITH_ECHO
T_CLOSE_TAG
T_WHITESPACE
T_START_HEREDOC
T_END_HEREDOC
T_DOLLAR_OPEN_CURLY_BRACES
T_CURLY_OPEN
T_PAAMAYIM_NEKUDOTAYIM
T_NAMESPACE
T_NS_C
T_DIR
T_NS_SEPARATOR
T_ELLIPSIS
T_EVAL
T_REQUIRE
T_REQUIRE_ONCE
T_LOGICAL_OR
T_LOGICAL_XOR
T_LOGICAL_AND
T_INSTANCEOF
T_NEW
T_CLONE
T_ELSEIF
T_ELSE
T_ENDIF
T_PRINT
T_YIELD
T_STATIC
T_ABSTRACT
T_FINAL
T_PRIVATE
T_PROTECTED
T_PUBLIC
T_INC
T_DEC
T_YIELD_FROM
T_INT_CAST
T_DOUBLE_CAST
T_STRING_CAST
T_ARRAY_CAST
T_OBJECT_CAST
T_BOOL_CAST
T_UNSET_CAST
T_COALESCE
T_SPACESHIP
T_NOELSE
T_PLUS_EQUAL
T_MINUS_EQUAL
T_MUL_EQUAL
T_POW_EQUAL
T_DIV_EQUAL
T_CONCAT_EQUAL
T_MOD_EQUAL
T_AND_EQUAL
T_OR_EQUAL
T_XOR_EQUAL
T_SL_EQUAL
T_SR_EQUAL
T_COALESCE_EQUAL
T_BOOLEAN_OR
T_BOOLEAN_AND
T_POW
T_SL
T_SR
T_IS_IDENTICAL
T_IS_NOT_IDENTICAL
T_IS_EQUAL
T_IS_NOT_EQUAL
T_IS_SMALLER_OR_EQUAL
T_IS_GREATER_OR_EQUAL
)

View File

@@ -0,0 +1,25 @@
package scanner
type NewLines struct {
data []int
}
func (nl *NewLines) Append(p int) {
if len(nl.data) == 0 || nl.data[len(nl.data)-1] < p {
nl.data = append(nl.data, p)
}
}
func (nl *NewLines) GetLine(p int) int {
line := len(nl.data) + 1
for i := len(nl.data) - 1; i >= 0; i-- {
if p < nl.data[i] {
line = i + 1
} else {
break
}
}
return line
}

22874
internal/scanner/scanner.go Normal file

File diff suppressed because it is too large Load Diff

517
internal/scanner/scanner.rl Normal file
View File

@@ -0,0 +1,517 @@
package scanner
import (
"fmt"
"strconv"
"strings"
)
%%{
machine lexer;
write data;
access lex.;
variable p lex.p;
variable pe lex.pe;
}%%
func NewLexer(data []byte) *Lexer {
lex := &Lexer{
data: data,
pe: len(data),
stack: make([]int, 0),
TokenPool: &TokenPool{},
NewLines: NewLines{make([]int, 0, 128)},
}
%% write init;
return lex
}
func (lex *Lexer) Lex(lval Lval) int {
lex.Tokens = nil
eof := lex.pe
var tok TokenID
token := lex.TokenPool.Get()
token.Tokens = lex.Tokens
token.Value = lex.data[0:0]
lblStart := 0
lblEnd := 0
_, _ = lblStart, lblEnd
%%{
action heredoc_lbl_start {lblStart = lex.p}
action heredoc_lbl_end {lblEnd = lex.p}
action constant_string_new_line {
if lex.data[lex.p] == '\n' {
lex.NewLines.Append(lex.p)
}
if lex.data[lex.p] == '\r' && lex.data[lex.p+1] != '\n' {
lex.NewLines.Append(lex.p)
}
}
action is_not_heredoc_end { lex.isNotHeredocEnd(lex.p) }
action is_not_comment_end { lex.isNotPhpCloseToken() && lex.isNotNewLine() }
action is_not_heredoc_end_or_var { lex.isNotHeredocEnd(lex.p) && lex.isNotStringVar() }
action is_not_string_end_or_var { lex.isNotStringEnd('"') && lex.isNotStringVar() }
action is_not_backqoute_end_or_var { lex.isNotStringEnd('`') && lex.isNotStringVar() }
newline = ('\r\n' >(nl, 1) | '\r' >(nl, 0) | '\n' >(nl, 0)) %{lex.NewLines.Append(lex.p);};
any_line = any | newline;
whitespace = [\t\v\f ];
whitespace_line = [\t\v\f ] | newline;
lnum = [0-9]+('_'[0-9]+)*;
dnum = (lnum?"." lnum)|(lnum"."lnum?);
hnum = '0x'[0-9a-fA-F]+('_'[0-9a-fA-F]+)*;
bnum = '0b'[01]+('_'[01]+)*;
exponent_dnum = (lnum | dnum) ('e'|'E') ('+'|'-')? lnum;
varname_first = [a-zA-Z_] | (0x0080..0x00FF);
varname_second = varname_first | [0-9];
varname = varname_first (varname_second)*;
heredoc_label = varname >heredoc_lbl_start %heredoc_lbl_end;
operators = ';'|':'|','|'.'|'['|']'|'('|')'|'|'|'/'|'^'|'&'|'+'|'-'|'*'|'='|'%'|'!'|'~'|'$'|'<'|'>'|'?'|'@';
prepush { lex.growCallStack(); }
constant_string =
start: (
"'" -> qoute
| "b"i? '"' -> double_qoute
),
# single qoute string
qoute: (
(any - [\\'\r\n]) -> qoute
| "\r" @constant_string_new_line -> qoute
| "\n" @constant_string_new_line -> qoute
| "\\" -> qoute_any
| "'" -> final
),
qoute_any: (
(any - [\r\n]) -> qoute
| "\r" @constant_string_new_line -> qoute
| "\n" @constant_string_new_line -> qoute
),
# double qoute string
double_qoute: (
(any - [\\"${\r\n]) -> double_qoute
| "\r" @constant_string_new_line -> double_qoute
| "\n" @constant_string_new_line -> double_qoute
| "\\" -> double_qoute_any
| '"' -> final
| '$' -> double_qoute_nonvarname
| '{' -> double_qoute_nondollar
),
double_qoute_any: (
(any - [\r\n]) -> double_qoute
| "\r" @constant_string_new_line -> double_qoute
| "\n" @constant_string_new_line -> double_qoute
),
double_qoute_nondollar: (
(any - [\\$"\r\n]) -> double_qoute
| "\r" @constant_string_new_line -> double_qoute
| "\n" @constant_string_new_line -> double_qoute
| "\\" -> double_qoute_any
| '"' -> final
),
double_qoute_nonvarname: (
(any - [\\{"\r\n] - varname_first) -> double_qoute
| "\r" @constant_string_new_line -> double_qoute
| "\n" @constant_string_new_line -> double_qoute
| "\\" -> double_qoute_any
| '"' -> final
);
main := |*
"#!" any* :>> newline => {
lex.addToken(T_COMMENT, lex.ts, lex.te)
};
any => {
fnext html;
lex.ungetCnt(1)
};
*|;
html := |*
any_line+ -- '<?' => {
lex.ungetStr("<")
lex.setTokenPosition(token)
tok = T_INLINE_HTML;
fbreak;
};
'<?' => {
lex.addToken(T_OPEN_TAG, lex.ts, lex.te)
fnext php;
};
'<?php'i ( [ \t] | newline ) => {
lex.ungetCnt(lex.te - lex.ts - 5)
lex.addToken(T_OPEN_TAG, lex.ts, lex.ts+5)
fnext php;
};
'<?='i => {
lex.setTokenPosition(token);
tok = T_ECHO;
fnext php;
fbreak;
};
*|;
php := |*
whitespace_line* => {lex.addToken(T_WHITESPACE, lex.ts, lex.te)};
'?>' newline? => {lex.setTokenPosition(token); tok = TokenID(int(';')); fnext html; fbreak;};
';' whitespace_line* '?>' newline? => {lex.setTokenPosition(token); tok = TokenID(int(';')); fnext html; fbreak;};
(dnum | exponent_dnum) => {lex.setTokenPosition(token); tok = T_DNUMBER; fbreak;};
bnum => {
s := strings.Replace(string(lex.data[lex.ts+2:lex.te]), "_", "", -1)
_, err := strconv.ParseInt(s, 2, 0)
if err == nil {
lex.setTokenPosition(token); tok = T_LNUMBER; fbreak;
}
lex.setTokenPosition(token); tok = T_DNUMBER; fbreak;
};
lnum => {
base := 10
if lex.data[lex.ts] == '0' {
base = 8
}
s := strings.Replace(string(lex.data[lex.ts:lex.te]), "_", "", -1)
_, err := strconv.ParseInt(s, base, 0)
if err == nil {
lex.setTokenPosition(token); tok = T_LNUMBER; fbreak;
}
lex.setTokenPosition(token); tok = T_DNUMBER; fbreak;
};
hnum => {
s := strings.Replace(string(lex.data[lex.ts+2:lex.te]), "_", "", -1)
_, err := strconv.ParseInt(s, 16, 0)
if err == nil {
lex.setTokenPosition(token); tok = T_LNUMBER; fbreak;
}
lex.setTokenPosition(token); tok = T_DNUMBER; fbreak;
};
'abstract'i => {lex.setTokenPosition(token); tok = T_ABSTRACT; fbreak;};
'array'i => {lex.setTokenPosition(token); tok = T_ARRAY; fbreak;};
'as'i => {lex.setTokenPosition(token); tok = T_AS; fbreak;};
'break'i => {lex.setTokenPosition(token); tok = T_BREAK; fbreak;};
'callable'i => {lex.setTokenPosition(token); tok = T_CALLABLE; fbreak;};
'case'i => {lex.setTokenPosition(token); tok = T_CASE; fbreak;};
'catch'i => {lex.setTokenPosition(token); tok = T_CATCH; fbreak;};
'class'i => {lex.setTokenPosition(token); tok = T_CLASS; fbreak;};
'clone'i => {lex.setTokenPosition(token); tok = T_CLONE; fbreak;};
'const'i => {lex.setTokenPosition(token); tok = T_CONST; fbreak;};
'continue'i => {lex.setTokenPosition(token); tok = T_CONTINUE; fbreak;};
'declare'i => {lex.setTokenPosition(token); tok = T_DECLARE; fbreak;};
'default'i => {lex.setTokenPosition(token); tok = T_DEFAULT; fbreak;};
'do'i => {lex.setTokenPosition(token); tok = T_DO; fbreak;};
'echo'i => {lex.setTokenPosition(token); tok = T_ECHO; fbreak;};
'else'i => {lex.setTokenPosition(token); tok = T_ELSE; fbreak;};
'elseif'i => {lex.setTokenPosition(token); tok = T_ELSEIF; fbreak;};
'empty'i => {lex.setTokenPosition(token); tok = T_EMPTY; fbreak;};
'enddeclare'i => {lex.setTokenPosition(token); tok = T_ENDDECLARE; fbreak;};
'endfor'i => {lex.setTokenPosition(token); tok = T_ENDFOR; fbreak;};
'endforeach'i => {lex.setTokenPosition(token); tok = T_ENDFOREACH; fbreak;};
'endif'i => {lex.setTokenPosition(token); tok = T_ENDIF; fbreak;};
'endswitch'i => {lex.setTokenPosition(token); tok = T_ENDSWITCH; fbreak;};
'endwhile'i => {lex.setTokenPosition(token); tok = T_ENDWHILE; fbreak;};
'eval'i => {lex.setTokenPosition(token); tok = T_EVAL; fbreak;};
'exit'i | 'die'i => {lex.setTokenPosition(token); tok = T_EXIT; fbreak;};
'extends'i => {lex.setTokenPosition(token); tok = T_EXTENDS; fbreak;};
'final'i => {lex.setTokenPosition(token); tok = T_FINAL; fbreak;};
'finally'i => {lex.setTokenPosition(token); tok = T_FINALLY; fbreak;};
'for'i => {lex.setTokenPosition(token); tok = T_FOR; fbreak;};
'foreach'i => {lex.setTokenPosition(token); tok = T_FOREACH; fbreak;};
'function'i | 'cfunction'i => {lex.setTokenPosition(token); tok = T_FUNCTION; fbreak;};
'fn'i => {lex.setTokenPosition(token); tok = T_FN; fbreak;};
'global'i => {lex.setTokenPosition(token); tok = T_GLOBAL; fbreak;};
'goto'i => {lex.setTokenPosition(token); tok = T_GOTO; fbreak;};
'if'i => {lex.setTokenPosition(token); tok = T_IF; fbreak;};
'isset'i => {lex.setTokenPosition(token); tok = T_ISSET; fbreak;};
'implements'i => {lex.setTokenPosition(token); tok = T_IMPLEMENTS; fbreak;};
'instanceof'i => {lex.setTokenPosition(token); tok = T_INSTANCEOF; fbreak;};
'insteadof'i => {lex.setTokenPosition(token); tok = T_INSTEADOF; fbreak;};
'interface'i => {lex.setTokenPosition(token); tok = T_INTERFACE; fbreak;};
'list'i => {lex.setTokenPosition(token); tok = T_LIST; fbreak;};
'namespace'i => {lex.setTokenPosition(token); tok = T_NAMESPACE; fbreak;};
'private'i => {lex.setTokenPosition(token); tok = T_PRIVATE; fbreak;};
'public'i => {lex.setTokenPosition(token); tok = T_PUBLIC; fbreak;};
'print'i => {lex.setTokenPosition(token); tok = T_PRINT; fbreak;};
'protected'i => {lex.setTokenPosition(token); tok = T_PROTECTED; fbreak;};
'return'i => {lex.setTokenPosition(token); tok = T_RETURN; fbreak;};
'static'i => {lex.setTokenPosition(token); tok = T_STATIC; fbreak;};
'switch'i => {lex.setTokenPosition(token); tok = T_SWITCH; fbreak;};
'throw'i => {lex.setTokenPosition(token); tok = T_THROW; fbreak;};
'trait'i => {lex.setTokenPosition(token); tok = T_TRAIT; fbreak;};
'try'i => {lex.setTokenPosition(token); tok = T_TRY; fbreak;};
'unset'i => {lex.setTokenPosition(token); tok = T_UNSET; fbreak;};
'use'i => {lex.setTokenPosition(token); tok = T_USE; fbreak;};
'var'i => {lex.setTokenPosition(token); tok = T_VAR; fbreak;};
'while'i => {lex.setTokenPosition(token); tok = T_WHILE; fbreak;};
'yield'i whitespace_line* 'from'i => {lex.setTokenPosition(token); tok = T_YIELD_FROM; fbreak;};
'yield'i => {lex.setTokenPosition(token); tok = T_YIELD; fbreak;};
'include'i => {lex.setTokenPosition(token); tok = T_INCLUDE; fbreak;};
'include_once'i => {lex.setTokenPosition(token); tok = T_INCLUDE_ONCE; fbreak;};
'require'i => {lex.setTokenPosition(token); tok = T_REQUIRE; fbreak;};
'require_once'i => {lex.setTokenPosition(token); tok = T_REQUIRE_ONCE; fbreak;};
'__CLASS__'i => {lex.setTokenPosition(token); tok = T_CLASS_C; fbreak;};
'__DIR__'i => {lex.setTokenPosition(token); tok = T_DIR; fbreak;};
'__FILE__'i => {lex.setTokenPosition(token); tok = T_FILE; fbreak;};
'__FUNCTION__'i => {lex.setTokenPosition(token); tok = T_FUNC_C; fbreak;};
'__LINE__'i => {lex.setTokenPosition(token); tok = T_LINE; fbreak;};
'__NAMESPACE__'i => {lex.setTokenPosition(token); tok = T_NS_C; fbreak;};
'__METHOD__'i => {lex.setTokenPosition(token); tok = T_METHOD_C; fbreak;};
'__TRAIT__'i => {lex.setTokenPosition(token); tok = T_TRAIT_C; fbreak;};
'__halt_compiler'i => {lex.setTokenPosition(token); tok = T_HALT_COMPILER; fnext halt_compiller_open_parenthesis; fbreak;};
'new'i => {lex.setTokenPosition(token); tok = T_NEW; fbreak;};
'and'i => {lex.setTokenPosition(token); tok = T_LOGICAL_AND; fbreak;};
'or'i => {lex.setTokenPosition(token); tok = T_LOGICAL_OR; fbreak;};
'xor'i => {lex.setTokenPosition(token); tok = T_LOGICAL_XOR; fbreak;};
'\\' => {lex.setTokenPosition(token); tok = T_NS_SEPARATOR; fbreak;};
'...' => {lex.setTokenPosition(token); tok = T_ELLIPSIS; fbreak;};
'::' => {lex.setTokenPosition(token); tok = T_PAAMAYIM_NEKUDOTAYIM; fbreak;};
'&&' => {lex.setTokenPosition(token); tok = T_BOOLEAN_AND; fbreak;};
'||' => {lex.setTokenPosition(token); tok = T_BOOLEAN_OR; fbreak;};
'&=' => {lex.setTokenPosition(token); tok = T_AND_EQUAL; fbreak;};
'|=' => {lex.setTokenPosition(token); tok = T_OR_EQUAL; fbreak;};
'.=' => {lex.setTokenPosition(token); tok = T_CONCAT_EQUAL; fbreak;};
'*=' => {lex.setTokenPosition(token); tok = T_MUL_EQUAL; fbreak;};
'**=' => {lex.setTokenPosition(token); tok = T_POW_EQUAL; fbreak;};
'/=' => {lex.setTokenPosition(token); tok = T_DIV_EQUAL; fbreak;};
'+=' => {lex.setTokenPosition(token); tok = T_PLUS_EQUAL; fbreak;};
'-=' => {lex.setTokenPosition(token); tok = T_MINUS_EQUAL; fbreak;};
'^=' => {lex.setTokenPosition(token); tok = T_XOR_EQUAL; fbreak;};
'%=' => {lex.setTokenPosition(token); tok = T_MOD_EQUAL; fbreak;};
'--' => {lex.setTokenPosition(token); tok = T_DEC; fbreak;};
'++' => {lex.setTokenPosition(token); tok = T_INC; fbreak;};
'=>' => {lex.setTokenPosition(token); tok = T_DOUBLE_ARROW; fbreak;};
'<=>' => {lex.setTokenPosition(token); tok = T_SPACESHIP; fbreak;};
'!=' | '<>' => {lex.setTokenPosition(token); tok = T_IS_NOT_EQUAL; fbreak;};
'!==' => {lex.setTokenPosition(token); tok = T_IS_NOT_IDENTICAL; fbreak;};
'==' => {lex.setTokenPosition(token); tok = T_IS_EQUAL; fbreak;};
'===' => {lex.setTokenPosition(token); tok = T_IS_IDENTICAL; fbreak;};
'<<=' => {lex.setTokenPosition(token); tok = T_SL_EQUAL; fbreak;};
'>>=' => {lex.setTokenPosition(token); tok = T_SR_EQUAL; fbreak;};
'>=' => {lex.setTokenPosition(token); tok = T_IS_GREATER_OR_EQUAL; fbreak;};
'<=' => {lex.setTokenPosition(token); tok = T_IS_SMALLER_OR_EQUAL; fbreak;};
'**' => {lex.setTokenPosition(token); tok = T_POW; fbreak;};
'<<' => {lex.setTokenPosition(token); tok = T_SL; fbreak;};
'>>' => {lex.setTokenPosition(token); tok = T_SR; fbreak;};
'??' => {lex.setTokenPosition(token); tok = T_COALESCE; fbreak;};
'??=' => {lex.setTokenPosition(token); tok = T_COALESCE_EQUAL; fbreak;};
'(' whitespace* 'array'i whitespace* ')' => {lex.setTokenPosition(token); tok = T_ARRAY_CAST; fbreak;};
'(' whitespace* ('bool'i|'boolean'i) whitespace* ')' => {lex.setTokenPosition(token); tok = T_BOOL_CAST; fbreak;};
'(' whitespace* ('real'i|'double'i|'float'i) whitespace* ')' => {lex.setTokenPosition(token); tok = T_DOUBLE_CAST; fbreak;};
'(' whitespace* ('int'i|'integer'i) whitespace* ')' => {lex.setTokenPosition(token); tok = T_INT_CAST; fbreak;};
'(' whitespace* 'object'i whitespace* ')' => {lex.setTokenPosition(token); tok = T_OBJECT_CAST; fbreak;};
'(' whitespace* ('string'i|'binary'i) whitespace* ')' => {lex.setTokenPosition(token); tok = T_STRING_CAST; fbreak;};
'(' whitespace* 'unset'i whitespace* ')' => {lex.setTokenPosition(token); tok = T_UNSET_CAST; fbreak;};
('#' | '//') any_line* when is_not_comment_end => {
lex.ungetStr("?>")
lex.addToken(T_COMMENT, lex.ts, lex.te)
};
'/*' any_line* :>> '*/' {
isDocComment := false;
if lex.te - lex.ts > 4 && string(lex.data[lex.ts:lex.ts+3]) == "/**" {
isDocComment = true;
}
if isDocComment {
lex.PhpDocComment = string(lex.data[lex.ts:lex.te])
lex.addToken(T_DOC_COMMENT, lex.ts, lex.te)
} else {
lex.addToken(T_COMMENT, lex.ts, lex.te)
}
};
operators => {
// rune, _ := utf8.DecodeRune(lex.data[lex.ts:lex.te]);
// tok = TokenID(Rune2Class(rune));
lex.setTokenPosition(token);
tok = TokenID(int(lex.data[lex.ts]));
fbreak;
};
"{" => { lex.setTokenPosition(token); tok = TokenID(int('{')); lex.call(ftargs, fentry(php)); goto _out; };
"}" => { lex.setTokenPosition(token); tok = TokenID(int('}')); lex.ret(1); lex.PhpDocComment = ""; goto _out;};
"$" varname => { lex.setTokenPosition(token); tok = T_VARIABLE; fbreak; };
varname => { lex.setTokenPosition(token); tok = T_STRING; fbreak; };
"->" => { lex.setTokenPosition(token); tok = T_OBJECT_OPERATOR; fnext property; fbreak; };
constant_string => {
lex.setTokenPosition(token);
tok = T_CONSTANT_ENCAPSED_STRING;
fbreak;
};
"b"i? "<<<" [ \t]* ( heredoc_label | ("'" heredoc_label "'") | ('"' heredoc_label '"') ) newline => {
lex.heredocLabel = lex.data[lblStart:lblEnd]
lex.setTokenPosition(token);
tok = T_START_HEREDOC;
if lex.isHeredocEnd(lex.p+1) {
fnext heredoc_end;
} else if lex.data[lblStart-1] == '\'' {
fnext nowdoc;
} else {
fnext heredoc;
}
fbreak;
};
"`" => {lex.setTokenPosition(token); tok = TokenID(int('`')); fnext backqote; fbreak;};
'"' => {lex.setTokenPosition(token); tok = TokenID(int('"')); fnext template_string; fbreak;};
any_line => {
c := lex.data[lex.p]
lex.Error(fmt.Sprintf("WARNING: Unexpected character in input: '%c' (ASCII=%d)", c, c));
};
*|;
property := |*
whitespace_line* => {lex.addToken(T_WHITESPACE, lex.ts, lex.te)};
"->" => {lex.setTokenPosition(token); tok = T_OBJECT_OPERATOR; fbreak;};
varname => {lex.setTokenPosition(token); tok = T_STRING; fnext php; fbreak;};
any => {lex.ungetCnt(1); fgoto php;};
*|;
nowdoc := |*
any_line* when is_not_heredoc_end => {
lex.setTokenPosition(token);
tok = T_ENCAPSED_AND_WHITESPACE;
fnext heredoc_end;
fbreak;
};
*|;
heredoc := |*
"{$" => {lex.ungetCnt(1); lex.setTokenPosition(token); tok = T_CURLY_OPEN; lex.call(ftargs, fentry(php)); goto _out;};
"${" => {lex.setTokenPosition(token); tok = T_DOLLAR_OPEN_CURLY_BRACES; lex.call(ftargs, fentry(string_var_name)); goto _out;};
"$" => {lex.ungetCnt(1); fcall string_var;};
any_line* when is_not_heredoc_end_or_var => {
lex.setTokenPosition(token);
tok = T_ENCAPSED_AND_WHITESPACE;
if len(lex.data) > lex.p+1 && lex.data[lex.p+1] != '$' && lex.data[lex.p+1] != '{' {
fnext heredoc_end;
}
fbreak;
};
*|;
backqote := |*
"{$" => {lex.ungetCnt(1); lex.setTokenPosition(token); tok = T_CURLY_OPEN; lex.call(ftargs, fentry(php)); goto _out;};
"${" => {lex.setTokenPosition(token); tok = T_DOLLAR_OPEN_CURLY_BRACES; lex.call(ftargs, fentry(string_var_name)); goto _out;};
"$" => {lex.ungetCnt(1); fcall string_var;};
'`' => {lex.setTokenPosition(token); tok = TokenID(int('`')); fnext php; fbreak;};
any_line* when is_not_backqoute_end_or_var => {
lex.setTokenPosition(token);
tok = T_ENCAPSED_AND_WHITESPACE;
fbreak;
};
*|;
template_string := |*
"{$" => {lex.ungetCnt(1); lex.setTokenPosition(token); tok = T_CURLY_OPEN; lex.call(ftargs, fentry(php)); goto _out;};
"${" => {lex.setTokenPosition(token); tok = T_DOLLAR_OPEN_CURLY_BRACES; lex.call(ftargs, fentry(string_var_name)); goto _out;};
"$" => {lex.ungetCnt(1); fcall string_var;};
'"' => {lex.setTokenPosition(token); tok = TokenID(int('"')); fnext php; fbreak;};
any_line* when is_not_string_end_or_var => {
lex.setTokenPosition(token);
tok = T_ENCAPSED_AND_WHITESPACE;
fbreak;
};
*|;
heredoc_end := |*
varname -- ";" => {
lex.setTokenPosition(token);
tok = T_END_HEREDOC;
fnext php;
fbreak;
};
varname => {
lex.setTokenPosition(token);
tok = T_END_HEREDOC;
fnext php;
fbreak;
};
*|;
string_var := |*
'$' varname => {lex.setTokenPosition(token); tok = T_VARIABLE; fbreak;};
'->' varname_first => {lex.ungetCnt(1); lex.setTokenPosition(token); tok = T_OBJECT_OPERATOR; fbreak;};
varname => {lex.setTokenPosition(token); tok = T_STRING; fbreak;};
'[' => {lex.setTokenPosition(token); tok = TokenID(int('[')); lex.call(ftargs, fentry(string_var_index)); goto _out;};
any => {lex.ungetCnt(1); fret;};
*|;
string_var_index := |*
lnum | hnum | bnum => {lex.setTokenPosition(token); tok = T_NUM_STRING; fbreak;};
'$' varname => {lex.setTokenPosition(token); tok = T_VARIABLE; fbreak;};
varname => {lex.setTokenPosition(token); tok = T_STRING; fbreak;};
whitespace_line | [\\'#] => {lex.setTokenPosition(token); tok = T_ENCAPSED_AND_WHITESPACE; lex.ret(2); goto _out;};
operators > (svi, 1) => {lex.setTokenPosition(token); tok = TokenID(int(lex.data[lex.ts])); fbreak;};
']' > (svi, 2) => {lex.setTokenPosition(token); tok = TokenID(int(']')); lex.ret(2); goto _out;};
any_line => {
c := lex.data[lex.p]
lex.Error(fmt.Sprintf("WARNING: Unexpected character in input: '%c' (ASCII=%d)", c, c));
};
*|;
string_var_name := |*
varname ("[" | "}") => {lex.ungetCnt(1); lex.setTokenPosition(token); tok = T_STRING_VARNAME; fnext php; fbreak;};
any => {lex.ungetCnt(1); fnext php;};
*|;
halt_compiller_open_parenthesis := |*
whitespace_line* => {lex.addToken(T_WHITESPACE, lex.ts, lex.te)};
"(" => {lex.setTokenPosition(token); tok = TokenID(int('(')); fnext halt_compiller_close_parenthesis; fbreak;};
any => {lex.ungetCnt(1); fnext php;};
*|;
halt_compiller_close_parenthesis := |*
whitespace_line* => {lex.addToken(T_WHITESPACE, lex.ts, lex.te)};
")" => {lex.setTokenPosition(token); tok = TokenID(int(')')); fnext halt_compiller_close_semicolon; fbreak;};
any => {lex.ungetCnt(1); fnext php;};
*|;
halt_compiller_close_semicolon := |*
whitespace_line* => {lex.addToken(T_WHITESPACE, lex.ts, lex.te)};
";" => {lex.setTokenPosition(token); tok = TokenID(int(';')); fnext halt_compiller_end; fbreak;};
any => {lex.ungetCnt(1); fnext php;};
*|;
halt_compiller_end := |*
any_line* => { lex.addToken(T_HALT_COMPILER, lex.ts, lex.te); };
*|;
write exec;
}%%
token.Tokens = lex.Tokens
token.Value = lex.data[lex.ts:lex.te]
lval.Token(token)
return int(tok);
}

File diff suppressed because it is too large Load Diff

15
internal/scanner/token.go Normal file
View File

@@ -0,0 +1,15 @@
package scanner
import (
"github.com/z7zmey/php-parser/pkg/token"
)
// Token value returned by lexer
type Token struct {
Value []byte
Tokens []token.Token
StartLine int
EndLine int
StartPos int
EndPos int
}

View File

@@ -0,0 +1,22 @@
package scanner
// TokenPool light version of sync.Pool for Token objects
type TokenPool struct {
pool []*Token
}
// Get returns *Token from pool or creates new object
func (tp *TokenPool) Get() *Token {
if len(tp.pool) < 1 {
return new(Token)
}
t := tp.pool[len(tp.pool)-1]
tp.pool = tp.pool[:len(tp.pool)-1]
return t
}
// Put returns *Token to pool
func (tp *TokenPool) Put(t *Token) {
tp.pool = append(tp.pool, t)
}

View File

@@ -0,0 +1,34 @@
package scanner_test
import (
"reflect"
"testing"
"github.com/z7zmey/php-parser/internal/scanner"
)
func TestTokenPoolGetNew(t *testing.T) {
tp := new(scanner.TokenPool)
newToken := tp.Get()
if newToken == nil {
t.Errorf("*TokenPool.Get() must return new *Token object\n")
}
}
func TestTokenPoolGetFromPool(t *testing.T) {
tp := new(scanner.TokenPool)
expectedToken := &scanner.Token{
Value: []byte("test"),
}
tp.Put(expectedToken)
actualToken := tp.Get()
if !reflect.DeepEqual(expectedToken, actualToken) {
t.Errorf("*TokenPool.Put() must return *Token object from pool\n")
}
}

View File

@@ -0,0 +1,161 @@
// Code generated by "stringer -type=TokenID -output ./tokenid_string.go"; DO NOT EDIT.
package scanner
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[T_INCLUDE-57346]
_ = x[T_INCLUDE_ONCE-57347]
_ = x[T_EXIT-57348]
_ = x[T_IF-57349]
_ = x[T_LNUMBER-57350]
_ = x[T_DNUMBER-57351]
_ = x[T_STRING-57352]
_ = x[T_STRING_VARNAME-57353]
_ = x[T_VARIABLE-57354]
_ = x[T_NUM_STRING-57355]
_ = x[T_INLINE_HTML-57356]
_ = x[T_CHARACTER-57357]
_ = x[T_BAD_CHARACTER-57358]
_ = x[T_ENCAPSED_AND_WHITESPACE-57359]
_ = x[T_CONSTANT_ENCAPSED_STRING-57360]
_ = x[T_ECHO-57361]
_ = x[T_DO-57362]
_ = x[T_WHILE-57363]
_ = x[T_ENDWHILE-57364]
_ = x[T_FOR-57365]
_ = x[T_ENDFOR-57366]
_ = x[T_FOREACH-57367]
_ = x[T_ENDFOREACH-57368]
_ = x[T_DECLARE-57369]
_ = x[T_ENDDECLARE-57370]
_ = x[T_AS-57371]
_ = x[T_SWITCH-57372]
_ = x[T_ENDSWITCH-57373]
_ = x[T_CASE-57374]
_ = x[T_DEFAULT-57375]
_ = x[T_BREAK-57376]
_ = x[T_CONTINUE-57377]
_ = x[T_GOTO-57378]
_ = x[T_FUNCTION-57379]
_ = x[T_FN-57380]
_ = x[T_CONST-57381]
_ = x[T_RETURN-57382]
_ = x[T_TRY-57383]
_ = x[T_CATCH-57384]
_ = x[T_FINALLY-57385]
_ = x[T_THROW-57386]
_ = x[T_USE-57387]
_ = x[T_INSTEADOF-57388]
_ = x[T_GLOBAL-57389]
_ = x[T_VAR-57390]
_ = x[T_UNSET-57391]
_ = x[T_ISSET-57392]
_ = x[T_EMPTY-57393]
_ = x[T_HALT_COMPILER-57394]
_ = x[T_CLASS-57395]
_ = x[T_TRAIT-57396]
_ = x[T_INTERFACE-57397]
_ = x[T_EXTENDS-57398]
_ = x[T_IMPLEMENTS-57399]
_ = x[T_OBJECT_OPERATOR-57400]
_ = x[T_DOUBLE_ARROW-57401]
_ = x[T_LIST-57402]
_ = x[T_ARRAY-57403]
_ = x[T_CALLABLE-57404]
_ = x[T_CLASS_C-57405]
_ = x[T_TRAIT_C-57406]
_ = x[T_METHOD_C-57407]
_ = x[T_FUNC_C-57408]
_ = x[T_LINE-57409]
_ = x[T_FILE-57410]
_ = x[T_COMMENT-57411]
_ = x[T_DOC_COMMENT-57412]
_ = x[T_OPEN_TAG-57413]
_ = x[T_OPEN_TAG_WITH_ECHO-57414]
_ = x[T_CLOSE_TAG-57415]
_ = x[T_WHITESPACE-57416]
_ = x[T_START_HEREDOC-57417]
_ = x[T_END_HEREDOC-57418]
_ = x[T_DOLLAR_OPEN_CURLY_BRACES-57419]
_ = x[T_CURLY_OPEN-57420]
_ = x[T_PAAMAYIM_NEKUDOTAYIM-57421]
_ = x[T_NAMESPACE-57422]
_ = x[T_NS_C-57423]
_ = x[T_DIR-57424]
_ = x[T_NS_SEPARATOR-57425]
_ = x[T_ELLIPSIS-57426]
_ = x[T_EVAL-57427]
_ = x[T_REQUIRE-57428]
_ = x[T_REQUIRE_ONCE-57429]
_ = x[T_LOGICAL_OR-57430]
_ = x[T_LOGICAL_XOR-57431]
_ = x[T_LOGICAL_AND-57432]
_ = x[T_INSTANCEOF-57433]
_ = x[T_NEW-57434]
_ = x[T_CLONE-57435]
_ = x[T_ELSEIF-57436]
_ = x[T_ELSE-57437]
_ = x[T_ENDIF-57438]
_ = x[T_PRINT-57439]
_ = x[T_YIELD-57440]
_ = x[T_STATIC-57441]
_ = x[T_ABSTRACT-57442]
_ = x[T_FINAL-57443]
_ = x[T_PRIVATE-57444]
_ = x[T_PROTECTED-57445]
_ = x[T_PUBLIC-57446]
_ = x[T_INC-57447]
_ = x[T_DEC-57448]
_ = x[T_YIELD_FROM-57449]
_ = x[T_INT_CAST-57450]
_ = x[T_DOUBLE_CAST-57451]
_ = x[T_STRING_CAST-57452]
_ = x[T_ARRAY_CAST-57453]
_ = x[T_OBJECT_CAST-57454]
_ = x[T_BOOL_CAST-57455]
_ = x[T_UNSET_CAST-57456]
_ = x[T_COALESCE-57457]
_ = x[T_SPACESHIP-57458]
_ = x[T_NOELSE-57459]
_ = x[T_PLUS_EQUAL-57460]
_ = x[T_MINUS_EQUAL-57461]
_ = x[T_MUL_EQUAL-57462]
_ = x[T_POW_EQUAL-57463]
_ = x[T_DIV_EQUAL-57464]
_ = x[T_CONCAT_EQUAL-57465]
_ = x[T_MOD_EQUAL-57466]
_ = x[T_AND_EQUAL-57467]
_ = x[T_OR_EQUAL-57468]
_ = x[T_XOR_EQUAL-57469]
_ = x[T_SL_EQUAL-57470]
_ = x[T_SR_EQUAL-57471]
_ = x[T_COALESCE_EQUAL-57472]
_ = x[T_BOOLEAN_OR-57473]
_ = x[T_BOOLEAN_AND-57474]
_ = x[T_POW-57475]
_ = x[T_SL-57476]
_ = x[T_SR-57477]
_ = x[T_IS_IDENTICAL-57478]
_ = x[T_IS_NOT_IDENTICAL-57479]
_ = x[T_IS_EQUAL-57480]
_ = x[T_IS_NOT_EQUAL-57481]
_ = x[T_IS_SMALLER_OR_EQUAL-57482]
_ = x[T_IS_GREATER_OR_EQUAL-57483]
}
const _TokenID_name = "T_INCLUDET_INCLUDE_ONCET_EXITT_IFT_LNUMBERT_DNUMBERT_STRINGT_STRING_VARNAMET_VARIABLET_NUM_STRINGT_INLINE_HTMLT_CHARACTERT_BAD_CHARACTERT_ENCAPSED_AND_WHITESPACET_CONSTANT_ENCAPSED_STRINGT_ECHOT_DOT_WHILET_ENDWHILET_FORT_ENDFORT_FOREACHT_ENDFOREACHT_DECLARET_ENDDECLARET_AST_SWITCHT_ENDSWITCHT_CASET_DEFAULTT_BREAKT_CONTINUET_GOTOT_FUNCTIONT_FNT_CONSTT_RETURNT_TRYT_CATCHT_FINALLYT_THROWT_USET_INSTEADOFT_GLOBALT_VART_UNSETT_ISSETT_EMPTYT_HALT_COMPILERT_CLASST_TRAITT_INTERFACET_EXTENDST_IMPLEMENTST_OBJECT_OPERATORT_DOUBLE_ARROWT_LISTT_ARRAYT_CALLABLET_CLASS_CT_TRAIT_CT_METHOD_CT_FUNC_CT_LINET_FILET_COMMENTT_DOC_COMMENTT_OPEN_TAGT_OPEN_TAG_WITH_ECHOT_CLOSE_TAGT_WHITESPACET_START_HEREDOCT_END_HEREDOCT_DOLLAR_OPEN_CURLY_BRACEST_CURLY_OPENT_PAAMAYIM_NEKUDOTAYIMT_NAMESPACET_NS_CT_DIRT_NS_SEPARATORT_ELLIPSIST_EVALT_REQUIRET_REQUIRE_ONCET_LOGICAL_ORT_LOGICAL_XORT_LOGICAL_ANDT_INSTANCEOFT_NEWT_CLONET_ELSEIFT_ELSET_ENDIFT_PRINTT_YIELDT_STATICT_ABSTRACTT_FINALT_PRIVATET_PROTECTEDT_PUBLICT_INCT_DECT_YIELD_FROMT_INT_CASTT_DOUBLE_CASTT_STRING_CASTT_ARRAY_CASTT_OBJECT_CASTT_BOOL_CASTT_UNSET_CASTT_COALESCET_SPACESHIPT_NOELSET_PLUS_EQUALT_MINUS_EQUALT_MUL_EQUALT_POW_EQUALT_DIV_EQUALT_CONCAT_EQUALT_MOD_EQUALT_AND_EQUALT_OR_EQUALT_XOR_EQUALT_SL_EQUALT_SR_EQUALT_COALESCE_EQUALT_BOOLEAN_ORT_BOOLEAN_ANDT_POWT_SLT_SRT_IS_IDENTICALT_IS_NOT_IDENTICALT_IS_EQUALT_IS_NOT_EQUALT_IS_SMALLER_OR_EQUALT_IS_GREATER_OR_EQUAL"
var _TokenID_index = [...]uint16{0, 9, 23, 29, 33, 42, 51, 59, 75, 85, 97, 110, 121, 136, 161, 187, 193, 197, 204, 214, 219, 227, 236, 248, 257, 269, 273, 281, 292, 298, 307, 314, 324, 330, 340, 344, 351, 359, 364, 371, 380, 387, 392, 403, 411, 416, 423, 430, 437, 452, 459, 466, 477, 486, 498, 515, 529, 535, 542, 552, 561, 570, 580, 588, 594, 600, 609, 622, 632, 652, 663, 675, 690, 703, 729, 741, 763, 774, 780, 785, 799, 809, 815, 824, 838, 850, 863, 876, 888, 893, 900, 908, 914, 921, 928, 935, 943, 953, 960, 969, 980, 988, 993, 998, 1010, 1020, 1033, 1046, 1058, 1071, 1082, 1094, 1104, 1115, 1123, 1135, 1148, 1159, 1170, 1181, 1195, 1206, 1217, 1227, 1238, 1248, 1258, 1274, 1286, 1299, 1304, 1308, 1312, 1326, 1344, 1354, 1368, 1389, 1410}
func (i TokenID) String() string {
i -= 57346
if i < 0 || i >= TokenID(len(_TokenID_index)-1) {
return "TokenID(" + strconv.FormatInt(int64(i+57346), 10) + ")"
}
return _TokenID_name[_TokenID_index[i]:_TokenID_index[i+1]]
}

View File

@@ -0,0 +1,61 @@
package version
import (
"errors"
"strconv"
"strings"
)
type version struct {
major int
minor int
}
func Compare(a string, b string) (int, error) {
first, err := parse(a)
if err != nil {
return 0, err
}
second, err := parse(b)
if err != nil {
return 0, err
}
if first.major < second.major {
return -1, nil
}
if first.major > second.major {
return 1, nil
}
if first.minor < second.minor {
return -1, nil
}
if first.minor > second.minor {
return 1, nil
}
return 0, nil
}
func parse(v string) (version, error) {
parts := strings.Split(v, ".")
if len(parts) != 2 {
return version{}, errors.New("version must contain major and minor parts")
}
major, err := strconv.Atoi(parts[0])
if err != nil {
return version{}, err
}
minor, err := strconv.Atoi(parts[1])
if err != nil {
return version{}, err
}
return version{major, minor}, nil
}