Merge pull request #4 from z7zmey/issue-2

Issue #2 - fix template and backquoted strings scanning
This commit is contained in:
Vadym Slizov 2018-03-30 20:05:25 +03:00 committed by GitHub
commit 0c1ac1c4a6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 2887 additions and 2579 deletions

View File

@ -4,11 +4,14 @@ import (
"bytes"
"testing"
"github.com/z7zmey/php-parser/node/scalar"
"github.com/z7zmey/php-parser/node/name"
"github.com/z7zmey/php-parser/node/expr"
"github.com/z7zmey/php-parser/node"
"github.com/z7zmey/php-parser/node/expr/binary"
"github.com/z7zmey/php-parser/node/stmt"
"github.com/z7zmey/php-parser/php5"
"github.com/z7zmey/php-parser/php7"
@ -125,3 +128,37 @@ func TestFunctionCallVar(t *testing.T) {
actual, _, _ = php5.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}
func TestFunctionCallExprArg(t *testing.T) {
src := `<? ceil($foo/3);`
expected := &stmt.StmtList{
Stmts: []node.Node{
&stmt.Expression{
Expr: &expr.FunctionCall{
Function: &name.Name{
Parts: []node.Node{
&name.NamePart{Value: "ceil"},
},
},
Arguments: []node.Node{
&node.Argument{
Variadic: false,
IsReference: false,
Expr: &binary.Div{
Left: &expr.Variable{VarName: &node.Identifier{Value: "foo"}},
Right: &scalar.Lnumber{Value: "3"},
},
},
},
},
},
},
}
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
actual, _, _ = php5.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}

View File

@ -36,6 +36,29 @@ func TestSimpleVar(t *testing.T) {
assertEqual(t, expected, actual)
}
func TestSimpleVarOneChar(t *testing.T) {
src := `<? "test $a";`
expected := &stmt.StmtList{
Stmts: []node.Node{
&stmt.Expression{
Expr: &scalar.Encapsed{
Parts: []node.Node{
&scalar.EncapsedStringPart{Value: "test "},
&expr.Variable{VarName: &node.Identifier{Value: "a"}},
},
},
},
},
}
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
actual, _, _ = php5.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}
func TestSimpleVarEndsEcapsed(t *testing.T) {
src := `<? "test $var\"";`
@ -60,6 +83,30 @@ func TestSimpleVarEndsEcapsed(t *testing.T) {
assertEqual(t, expected, actual)
}
func TestStringVarCurveOpen(t *testing.T) {
src := `<? "=$a{$b}";`
expected := &stmt.StmtList{
Stmts: []node.Node{
&stmt.Expression{
Expr: &scalar.Encapsed{
Parts: []node.Node{
&scalar.EncapsedStringPart{Value: "="},
&expr.Variable{VarName: &node.Identifier{Value: "a"}},
&expr.Variable{VarName: &node.Identifier{Value: "b"}},
},
},
},
},
}
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
actual, _, _ = php5.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}
func TestSimpleVarPropertyFetch(t *testing.T) {
src := `<? "test $foo->bar()";`

View File

@ -346,3 +346,17 @@ func TestPhp5ParameterNode(t *testing.T) {
actual, _, _ := php5.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}
func TestCommentEndFile(t *testing.T) {
src := `<? //comment at the end)`
expected := &stmt.StmtList{
Stmts: []node.Node{},
}
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
actual, _, _ = php5.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}

File diff suppressed because it is too large Load Diff

View File

@ -30,6 +30,10 @@ const (
var heredocLabel []lex.Char
func isValidFirstVarNameRune(r rune) bool {
return r >= 'A' && r <= 'Z' || r == '_' || r >= 'a' && r <= 'z' || r >= '\u007f' && r <= 'ÿ'
}
func (l *Lexer) Lex(lval Lval) int {
l.Comments = nil
c := l.Enter()
@ -50,7 +54,7 @@ DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
HNUM 0x[0-9a-fA-F]+
BNUM 0b[01]+
EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
VAR_NAME [a-zA-Z_\x7f-\xff^0-9/][a-zA-Z0-9_\x7f-\xff]*
VAR_NAME [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
OPERATORS [;:,.\[\]()|\/\^&\+-*=%!~$<>?@]
NEW_LINE (\r|\n|\r\n)
@ -247,7 +251,48 @@ NEW_LINE (\r|\n|\r\n)
<PHP>\<\< lval.Token(l.newToken(l.Token())); return T_SL
<PHP>\>\> lval.Token(l.newToken(l.Token())); return T_SR
<PHP>\?\? lval.Token(l.newToken(l.Token())); return T_COALESCE
<PHP>(#|[/][/]).*{NEW_LINE} lval.Token(l.newToken(l.Token())); l.addComment(comment.NewPlainComment(string(l.TokenBytes(nil))))// return T_COMMENT; // TODO: handle ?>
<PHP>(#|[/][/])
tb := []rune{}
for _, chr := range(l.Token()) {
tb = append(tb, chr.Rune)
}
for {
if c == -1 {
break
}
tb = append(tb, rune(c))
switch c {
case '\r':
c = l.Next()
if c == '\n' {
continue
}
case '\n':
case '?':
c = l.Next()
if c == '>' {
l.ungetChars(1)
tb = tb[:len(tb)-1]
break
}
continue
default:
c = l.Next()
continue
}
break;
}
l.addComment(comment.NewPlainComment(string(tb)))
<PHP>([/][*])|([/][*][*])
tb := l.Token()
is_doc_comment := false
@ -445,42 +490,39 @@ NEW_LINE (\r|\n|\r\n)
<STRING>\" l.popState(); lval.Token(l.newToken(l.Token())); return Rune2Class(l.Token()[0].Rune)
<STRING,HEREDOC,BACKQUOTE>\{\$ lval.Token(l.newToken(l.ungetChars(1))); l.pushState(PHP); return T_CURLY_OPEN
<STRING,HEREDOC,BACKQUOTE>\$\{ l.pushState(STRING_VAR_NAME); lval.Token(l.newToken(l.Token())); return T_DOLLAR_OPEN_CURLY_BRACES
<STRING,HEREDOC,BACKQUOTE>\$ l.ungetChars(1);l.pushState(STRING_VAR)
<STRING,HEREDOC,BACKQUOTE>\${VAR_NAME} l.ungetChars(len(l.Token()));l.pushState(STRING_VAR)
<STRING>.|[ \t\n\r]
F1:for {
switch l.Prev.Rune {
currentChar := l.Prev
tb := []lex.Char{currentChar}
for {
switch currentChar.Rune {
case '$':
c = l.Next();
if l.Prev.Rune == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
l.ungetChars(2)
tb := l.Token()
lval.Token(l.newToken(tb[:len(tb)-2]));
return T_ENCAPSED_AND_WHITESPACE
break F1;
}
if c == '{' || isValidFirstVarNameRune(rune(c)) {
l.ungetChars(1)
lval.Token(l.newToken(tb[:len(tb)-1]));
return T_ENCAPSED_AND_WHITESPACE
}
case '{':
c = l.Next();
if l.Prev.Rune == '$' {
l.ungetChars(2)
tb := l.Token()
lval.Token(l.newToken(tb[:len(tb)-2]));
return T_ENCAPSED_AND_WHITESPACE
break F1;
}
if rune(c) == '$' {
l.ungetChars(1)
lval.Token(l.newToken(tb[:len(tb)-1]));
return T_ENCAPSED_AND_WHITESPACE
}
case '\\':
currentChar := l.Last
tb = append(tb, currentChar)
c = l.Next();
}
if rune(c) == '"' {
lval.Token(l.newToken(l.Token()));
return T_ENCAPSED_AND_WHITESPACE
break F1;
}
currentChar = l.Last
tb = append(tb, currentChar)
c = l.Next()
if c == -1 {
@ -489,44 +531,43 @@ NEW_LINE (\r|\n|\r\n)
}
<BACKQUOTE>.|[ \t\n\r]
F2:for {
currentChar := l.Prev
tb := []lex.Char{currentChar}
for {
switch currentChar.Rune {
case '$':
if c == '{' || isValidFirstVarNameRune(rune(c)) {
l.ungetChars(1)
lval.Token(l.newToken(tb[:len(tb)-1]));
return T_ENCAPSED_AND_WHITESPACE
}
case '{':
if rune(c) == '$' {
l.ungetChars(1)
lval.Token(l.newToken(tb[:len(tb)-1]));
return T_ENCAPSED_AND_WHITESPACE
}
case '\\':
currentChar := l.Last
tb = append(tb, currentChar)
c = l.Next();
}
if rune(c) == '`' {
lval.Token(l.newToken(l.Token()));
return T_ENCAPSED_AND_WHITESPACE
}
currentChar = l.Last
tb = append(tb, currentChar)
c = l.Next()
if c == -1 {
break;
}
switch c {
case '`' :
lval.Token(l.newToken(l.Token()));
return T_ENCAPSED_AND_WHITESPACE
break F2;
case '$':
c = l.Next();
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
l.ungetChars(1)
tb := l.Token()
lval.Token(l.newToken(tb[:len(tb)-1]));
return T_ENCAPSED_AND_WHITESPACE
break F2;
}
l.ungetChars(0)
case '{':
c = l.Next();
if rune(c) == '$' {
l.ungetChars(1)
tb := l.Token()
lval.Token(l.newToken(tb[:len(tb)-1]));
return T_ENCAPSED_AND_WHITESPACE
break F2;
}
l.ungetChars(0)
case '\\':
c = l.Next();
}
c = l.Next()
}
<HEREDOC>.|[ \t\n\r]

View File

@ -5,6 +5,8 @@ import (
"reflect"
"testing"
"github.com/z7zmey/php-parser/comment"
"github.com/z7zmey/php-parser/scanner"
"github.com/z7zmey/php-parser/token"
@ -427,14 +429,120 @@ func TestTokens(t *testing.T) {
func TestTeplateStringTokens(t *testing.T) {
src := `<?php
` + "`test $var {$var} ${var_name} {s $ \\$a `" + `
"foo $a"
"foo $a{$b}"
"test $var {$var} ${var_name} {s $ \$a "
"{$var}"
"$foo/"
"$foo/100;"
"$/$foo"
"$0$foo"
`
expected := []int{
scanner.Rune2Class('"'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_VARIABLE,
scanner.Rune2Class('"'),
scanner.Rune2Class('"'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_VARIABLE,
scanner.T_CURLY_OPEN,
scanner.T_VARIABLE,
scanner.Rune2Class('}'),
scanner.Rune2Class('"'),
scanner.Rune2Class('"'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_VARIABLE,
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_CURLY_OPEN,
scanner.T_VARIABLE,
scanner.Rune2Class('}'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_DOLLAR_OPEN_CURLY_BRACES,
scanner.T_STRING_VARNAME,
scanner.Rune2Class('}'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.Rune2Class('"'),
scanner.Rune2Class('"'),
scanner.T_CURLY_OPEN,
scanner.T_VARIABLE,
scanner.Rune2Class('}'),
scanner.Rune2Class('"'),
scanner.Rune2Class('"'),
scanner.T_VARIABLE,
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.Rune2Class('"'),
scanner.Rune2Class('"'),
scanner.T_VARIABLE,
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.Rune2Class('"'),
scanner.Rune2Class('"'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_VARIABLE,
scanner.Rune2Class('"'),
scanner.Rune2Class('"'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_VARIABLE,
scanner.Rune2Class('"'),
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
lv := &lval{}
actual := []int{}
for {
token := lexer.Lex(lv)
if token < 0 {
break
}
actual = append(actual, token)
}
assertEqual(t, expected, actual)
}
func TestBackquoteStringTokens(t *testing.T) {
src := `<?php
` + "`foo $a`" + `
` + "`foo $a{$b}`" + `
` + "`test $var {$var} ${var_name} {s $ \\$a `" + `
` + "`{$var}`" + `
` + "`$foo/`" + `
` + "`$foo/100`" + `
` + "`$/$foo`" + `
` + "`$0$foo`" + `
`
expected := []int{
scanner.Rune2Class('`'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_VARIABLE,
scanner.Rune2Class('`'),
scanner.Rune2Class('`'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_VARIABLE,
scanner.T_CURLY_OPEN,
scanner.T_VARIABLE,
scanner.Rune2Class('}'),
scanner.Rune2Class('`'),
scanner.Rune2Class('`'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_VARIABLE,
@ -449,25 +557,31 @@ func TestTeplateStringTokens(t *testing.T) {
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.Rune2Class('`'),
scanner.Rune2Class('"'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_VARIABLE,
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.Rune2Class('`'),
scanner.T_CURLY_OPEN,
scanner.T_VARIABLE,
scanner.Rune2Class('}'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_DOLLAR_OPEN_CURLY_BRACES,
scanner.T_STRING_VARNAME,
scanner.Rune2Class('}'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.Rune2Class('"'),
scanner.Rune2Class('`'),
scanner.Rune2Class('"'),
scanner.T_CURLY_OPEN,
scanner.Rune2Class('`'),
scanner.T_VARIABLE,
scanner.Rune2Class('}'),
scanner.Rune2Class('"'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.Rune2Class('`'),
scanner.Rune2Class('`'),
scanner.T_VARIABLE,
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.Rune2Class('`'),
scanner.Rune2Class('`'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_VARIABLE,
scanner.Rune2Class('`'),
scanner.Rune2Class('`'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_VARIABLE,
scanner.Rune2Class('`'),
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
@ -616,3 +730,123 @@ func TestStringTokensAfterVariable(t *testing.T) {
assertEqual(t, expected, actual)
assertEqual(t, expectedTokens, actualTokens)
}
func TestSlashAfterVariable(t *testing.T) {
src := `<?php $foo/3`
expected := []int{
scanner.T_VARIABLE,
scanner.Rune2Class('/'),
scanner.T_LNUMBER,
}
expectedTokens := []string{
"$foo",
"/",
"3",
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
lv := &lval{}
actual := []int{}
actualTokens := []string{}
for {
token := lexer.Lex(lv)
if token < 0 {
break
}
actualTokens = append(actualTokens, lv.Tkn.Value)
actual = append(actual, token)
}
assertEqual(t, expected, actual)
assertEqual(t, expectedTokens, actualTokens)
}
func TestCommentEnd(t *testing.T) {
src := `<?php //test`
expected := []comment.Comment{
comment.NewPlainComment("//test"),
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
lv := &lval{}
lexer.Lex(lv)
actual := lexer.Comments
assertEqual(t, expected, actual)
}
func TestCommentNewLine(t *testing.T) {
src := "<?php //test\n$a"
expected := []comment.Comment{
comment.NewPlainComment("//test\n"),
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
lv := &lval{}
lexer.Lex(lv)
actual := lexer.Comments
assertEqual(t, expected, actual)
}
func TestCommentNewLine1(t *testing.T) {
src := "<?php //test\r$a"
expected := []comment.Comment{
comment.NewPlainComment("//test\r"),
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
lv := &lval{}
lexer.Lex(lv)
actual := lexer.Comments
assertEqual(t, expected, actual)
}
func TestCommentNewLine2(t *testing.T) {
src := "<?php #test\r\n$a"
expected := []comment.Comment{
comment.NewPlainComment("#test\r\n"),
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
lv := &lval{}
lexer.Lex(lv)
actual := lexer.Comments
assertEqual(t, expected, actual)
}
func TestCommentWithPhpEndTag(t *testing.T) {
src := `<?php
//test?> test`
expected := []comment.Comment{
comment.NewPlainComment("//test"),
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
lv := &lval{}
lexer.Lex(lv)
actual := lexer.Comments
assertEqual(t, expected, actual)
}