Merge pull request #4 from z7zmey/issue-2

Issue #2 - fix template and backquoted strings scanning
This commit is contained in:
Vadym Slizov 2018-03-30 20:05:25 +03:00 committed by GitHub
commit 0c1ac1c4a6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 2887 additions and 2579 deletions

View File

@ -4,11 +4,14 @@ import (
"bytes" "bytes"
"testing" "testing"
"github.com/z7zmey/php-parser/node/scalar"
"github.com/z7zmey/php-parser/node/name" "github.com/z7zmey/php-parser/node/name"
"github.com/z7zmey/php-parser/node/expr" "github.com/z7zmey/php-parser/node/expr"
"github.com/z7zmey/php-parser/node" "github.com/z7zmey/php-parser/node"
"github.com/z7zmey/php-parser/node/expr/binary"
"github.com/z7zmey/php-parser/node/stmt" "github.com/z7zmey/php-parser/node/stmt"
"github.com/z7zmey/php-parser/php5" "github.com/z7zmey/php-parser/php5"
"github.com/z7zmey/php-parser/php7" "github.com/z7zmey/php-parser/php7"
@ -125,3 +128,37 @@ func TestFunctionCallVar(t *testing.T) {
actual, _, _ = php5.Parse(bytes.NewBufferString(src), "test.php") actual, _, _ = php5.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual) assertEqual(t, expected, actual)
} }
func TestFunctionCallExprArg(t *testing.T) {
src := `<? ceil($foo/3);`
expected := &stmt.StmtList{
Stmts: []node.Node{
&stmt.Expression{
Expr: &expr.FunctionCall{
Function: &name.Name{
Parts: []node.Node{
&name.NamePart{Value: "ceil"},
},
},
Arguments: []node.Node{
&node.Argument{
Variadic: false,
IsReference: false,
Expr: &binary.Div{
Left: &expr.Variable{VarName: &node.Identifier{Value: "foo"}},
Right: &scalar.Lnumber{Value: "3"},
},
},
},
},
},
},
}
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
actual, _, _ = php5.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}

View File

@ -36,6 +36,29 @@ func TestSimpleVar(t *testing.T) {
assertEqual(t, expected, actual) assertEqual(t, expected, actual)
} }
func TestSimpleVarOneChar(t *testing.T) {
src := `<? "test $a";`
expected := &stmt.StmtList{
Stmts: []node.Node{
&stmt.Expression{
Expr: &scalar.Encapsed{
Parts: []node.Node{
&scalar.EncapsedStringPart{Value: "test "},
&expr.Variable{VarName: &node.Identifier{Value: "a"}},
},
},
},
},
}
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
actual, _, _ = php5.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}
func TestSimpleVarEndsEcapsed(t *testing.T) { func TestSimpleVarEndsEcapsed(t *testing.T) {
src := `<? "test $var\"";` src := `<? "test $var\"";`
@ -60,6 +83,30 @@ func TestSimpleVarEndsEcapsed(t *testing.T) {
assertEqual(t, expected, actual) assertEqual(t, expected, actual)
} }
func TestStringVarCurveOpen(t *testing.T) {
src := `<? "=$a{$b}";`
expected := &stmt.StmtList{
Stmts: []node.Node{
&stmt.Expression{
Expr: &scalar.Encapsed{
Parts: []node.Node{
&scalar.EncapsedStringPart{Value: "="},
&expr.Variable{VarName: &node.Identifier{Value: "a"}},
&expr.Variable{VarName: &node.Identifier{Value: "b"}},
},
},
},
},
}
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
actual, _, _ = php5.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}
func TestSimpleVarPropertyFetch(t *testing.T) { func TestSimpleVarPropertyFetch(t *testing.T) {
src := `<? "test $foo->bar()";` src := `<? "test $foo->bar()";`

View File

@ -346,3 +346,17 @@ func TestPhp5ParameterNode(t *testing.T) {
actual, _, _ := php5.Parse(bytes.NewBufferString(src), "test.php") actual, _, _ := php5.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual) assertEqual(t, expected, actual)
} }
func TestCommentEndFile(t *testing.T) {
src := `<? //comment at the end)`
expected := &stmt.StmtList{
Stmts: []node.Node{},
}
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
actual, _, _ = php5.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}

File diff suppressed because it is too large Load Diff

View File

@ -30,6 +30,10 @@ const (
var heredocLabel []lex.Char var heredocLabel []lex.Char
func isValidFirstVarNameRune(r rune) bool {
return r >= 'A' && r <= 'Z' || r == '_' || r >= 'a' && r <= 'z' || r >= '\u007f' && r <= 'ÿ'
}
func (l *Lexer) Lex(lval Lval) int { func (l *Lexer) Lex(lval Lval) int {
l.Comments = nil l.Comments = nil
c := l.Enter() c := l.Enter()
@ -50,7 +54,7 @@ DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
HNUM 0x[0-9a-fA-F]+ HNUM 0x[0-9a-fA-F]+
BNUM 0b[01]+ BNUM 0b[01]+
EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM}) EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
VAR_NAME [a-zA-Z_\x7f-\xff^0-9/][a-zA-Z0-9_\x7f-\xff]* VAR_NAME [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
OPERATORS [;:,.\[\]()|\/\^&\+-*=%!~$<>?@] OPERATORS [;:,.\[\]()|\/\^&\+-*=%!~$<>?@]
NEW_LINE (\r|\n|\r\n) NEW_LINE (\r|\n|\r\n)
@ -247,7 +251,48 @@ NEW_LINE (\r|\n|\r\n)
<PHP>\<\< lval.Token(l.newToken(l.Token())); return T_SL <PHP>\<\< lval.Token(l.newToken(l.Token())); return T_SL
<PHP>\>\> lval.Token(l.newToken(l.Token())); return T_SR <PHP>\>\> lval.Token(l.newToken(l.Token())); return T_SR
<PHP>\?\? lval.Token(l.newToken(l.Token())); return T_COALESCE <PHP>\?\? lval.Token(l.newToken(l.Token())); return T_COALESCE
<PHP>(#|[/][/]).*{NEW_LINE} lval.Token(l.newToken(l.Token())); l.addComment(comment.NewPlainComment(string(l.TokenBytes(nil))))// return T_COMMENT; // TODO: handle ?> <PHP>(#|[/][/])
tb := []rune{}
for _, chr := range(l.Token()) {
tb = append(tb, chr.Rune)
}
for {
if c == -1 {
break
}
tb = append(tb, rune(c))
switch c {
case '\r':
c = l.Next()
if c == '\n' {
continue
}
case '\n':
case '?':
c = l.Next()
if c == '>' {
l.ungetChars(1)
tb = tb[:len(tb)-1]
break
}
continue
default:
c = l.Next()
continue
}
break;
}
l.addComment(comment.NewPlainComment(string(tb)))
<PHP>([/][*])|([/][*][*]) <PHP>([/][*])|([/][*][*])
tb := l.Token() tb := l.Token()
is_doc_comment := false is_doc_comment := false
@ -445,42 +490,39 @@ NEW_LINE (\r|\n|\r\n)
<STRING>\" l.popState(); lval.Token(l.newToken(l.Token())); return Rune2Class(l.Token()[0].Rune) <STRING>\" l.popState(); lval.Token(l.newToken(l.Token())); return Rune2Class(l.Token()[0].Rune)
<STRING,HEREDOC,BACKQUOTE>\{\$ lval.Token(l.newToken(l.ungetChars(1))); l.pushState(PHP); return T_CURLY_OPEN <STRING,HEREDOC,BACKQUOTE>\{\$ lval.Token(l.newToken(l.ungetChars(1))); l.pushState(PHP); return T_CURLY_OPEN
<STRING,HEREDOC,BACKQUOTE>\$\{ l.pushState(STRING_VAR_NAME); lval.Token(l.newToken(l.Token())); return T_DOLLAR_OPEN_CURLY_BRACES <STRING,HEREDOC,BACKQUOTE>\$\{ l.pushState(STRING_VAR_NAME); lval.Token(l.newToken(l.Token())); return T_DOLLAR_OPEN_CURLY_BRACES
<STRING,HEREDOC,BACKQUOTE>\$ l.ungetChars(1);l.pushState(STRING_VAR) <STRING,HEREDOC,BACKQUOTE>\${VAR_NAME} l.ungetChars(len(l.Token()));l.pushState(STRING_VAR)
<STRING>.|[ \t\n\r] <STRING>.|[ \t\n\r]
F1:for { currentChar := l.Prev
switch l.Prev.Rune { tb := []lex.Char{currentChar}
for {
switch currentChar.Rune {
case '$': case '$':
c = l.Next(); if c == '{' || isValidFirstVarNameRune(rune(c)) {
if l.Prev.Rune == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
l.ungetChars(2)
tb := l.Token()
lval.Token(l.newToken(tb[:len(tb)-2]));
return T_ENCAPSED_AND_WHITESPACE
break F1;
}
l.ungetChars(1) l.ungetChars(1)
lval.Token(l.newToken(tb[:len(tb)-1]));
return T_ENCAPSED_AND_WHITESPACE
}
case '{': case '{':
c = l.Next(); if rune(c) == '$' {
if l.Prev.Rune == '$' {
l.ungetChars(2)
tb := l.Token()
lval.Token(l.newToken(tb[:len(tb)-2]));
return T_ENCAPSED_AND_WHITESPACE
break F1;
}
l.ungetChars(1) l.ungetChars(1)
lval.Token(l.newToken(tb[:len(tb)-1]));
return T_ENCAPSED_AND_WHITESPACE
}
case '\\': case '\\':
currentChar := l.Last
tb = append(tb, currentChar)
c = l.Next(); c = l.Next();
} }
if rune(c) == '"' { if rune(c) == '"' {
lval.Token(l.newToken(l.Token())); lval.Token(l.newToken(l.Token()));
return T_ENCAPSED_AND_WHITESPACE return T_ENCAPSED_AND_WHITESPACE
break F1;
} }
currentChar = l.Last
tb = append(tb, currentChar)
c = l.Next() c = l.Next()
if c == -1 { if c == -1 {
@ -489,44 +531,43 @@ NEW_LINE (\r|\n|\r\n)
} }
<BACKQUOTE>.|[ \t\n\r] <BACKQUOTE>.|[ \t\n\r]
F2:for { currentChar := l.Prev
tb := []lex.Char{currentChar}
for {
switch currentChar.Rune {
case '$':
if c == '{' || isValidFirstVarNameRune(rune(c)) {
l.ungetChars(1)
lval.Token(l.newToken(tb[:len(tb)-1]));
return T_ENCAPSED_AND_WHITESPACE
}
case '{':
if rune(c) == '$' {
l.ungetChars(1)
lval.Token(l.newToken(tb[:len(tb)-1]));
return T_ENCAPSED_AND_WHITESPACE
}
case '\\':
currentChar := l.Last
tb = append(tb, currentChar)
c = l.Next();
}
if rune(c) == '`' {
lval.Token(l.newToken(l.Token()));
return T_ENCAPSED_AND_WHITESPACE
}
currentChar = l.Last
tb = append(tb, currentChar)
c = l.Next()
if c == -1 { if c == -1 {
break; break;
} }
switch c {
case '`' :
lval.Token(l.newToken(l.Token()));
return T_ENCAPSED_AND_WHITESPACE
break F2;
case '$':
c = l.Next();
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
l.ungetChars(1)
tb := l.Token()
lval.Token(l.newToken(tb[:len(tb)-1]));
return T_ENCAPSED_AND_WHITESPACE
break F2;
}
l.ungetChars(0)
case '{':
c = l.Next();
if rune(c) == '$' {
l.ungetChars(1)
tb := l.Token()
lval.Token(l.newToken(tb[:len(tb)-1]));
return T_ENCAPSED_AND_WHITESPACE
break F2;
}
l.ungetChars(0)
case '\\':
c = l.Next();
}
c = l.Next()
} }
<HEREDOC>.|[ \t\n\r] <HEREDOC>.|[ \t\n\r]

View File

@ -5,6 +5,8 @@ import (
"reflect" "reflect"
"testing" "testing"
"github.com/z7zmey/php-parser/comment"
"github.com/z7zmey/php-parser/scanner" "github.com/z7zmey/php-parser/scanner"
"github.com/z7zmey/php-parser/token" "github.com/z7zmey/php-parser/token"
@ -427,14 +429,120 @@ func TestTokens(t *testing.T) {
func TestTeplateStringTokens(t *testing.T) { func TestTeplateStringTokens(t *testing.T) {
src := `<?php src := `<?php
` + "`test $var {$var} ${var_name} {s $ \\$a `" + ` "foo $a"
"foo $a{$b}"
"test $var {$var} ${var_name} {s $ \$a " "test $var {$var} ${var_name} {s $ \$a "
"{$var}" "{$var}"
"$foo/"
"$foo/100;"
"$/$foo"
"$0$foo"
`
expected := []int{
scanner.Rune2Class('"'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_VARIABLE,
scanner.Rune2Class('"'),
scanner.Rune2Class('"'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_VARIABLE,
scanner.T_CURLY_OPEN,
scanner.T_VARIABLE,
scanner.Rune2Class('}'),
scanner.Rune2Class('"'),
scanner.Rune2Class('"'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_VARIABLE,
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_CURLY_OPEN,
scanner.T_VARIABLE,
scanner.Rune2Class('}'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_DOLLAR_OPEN_CURLY_BRACES,
scanner.T_STRING_VARNAME,
scanner.Rune2Class('}'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.Rune2Class('"'),
scanner.Rune2Class('"'),
scanner.T_CURLY_OPEN,
scanner.T_VARIABLE,
scanner.Rune2Class('}'),
scanner.Rune2Class('"'),
scanner.Rune2Class('"'),
scanner.T_VARIABLE,
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.Rune2Class('"'),
scanner.Rune2Class('"'),
scanner.T_VARIABLE,
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.Rune2Class('"'),
scanner.Rune2Class('"'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_VARIABLE,
scanner.Rune2Class('"'),
scanner.Rune2Class('"'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_VARIABLE,
scanner.Rune2Class('"'),
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
lv := &lval{}
actual := []int{}
for {
token := lexer.Lex(lv)
if token < 0 {
break
}
actual = append(actual, token)
}
assertEqual(t, expected, actual)
}
func TestBackquoteStringTokens(t *testing.T) {
src := `<?php
` + "`foo $a`" + `
` + "`foo $a{$b}`" + `
` + "`test $var {$var} ${var_name} {s $ \\$a `" + `
` + "`{$var}`" + `
` + "`$foo/`" + `
` + "`$foo/100`" + `
` + "`$/$foo`" + `
` + "`$0$foo`" + `
` `
expected := []int{ expected := []int{
scanner.Rune2Class('`'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_VARIABLE,
scanner.Rune2Class('`'),
scanner.Rune2Class('`'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_VARIABLE,
scanner.T_CURLY_OPEN,
scanner.T_VARIABLE,
scanner.Rune2Class('}'),
scanner.Rune2Class('`'),
scanner.Rune2Class('`'), scanner.Rune2Class('`'),
scanner.T_ENCAPSED_AND_WHITESPACE, scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_VARIABLE, scanner.T_VARIABLE,
@ -449,25 +557,31 @@ func TestTeplateStringTokens(t *testing.T) {
scanner.T_ENCAPSED_AND_WHITESPACE, scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.Rune2Class('`'), scanner.Rune2Class('`'),
scanner.Rune2Class('"'), scanner.Rune2Class('`'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_VARIABLE,
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_CURLY_OPEN, scanner.T_CURLY_OPEN,
scanner.T_VARIABLE, scanner.T_VARIABLE,
scanner.Rune2Class('}'), scanner.Rune2Class('}'),
scanner.T_ENCAPSED_AND_WHITESPACE, scanner.Rune2Class('`'),
scanner.T_DOLLAR_OPEN_CURLY_BRACES,
scanner.T_STRING_VARNAME,
scanner.Rune2Class('}'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.Rune2Class('"'),
scanner.Rune2Class('"'), scanner.Rune2Class('`'),
scanner.T_CURLY_OPEN,
scanner.T_VARIABLE, scanner.T_VARIABLE,
scanner.Rune2Class('}'), scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.Rune2Class('"'), scanner.Rune2Class('`'),
scanner.Rune2Class('`'),
scanner.T_VARIABLE,
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.Rune2Class('`'),
scanner.Rune2Class('`'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_VARIABLE,
scanner.Rune2Class('`'),
scanner.Rune2Class('`'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_VARIABLE,
scanner.Rune2Class('`'),
} }
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php") lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
@ -616,3 +730,123 @@ func TestStringTokensAfterVariable(t *testing.T) {
assertEqual(t, expected, actual) assertEqual(t, expected, actual)
assertEqual(t, expectedTokens, actualTokens) assertEqual(t, expectedTokens, actualTokens)
} }
func TestSlashAfterVariable(t *testing.T) {
src := `<?php $foo/3`
expected := []int{
scanner.T_VARIABLE,
scanner.Rune2Class('/'),
scanner.T_LNUMBER,
}
expectedTokens := []string{
"$foo",
"/",
"3",
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
lv := &lval{}
actual := []int{}
actualTokens := []string{}
for {
token := lexer.Lex(lv)
if token < 0 {
break
}
actualTokens = append(actualTokens, lv.Tkn.Value)
actual = append(actual, token)
}
assertEqual(t, expected, actual)
assertEqual(t, expectedTokens, actualTokens)
}
func TestCommentEnd(t *testing.T) {
src := `<?php //test`
expected := []comment.Comment{
comment.NewPlainComment("//test"),
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
lv := &lval{}
lexer.Lex(lv)
actual := lexer.Comments
assertEqual(t, expected, actual)
}
func TestCommentNewLine(t *testing.T) {
src := "<?php //test\n$a"
expected := []comment.Comment{
comment.NewPlainComment("//test\n"),
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
lv := &lval{}
lexer.Lex(lv)
actual := lexer.Comments
assertEqual(t, expected, actual)
}
func TestCommentNewLine1(t *testing.T) {
src := "<?php //test\r$a"
expected := []comment.Comment{
comment.NewPlainComment("//test\r"),
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
lv := &lval{}
lexer.Lex(lv)
actual := lexer.Comments
assertEqual(t, expected, actual)
}
func TestCommentNewLine2(t *testing.T) {
src := "<?php #test\r\n$a"
expected := []comment.Comment{
comment.NewPlainComment("#test\r\n"),
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
lv := &lval{}
lexer.Lex(lv)
actual := lexer.Comments
assertEqual(t, expected, actual)
}
func TestCommentWithPhpEndTag(t *testing.T) {
src := `<?php
//test?> test`
expected := []comment.Comment{
comment.NewPlainComment("//test"),
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
lv := &lval{}
lexer.Lex(lv)
actual := lexer.Comments
assertEqual(t, expected, actual)
}