issue #2 - fix template string scanning

`l.Prev.Rune` is actually current rune and `c` is next rune.
This commit is contained in:
z7zmey 2018-03-29 16:46:21 +03:00
parent 685b7b25bd
commit 83bb761062
4 changed files with 101 additions and 38 deletions

View File

@ -36,6 +36,30 @@ func TestSimpleVar(t *testing.T) {
assertEqual(t, expected, actual) assertEqual(t, expected, actual)
} }
func TestSimpleVarEndsEcapsed(t *testing.T) {
src := `<? "test $var\"";`
expected := &stmt.StmtList{
Stmts: []node.Node{
&stmt.Expression{
Expr: &scalar.Encapsed{
Parts: []node.Node{
&scalar.EncapsedStringPart{Value: "test "},
&expr.Variable{VarName: &node.Identifier{Value: "var"}},
&scalar.EncapsedStringPart{Value: "\\\""},
},
},
},
},
}
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
actual, _, _ = php5.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}
func TestSimpleVarPropertyFetch(t *testing.T) { func TestSimpleVarPropertyFetch(t *testing.T) {
src := `<? "test $foo->bar()";` src := `<? "test $foo->bar()";`

View File

@ -8624,40 +8624,40 @@ yyrule149: // .|[ \t\n\r]
F1: F1:
for { for {
if c == -1 { switch l.Prev.Rune {
break
}
switch c {
case '"':
lval.Token(l.newToken(l.Token()))
return T_ENCAPSED_AND_WHITESPACE
break F1
case '$': case '$':
c = l.Next() c = l.Next()
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' { if l.Prev.Rune == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
l.ungetChars(1) l.ungetChars(2)
tb := l.Token() tb := l.Token()
lval.Token(l.newToken(tb[:len(tb)-1])) lval.Token(l.newToken(tb[:len(tb)-2]))
return T_ENCAPSED_AND_WHITESPACE return T_ENCAPSED_AND_WHITESPACE
break F1 break F1
} }
l.ungetChars(0) l.ungetChars(1)
case '{': case '{':
c = l.Next() c = l.Next()
if rune(c) == '$' { if l.Prev.Rune == '$' {
l.ungetChars(1) l.ungetChars(2)
tb := l.Token() tb := l.Token()
lval.Token(l.newToken(tb[:len(tb)-1])) lval.Token(l.newToken(tb[:len(tb)-2]))
return T_ENCAPSED_AND_WHITESPACE return T_ENCAPSED_AND_WHITESPACE
break F1 break F1
} }
l.ungetChars(0) l.ungetChars(1)
case '\\': case '\\':
c = l.Next() c = l.Next()
} }
if rune(c) == '"' {
lval.Token(l.newToken(l.Token()))
return T_ENCAPSED_AND_WHITESPACE
break F1
}
c = l.Next() c = l.Next()
if c == -1 {
break
}
} }
goto yystate0 goto yystate0
} }

View File

@ -10,7 +10,7 @@ package scanner
import ( import (
"fmt" "fmt"
"bytes" "bytes"
"github.com/cznic/golex/lex" "github.com/cznic/golex/lex"
"github.com/z7zmey/php-parser/comment" "github.com/z7zmey/php-parser/comment"
) )
@ -448,43 +448,44 @@ NEW_LINE (\r|\n|\r\n)
<STRING,HEREDOC,BACKQUOTE>\$ l.ungetChars(1);l.pushState(STRING_VAR) <STRING,HEREDOC,BACKQUOTE>\$ l.ungetChars(1);l.pushState(STRING_VAR)
<STRING>.|[ \t\n\r] <STRING>.|[ \t\n\r]
F1:for { F1:for {
if c == -1 { switch l.Prev.Rune {
break;
}
switch c {
case '"' :
lval.Token(l.newToken(l.Token()));
return T_ENCAPSED_AND_WHITESPACE
break F1;
case '$': case '$':
c = l.Next(); c = l.Next();
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' { if l.Prev.Rune == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
l.ungetChars(1) l.ungetChars(2)
tb := l.Token() tb := l.Token()
lval.Token(l.newToken(tb[:len(tb)-1])); lval.Token(l.newToken(tb[:len(tb)-2]));
return T_ENCAPSED_AND_WHITESPACE return T_ENCAPSED_AND_WHITESPACE
break F1; break F1;
} }
l.ungetChars(0) l.ungetChars(1)
case '{': case '{':
c = l.Next(); c = l.Next();
if rune(c) == '$' { if l.Prev.Rune == '$' {
l.ungetChars(1) l.ungetChars(2)
tb := l.Token() tb := l.Token()
lval.Token(l.newToken(tb[:len(tb)-1])); lval.Token(l.newToken(tb[:len(tb)-2]));
return T_ENCAPSED_AND_WHITESPACE return T_ENCAPSED_AND_WHITESPACE
break F1; break F1;
} }
l.ungetChars(0) l.ungetChars(1)
case '\\': case '\\':
c = l.Next(); c = l.Next();
} }
if rune(c) == '"' {
lval.Token(l.newToken(l.Token()));
return T_ENCAPSED_AND_WHITESPACE
break F1;
}
c = l.Next() c = l.Next()
if c == -1 {
break;
}
} }
<BACKQUOTE>.|[ \t\n\r] <BACKQUOTE>.|[ \t\n\r]

View File

@ -25,11 +25,11 @@ func assertEqual(t *testing.T, expected interface{}, actual interface{}) {
} }
type lval struct { type lval struct {
token token.Token Tkn token.Token
} }
func (lv *lval) Token(t token.Token) { func (lv *lval) Token(t token.Token) {
lv.token = t lv.Tkn = t
} }
func TestIdentifier(t *testing.T) { func TestIdentifier(t *testing.T) {
@ -538,3 +538,41 @@ CAT;
assertEqual(t, expected, actual) assertEqual(t, expected, actual)
} }
func TestStringTokensAfterVariable(t *testing.T) {
src := `<?php "test \"$var\""`
expected := []int{
scanner.Rune2Class('"'),
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.T_VARIABLE,
scanner.T_ENCAPSED_AND_WHITESPACE,
scanner.Rune2Class('"'),
}
expectedTokens := []string{
"\"",
"test \\\"",
"$var",
"\\\"",
"\"",
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
lv := &lval{}
actual := []int{}
actualTokens := []string{}
for {
token := lexer.Lex(lv)
if token < 0 {
break
}
actualTokens = append(actualTokens, lv.Tkn.Value)
actual = append(actual, token)
}
assertEqual(t, expected, actual)
assertEqual(t, expectedTokens, actualTokens)
}