[#93] fix scanning constant strings

This commit is contained in:
z7zmey 2019-06-07 09:33:35 +03:00
parent fb2adacdd2
commit e4a208e2a9
3 changed files with 8188 additions and 8273 deletions

File diff suppressed because it is too large Load Diff

View File

@ -41,6 +41,16 @@ func (lex *Lexer) Lex(lval Lval) int {
action heredoc_lbl_start {lblStart = lex.p} action heredoc_lbl_start {lblStart = lex.p}
action heredoc_lbl_end {lblEnd = lex.p} action heredoc_lbl_end {lblEnd = lex.p}
action constant_string_new_line {
if lex.data[lex.p] == '\n' {
lex.NewLines.Append(lex.p)
}
if lex.data[lex.p] == '\r' && lex.data[lex.p+1] != '\n' {
lex.NewLines.Append(lex.p)
}
}
action is_not_heredoc_end { lex.isNotHeredocEnd(lex.p) } action is_not_heredoc_end { lex.isNotHeredocEnd(lex.p) }
action is_not_comment_end { lex.isNotPhpCloseToken() && lex.isNotNewLine() } action is_not_comment_end { lex.isNotPhpCloseToken() && lex.isNotNewLine() }
action is_not_heredoc_end_or_var { lex.isNotHeredocEnd(lex.p) && lex.isNotStringVar() } action is_not_heredoc_end_or_var { lex.isNotHeredocEnd(lex.p) && lex.isNotStringVar() }
@ -71,37 +81,50 @@ func (lex *Lexer) Lex(lval Lval) int {
"'" -> qoute "'" -> qoute
| "b"i? '"' -> double_qoute | "b"i? '"' -> double_qoute
), ),
# single qoute string
qoute: ( qoute: (
(any - [\\'\r\n]) -> qoute (any - [\\'\r\n]) -> qoute
| "\r" @{if lex.p+1 != eof && lex.data[lex.p+1] != '\n' {lex.NewLines.Append(lex.p)}} -> qoute | "\r" @constant_string_new_line -> qoute
| "\n" @{lex.NewLines.Append(lex.p)} -> qoute | "\n" @constant_string_new_line -> qoute
| "\\" -> qoute_any | "\\" -> qoute_any
| "'" -> final | "'" -> final
), ),
qoute_any: ( qoute_any: (
any_line -> qoute (any - [\r\n]) -> qoute
| "\r" @constant_string_new_line -> qoute
| "\n" @constant_string_new_line -> qoute
), ),
# double qoute string
double_qoute: ( double_qoute: (
(any - [\\"${\r\n]) -> double_qoute (any - [\\"${\r\n]) -> double_qoute
| "\r" @{if lex.p+1 != eof && lex.data[lex.p+1] != '\n' {lex.NewLines.Append(lex.p)}} -> double_qoute | "\r" @constant_string_new_line -> double_qoute
| "\n" @{lex.NewLines.Append(lex.p)} -> double_qoute | "\n" @constant_string_new_line -> double_qoute
| "\\" -> double_qoute_any | "\\" -> double_qoute_any
| '"' -> final | '"' -> final
| '$' -> double_qoute_nonvarname | '$' -> double_qoute_nonvarname
| '{' -> double_qoute_nondollar | '{' -> double_qoute_nondollar
), ),
double_qoute_any: ( double_qoute_any: (
any_line -> double_qoute (any - [\r\n]) -> double_qoute
| "\r" @constant_string_new_line -> double_qoute
| "\n" @constant_string_new_line -> double_qoute
), ),
double_qoute_nondollar: ( double_qoute_nondollar: (
'"' -> final (any - [$"\r\n]) -> double_qoute
| "\\" -> double_qoute_any | "\r" @constant_string_new_line -> double_qoute
| [^$\\"] -> double_qoute | "\n" @constant_string_new_line -> double_qoute
| '"' -> final
), ),
double_qoute_nonvarname: ( double_qoute_nonvarname: (
'"' -> final (any - [\\{"\r\n] - varname_first) -> double_qoute
| "\r" @constant_string_new_line -> double_qoute
| "\n" @constant_string_new_line -> double_qoute
| "\\" -> double_qoute_any | "\\" -> double_qoute_any
| /[^"\\{a-zA-Z_\x7f-\xff]/ -> double_qoute | '"' -> final
); );
main := |* main := |*

View File

@ -191,11 +191,6 @@ func TestTokens(t *testing.T) {
-> ` + "\t\r\n" + ` ->prop -> ` + "\t\r\n" + ` ->prop
'adsf\'adsf\''
"test"
b"\$var $4 {a"
( array ) ( array )
( bool ) ( bool )
( boolean ) ( boolean )
@ -379,10 +374,6 @@ func TestTokens(t *testing.T) {
T_OBJECT_OPERATOR.String(), T_OBJECT_OPERATOR.String(),
T_STRING.String(), T_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_ARRAY_CAST.String(), T_ARRAY_CAST.String(),
T_BOOL_CAST.String(), T_BOOL_CAST.String(),
T_BOOL_CAST.String(), T_BOOL_CAST.String(),
@ -414,6 +405,62 @@ func TestTokens(t *testing.T) {
assert.DeepEqual(t, expected, actual) assert.DeepEqual(t, expected, actual)
} }
func TestConstantStrings(t *testing.T) {
src := `<?
'str'
'\''
'\\'
b"str"
"\""
"\\"
"\$var"
"$4"
"$"
"$\\"
"{"
"{a"
"\{$"
`
expected := []string{
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
}
lexer := NewLexer([]byte(src))
lexer.WithFreeFloating = true
lv := &lval{}
actual := []string{}
for {
token := lexer.Lex(lv)
if token == 0 {
break
}
actual = append(actual, TokenID(token).String())
}
assert.DeepEqual(t, expected, actual)
}
func TestSingleQuoteStringTokens(t *testing.T) { func TestSingleQuoteStringTokens(t *testing.T) {
src := `<?php src := `<?php
'str $var str' 'str $var str'
@ -1388,6 +1435,28 @@ func TestVarNameByteChars(t *testing.T) {
assert.Equal(t, "$\xff", lv.Tkn.Value) assert.Equal(t, "$\xff", lv.Tkn.Value)
} }
func TestStringVarNameByteChars(t *testing.T) {
src := "<?php \"$\x80 $\xff\""
lexer := NewLexer([]byte(src))
lv := &lval{}
lexer.Lex(lv)
assert.Equal(t, "\"", lv.Tkn.Value)
lexer.Lex(lv)
assert.Equal(t, "$\x80", lv.Tkn.Value)
lexer.Lex(lv)
assert.Equal(t, " ", lv.Tkn.Value)
lexer.Lex(lv)
assert.Equal(t, "$\xff", lv.Tkn.Value)
lexer.Lex(lv)
assert.Equal(t, "\"", lv.Tkn.Value)
}
func TestIgnoreControllCharacters(t *testing.T) { func TestIgnoreControllCharacters(t *testing.T) {
src := "<?php \004 echo $b;" src := "<?php \004 echo $b;"