[#93] fix scanning constant strings

This commit is contained in:
z7zmey 2019-06-07 09:33:35 +03:00
parent fb2adacdd2
commit e4a208e2a9
3 changed files with 8188 additions and 8273 deletions

File diff suppressed because it is too large Load Diff

View File

@ -41,6 +41,16 @@ func (lex *Lexer) Lex(lval Lval) int {
action heredoc_lbl_start {lblStart = lex.p}
action heredoc_lbl_end {lblEnd = lex.p}
action constant_string_new_line {
if lex.data[lex.p] == '\n' {
lex.NewLines.Append(lex.p)
}
if lex.data[lex.p] == '\r' && lex.data[lex.p+1] != '\n' {
lex.NewLines.Append(lex.p)
}
}
action is_not_heredoc_end { lex.isNotHeredocEnd(lex.p) }
action is_not_comment_end { lex.isNotPhpCloseToken() && lex.isNotNewLine() }
action is_not_heredoc_end_or_var { lex.isNotHeredocEnd(lex.p) && lex.isNotStringVar() }
@ -68,40 +78,53 @@ func (lex *Lexer) Lex(lval Lval) int {
constant_string =
start: (
"'" -> qoute
"'" -> qoute
| "b"i? '"' -> double_qoute
),
# single qoute string
qoute: (
(any - [\\'\r\n]) -> qoute
| "\r" @{if lex.p+1 != eof && lex.data[lex.p+1] != '\n' {lex.NewLines.Append(lex.p)}} -> qoute
| "\n" @{lex.NewLines.Append(lex.p)} -> qoute
| "\\" -> qoute_any
| "'" -> final
(any - [\\'\r\n]) -> qoute
| "\r" @constant_string_new_line -> qoute
| "\n" @constant_string_new_line -> qoute
| "\\" -> qoute_any
| "'" -> final
),
qoute_any: (
any_line -> qoute
(any - [\r\n]) -> qoute
| "\r" @constant_string_new_line -> qoute
| "\n" @constant_string_new_line -> qoute
),
# double qoute string
double_qoute: (
(any - [\\"${\r\n]) -> double_qoute
| "\r" @{if lex.p+1 != eof && lex.data[lex.p+1] != '\n' {lex.NewLines.Append(lex.p)}} -> double_qoute
| "\n" @{lex.NewLines.Append(lex.p)} -> double_qoute
| "\\" -> double_qoute_any
| '"' -> final
| '$' -> double_qoute_nonvarname
| '{' -> double_qoute_nondollar
(any - [\\"${\r\n]) -> double_qoute
| "\r" @constant_string_new_line -> double_qoute
| "\n" @constant_string_new_line -> double_qoute
| "\\" -> double_qoute_any
| '"' -> final
| '$' -> double_qoute_nonvarname
| '{' -> double_qoute_nondollar
),
double_qoute_any: (
any_line -> double_qoute
(any - [\r\n]) -> double_qoute
| "\r" @constant_string_new_line -> double_qoute
| "\n" @constant_string_new_line -> double_qoute
),
double_qoute_nondollar: (
'"' -> final
| "\\" -> double_qoute_any
| [^$\\"] -> double_qoute
(any - [$"\r\n]) -> double_qoute
| "\r" @constant_string_new_line -> double_qoute
| "\n" @constant_string_new_line -> double_qoute
| '"' -> final
),
double_qoute_nonvarname: (
'"' -> final
| "\\" -> double_qoute_any
| /[^"\\{a-zA-Z_\x7f-\xff]/ -> double_qoute
(any - [\\{"\r\n] - varname_first) -> double_qoute
| "\r" @constant_string_new_line -> double_qoute
| "\n" @constant_string_new_line -> double_qoute
| "\\" -> double_qoute_any
| '"' -> final
);
main := |*

View File

@ -191,11 +191,6 @@ func TestTokens(t *testing.T) {
-> ` + "\t\r\n" + ` ->prop
'adsf\'adsf\''
"test"
b"\$var $4 {a"
( array )
( bool )
( boolean )
@ -379,10 +374,6 @@ func TestTokens(t *testing.T) {
T_OBJECT_OPERATOR.String(),
T_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_ARRAY_CAST.String(),
T_BOOL_CAST.String(),
T_BOOL_CAST.String(),
@ -414,6 +405,62 @@ func TestTokens(t *testing.T) {
assert.DeepEqual(t, expected, actual)
}
func TestConstantStrings(t *testing.T) {
src := `<?
'str'
'\''
'\\'
b"str"
"\""
"\\"
"\$var"
"$4"
"$"
"$\\"
"{"
"{a"
"\{$"
`
expected := []string{
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
T_CONSTANT_ENCAPSED_STRING.String(),
}
lexer := NewLexer([]byte(src))
lexer.WithFreeFloating = true
lv := &lval{}
actual := []string{}
for {
token := lexer.Lex(lv)
if token == 0 {
break
}
actual = append(actual, TokenID(token).String())
}
assert.DeepEqual(t, expected, actual)
}
func TestSingleQuoteStringTokens(t *testing.T) {
src := `<?php
'str $var str'
@ -1388,6 +1435,28 @@ func TestVarNameByteChars(t *testing.T) {
assert.Equal(t, "$\xff", lv.Tkn.Value)
}
func TestStringVarNameByteChars(t *testing.T) {
src := "<?php \"$\x80 $\xff\""
lexer := NewLexer([]byte(src))
lv := &lval{}
lexer.Lex(lv)
assert.Equal(t, "\"", lv.Tkn.Value)
lexer.Lex(lv)
assert.Equal(t, "$\x80", lv.Tkn.Value)
lexer.Lex(lv)
assert.Equal(t, " ", lv.Tkn.Value)
lexer.Lex(lv)
assert.Equal(t, "$\xff", lv.Tkn.Value)
lexer.Lex(lv)
assert.Equal(t, "\"", lv.Tkn.Value)
}
func TestIgnoreControllCharacters(t *testing.T) {
src := "<?php \004 echo $b;"