[#93] fix scanning constant strings
This commit is contained in:
parent
fb2adacdd2
commit
e4a208e2a9
16309
scanner/scanner.go
16309
scanner/scanner.go
File diff suppressed because it is too large
Load Diff
@ -41,6 +41,16 @@ func (lex *Lexer) Lex(lval Lval) int {
|
|||||||
action heredoc_lbl_start {lblStart = lex.p}
|
action heredoc_lbl_start {lblStart = lex.p}
|
||||||
action heredoc_lbl_end {lblEnd = lex.p}
|
action heredoc_lbl_end {lblEnd = lex.p}
|
||||||
|
|
||||||
|
action constant_string_new_line {
|
||||||
|
if lex.data[lex.p] == '\n' {
|
||||||
|
lex.NewLines.Append(lex.p)
|
||||||
|
}
|
||||||
|
|
||||||
|
if lex.data[lex.p] == '\r' && lex.data[lex.p+1] != '\n' {
|
||||||
|
lex.NewLines.Append(lex.p)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
action is_not_heredoc_end { lex.isNotHeredocEnd(lex.p) }
|
action is_not_heredoc_end { lex.isNotHeredocEnd(lex.p) }
|
||||||
action is_not_comment_end { lex.isNotPhpCloseToken() && lex.isNotNewLine() }
|
action is_not_comment_end { lex.isNotPhpCloseToken() && lex.isNotNewLine() }
|
||||||
action is_not_heredoc_end_or_var { lex.isNotHeredocEnd(lex.p) && lex.isNotStringVar() }
|
action is_not_heredoc_end_or_var { lex.isNotHeredocEnd(lex.p) && lex.isNotStringVar() }
|
||||||
@ -68,40 +78,53 @@ func (lex *Lexer) Lex(lval Lval) int {
|
|||||||
|
|
||||||
constant_string =
|
constant_string =
|
||||||
start: (
|
start: (
|
||||||
"'" -> qoute
|
"'" -> qoute
|
||||||
| "b"i? '"' -> double_qoute
|
| "b"i? '"' -> double_qoute
|
||||||
),
|
),
|
||||||
|
|
||||||
|
# single qoute string
|
||||||
|
|
||||||
qoute: (
|
qoute: (
|
||||||
(any - [\\'\r\n]) -> qoute
|
(any - [\\'\r\n]) -> qoute
|
||||||
| "\r" @{if lex.p+1 != eof && lex.data[lex.p+1] != '\n' {lex.NewLines.Append(lex.p)}} -> qoute
|
| "\r" @constant_string_new_line -> qoute
|
||||||
| "\n" @{lex.NewLines.Append(lex.p)} -> qoute
|
| "\n" @constant_string_new_line -> qoute
|
||||||
| "\\" -> qoute_any
|
| "\\" -> qoute_any
|
||||||
| "'" -> final
|
| "'" -> final
|
||||||
),
|
),
|
||||||
qoute_any: (
|
qoute_any: (
|
||||||
any_line -> qoute
|
(any - [\r\n]) -> qoute
|
||||||
|
| "\r" @constant_string_new_line -> qoute
|
||||||
|
| "\n" @constant_string_new_line -> qoute
|
||||||
),
|
),
|
||||||
|
|
||||||
|
# double qoute string
|
||||||
|
|
||||||
double_qoute: (
|
double_qoute: (
|
||||||
(any - [\\"${\r\n]) -> double_qoute
|
(any - [\\"${\r\n]) -> double_qoute
|
||||||
| "\r" @{if lex.p+1 != eof && lex.data[lex.p+1] != '\n' {lex.NewLines.Append(lex.p)}} -> double_qoute
|
| "\r" @constant_string_new_line -> double_qoute
|
||||||
| "\n" @{lex.NewLines.Append(lex.p)} -> double_qoute
|
| "\n" @constant_string_new_line -> double_qoute
|
||||||
| "\\" -> double_qoute_any
|
| "\\" -> double_qoute_any
|
||||||
| '"' -> final
|
| '"' -> final
|
||||||
| '$' -> double_qoute_nonvarname
|
| '$' -> double_qoute_nonvarname
|
||||||
| '{' -> double_qoute_nondollar
|
| '{' -> double_qoute_nondollar
|
||||||
),
|
),
|
||||||
double_qoute_any: (
|
double_qoute_any: (
|
||||||
any_line -> double_qoute
|
(any - [\r\n]) -> double_qoute
|
||||||
|
| "\r" @constant_string_new_line -> double_qoute
|
||||||
|
| "\n" @constant_string_new_line -> double_qoute
|
||||||
),
|
),
|
||||||
double_qoute_nondollar: (
|
double_qoute_nondollar: (
|
||||||
'"' -> final
|
(any - [$"\r\n]) -> double_qoute
|
||||||
| "\\" -> double_qoute_any
|
| "\r" @constant_string_new_line -> double_qoute
|
||||||
| [^$\\"] -> double_qoute
|
| "\n" @constant_string_new_line -> double_qoute
|
||||||
|
| '"' -> final
|
||||||
),
|
),
|
||||||
double_qoute_nonvarname: (
|
double_qoute_nonvarname: (
|
||||||
'"' -> final
|
(any - [\\{"\r\n] - varname_first) -> double_qoute
|
||||||
| "\\" -> double_qoute_any
|
| "\r" @constant_string_new_line -> double_qoute
|
||||||
| /[^"\\{a-zA-Z_\x7f-\xff]/ -> double_qoute
|
| "\n" @constant_string_new_line -> double_qoute
|
||||||
|
| "\\" -> double_qoute_any
|
||||||
|
| '"' -> final
|
||||||
);
|
);
|
||||||
|
|
||||||
main := |*
|
main := |*
|
||||||
|
@ -191,11 +191,6 @@ func TestTokens(t *testing.T) {
|
|||||||
|
|
||||||
-> ` + "\t\r\n" + ` ->prop
|
-> ` + "\t\r\n" + ` ->prop
|
||||||
|
|
||||||
'adsf\'adsf\''
|
|
||||||
|
|
||||||
"test"
|
|
||||||
b"\$var $4 {a"
|
|
||||||
|
|
||||||
( array )
|
( array )
|
||||||
( bool )
|
( bool )
|
||||||
( boolean )
|
( boolean )
|
||||||
@ -379,10 +374,6 @@ func TestTokens(t *testing.T) {
|
|||||||
T_OBJECT_OPERATOR.String(),
|
T_OBJECT_OPERATOR.String(),
|
||||||
T_STRING.String(),
|
T_STRING.String(),
|
||||||
|
|
||||||
T_CONSTANT_ENCAPSED_STRING.String(),
|
|
||||||
T_CONSTANT_ENCAPSED_STRING.String(),
|
|
||||||
T_CONSTANT_ENCAPSED_STRING.String(),
|
|
||||||
|
|
||||||
T_ARRAY_CAST.String(),
|
T_ARRAY_CAST.String(),
|
||||||
T_BOOL_CAST.String(),
|
T_BOOL_CAST.String(),
|
||||||
T_BOOL_CAST.String(),
|
T_BOOL_CAST.String(),
|
||||||
@ -414,6 +405,62 @@ func TestTokens(t *testing.T) {
|
|||||||
assert.DeepEqual(t, expected, actual)
|
assert.DeepEqual(t, expected, actual)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestConstantStrings(t *testing.T) {
|
||||||
|
src := `<?
|
||||||
|
'str'
|
||||||
|
'\''
|
||||||
|
'\\'
|
||||||
|
|
||||||
|
b"str"
|
||||||
|
"\""
|
||||||
|
"\\"
|
||||||
|
|
||||||
|
"\$var"
|
||||||
|
"$4"
|
||||||
|
"$"
|
||||||
|
"$\\"
|
||||||
|
|
||||||
|
"{"
|
||||||
|
"{a"
|
||||||
|
"\{$"
|
||||||
|
`
|
||||||
|
|
||||||
|
expected := []string{
|
||||||
|
T_CONSTANT_ENCAPSED_STRING.String(),
|
||||||
|
T_CONSTANT_ENCAPSED_STRING.String(),
|
||||||
|
T_CONSTANT_ENCAPSED_STRING.String(),
|
||||||
|
|
||||||
|
T_CONSTANT_ENCAPSED_STRING.String(),
|
||||||
|
T_CONSTANT_ENCAPSED_STRING.String(),
|
||||||
|
T_CONSTANT_ENCAPSED_STRING.String(),
|
||||||
|
|
||||||
|
T_CONSTANT_ENCAPSED_STRING.String(),
|
||||||
|
T_CONSTANT_ENCAPSED_STRING.String(),
|
||||||
|
T_CONSTANT_ENCAPSED_STRING.String(),
|
||||||
|
T_CONSTANT_ENCAPSED_STRING.String(),
|
||||||
|
|
||||||
|
T_CONSTANT_ENCAPSED_STRING.String(),
|
||||||
|
T_CONSTANT_ENCAPSED_STRING.String(),
|
||||||
|
T_CONSTANT_ENCAPSED_STRING.String(),
|
||||||
|
}
|
||||||
|
|
||||||
|
lexer := NewLexer([]byte(src))
|
||||||
|
lexer.WithFreeFloating = true
|
||||||
|
lv := &lval{}
|
||||||
|
actual := []string{}
|
||||||
|
|
||||||
|
for {
|
||||||
|
token := lexer.Lex(lv)
|
||||||
|
if token == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
actual = append(actual, TokenID(token).String())
|
||||||
|
}
|
||||||
|
|
||||||
|
assert.DeepEqual(t, expected, actual)
|
||||||
|
}
|
||||||
|
|
||||||
func TestSingleQuoteStringTokens(t *testing.T) {
|
func TestSingleQuoteStringTokens(t *testing.T) {
|
||||||
src := `<?php
|
src := `<?php
|
||||||
'str $var str'
|
'str $var str'
|
||||||
@ -1388,6 +1435,28 @@ func TestVarNameByteChars(t *testing.T) {
|
|||||||
assert.Equal(t, "$\xff", lv.Tkn.Value)
|
assert.Equal(t, "$\xff", lv.Tkn.Value)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestStringVarNameByteChars(t *testing.T) {
|
||||||
|
src := "<?php \"$\x80 $\xff\""
|
||||||
|
|
||||||
|
lexer := NewLexer([]byte(src))
|
||||||
|
lv := &lval{}
|
||||||
|
|
||||||
|
lexer.Lex(lv)
|
||||||
|
assert.Equal(t, "\"", lv.Tkn.Value)
|
||||||
|
|
||||||
|
lexer.Lex(lv)
|
||||||
|
assert.Equal(t, "$\x80", lv.Tkn.Value)
|
||||||
|
|
||||||
|
lexer.Lex(lv)
|
||||||
|
assert.Equal(t, " ", lv.Tkn.Value)
|
||||||
|
|
||||||
|
lexer.Lex(lv)
|
||||||
|
assert.Equal(t, "$\xff", lv.Tkn.Value)
|
||||||
|
|
||||||
|
lexer.Lex(lv)
|
||||||
|
assert.Equal(t, "\"", lv.Tkn.Value)
|
||||||
|
}
|
||||||
|
|
||||||
func TestIgnoreControllCharacters(t *testing.T) {
|
func TestIgnoreControllCharacters(t *testing.T) {
|
||||||
src := "<?php \004 echo $b;"
|
src := "<?php \004 echo $b;"
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user