Merge pull request #94 from z7zmey/issue-93
Fix scanning constant strings
This commit is contained in:
		
						commit
						498e93d178
					
				
							
								
								
									
										15905
									
								
								scanner/scanner.go
									
									
									
									
									
								
							
							
						
						
									
										15905
									
								
								scanner/scanner.go
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -41,6 +41,16 @@ func (lex *Lexer) Lex(lval Lval) int { | ||||
|         action heredoc_lbl_start {lblStart = lex.p} | ||||
|         action heredoc_lbl_end   {lblEnd = lex.p} | ||||
| 
 | ||||
|         action constant_string_new_line   { | ||||
|             if lex.data[lex.p] == '\n' { | ||||
|                 lex.NewLines.Append(lex.p) | ||||
|             } | ||||
| 
 | ||||
|             if lex.data[lex.p] == '\r' && lex.data[lex.p+1] != '\n' { | ||||
|                 lex.NewLines.Append(lex.p) | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         action is_not_heredoc_end { lex.isNotHeredocEnd(lex.p) } | ||||
|         action is_not_comment_end { lex.isNotPhpCloseToken() && lex.isNotNewLine()  } | ||||
|         action is_not_heredoc_end_or_var { lex.isNotHeredocEnd(lex.p) && lex.isNotStringVar() } | ||||
| @ -71,37 +81,50 @@ func (lex *Lexer) Lex(lval Lval) int { | ||||
|                 "'"         -> qoute | ||||
|                 | "b"i? '"' -> double_qoute | ||||
|             ), | ||||
| 
 | ||||
|             # single qoute string | ||||
| 
 | ||||
|             qoute: ( | ||||
|                 (any - [\\'\r\n])                -> qoute | ||||
|                 | "\r" @{if lex.p+1 != eof && lex.data[lex.p+1] != '\n' {lex.NewLines.Append(lex.p)}} -> qoute | ||||
|                 | "\n" @{lex.NewLines.Append(lex.p)} -> qoute | ||||
|                 | "\r" @constant_string_new_line -> qoute | ||||
|                 | "\n" @constant_string_new_line -> qoute | ||||
|                 | "\\"                           -> qoute_any | ||||
|                 | "'"                            -> final | ||||
|             ), | ||||
|             qoute_any: ( | ||||
|                 any_line -> qoute | ||||
|                 (any - [\r\n])                   -> qoute | ||||
|                 | "\r" @constant_string_new_line -> qoute | ||||
|                 | "\n" @constant_string_new_line -> qoute | ||||
|             ), | ||||
| 
 | ||||
|             # double qoute string | ||||
| 
 | ||||
|             double_qoute: ( | ||||
|                 (any - [\\"${\r\n])                -> double_qoute | ||||
|                 | "\r" @{if lex.p+1 != eof && lex.data[lex.p+1] != '\n' {lex.NewLines.Append(lex.p)}} -> double_qoute | ||||
|                 | "\n" @{lex.NewLines.Append(lex.p)} -> double_qoute | ||||
|                 | "\r" @constant_string_new_line   -> double_qoute | ||||
|                 | "\n" @constant_string_new_line   -> double_qoute | ||||
|                 | "\\"                             -> double_qoute_any | ||||
|                 | '"'                              -> final | ||||
|                 | '$'                              -> double_qoute_nonvarname | ||||
|                 | '{'                              -> double_qoute_nondollar | ||||
|             ), | ||||
|             double_qoute_any: ( | ||||
|                 any_line -> double_qoute | ||||
|                 (any - [\r\n])                     -> double_qoute | ||||
|                 | "\r" @constant_string_new_line   -> double_qoute | ||||
|                 | "\n" @constant_string_new_line   -> double_qoute | ||||
|             ), | ||||
|             double_qoute_nondollar: ( | ||||
|                 '"'    -> final | ||||
|                 | "\\"               -> double_qoute_any | ||||
|                 | [^$\\"] -> double_qoute | ||||
|                 (any - [$"\r\n])                   -> double_qoute | ||||
|                 | "\r" @constant_string_new_line   -> double_qoute | ||||
|                 | "\n" @constant_string_new_line   -> double_qoute | ||||
|                 | '"'                              -> final | ||||
|             ), | ||||
|             double_qoute_nonvarname: ( | ||||
|                 '"'                      -> final | ||||
|                 (any - [\\{"\r\n] - varname_first) -> double_qoute | ||||
|                 | "\r" @constant_string_new_line   -> double_qoute | ||||
|                 | "\n" @constant_string_new_line   -> double_qoute | ||||
|                 | "\\"                             -> double_qoute_any | ||||
|                 | /[^"\\{a-zA-Z_\x7f-\xff]/ -> double_qoute | ||||
|                 | '"'                              -> final | ||||
|             ); | ||||
| 
 | ||||
|         main := |* | ||||
|  | ||||
| @ -191,11 +191,6 @@ func TestTokens(t *testing.T) { | ||||
| 
 | ||||
| 		-> ` + "\t\r\n" + ` ->prop | ||||
| 
 | ||||
| 		'adsf\'adsf\'' | ||||
| 
 | ||||
| 		"test" | ||||
| 		b"\$var $4 {a" | ||||
| 
 | ||||
| 		( array ) | ||||
| 		( bool ) | ||||
| 		( boolean ) | ||||
| @ -379,10 +374,6 @@ func TestTokens(t *testing.T) { | ||||
| 		T_OBJECT_OPERATOR.String(), | ||||
| 		T_STRING.String(), | ||||
| 
 | ||||
| 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||
| 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||
| 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||
| 
 | ||||
| 		T_ARRAY_CAST.String(), | ||||
| 		T_BOOL_CAST.String(), | ||||
| 		T_BOOL_CAST.String(), | ||||
| @ -414,6 +405,62 @@ func TestTokens(t *testing.T) { | ||||
| 	assert.DeepEqual(t, expected, actual) | ||||
| } | ||||
| 
 | ||||
| func TestConstantStrings(t *testing.T) { | ||||
| 	src := `<? | ||||
| 		'str' | ||||
| 		'\'' | ||||
| 		'\\' | ||||
| 
 | ||||
| 		b"str" | ||||
| 		"\"" | ||||
| 		"\\" | ||||
| 
 | ||||
| 		"\$var" | ||||
| 		"$4" | ||||
| 		"$" | ||||
| 		"$\\" | ||||
| 
 | ||||
| 		"{" | ||||
| 		"{a" | ||||
| 		"\{$" | ||||
| 	` | ||||
| 
 | ||||
| 	expected := []string{ | ||||
| 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||
| 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||
| 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||
| 
 | ||||
| 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||
| 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||
| 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||
| 
 | ||||
| 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||
| 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||
| 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||
| 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||
| 
 | ||||
| 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||
| 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||
| 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||
| 	} | ||||
| 
 | ||||
| 	lexer := NewLexer([]byte(src)) | ||||
| 	lexer.WithFreeFloating = true | ||||
| 	lv := &lval{} | ||||
| 	actual := []string{} | ||||
| 
 | ||||
| 	for { | ||||
| 		token := lexer.Lex(lv) | ||||
| 		if token == 0 { | ||||
| 			break | ||||
| 		} | ||||
| 
 | ||||
| 		actual = append(actual, TokenID(token).String()) | ||||
| 	} | ||||
| 
 | ||||
| 	assert.DeepEqual(t, expected, actual) | ||||
| } | ||||
| 
 | ||||
| func TestSingleQuoteStringTokens(t *testing.T) { | ||||
| 	src := `<?php | ||||
| 		'str $var str' | ||||
| @ -1388,6 +1435,28 @@ func TestVarNameByteChars(t *testing.T) { | ||||
| 	assert.Equal(t, "$\xff", lv.Tkn.Value) | ||||
| } | ||||
| 
 | ||||
| func TestStringVarNameByteChars(t *testing.T) { | ||||
| 	src := "<?php \"$\x80 $\xff\"" | ||||
| 
 | ||||
| 	lexer := NewLexer([]byte(src)) | ||||
| 	lv := &lval{} | ||||
| 
 | ||||
| 	lexer.Lex(lv) | ||||
| 	assert.Equal(t, "\"", lv.Tkn.Value) | ||||
| 
 | ||||
| 	lexer.Lex(lv) | ||||
| 	assert.Equal(t, "$\x80", lv.Tkn.Value) | ||||
| 
 | ||||
| 	lexer.Lex(lv) | ||||
| 	assert.Equal(t, " ", lv.Tkn.Value) | ||||
| 
 | ||||
| 	lexer.Lex(lv) | ||||
| 	assert.Equal(t, "$\xff", lv.Tkn.Value) | ||||
| 
 | ||||
| 	lexer.Lex(lv) | ||||
| 	assert.Equal(t, "\"", lv.Tkn.Value) | ||||
| } | ||||
| 
 | ||||
| func TestIgnoreControllCharacters(t *testing.T) { | ||||
| 	src := "<?php \004 echo $b;" | ||||
| 
 | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user