Merge pull request #94 from z7zmey/issue-93
Fix scanning constant strings
This commit is contained in:
		
						commit
						498e93d178
					
				
							
								
								
									
										15905
									
								
								scanner/scanner.go
									
									
									
									
									
								
							
							
						
						
									
										15905
									
								
								scanner/scanner.go
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -41,6 +41,16 @@ func (lex *Lexer) Lex(lval Lval) int { | |||||||
|         action heredoc_lbl_start {lblStart = lex.p} |         action heredoc_lbl_start {lblStart = lex.p} | ||||||
|         action heredoc_lbl_end   {lblEnd = lex.p} |         action heredoc_lbl_end   {lblEnd = lex.p} | ||||||
| 
 | 
 | ||||||
|  |         action constant_string_new_line   { | ||||||
|  |             if lex.data[lex.p] == '\n' { | ||||||
|  |                 lex.NewLines.Append(lex.p) | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             if lex.data[lex.p] == '\r' && lex.data[lex.p+1] != '\n' { | ||||||
|  |                 lex.NewLines.Append(lex.p) | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|         action is_not_heredoc_end { lex.isNotHeredocEnd(lex.p) } |         action is_not_heredoc_end { lex.isNotHeredocEnd(lex.p) } | ||||||
|         action is_not_comment_end { lex.isNotPhpCloseToken() && lex.isNotNewLine()  } |         action is_not_comment_end { lex.isNotPhpCloseToken() && lex.isNotNewLine()  } | ||||||
|         action is_not_heredoc_end_or_var { lex.isNotHeredocEnd(lex.p) && lex.isNotStringVar() } |         action is_not_heredoc_end_or_var { lex.isNotHeredocEnd(lex.p) && lex.isNotStringVar() } | ||||||
| @ -71,37 +81,50 @@ func (lex *Lexer) Lex(lval Lval) int { | |||||||
|                 "'"         -> qoute |                 "'"         -> qoute | ||||||
|                 | "b"i? '"' -> double_qoute |                 | "b"i? '"' -> double_qoute | ||||||
|             ), |             ), | ||||||
|  | 
 | ||||||
|  |             # single qoute string | ||||||
|  | 
 | ||||||
|             qoute: ( |             qoute: ( | ||||||
|                 (any - [\\'\r\n])                -> qoute |                 (any - [\\'\r\n])                -> qoute | ||||||
|                 | "\r" @{if lex.p+1 != eof && lex.data[lex.p+1] != '\n' {lex.NewLines.Append(lex.p)}} -> qoute |                 | "\r" @constant_string_new_line -> qoute | ||||||
|                 | "\n" @{lex.NewLines.Append(lex.p)} -> qoute |                 | "\n" @constant_string_new_line -> qoute | ||||||
|                 | "\\"                           -> qoute_any |                 | "\\"                           -> qoute_any | ||||||
|                 | "'"                            -> final |                 | "'"                            -> final | ||||||
|             ), |             ), | ||||||
|             qoute_any: ( |             qoute_any: ( | ||||||
|                 any_line -> qoute |                 (any - [\r\n])                   -> qoute | ||||||
|  |                 | "\r" @constant_string_new_line -> qoute | ||||||
|  |                 | "\n" @constant_string_new_line -> qoute | ||||||
|             ), |             ), | ||||||
|  | 
 | ||||||
|  |             # double qoute string | ||||||
|  | 
 | ||||||
|             double_qoute: ( |             double_qoute: ( | ||||||
|                 (any - [\\"${\r\n])                -> double_qoute |                 (any - [\\"${\r\n])                -> double_qoute | ||||||
|                 | "\r" @{if lex.p+1 != eof && lex.data[lex.p+1] != '\n' {lex.NewLines.Append(lex.p)}} -> double_qoute |                 | "\r" @constant_string_new_line   -> double_qoute | ||||||
|                 | "\n" @{lex.NewLines.Append(lex.p)} -> double_qoute |                 | "\n" @constant_string_new_line   -> double_qoute | ||||||
|                 | "\\"                             -> double_qoute_any |                 | "\\"                             -> double_qoute_any | ||||||
|                 | '"'                              -> final |                 | '"'                              -> final | ||||||
|                 | '$'                              -> double_qoute_nonvarname |                 | '$'                              -> double_qoute_nonvarname | ||||||
|                 | '{'                              -> double_qoute_nondollar |                 | '{'                              -> double_qoute_nondollar | ||||||
|             ), |             ), | ||||||
|             double_qoute_any: ( |             double_qoute_any: ( | ||||||
|                 any_line -> double_qoute |                 (any - [\r\n])                     -> double_qoute | ||||||
|  |                 | "\r" @constant_string_new_line   -> double_qoute | ||||||
|  |                 | "\n" @constant_string_new_line   -> double_qoute | ||||||
|             ), |             ), | ||||||
|             double_qoute_nondollar: ( |             double_qoute_nondollar: ( | ||||||
|                 '"'    -> final |                 (any - [$"\r\n])                   -> double_qoute | ||||||
|                 | "\\"               -> double_qoute_any |                 | "\r" @constant_string_new_line   -> double_qoute | ||||||
|                 | [^$\\"] -> double_qoute |                 | "\n" @constant_string_new_line   -> double_qoute | ||||||
|  |                 | '"'                              -> final | ||||||
|             ), |             ), | ||||||
|             double_qoute_nonvarname: ( |             double_qoute_nonvarname: ( | ||||||
|                 '"'                      -> final |                 (any - [\\{"\r\n] - varname_first) -> double_qoute | ||||||
|  |                 | "\r" @constant_string_new_line   -> double_qoute | ||||||
|  |                 | "\n" @constant_string_new_line   -> double_qoute | ||||||
|                 | "\\"                             -> double_qoute_any |                 | "\\"                             -> double_qoute_any | ||||||
|                 | /[^"\\{a-zA-Z_\x7f-\xff]/ -> double_qoute |                 | '"'                              -> final | ||||||
|             ); |             ); | ||||||
| 
 | 
 | ||||||
|         main := |* |         main := |* | ||||||
|  | |||||||
| @ -191,11 +191,6 @@ func TestTokens(t *testing.T) { | |||||||
| 
 | 
 | ||||||
| 		-> ` + "\t\r\n" + ` ->prop | 		-> ` + "\t\r\n" + ` ->prop | ||||||
| 
 | 
 | ||||||
| 		'adsf\'adsf\'' |  | ||||||
| 
 |  | ||||||
| 		"test" |  | ||||||
| 		b"\$var $4 {a" |  | ||||||
| 
 |  | ||||||
| 		( array ) | 		( array ) | ||||||
| 		( bool ) | 		( bool ) | ||||||
| 		( boolean ) | 		( boolean ) | ||||||
| @ -379,10 +374,6 @@ func TestTokens(t *testing.T) { | |||||||
| 		T_OBJECT_OPERATOR.String(), | 		T_OBJECT_OPERATOR.String(), | ||||||
| 		T_STRING.String(), | 		T_STRING.String(), | ||||||
| 
 | 
 | ||||||
| 		T_CONSTANT_ENCAPSED_STRING.String(), |  | ||||||
| 		T_CONSTANT_ENCAPSED_STRING.String(), |  | ||||||
| 		T_CONSTANT_ENCAPSED_STRING.String(), |  | ||||||
| 
 |  | ||||||
| 		T_ARRAY_CAST.String(), | 		T_ARRAY_CAST.String(), | ||||||
| 		T_BOOL_CAST.String(), | 		T_BOOL_CAST.String(), | ||||||
| 		T_BOOL_CAST.String(), | 		T_BOOL_CAST.String(), | ||||||
| @ -414,6 +405,62 @@ func TestTokens(t *testing.T) { | |||||||
| 	assert.DeepEqual(t, expected, actual) | 	assert.DeepEqual(t, expected, actual) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | func TestConstantStrings(t *testing.T) { | ||||||
|  | 	src := `<? | ||||||
|  | 		'str' | ||||||
|  | 		'\'' | ||||||
|  | 		'\\' | ||||||
|  | 
 | ||||||
|  | 		b"str" | ||||||
|  | 		"\"" | ||||||
|  | 		"\\" | ||||||
|  | 
 | ||||||
|  | 		"\$var" | ||||||
|  | 		"$4" | ||||||
|  | 		"$" | ||||||
|  | 		"$\\" | ||||||
|  | 
 | ||||||
|  | 		"{" | ||||||
|  | 		"{a" | ||||||
|  | 		"\{$" | ||||||
|  | 	` | ||||||
|  | 
 | ||||||
|  | 	expected := []string{ | ||||||
|  | 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||||
|  | 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||||
|  | 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||||
|  | 
 | ||||||
|  | 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||||
|  | 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||||
|  | 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||||
|  | 
 | ||||||
|  | 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||||
|  | 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||||
|  | 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||||
|  | 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||||
|  | 
 | ||||||
|  | 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||||
|  | 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||||
|  | 		T_CONSTANT_ENCAPSED_STRING.String(), | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	lexer := NewLexer([]byte(src)) | ||||||
|  | 	lexer.WithFreeFloating = true | ||||||
|  | 	lv := &lval{} | ||||||
|  | 	actual := []string{} | ||||||
|  | 
 | ||||||
|  | 	for { | ||||||
|  | 		token := lexer.Lex(lv) | ||||||
|  | 		if token == 0 { | ||||||
|  | 			break | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		actual = append(actual, TokenID(token).String()) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	assert.DeepEqual(t, expected, actual) | ||||||
|  | } | ||||||
|  | 
 | ||||||
| func TestSingleQuoteStringTokens(t *testing.T) { | func TestSingleQuoteStringTokens(t *testing.T) { | ||||||
| 	src := `<?php | 	src := `<?php | ||||||
| 		'str $var str' | 		'str $var str' | ||||||
| @ -1388,6 +1435,28 @@ func TestVarNameByteChars(t *testing.T) { | |||||||
| 	assert.Equal(t, "$\xff", lv.Tkn.Value) | 	assert.Equal(t, "$\xff", lv.Tkn.Value) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | func TestStringVarNameByteChars(t *testing.T) { | ||||||
|  | 	src := "<?php \"$\x80 $\xff\"" | ||||||
|  | 
 | ||||||
|  | 	lexer := NewLexer([]byte(src)) | ||||||
|  | 	lv := &lval{} | ||||||
|  | 
 | ||||||
|  | 	lexer.Lex(lv) | ||||||
|  | 	assert.Equal(t, "\"", lv.Tkn.Value) | ||||||
|  | 
 | ||||||
|  | 	lexer.Lex(lv) | ||||||
|  | 	assert.Equal(t, "$\x80", lv.Tkn.Value) | ||||||
|  | 
 | ||||||
|  | 	lexer.Lex(lv) | ||||||
|  | 	assert.Equal(t, " ", lv.Tkn.Value) | ||||||
|  | 
 | ||||||
|  | 	lexer.Lex(lv) | ||||||
|  | 	assert.Equal(t, "$\xff", lv.Tkn.Value) | ||||||
|  | 
 | ||||||
|  | 	lexer.Lex(lv) | ||||||
|  | 	assert.Equal(t, "\"", lv.Tkn.Value) | ||||||
|  | } | ||||||
|  | 
 | ||||||
| func TestIgnoreControllCharacters(t *testing.T) { | func TestIgnoreControllCharacters(t *testing.T) { | ||||||
| 	src := "<?php \004 echo $b;" | 	src := "<?php \004 echo $b;" | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user