#67: skip unexpected character in input

This commit is contained in:
z7zmey 2018-11-05 16:56:27 +02:00
parent 4133a65afe
commit 69e3111221
12 changed files with 2633 additions and 2433 deletions

View File

@ -4,7 +4,6 @@ import (
"fmt" "fmt"
"github.com/z7zmey/php-parser/position" "github.com/z7zmey/php-parser/position"
"github.com/z7zmey/php-parser/scanner"
) )
// Error parsing error // Error parsing error
@ -14,13 +13,18 @@ type Error struct {
} }
// NewError creates and returns new Error // NewError creates and returns new Error
func NewError(msg string, t *scanner.Token) *Error { func NewError(msg string, p *position.Position) *Error {
return &Error{ return &Error{
Msg: msg, Msg: msg,
Pos: t.Position, Pos: p,
} }
} }
func (e *Error) String() string { func (e *Error) String() string {
return fmt.Sprintf("%s at line %d", e.Msg, e.Pos.StartLine) atLine := ""
if e.Pos != nil {
atLine = fmt.Sprintf(" at line %d", e.Pos.StartLine)
}
return fmt.Sprintf("%s%s", e.Msg, atLine)
} }

View File

@ -7,7 +7,6 @@ import (
"github.com/z7zmey/php-parser/position" "github.com/z7zmey/php-parser/position"
"github.com/z7zmey/php-parser/errors" "github.com/z7zmey/php-parser/errors"
"github.com/z7zmey/php-parser/scanner"
"github.com/kylelemons/godebug/pretty" "github.com/kylelemons/godebug/pretty"
) )
@ -27,12 +26,8 @@ func assertEqual(t *testing.T, expected interface{}, actual interface{}) {
func TestConstructor(t *testing.T) { func TestConstructor(t *testing.T) {
pos := position.NewPosition(1, 2, 3, 4) pos := position.NewPosition(1, 2, 3, 4)
token := &scanner.Token{
Value: `test`,
Position: pos,
}
actual := errors.NewError("message", token) actual := errors.NewError("message", pos)
expected := &errors.Error{ expected := &errors.Error{
Msg: "message", Msg: "message",
@ -44,12 +39,8 @@ func TestConstructor(t *testing.T) {
func TestPrint(t *testing.T) { func TestPrint(t *testing.T) {
pos := position.NewPosition(1, 2, 3, 4) pos := position.NewPosition(1, 2, 3, 4)
token := &scanner.Token{
Value: `test`,
Position: pos,
}
Error := errors.NewError("message", token) Error := errors.NewError("message", pos)
actual := Error.String() actual := Error.String()
@ -57,3 +48,13 @@ func TestPrint(t *testing.T) {
assertEqual(t, expected, actual) assertEqual(t, expected, actual)
} }
func TestPrintWithotPos(t *testing.T) {
Error := errors.NewError("message", nil)
actual := Error.String()
expected := "message"
assertEqual(t, expected, actual)
}

View File

@ -20,7 +20,6 @@ type Parser struct {
path string path string
currentToken *scanner.Token currentToken *scanner.Token
positionBuilder *parser.PositionBuilder positionBuilder *parser.PositionBuilder
errors []*errors.Error
rootNode node.Node rootNode node.Node
comments parser.Comments comments parser.Comments
positions parser.Positions positions parser.Positions
@ -38,7 +37,6 @@ func NewParser(src io.Reader, path string) *Parser {
nil, nil,
nil, nil,
nil, nil,
nil,
} }
} }
@ -50,13 +48,13 @@ func (l *Parser) Lex(lval *yySymType) int {
} }
func (l *Parser) Error(msg string) { func (l *Parser) Error(msg string) {
l.errors = append(l.errors, errors.NewError(msg, l.currentToken)) l.Lexer.Errors = append(l.Lexer.Errors, errors.NewError(msg, l.currentToken.Position))
} }
// Parse the php7 Parser entrypoint // Parse the php7 Parser entrypoint
func (l *Parser) Parse() int { func (l *Parser) Parse() int {
// init // init
l.errors = nil l.Lexer.Errors = nil
l.rootNode = nil l.rootNode = nil
l.comments = parser.Comments{} l.comments = parser.Comments{}
l.positions = parser.Positions{} l.positions = parser.Positions{}
@ -92,7 +90,7 @@ func (l *Parser) GetRootNode() node.Node {
// GetErrors returns errors list // GetErrors returns errors list
func (l *Parser) GetErrors() []*errors.Error { func (l *Parser) GetErrors() []*errors.Error {
return l.errors return l.Lexer.Errors
} }
// GetComments returns comments list // GetComments returns comments list

View File

@ -1,3 +1,5 @@
// Code generated by goyacc -o php5/php5.go php5/php5.y. DO NOT EDIT.
//line php5/php5.y:2 //line php5/php5.y:2
package php5 package php5

View File

@ -6,6 +6,7 @@ import (
"testing" "testing"
"github.com/kylelemons/godebug/pretty" "github.com/kylelemons/godebug/pretty"
"github.com/z7zmey/php-parser/errors"
"github.com/z7zmey/php-parser/node/expr" "github.com/z7zmey/php-parser/node/expr"
"github.com/z7zmey/php-parser/node/expr/assign" "github.com/z7zmey/php-parser/node/expr/assign"
"github.com/z7zmey/php-parser/node/expr/binary" "github.com/z7zmey/php-parser/node/expr/binary"
@ -13,6 +14,7 @@ import (
"github.com/z7zmey/php-parser/node/name" "github.com/z7zmey/php-parser/node/name"
"github.com/z7zmey/php-parser/node/scalar" "github.com/z7zmey/php-parser/node/scalar"
"github.com/z7zmey/php-parser/php5" "github.com/z7zmey/php-parser/php5"
"github.com/z7zmey/php-parser/position"
"github.com/z7zmey/php-parser/node" "github.com/z7zmey/php-parser/node"
"github.com/z7zmey/php-parser/node/stmt" "github.com/z7zmey/php-parser/node/stmt"
@ -3750,3 +3752,23 @@ CAD;
actual := php5parser.GetRootNode() actual := php5parser.GetRootNode()
assertEqual(t, expected, actual) assertEqual(t, expected, actual)
} }
func TestPhp5ControlCharsErrors(t *testing.T) {
src := "<?php \004 echo $b; \"$a[\005test]\";"
expected := []*errors.Error{
{
Msg: "WARNING: Unexpected character in input: '\004' (ASCII=4)",
Pos: &position.Position{1, 1, 7, 7},
},
{
Msg: "WARNING: Unexpected character in input: '\005' (ASCII=5)",
Pos: &position.Position{1, 1, 22, 22},
},
}
php5parser := php5.NewParser(bytes.NewBufferString(src), "test.php")
php5parser.Parse()
actual := php5parser.GetErrors()
assertEqual(t, expected, actual)
}

View File

@ -19,7 +19,6 @@ type Parser struct {
path string path string
currentToken *scanner.Token currentToken *scanner.Token
positionBuilder *parser.PositionBuilder positionBuilder *parser.PositionBuilder
errors []*errors.Error
rootNode node.Node rootNode node.Node
comments parser.Comments comments parser.Comments
positions parser.Positions positions parser.Positions
@ -37,7 +36,6 @@ func NewParser(src io.Reader, path string) *Parser {
nil, nil,
nil, nil,
nil, nil,
nil,
} }
} }
@ -49,13 +47,13 @@ func (l *Parser) Lex(lval *yySymType) int {
} }
func (l *Parser) Error(msg string) { func (l *Parser) Error(msg string) {
l.errors = append(l.errors, errors.NewError(msg, l.currentToken)) l.Lexer.Errors = append(l.Lexer.Errors, errors.NewError(msg, l.currentToken.Position))
} }
// Parse the php7 Parser entrypoint // Parse the php7 Parser entrypoint
func (l *Parser) Parse() int { func (l *Parser) Parse() int {
// init // init
l.errors = nil l.Lexer.Errors = nil
l.rootNode = nil l.rootNode = nil
l.comments = parser.Comments{} l.comments = parser.Comments{}
l.positions = parser.Positions{} l.positions = parser.Positions{}
@ -81,7 +79,7 @@ func (l *Parser) GetRootNode() node.Node {
// GetErrors returns errors list // GetErrors returns errors list
func (l *Parser) GetErrors() []*errors.Error { func (l *Parser) GetErrors() []*errors.Error {
return l.errors return l.Lexer.Errors
} }
// GetComments returns comments list // GetComments returns comments list

View File

@ -1,3 +1,5 @@
// Code generated by goyacc -o php7/php7.go php7/php7.y. DO NOT EDIT.
//line php7/php7.y:2 //line php7/php7.y:2
package php7 package php7

View File

@ -6,12 +6,14 @@ import (
"testing" "testing"
"github.com/kylelemons/godebug/pretty" "github.com/kylelemons/godebug/pretty"
"github.com/z7zmey/php-parser/errors"
"github.com/z7zmey/php-parser/node/expr" "github.com/z7zmey/php-parser/node/expr"
"github.com/z7zmey/php-parser/node/expr/assign" "github.com/z7zmey/php-parser/node/expr/assign"
"github.com/z7zmey/php-parser/node/expr/binary" "github.com/z7zmey/php-parser/node/expr/binary"
"github.com/z7zmey/php-parser/node/expr/cast" "github.com/z7zmey/php-parser/node/expr/cast"
"github.com/z7zmey/php-parser/node/name" "github.com/z7zmey/php-parser/node/name"
"github.com/z7zmey/php-parser/node/scalar" "github.com/z7zmey/php-parser/node/scalar"
"github.com/z7zmey/php-parser/position"
"github.com/z7zmey/php-parser/node" "github.com/z7zmey/php-parser/node"
"github.com/z7zmey/php-parser/node/stmt" "github.com/z7zmey/php-parser/node/stmt"
@ -3336,3 +3338,23 @@ CAD;
actual := php7parser.GetRootNode() actual := php7parser.GetRootNode()
assertEqual(t, expected, actual) assertEqual(t, expected, actual)
} }
func TestPhp7ControlCharsErrors(t *testing.T) {
src := "<?php \004 echo $b; \"$a[\005test]\";"
expected := []*errors.Error{
{
Msg: "WARNING: Unexpected character in input: '\004' (ASCII=4)",
Pos: &position.Position{1, 1, 7, 7},
},
{
Msg: "WARNING: Unexpected character in input: '\005' (ASCII=5)",
Pos: &position.Position{1, 1, 22, 22},
},
}
php7parser := php7.NewParser(bytes.NewBufferString(src), "test.php")
php7parser.Parse()
actual := php7parser.GetErrors()
assertEqual(t, expected, actual)
}

View File

@ -9,6 +9,7 @@ import (
"sync" "sync"
"unicode" "unicode"
"github.com/z7zmey/php-parser/errors"
"github.com/z7zmey/php-parser/position" "github.com/z7zmey/php-parser/position"
"github.com/cznic/golex/lex" "github.com/cznic/golex/lex"
@ -446,6 +447,7 @@ type Lexer struct {
tokenBytesBuf *bytes.Buffer tokenBytesBuf *bytes.Buffer
TokenPool sync.Pool TokenPool sync.Pool
PositionPool sync.Pool PositionPool sync.Pool
Errors []*errors.Error
} }
// Rune2Class returns the rune integer id // Rune2Class returns the rune integer id
@ -492,6 +494,21 @@ func NewLexer(src io.Reader, fName string) *Lexer {
} }
} }
func (l *Lexer) Error(msg string) {
chars := l.Token()
firstChar := chars[0]
lastChar := chars[len(chars)-1]
pos := position.NewPosition(
l.File.Line(firstChar.Pos()),
l.File.Line(lastChar.Pos()),
int(firstChar.Pos()),
int(lastChar.Pos()),
)
l.Errors = append(l.Errors, errors.NewError(msg, pos))
}
func (l *Lexer) ungetChars(n int) []lex.Char { func (l *Lexer) ungetChars(n int) []lex.Char {
l.Unget(l.Lookahead()) l.Unget(l.Lookahead())

File diff suppressed because it is too large Load Diff

View File

@ -54,6 +54,7 @@ EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
VAR_NAME [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]* VAR_NAME [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
OPERATORS [;:,.\[\]()|\/\^&\+-*=%!~$<>?@] OPERATORS [;:,.\[\]()|\/\^&\+-*=%!~$<>?@]
NEW_LINE (\r|\n|\r\n) NEW_LINE (\r|\n|\r\n)
ANY_CHAR .
%% %%
c = l.Rule0() c = l.Rule0()
@ -648,13 +649,15 @@ NEW_LINE (\r|\n|\r\n)
<STRING_VAR_INDEX>\] l.popState(); l.popState();lval.Token(l.createToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0])) <STRING_VAR_INDEX>\] l.popState(); l.popState();lval.Token(l.createToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
<STRING_VAR_INDEX>[ \n\r\t\\'#] l.popState(); l.popState();lval.Token(l.createToken(l.Token())); return T_ENCAPSED_AND_WHITESPACE <STRING_VAR_INDEX>[ \n\r\t\\'#] l.popState(); l.popState();lval.Token(l.createToken(l.Token())); return T_ENCAPSED_AND_WHITESPACE
<STRING_VAR_INDEX>{OPERATORS} lval.Token(l.createToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0])) <STRING_VAR_INDEX>{OPERATORS} lval.Token(l.createToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
<STRING_VAR_INDEX>. lval.Token(l.createToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0])) <STRING_VAR_INDEX>{ANY_CHAR} l.Error(fmt.Sprintf("WARNING: Unexpected character in input: '%c' (ASCII=%d)", l.TokenBytes(nil)[0], l.TokenBytes(nil)[0]));l.Abort();
<STRING_VAR_NAME>{VAR_NAME}[\[\}] l.popState();l.pushState(PHP);lval.Token(l.createToken(l.ungetChars(1))); return T_STRING_VARNAME <STRING_VAR_NAME>{VAR_NAME}[\[\}] l.popState();l.pushState(PHP);lval.Token(l.createToken(l.ungetChars(1))); return T_STRING_VARNAME
<STRING_VAR_NAME>. l.ungetChars(1);l.popState();l.pushState(PHP) <STRING_VAR_NAME>. l.ungetChars(1);l.popState();l.pushState(PHP)
<HALT_COMPILER>.|[ \t\n\r] // do nothing <HALT_COMPILER>.|[ \t\n\r] // do nothing
<PHP>{ANY_CHAR} l.Error(fmt.Sprintf("WARNING: Unexpected character in input: '%c' (ASCII=%d)", l.TokenBytes(nil)[0], l.TokenBytes(nil)[0]));l.Abort();
%% %%
if c, ok := l.Abort(); ok { return int(c) } if c, ok := l.Abort(); ok { return int(c) }
goto yyAction goto yyAction

View File

@ -1077,3 +1077,52 @@ func TestEmptyInlineComment2(t *testing.T) {
assertEqual(t, expected, actual) assertEqual(t, expected, actual)
} }
func TestIgnoreControllCharacters(t *testing.T) {
src := "<?php \004 echo $b;"
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
lv := &lval{}
expected := "echo"
lexer.Lex(lv)
actual := lv.Tkn.Value
assertEqual(t, expected, actual)
expected = "$b"
lexer.Lex(lv)
actual = lv.Tkn.Value
assertEqual(t, expected, actual)
}
func TestIgnoreControllCharactersAtStringVarOffset(t *testing.T) {
src := "<?php \"$a[test\004]\";"
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
lv := &lval{}
expected := "\""
lexer.Lex(lv)
actual := lv.Tkn.Value
assertEqual(t, expected, actual)
expected = "$a"
lexer.Lex(lv)
actual = lv.Tkn.Value
assertEqual(t, expected, actual)
expected = "["
lexer.Lex(lv)
actual = lv.Tkn.Value
assertEqual(t, expected, actual)
expected = "test"
lexer.Lex(lv)
actual = lv.Tkn.Value
assertEqual(t, expected, actual)
expected = "]"
lexer.Lex(lv)
actual = lv.Tkn.Value
assertEqual(t, expected, actual)
}