#67: skip unexpected character in input

This commit is contained in:
z7zmey
2018-11-05 16:56:27 +02:00
parent 4133a65afe
commit 69e3111221
12 changed files with 2633 additions and 2433 deletions

View File

@@ -9,6 +9,7 @@ import (
"sync"
"unicode"
"github.com/z7zmey/php-parser/errors"
"github.com/z7zmey/php-parser/position"
"github.com/cznic/golex/lex"
@@ -446,6 +447,7 @@ type Lexer struct {
tokenBytesBuf *bytes.Buffer
TokenPool sync.Pool
PositionPool sync.Pool
Errors []*errors.Error
}
// Rune2Class returns the rune integer id
@@ -492,6 +494,21 @@ func NewLexer(src io.Reader, fName string) *Lexer {
}
}
func (l *Lexer) Error(msg string) {
chars := l.Token()
firstChar := chars[0]
lastChar := chars[len(chars)-1]
pos := position.NewPosition(
l.File.Line(firstChar.Pos()),
l.File.Line(lastChar.Pos()),
int(firstChar.Pos()),
int(lastChar.Pos()),
)
l.Errors = append(l.Errors, errors.NewError(msg, pos))
}
func (l *Lexer) ungetChars(n int) []lex.Char {
l.Unget(l.Lookahead())

File diff suppressed because it is too large Load Diff

View File

@@ -54,6 +54,7 @@ EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
VAR_NAME [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
OPERATORS [;:,.\[\]()|\/\^&\+-*=%!~$<>?@]
NEW_LINE (\r|\n|\r\n)
ANY_CHAR .
%%
c = l.Rule0()
@@ -648,13 +649,15 @@ NEW_LINE (\r|\n|\r\n)
<STRING_VAR_INDEX>\] l.popState(); l.popState();lval.Token(l.createToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
<STRING_VAR_INDEX>[ \n\r\t\\'#] l.popState(); l.popState();lval.Token(l.createToken(l.Token())); return T_ENCAPSED_AND_WHITESPACE
<STRING_VAR_INDEX>{OPERATORS} lval.Token(l.createToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
<STRING_VAR_INDEX>. lval.Token(l.createToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
<STRING_VAR_INDEX>{ANY_CHAR} l.Error(fmt.Sprintf("WARNING: Unexpected character in input: '%c' (ASCII=%d)", l.TokenBytes(nil)[0], l.TokenBytes(nil)[0]));l.Abort();
<STRING_VAR_NAME>{VAR_NAME}[\[\}] l.popState();l.pushState(PHP);lval.Token(l.createToken(l.ungetChars(1))); return T_STRING_VARNAME
<STRING_VAR_NAME>. l.ungetChars(1);l.popState();l.pushState(PHP)
<HALT_COMPILER>.|[ \t\n\r] // do nothing
<PHP>{ANY_CHAR} l.Error(fmt.Sprintf("WARNING: Unexpected character in input: '%c' (ASCII=%d)", l.TokenBytes(nil)[0], l.TokenBytes(nil)[0]));l.Abort();
%%
if c, ok := l.Abort(); ok { return int(c) }
goto yyAction

View File

@@ -1077,3 +1077,52 @@ func TestEmptyInlineComment2(t *testing.T) {
assertEqual(t, expected, actual)
}
func TestIgnoreControllCharacters(t *testing.T) {
src := "<?php \004 echo $b;"
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
lv := &lval{}
expected := "echo"
lexer.Lex(lv)
actual := lv.Tkn.Value
assertEqual(t, expected, actual)
expected = "$b"
lexer.Lex(lv)
actual = lv.Tkn.Value
assertEqual(t, expected, actual)
}
func TestIgnoreControllCharactersAtStringVarOffset(t *testing.T) {
src := "<?php \"$a[test\004]\";"
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
lv := &lval{}
expected := "\""
lexer.Lex(lv)
actual := lv.Tkn.Value
assertEqual(t, expected, actual)
expected = "$a"
lexer.Lex(lv)
actual = lv.Tkn.Value
assertEqual(t, expected, actual)
expected = "["
lexer.Lex(lv)
actual = lv.Tkn.Value
assertEqual(t, expected, actual)
expected = "test"
lexer.Lex(lv)
actual = lv.Tkn.Value
assertEqual(t, expected, actual)
expected = "]"
lexer.Lex(lv)
actual = lv.Tkn.Value
assertEqual(t, expected, actual)
}