php-parser/parser/lexer.go

123 lines
2.2 KiB
Go
Raw Normal View History

2017-12-04 10:40:36 +00:00
package parser
2017-11-07 06:21:38 +00:00
import (
"bufio"
2017-12-03 18:49:18 +00:00
"go/token"
2017-11-07 06:21:38 +00:00
"io"
2017-12-03 07:17:05 +00:00
"unicode"
2017-11-07 06:21:38 +00:00
"github.com/cznic/golex/lex"
2018-01-02 22:12:28 +00:00
"github.com/z7zmey/php-parser/comment"
2018-01-05 15:03:59 +00:00
t "github.com/z7zmey/php-parser/token"
2017-11-07 06:21:38 +00:00
)
// Allocate Character classes anywhere in [0x80, 0xFF].
const (
classUnicodeLeter = iota + 0x80
classUnicodeDigit
classOther
)
type lexer struct {
*lex.Lexer
2018-01-02 11:53:55 +00:00
stateStack []int
lineNumber int
phpDocComment string
2018-01-06 12:04:02 +00:00
comments *[]comment.Comment
2017-12-03 07:17:05 +00:00
}
func rune2Class(r rune) int {
if r >= 0 && r < 0x80 { // Keep ASCII as it is.
return int(r)
}
if unicode.IsLetter(r) {
return classUnicodeLeter
}
if unicode.IsDigit(r) {
return classUnicodeDigit
}
// return classOther
return -1
2017-11-07 06:21:38 +00:00
}
2017-12-01 16:04:31 +00:00
func newLexer(src io.Reader, fName string) *lexer {
2017-12-03 18:49:18 +00:00
file := token.NewFileSet().AddFile(fName, -1, 1<<31-1)
2017-11-07 06:21:38 +00:00
lx, err := lex.New(file, bufio.NewReader(src), lex.RuneClass(rune2Class))
if err != nil {
panic(err)
}
2018-01-06 12:04:02 +00:00
return &lexer{lx, []int{0}, 1, "", nil}
2017-11-07 06:21:38 +00:00
}
2017-12-01 14:04:53 +00:00
func (l *lexer) ungetN(n int) []byte {
2017-11-07 06:21:38 +00:00
l.Unget(l.Lookahead())
chars := l.Token()
2017-12-01 14:04:53 +00:00
for i := 1; i <= n; i++ {
char := chars[len(chars)-i]
l.Unget(char)
2017-11-07 06:21:38 +00:00
}
buf := l.TokenBytes(nil)
2017-12-01 14:04:53 +00:00
buf = buf[:len(buf)-n]
2017-11-07 06:21:38 +00:00
return buf
}
2017-12-01 14:04:53 +00:00
func (l *lexer) pushState(state int) {
l.stateStack = append(l.stateStack, state)
}
2017-11-12 11:13:31 +00:00
2017-12-01 14:04:53 +00:00
func (l *lexer) popState() {
len := len(l.stateStack)
if len <= 1 {
return
2017-11-12 11:13:31 +00:00
}
2017-12-01 14:04:53 +00:00
l.stateStack = l.stateStack[:len-1]
}
2017-11-12 11:13:31 +00:00
2017-12-01 14:04:53 +00:00
func (l *lexer) begin(state int) {
len := len(l.stateStack)
l.stateStack = l.stateStack[:len-1]
l.stateStack = append(l.stateStack, state)
}
func (l *lexer) getCurrentState() int {
return l.stateStack[len(l.stateStack)-1]
2017-11-12 11:13:31 +00:00
}
2017-12-03 07:17:05 +00:00
2018-01-05 11:01:14 +00:00
func (l *lexer) handleNewLine(tokenBytes []byte) ([]byte, int, int, int, int) {
2017-12-03 07:17:05 +00:00
startln := l.lineNumber
var prev byte
2017-12-03 16:36:34 +00:00
for _, b := range tokenBytes {
2017-12-03 07:17:05 +00:00
if b == '\n' || prev == '\r' {
l.lineNumber++
}
prev = b
}
// handle last \r
if prev == '\r' {
l.lineNumber++
}
2018-01-05 11:01:14 +00:00
return tokenBytes, startln, l.lineNumber, int(l.First.Pos()), int(l.Prev.Pos())
2017-12-03 07:17:05 +00:00
}
2018-01-05 15:03:59 +00:00
func (l *lexer) newToken() t.Token {
return t.NewToken(l.handleNewLine(l.TokenBytes(nil))).SetComments(l.comments)
}
2018-01-06 12:04:02 +00:00
func (l *lexer) addComment(c comment.Comment) {
if l.comments == nil {
l.comments = &[]comment.Comment{c}
} else {
*l.comments = append(*l.comments, c)
}
}