2017-12-04 10:40:36 +00:00
|
|
|
package parser
|
2017-11-07 06:21:38 +00:00
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
2018-01-10 12:35:08 +00:00
|
|
|
"bytes"
|
2017-12-03 18:49:18 +00:00
|
|
|
"go/token"
|
2017-11-07 06:21:38 +00:00
|
|
|
"io"
|
2017-12-03 07:17:05 +00:00
|
|
|
"unicode"
|
2017-11-07 06:21:38 +00:00
|
|
|
|
|
|
|
"github.com/cznic/golex/lex"
|
2018-01-02 22:12:28 +00:00
|
|
|
"github.com/z7zmey/php-parser/comment"
|
2018-01-05 15:03:59 +00:00
|
|
|
t "github.com/z7zmey/php-parser/token"
|
2017-11-07 06:21:38 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// Allocate Character classes anywhere in [0x80, 0xFF].
|
|
|
|
const (
|
|
|
|
classUnicodeLeter = iota + 0x80
|
|
|
|
classUnicodeDigit
|
2018-01-10 12:35:08 +00:00
|
|
|
classUnicodeGraphic
|
2017-11-07 06:21:38 +00:00
|
|
|
classOther
|
|
|
|
)
|
|
|
|
|
|
|
|
type lexer struct {
|
|
|
|
*lex.Lexer
|
2018-01-02 11:53:55 +00:00
|
|
|
stateStack []int
|
|
|
|
phpDocComment string
|
2018-01-08 19:50:39 +00:00
|
|
|
comments []comment.Comment
|
2017-12-03 07:17:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func rune2Class(r rune) int {
|
|
|
|
if r >= 0 && r < 0x80 { // Keep ASCII as it is.
|
|
|
|
return int(r)
|
|
|
|
}
|
|
|
|
if unicode.IsLetter(r) {
|
|
|
|
return classUnicodeLeter
|
|
|
|
}
|
|
|
|
if unicode.IsDigit(r) {
|
|
|
|
return classUnicodeDigit
|
|
|
|
}
|
2018-01-10 12:35:08 +00:00
|
|
|
if unicode.IsGraphic(r) {
|
|
|
|
return classUnicodeGraphic
|
|
|
|
}
|
2017-12-03 07:17:05 +00:00
|
|
|
// return classOther
|
|
|
|
return -1
|
2017-11-07 06:21:38 +00:00
|
|
|
}
|
|
|
|
|
2017-12-01 16:04:31 +00:00
|
|
|
func newLexer(src io.Reader, fName string) *lexer {
|
2017-12-03 18:49:18 +00:00
|
|
|
file := token.NewFileSet().AddFile(fName, -1, 1<<31-1)
|
2017-11-07 06:21:38 +00:00
|
|
|
lx, err := lex.New(file, bufio.NewReader(src), lex.RuneClass(rune2Class))
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
2018-01-08 15:02:56 +00:00
|
|
|
return &lexer{lx, []int{0}, "", nil}
|
2017-11-07 06:21:38 +00:00
|
|
|
}
|
|
|
|
|
2018-01-10 12:35:08 +00:00
|
|
|
func (l *lexer) ungetChars(n int) []lex.Char {
|
2017-11-07 06:21:38 +00:00
|
|
|
l.Unget(l.Lookahead())
|
|
|
|
|
|
|
|
chars := l.Token()
|
|
|
|
|
2017-12-01 14:04:53 +00:00
|
|
|
for i := 1; i <= n; i++ {
|
|
|
|
char := chars[len(chars)-i]
|
|
|
|
l.Unget(char)
|
2017-11-07 06:21:38 +00:00
|
|
|
}
|
|
|
|
|
2018-01-10 12:35:08 +00:00
|
|
|
buf := l.Token()
|
2017-12-01 14:04:53 +00:00
|
|
|
buf = buf[:len(buf)-n]
|
2017-11-07 06:21:38 +00:00
|
|
|
|
|
|
|
return buf
|
|
|
|
}
|
|
|
|
|
2017-12-01 14:04:53 +00:00
|
|
|
func (l *lexer) pushState(state int) {
|
|
|
|
l.stateStack = append(l.stateStack, state)
|
|
|
|
}
|
2017-11-12 11:13:31 +00:00
|
|
|
|
2017-12-01 14:04:53 +00:00
|
|
|
func (l *lexer) popState() {
|
|
|
|
len := len(l.stateStack)
|
|
|
|
if len <= 1 {
|
|
|
|
return
|
2017-11-12 11:13:31 +00:00
|
|
|
}
|
|
|
|
|
2017-12-01 14:04:53 +00:00
|
|
|
l.stateStack = l.stateStack[:len-1]
|
|
|
|
}
|
2017-11-12 11:13:31 +00:00
|
|
|
|
2017-12-01 14:04:53 +00:00
|
|
|
func (l *lexer) begin(state int) {
|
|
|
|
len := len(l.stateStack)
|
|
|
|
l.stateStack = l.stateStack[:len-1]
|
|
|
|
l.stateStack = append(l.stateStack, state)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *lexer) getCurrentState() int {
|
|
|
|
return l.stateStack[len(l.stateStack)-1]
|
2017-11-12 11:13:31 +00:00
|
|
|
}
|
2017-12-03 07:17:05 +00:00
|
|
|
|
2018-01-10 12:35:08 +00:00
|
|
|
func (l *lexer) newToken(chars []lex.Char) t.Token {
|
|
|
|
firstChar := chars[0]
|
|
|
|
lastChar := chars[len(chars)-1]
|
2017-12-03 07:17:05 +00:00
|
|
|
|
2018-01-10 12:35:08 +00:00
|
|
|
startLine := l.File.Line(firstChar.Pos())
|
2018-01-08 15:02:56 +00:00
|
|
|
endLine := l.File.Line(lastChar.Pos())
|
2018-01-10 12:35:08 +00:00
|
|
|
startPos := int(firstChar.Pos())
|
|
|
|
endPos := int(lastChar.Pos())
|
2017-12-03 07:17:05 +00:00
|
|
|
|
2018-01-10 12:35:08 +00:00
|
|
|
return t.NewToken(l.charsToBytes(chars), startLine, endLine, startPos, endPos).SetComments(l.comments)
|
2018-01-05 15:03:59 +00:00
|
|
|
}
|
2018-01-06 12:04:02 +00:00
|
|
|
|
|
|
|
func (l *lexer) addComment(c comment.Comment) {
|
2018-01-08 19:50:39 +00:00
|
|
|
l.comments = append(l.comments, c)
|
2018-01-06 12:04:02 +00:00
|
|
|
}
|
2018-01-10 12:35:08 +00:00
|
|
|
|
|
|
|
func (l *lexer) charsToBytes(chars []lex.Char) []byte {
|
|
|
|
bytesBuf := bytes.Buffer{}
|
|
|
|
|
|
|
|
for _, c := range chars {
|
|
|
|
bytesBuf.WriteRune(c.Rune)
|
|
|
|
}
|
|
|
|
|
|
|
|
return bytesBuf.Bytes()
|
|
|
|
}
|