php-parser/scanner/lexer.go

186 lines
3.5 KiB
Go
Raw Normal View History

2018-02-20 18:22:15 +00:00
// Package scanner transforms an input string into a stream of PHP tokens.
2018-01-24 16:42:23 +00:00
package scanner
import (
"bufio"
"bytes"
t "go/token"
2018-01-24 16:42:23 +00:00
"io"
"unicode"
2018-04-15 18:39:26 +00:00
"github.com/z7zmey/php-parser/position"
2018-01-24 16:42:23 +00:00
"github.com/cznic/golex/lex"
"github.com/z7zmey/php-parser/meta"
2018-01-24 16:42:23 +00:00
)
// Allocate Character classes anywhere in [0x80, 0xFF].
const (
classUnicodeLeter = iota + 0x80
classUnicodeDigit
classUnicodeGraphic
classOther
)
// Lval parsers yySymType must implement this interface
2018-01-24 16:42:23 +00:00
type Lval interface {
Token(tkn *Token)
2018-01-24 16:42:23 +00:00
}
// Lexer php lexer
2018-01-24 16:42:23 +00:00
type Lexer struct {
*lex.Lexer
StateStack []int
PhpDocComment string
Meta []meta.Meta
2018-06-05 12:20:23 +00:00
heredocLabel string
tokenBytesBuf *bytes.Buffer
2018-07-12 07:53:36 +00:00
TokenPool *TokenPool
WithMeta bool
lastToken *Token
2018-01-24 16:42:23 +00:00
}
// Rune2Class returns the rune integer id
2018-01-24 16:42:23 +00:00
func Rune2Class(r rune) int {
if r >= 0 && r < 0x80 { // Keep ASCII as it is.
return int(r)
}
if unicode.IsLetter(r) {
return classUnicodeLeter
}
if unicode.IsDigit(r) {
return classUnicodeDigit
}
if unicode.IsGraphic(r) {
return classUnicodeGraphic
}
if r == lex.RuneEOF {
return int(r)
}
return classOther
2018-01-24 16:42:23 +00:00
}
// NewLexer the Lexer constructor
2018-01-24 16:42:23 +00:00
func NewLexer(src io.Reader, fName string) *Lexer {
file := t.NewFileSet().AddFile(fName, -1, 1<<31-3)
2018-01-24 16:42:23 +00:00
lx, err := lex.New(file, bufio.NewReader(src), lex.RuneClass(Rune2Class))
if err != nil {
panic(err)
}
return &Lexer{
Lexer: lx,
StateStack: []int{0},
PhpDocComment: "",
Meta: nil,
heredocLabel: "",
tokenBytesBuf: &bytes.Buffer{},
2018-07-12 07:53:36 +00:00
TokenPool: &TokenPool{},
}
2018-01-24 16:42:23 +00:00
}
func (l *Lexer) ungetChars(n int) []lex.Char {
l.Unget(l.Lookahead())
chars := l.Token()
for i := 1; i <= n; i++ {
char := chars[len(chars)-i]
l.Unget(char)
}
buf := l.Token()
buf = buf[:len(buf)-n]
return buf
}
func (l *Lexer) pushState(state int) {
l.StateStack = append(l.StateStack, state)
}
func (l *Lexer) popState() {
len := len(l.StateStack)
if len <= 1 {
return
}
l.StateStack = l.StateStack[:len-1]
}
func (l *Lexer) Begin(state int) {
2018-01-24 16:42:23 +00:00
len := len(l.StateStack)
l.StateStack = l.StateStack[:len-1]
l.StateStack = append(l.StateStack, state)
}
func (l *Lexer) getCurrentState() int {
return l.StateStack[len(l.StateStack)-1]
}
2018-06-07 12:06:54 +00:00
func (l *Lexer) createToken(chars []lex.Char) *Token {
2018-01-24 16:42:23 +00:00
firstChar := chars[0]
lastChar := chars[len(chars)-1]
2018-07-12 07:53:36 +00:00
token := l.TokenPool.Get()
token.Meta = l.Meta
token.Value = l.tokenString(chars)
// fmt.Println(l.tokenString(chars))
2018-06-24 07:19:44 +00:00
token.StartLine = l.File.Line(firstChar.Pos())
token.EndLine = l.File.Line(lastChar.Pos())
token.StartPos = int(firstChar.Pos())
token.EndPos = int(lastChar.Pos())
return token
2018-01-24 16:42:23 +00:00
}
func (l *Lexer) addComments(chars []lex.Char) {
if !l.WithMeta {
return
}
firstChar := chars[0]
lastChar := chars[len(chars)-1]
pos := position.NewPosition(
l.File.Line(firstChar.Pos()),
l.File.Line(lastChar.Pos()),
int(firstChar.Pos()),
int(lastChar.Pos()),
)
c := meta.NewComment(l.tokenString(chars), pos)
l.Meta = append(l.Meta, c)
}
func (l *Lexer) addWhiteSpace(chars []lex.Char) {
if !l.WithMeta {
return
}
2018-04-15 18:39:26 +00:00
firstChar := chars[0]
lastChar := chars[len(chars)-1]
pos := position.NewPosition(
l.File.Line(firstChar.Pos()),
l.File.Line(lastChar.Pos()),
int(firstChar.Pos()),
int(lastChar.Pos()),
)
c := meta.NewWhiteSpace(l.tokenString(chars), pos)
l.Meta = append(l.Meta, c)
2018-01-24 16:42:23 +00:00
}
2018-06-05 12:20:23 +00:00
func (l *Lexer) tokenString(chars []lex.Char) string {
l.tokenBytesBuf.Reset()
2018-01-24 16:42:23 +00:00
for _, c := range chars {
l.tokenBytesBuf.WriteRune(c.Rune)
2018-01-24 16:42:23 +00:00
}
2018-06-05 12:20:23 +00:00
return string(l.tokenBytesBuf.Bytes())
2018-01-24 16:42:23 +00:00
}