2018-02-20 18:22:15 +00:00
|
|
|
// Package scanner transforms an input string into a stream of PHP tokens.
|
2018-01-24 16:42:23 +00:00
|
|
|
package scanner
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"bytes"
|
2018-07-07 22:06:49 +00:00
|
|
|
t "go/token"
|
2018-01-24 16:42:23 +00:00
|
|
|
"io"
|
|
|
|
"unicode"
|
|
|
|
|
2018-04-15 18:39:26 +00:00
|
|
|
"github.com/z7zmey/php-parser/position"
|
|
|
|
|
2018-01-24 16:42:23 +00:00
|
|
|
"github.com/cznic/golex/lex"
|
2018-06-29 21:51:11 +00:00
|
|
|
"github.com/z7zmey/php-parser/meta"
|
2018-01-24 16:42:23 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// Allocate Character classes anywhere in [0x80, 0xFF].
|
|
|
|
const (
|
|
|
|
classUnicodeLeter = iota + 0x80
|
|
|
|
classUnicodeDigit
|
|
|
|
classUnicodeGraphic
|
|
|
|
classOther
|
|
|
|
)
|
|
|
|
|
2018-02-19 11:32:41 +00:00
|
|
|
// Lval parsers yySymType must implement this interface
|
2018-01-24 16:42:23 +00:00
|
|
|
type Lval interface {
|
2018-04-15 19:56:20 +00:00
|
|
|
Token(tkn *Token)
|
2018-01-24 16:42:23 +00:00
|
|
|
}
|
|
|
|
|
2018-02-19 11:34:44 +00:00
|
|
|
// Lexer php lexer
|
2018-01-24 16:42:23 +00:00
|
|
|
type Lexer struct {
|
|
|
|
*lex.Lexer
|
|
|
|
StateStack []int
|
|
|
|
PhpDocComment string
|
2018-07-29 08:44:38 +00:00
|
|
|
Meta meta.Collection
|
2018-06-05 12:20:23 +00:00
|
|
|
heredocLabel string
|
2018-06-04 18:21:04 +00:00
|
|
|
tokenBytesBuf *bytes.Buffer
|
2018-07-12 07:53:36 +00:00
|
|
|
TokenPool *TokenPool
|
2018-06-29 21:51:11 +00:00
|
|
|
WithMeta bool
|
2018-07-02 17:48:55 +00:00
|
|
|
lastToken *Token
|
2018-01-24 16:42:23 +00:00
|
|
|
}
|
|
|
|
|
2018-02-19 11:32:41 +00:00
|
|
|
// Rune2Class returns the rune integer id
|
2018-01-24 16:42:23 +00:00
|
|
|
func Rune2Class(r rune) int {
|
|
|
|
if r >= 0 && r < 0x80 { // Keep ASCII as it is.
|
|
|
|
return int(r)
|
|
|
|
}
|
|
|
|
if unicode.IsLetter(r) {
|
|
|
|
return classUnicodeLeter
|
|
|
|
}
|
|
|
|
if unicode.IsDigit(r) {
|
|
|
|
return classUnicodeDigit
|
|
|
|
}
|
|
|
|
if unicode.IsGraphic(r) {
|
|
|
|
return classUnicodeGraphic
|
|
|
|
}
|
2018-07-14 12:03:49 +00:00
|
|
|
if r == lex.RuneEOF {
|
|
|
|
return int(r)
|
|
|
|
}
|
|
|
|
return classOther
|
2018-01-24 16:42:23 +00:00
|
|
|
}
|
|
|
|
|
2018-02-19 11:32:41 +00:00
|
|
|
// NewLexer the Lexer constructor
|
2018-01-24 16:42:23 +00:00
|
|
|
func NewLexer(src io.Reader, fName string) *Lexer {
|
2018-07-07 22:06:49 +00:00
|
|
|
file := t.NewFileSet().AddFile(fName, -1, 1<<31-3)
|
2018-01-24 16:42:23 +00:00
|
|
|
lx, err := lex.New(file, bufio.NewReader(src), lex.RuneClass(Rune2Class))
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
2018-06-10 23:41:12 +00:00
|
|
|
|
2018-06-12 18:14:11 +00:00
|
|
|
return &Lexer{
|
|
|
|
Lexer: lx,
|
|
|
|
StateStack: []int{0},
|
|
|
|
PhpDocComment: "",
|
2018-06-29 21:51:11 +00:00
|
|
|
Meta: nil,
|
2018-06-12 18:14:11 +00:00
|
|
|
heredocLabel: "",
|
|
|
|
tokenBytesBuf: &bytes.Buffer{},
|
2018-07-12 07:53:36 +00:00
|
|
|
TokenPool: &TokenPool{},
|
2018-06-10 23:41:12 +00:00
|
|
|
}
|
2018-01-24 16:42:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (l *Lexer) ungetChars(n int) []lex.Char {
|
|
|
|
l.Unget(l.Lookahead())
|
|
|
|
|
|
|
|
chars := l.Token()
|
|
|
|
|
|
|
|
for i := 1; i <= n; i++ {
|
|
|
|
char := chars[len(chars)-i]
|
|
|
|
l.Unget(char)
|
|
|
|
}
|
|
|
|
|
|
|
|
buf := l.Token()
|
|
|
|
buf = buf[:len(buf)-n]
|
|
|
|
|
|
|
|
return buf
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *Lexer) pushState(state int) {
|
|
|
|
l.StateStack = append(l.StateStack, state)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *Lexer) popState() {
|
|
|
|
len := len(l.StateStack)
|
|
|
|
if len <= 1 {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
l.StateStack = l.StateStack[:len-1]
|
|
|
|
}
|
|
|
|
|
2018-07-24 18:49:26 +00:00
|
|
|
func (l *Lexer) Begin(state int) {
|
2018-01-24 16:42:23 +00:00
|
|
|
len := len(l.StateStack)
|
|
|
|
l.StateStack = l.StateStack[:len-1]
|
|
|
|
l.StateStack = append(l.StateStack, state)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *Lexer) getCurrentState() int {
|
|
|
|
return l.StateStack[len(l.StateStack)-1]
|
|
|
|
}
|
|
|
|
|
2018-06-07 12:06:54 +00:00
|
|
|
func (l *Lexer) createToken(chars []lex.Char) *Token {
|
2018-01-24 16:42:23 +00:00
|
|
|
firstChar := chars[0]
|
|
|
|
lastChar := chars[len(chars)-1]
|
|
|
|
|
2018-07-12 07:53:36 +00:00
|
|
|
token := l.TokenPool.Get()
|
2018-06-29 21:51:11 +00:00
|
|
|
token.Meta = l.Meta
|
2018-06-10 23:41:12 +00:00
|
|
|
token.Value = l.tokenString(chars)
|
|
|
|
|
2018-07-14 15:00:48 +00:00
|
|
|
// fmt.Println(l.tokenString(chars))
|
|
|
|
|
2018-06-24 07:19:44 +00:00
|
|
|
token.StartLine = l.File.Line(firstChar.Pos())
|
|
|
|
token.EndLine = l.File.Line(lastChar.Pos())
|
|
|
|
token.StartPos = int(firstChar.Pos())
|
|
|
|
token.EndPos = int(lastChar.Pos())
|
|
|
|
|
2018-06-10 23:41:12 +00:00
|
|
|
return token
|
2018-01-24 16:42:23 +00:00
|
|
|
}
|
|
|
|
|
2018-07-29 08:44:38 +00:00
|
|
|
func (l *Lexer) tokenString(chars []lex.Char) string {
|
|
|
|
l.tokenBytesBuf.Reset()
|
2018-06-29 21:51:11 +00:00
|
|
|
|
2018-07-29 08:44:38 +00:00
|
|
|
for _, c := range chars {
|
|
|
|
l.tokenBytesBuf.WriteRune(c.Rune)
|
|
|
|
}
|
2018-06-29 21:51:11 +00:00
|
|
|
|
2018-07-29 08:44:38 +00:00
|
|
|
return string(l.tokenBytesBuf.Bytes())
|
2018-06-29 21:51:11 +00:00
|
|
|
}
|
|
|
|
|
2018-07-29 08:44:38 +00:00
|
|
|
// meta
|
|
|
|
|
|
|
|
func (l *Lexer) addMeta(mt meta.Type, chars []lex.Char) {
|
2018-06-29 21:51:11 +00:00
|
|
|
if !l.WithMeta {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2018-04-15 18:39:26 +00:00
|
|
|
firstChar := chars[0]
|
|
|
|
lastChar := chars[len(chars)-1]
|
|
|
|
|
|
|
|
pos := position.NewPosition(
|
|
|
|
l.File.Line(firstChar.Pos()),
|
|
|
|
l.File.Line(lastChar.Pos()),
|
|
|
|
int(firstChar.Pos()),
|
|
|
|
int(lastChar.Pos()),
|
|
|
|
)
|
|
|
|
|
2018-07-29 08:44:38 +00:00
|
|
|
l.Meta.Push(&meta.Data{
|
|
|
|
Value: l.tokenString(chars),
|
|
|
|
Type: mt,
|
|
|
|
Position: pos,
|
|
|
|
})
|
2018-01-24 16:42:23 +00:00
|
|
|
}
|