php-parser/internal/php8/lexer.go

271 lines
5.1 KiB
Go

package php8
import (
"bytes"
"strings"
pos "git.maride.cc/maride/php-parser/internal/position"
"git.maride.cc/maride/php-parser/pkg/conf"
"git.maride.cc/maride/php-parser/pkg/errors"
"git.maride.cc/maride/php-parser/pkg/position"
"git.maride.cc/maride/php-parser/pkg/token"
"git.maride.cc/maride/php-parser/pkg/version"
)
type Lexer struct {
data []byte
phpVersion *version.Version
errHandlerFunc func(*errors.Error)
// p: current position being lexed/checked.
// pe: length in bytes of input.
p, pe, cs int
// ts: start position of the current token.
// te: end position of the current token.
ts, te, act int
stack []int
top int
heredocLabel []byte
tokenPool *token.Pool
positionPool *position.Pool
newLines pos.NewLines
}
func NewLexer(data []byte, config conf.Config) *Lexer {
lex := &Lexer{
data: data,
phpVersion: config.Version,
errHandlerFunc: config.ErrorHandlerFunc,
pe: len(data),
stack: make([]int, 0),
tokenPool: token.NewPool(token.DefaultBlockSize),
positionPool: position.NewPool(position.DefaultBlockSize),
newLines: pos.NewNewLines(),
}
initLexer(lex)
return lex
}
func (lex *Lexer) setTokenPosition(token *token.Token) {
pos := lex.positionPool.Get()
sl, slb := lex.newLines.GetLine(lex.ts)
el, elb := lex.newLines.GetLine(lex.te - 1)
pos.StartLine = sl
pos.EndLine = el
pos.StartPos = lex.ts
pos.EndPos = lex.te
pos.StartCol = lex.ts - slb
pos.EndCol = lex.te - elb
token.Position = pos
}
func (lex *Lexer) setTokenPrefixPosition(token *token.Token, n int) {
pos := lex.positionPool.Get()
endPos := lex.ts + n
sl, slb := lex.newLines.GetLine(lex.ts)
el, elb := lex.newLines.GetLine(endPos)
pos.StartLine = sl
pos.EndLine = el
pos.StartPos = lex.ts
pos.EndPos = endPos
pos.StartCol = lex.ts - slb
pos.EndCol = endPos - elb
token.Position = pos
}
func (lex *Lexer) addFreeFloatingToken(t *token.Token, id token.ID, ps, pe int) {
skippedTkn := lex.tokenPool.Get()
skippedTkn.ID = id
skippedTkn.Value = lex.data[ps:pe]
lex.setTokenPosition(skippedTkn)
if t.FreeFloating == nil {
t.FreeFloating = make([]*token.Token, 0, 2)
}
t.FreeFloating = append(t.FreeFloating, skippedTkn)
}
func (lex *Lexer) isNotStringVar() bool {
p := lex.p
if lex.data[p-1] == '\\' && lex.data[p-2] != '\\' {
return true
}
if len(lex.data) < p+1 {
return true
}
if lex.data[p] == '$' && (lex.data[p+1] == '{' || isValidVarNameStart(lex.data[p+1])) {
return false
}
if lex.data[p] == '{' && lex.data[p+1] == '$' {
return false
}
return true
}
func (lex *Lexer) isNotStringEnd(s byte) bool {
p := lex.p
if lex.data[p-1] == '\\' && lex.data[p-2] != '\\' {
return true
}
return !(lex.data[p] == s)
}
func (lex *Lexer) isHeredocEnd(p int) bool {
if lex.data[p-1] != '\r' && lex.data[p-1] != '\n' {
return false
}
if p == len(lex.data) {
return false
}
for lex.data[p] == ' ' || lex.data[p] == '\t' {
p++
}
l := len(lex.heredocLabel)
if len(lex.data) < p+l {
return false
}
if len(lex.data) > p+l && isValidVarName(lex.data[p+l]) {
return false
}
a := string(lex.heredocLabel)
b := string(lex.data[p : p+l])
_, _ = a, b
if bytes.Equal(lex.heredocLabel, lex.data[p:p+l]) {
lex.p = p
return true
}
return false
}
func (lex *Lexer) isNotHeredocEnd(p int) bool {
return !lex.isHeredocEnd(p)
}
func (lex *Lexer) growCallStack() {
if lex.top == len(lex.stack) {
lex.stack = append(lex.stack, 0)
}
}
func (lex *Lexer) isNotPhpCloseToken() bool {
if lex.p+1 == len(lex.data) {
return true
}
return lex.data[lex.p] != '?' || lex.data[lex.p+1] != '>'
}
func (lex *Lexer) isNotNewLine() bool {
if lex.data[lex.p] == '\n' && lex.data[lex.p-1] == '\r' {
return true
}
return lex.data[lex.p-1] != '\n' && lex.data[lex.p-1] != '\r'
}
func (lex *Lexer) call(state int, fnext int) {
lex.growCallStack()
lex.stack[lex.top] = state
lex.top++
lex.p++
lex.cs = fnext
}
func (lex *Lexer) ret(n int) {
lex.top = lex.top - n
if lex.top < 0 {
lex.top = 0
}
lex.cs = lex.stack[lex.top]
lex.p++
}
func (lex *Lexer) ungetStr(s string) {
tokenStr := string(lex.data[lex.ts:lex.te])
if strings.HasSuffix(tokenStr, s) {
lex.ungetCnt(len(s))
}
}
func (lex *Lexer) ungetFromStart(n int) {
tokenLength := lex.te - lex.ts
lex.ungetCnt(tokenLength - n)
}
func (lex *Lexer) ungetCnt(n int) {
lex.p = lex.p - n
lex.te = lex.te - n
}
func (lex *Lexer) ungetWhile(s byte) {
for i := 0; i < 100; i++ {
v := lex.data[lex.te]
if v == s {
break
}
lex.te--
lex.p--
}
lex.te++
lex.p++
}
func (lex *Lexer) error(msg string) {
if lex.errHandlerFunc == nil {
return
}
sl, slb := lex.newLines.GetLine(lex.ts)
el, elb := lex.newLines.GetLine(lex.te - 1)
pos := position.NewPosition(
sl,
el,
lex.ts,
lex.te,
lex.ts-slb,
lex.te-elb,
)
lex.errHandlerFunc(errors.NewError(msg, pos))
}
func isValidVarNameStart(r byte) bool {
return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || r == '_' || r >= 0x80
}
func isValidVarName(r byte) bool {
return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '_' ||
r >= 0x80
}