#80 implement Ragel based lexer
This commit is contained in:
312
scanner/lexer.go
312
scanner/lexer.go
@@ -1,188 +1,232 @@
|
||||
// Package scanner transforms an input string into a stream of PHP tokens.
|
||||
package scanner
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
t "go/token"
|
||||
"io"
|
||||
"unicode"
|
||||
"strings"
|
||||
|
||||
"github.com/z7zmey/php-parser/errors"
|
||||
"github.com/z7zmey/php-parser/freefloating"
|
||||
"github.com/z7zmey/php-parser/position"
|
||||
|
||||
"github.com/cznic/golex/lex"
|
||||
)
|
||||
|
||||
// Allocate Character classes anywhere in [0x80, 0xFF].
|
||||
const (
|
||||
classUnicodeLeter = iota + 0x80
|
||||
classUnicodeDigit
|
||||
classUnicodeGraphic
|
||||
classOther
|
||||
)
|
||||
type Scanner interface {
|
||||
Lex(lval Lval) int
|
||||
ReturnTokenToPool(t *Token)
|
||||
GetPhpDocComment() string
|
||||
SetPhpDocComment(string)
|
||||
GetErrors() []*errors.Error
|
||||
GetWithFreeFloating() bool
|
||||
SetWithFreeFloating(bool)
|
||||
AddError(e *errors.Error)
|
||||
SetErrors(e []*errors.Error)
|
||||
}
|
||||
|
||||
// Lval parsers yySymType must implement this interface
|
||||
type Lval interface {
|
||||
Token(tkn *Token)
|
||||
}
|
||||
|
||||
// Lexer php lexer
|
||||
type Lexer struct {
|
||||
*lex.Lexer
|
||||
StateStack []int
|
||||
PhpDocComment string
|
||||
FreeFloating []freefloating.String
|
||||
heredocLabel string
|
||||
tokenBytesBuf *bytes.Buffer
|
||||
data []byte
|
||||
p, pe, cs int
|
||||
ts, te, act int
|
||||
stack []int
|
||||
top int
|
||||
heredocLabel []byte
|
||||
|
||||
TokenPool *TokenPool
|
||||
FreeFloating []freefloating.String
|
||||
WithFreeFloating bool
|
||||
PhpDocComment string
|
||||
lastToken *Token
|
||||
Errors []*errors.Error
|
||||
NewLines NewLines
|
||||
}
|
||||
|
||||
// Rune2Class returns the rune integer id
|
||||
func Rune2Class(r rune) int {
|
||||
if r >= 0 && r < 0x80 { // Keep ASCII as it is.
|
||||
return int(r)
|
||||
}
|
||||
if unicode.IsLetter(r) {
|
||||
return classUnicodeLeter
|
||||
}
|
||||
if unicode.IsDigit(r) {
|
||||
return classUnicodeDigit
|
||||
}
|
||||
if unicode.IsGraphic(r) {
|
||||
return classUnicodeGraphic
|
||||
}
|
||||
if r == lex.RuneEOF {
|
||||
return int(r)
|
||||
}
|
||||
return classOther
|
||||
func (l *Lexer) ReturnTokenToPool(t *Token) {
|
||||
l.TokenPool.Put(t)
|
||||
}
|
||||
|
||||
// NewLexer the Lexer constructor
|
||||
func NewLexer(src io.Reader, fName string) *Lexer {
|
||||
file := t.NewFileSet().AddFile(fName, -1, 1<<31-3)
|
||||
lx, err := lex.New(file, bufio.NewReader(src), lex.RuneClass(Rune2Class))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return &Lexer{
|
||||
Lexer: lx,
|
||||
StateStack: []int{0},
|
||||
PhpDocComment: "",
|
||||
FreeFloating: nil,
|
||||
heredocLabel: "",
|
||||
tokenBytesBuf: &bytes.Buffer{},
|
||||
TokenPool: &TokenPool{},
|
||||
}
|
||||
func (l *Lexer) GetPhpDocComment() string {
|
||||
return l.PhpDocComment
|
||||
}
|
||||
|
||||
func (l *Lexer) Error(msg string) {
|
||||
chars := l.Token()
|
||||
firstChar := chars[0]
|
||||
lastChar := chars[len(chars)-1]
|
||||
|
||||
pos := position.NewPosition(
|
||||
l.File.Line(firstChar.Pos()),
|
||||
l.File.Line(lastChar.Pos()),
|
||||
int(firstChar.Pos()),
|
||||
int(lastChar.Pos()),
|
||||
)
|
||||
|
||||
l.Errors = append(l.Errors, errors.NewError(msg, pos))
|
||||
func (l *Lexer) SetPhpDocComment(s string) {
|
||||
l.PhpDocComment = s
|
||||
}
|
||||
|
||||
func (l *Lexer) ungetChars(n int) []lex.Char {
|
||||
l.Unget(l.Lookahead())
|
||||
|
||||
chars := l.Token()
|
||||
|
||||
for i := 1; i <= n; i++ {
|
||||
char := chars[len(chars)-i]
|
||||
l.Unget(char)
|
||||
}
|
||||
|
||||
buf := l.Token()
|
||||
buf = buf[:len(buf)-n]
|
||||
|
||||
return buf
|
||||
func (l *Lexer) GetErrors() []*errors.Error {
|
||||
return l.Errors
|
||||
}
|
||||
|
||||
func (l *Lexer) pushState(state int) {
|
||||
l.StateStack = append(l.StateStack, state)
|
||||
func (l *Lexer) GetWithFreeFloating() bool {
|
||||
return l.WithFreeFloating
|
||||
}
|
||||
|
||||
func (l *Lexer) popState() {
|
||||
len := len(l.StateStack)
|
||||
if len <= 1 {
|
||||
return
|
||||
}
|
||||
|
||||
l.StateStack = l.StateStack[:len-1]
|
||||
func (l *Lexer) SetWithFreeFloating(b bool) {
|
||||
l.WithFreeFloating = b
|
||||
}
|
||||
|
||||
func (l *Lexer) Begin(state int) {
|
||||
len := len(l.StateStack)
|
||||
l.StateStack = l.StateStack[:len-1]
|
||||
l.StateStack = append(l.StateStack, state)
|
||||
func (l *Lexer) AddError(e *errors.Error) {
|
||||
l.Errors = append(l.Errors, e)
|
||||
}
|
||||
|
||||
func (l *Lexer) getCurrentState() int {
|
||||
return l.StateStack[len(l.StateStack)-1]
|
||||
func (l *Lexer) SetErrors(e []*errors.Error) {
|
||||
l.Errors = e
|
||||
}
|
||||
|
||||
func (l *Lexer) createToken(chars []lex.Char) *Token {
|
||||
firstChar := chars[0]
|
||||
lastChar := chars[len(chars)-1]
|
||||
func (lex *Lexer) createToken(lval Lval) *Token {
|
||||
token := lex.TokenPool.Get()
|
||||
token.FreeFloating = lex.FreeFloating
|
||||
token.Value = string(lex.data[lex.ts:lex.te])
|
||||
|
||||
token := l.TokenPool.Get()
|
||||
token.FreeFloating = l.FreeFloating
|
||||
token.Value = l.tokenString(chars)
|
||||
|
||||
// fmt.Println(l.tokenString(chars))
|
||||
|
||||
token.StartLine = l.File.Line(firstChar.Pos())
|
||||
token.EndLine = l.File.Line(lastChar.Pos())
|
||||
token.StartPos = int(firstChar.Pos())
|
||||
token.EndPos = int(lastChar.Pos())
|
||||
token.StartLine = lex.NewLines.GetLine(lex.ts)
|
||||
token.EndLine = lex.NewLines.GetLine(lex.te - 1)
|
||||
token.StartPos = lex.ts
|
||||
token.EndPos = lex.te
|
||||
|
||||
lval.Token(token)
|
||||
return token
|
||||
}
|
||||
|
||||
func (l *Lexer) tokenString(chars []lex.Char) string {
|
||||
l.tokenBytesBuf.Reset()
|
||||
|
||||
for _, c := range chars {
|
||||
l.tokenBytesBuf.WriteRune(c.Rune)
|
||||
}
|
||||
|
||||
return string(l.tokenBytesBuf.Bytes())
|
||||
}
|
||||
|
||||
// free-floating
|
||||
|
||||
func (l *Lexer) addFreeFloating(t freefloating.StringType, chars []lex.Char) {
|
||||
if !l.WithFreeFloating {
|
||||
func (lex *Lexer) addFreeFloating(t freefloating.StringType, ps, pe int) {
|
||||
if !lex.WithFreeFloating {
|
||||
return
|
||||
}
|
||||
|
||||
firstChar := chars[0]
|
||||
lastChar := chars[len(chars)-1]
|
||||
|
||||
pos := position.NewPosition(
|
||||
l.File.Line(firstChar.Pos()),
|
||||
l.File.Line(lastChar.Pos()),
|
||||
int(firstChar.Pos()),
|
||||
int(lastChar.Pos()),
|
||||
lex.NewLines.GetLine(lex.ts),
|
||||
lex.NewLines.GetLine(lex.te-1),
|
||||
lex.ts,
|
||||
lex.te,
|
||||
)
|
||||
|
||||
l.FreeFloating = append(l.FreeFloating, freefloating.String{
|
||||
lex.FreeFloating = append(lex.FreeFloating, freefloating.String{
|
||||
StringType: t,
|
||||
Value: l.tokenString(chars),
|
||||
Value: string(lex.data[ps:pe]),
|
||||
Position: pos,
|
||||
})
|
||||
}
|
||||
|
||||
func (lex *Lexer) isNotStringVar() bool {
|
||||
p := lex.p
|
||||
if lex.data[p-1] == '\\' && lex.data[p-2] != '\\' {
|
||||
return true
|
||||
}
|
||||
|
||||
if len(lex.data) < p+1 {
|
||||
return true
|
||||
}
|
||||
|
||||
if lex.data[p] == '$' && (lex.data[p+1] == '{' || isValidVarNameStart(lex.data[p+1])) {
|
||||
return false
|
||||
}
|
||||
|
||||
if lex.data[p] == '{' && lex.data[p+1] == '$' {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func (lex *Lexer) isNotStringEnd(s byte) bool {
|
||||
p := lex.p
|
||||
if lex.data[p-1] == '\\' && lex.data[p-2] != '\\' {
|
||||
return true
|
||||
}
|
||||
|
||||
return !(lex.data[p] == s)
|
||||
}
|
||||
|
||||
func (lex *Lexer) isHeredocEnd(p int) bool {
|
||||
if lex.data[p-1] != '\r' && lex.data[p-1] != '\n' {
|
||||
return false
|
||||
}
|
||||
|
||||
l := len(lex.heredocLabel)
|
||||
if len(lex.data) < p+l {
|
||||
return false
|
||||
}
|
||||
|
||||
if len(lex.data) > p+l && lex.data[p+l] != ';' && lex.data[p+l] != '\r' && lex.data[p+l] != '\n' {
|
||||
return false
|
||||
}
|
||||
|
||||
if len(lex.data) > p+l+1 && lex.data[p+l] == ';' && lex.data[p+l+1] != '\r' && lex.data[p+l+1] != '\n' {
|
||||
return false
|
||||
}
|
||||
|
||||
return bytes.Equal(lex.heredocLabel, lex.data[p:p+l])
|
||||
}
|
||||
|
||||
func (lex *Lexer) isNotHeredocEnd(p int) bool {
|
||||
return !lex.isHeredocEnd(p)
|
||||
}
|
||||
|
||||
func (lex *Lexer) growCallStack() {
|
||||
if lex.top == len(lex.stack) {
|
||||
lex.stack = append(lex.stack, 0)
|
||||
}
|
||||
}
|
||||
|
||||
func (lex *Lexer) isNotPhpCloseToken() bool {
|
||||
if lex.p+1 == len(lex.data) {
|
||||
return true
|
||||
}
|
||||
|
||||
return lex.data[lex.p] != '?' || lex.data[lex.p+1] != '>'
|
||||
}
|
||||
|
||||
func (lex *Lexer) isNotNewLine() bool {
|
||||
if lex.data[lex.p] == '\n' && lex.data[lex.p-1] == '\r' {
|
||||
return true
|
||||
}
|
||||
|
||||
return lex.data[lex.p-1] != '\n' && lex.data[lex.p-1] != '\r'
|
||||
}
|
||||
|
||||
func (lex *Lexer) call(state int, fnext int) {
|
||||
lex.growCallStack()
|
||||
|
||||
lex.stack[lex.top] = state
|
||||
lex.top++
|
||||
|
||||
lex.p++
|
||||
lex.cs = fnext
|
||||
}
|
||||
|
||||
func (lex *Lexer) ret(n int) {
|
||||
lex.top = lex.top - n
|
||||
if lex.top < 0 {
|
||||
lex.top = 0
|
||||
}
|
||||
lex.cs = lex.stack[lex.top]
|
||||
lex.p++
|
||||
}
|
||||
|
||||
func (lex *Lexer) ungetStr(s string) {
|
||||
tokenStr := string(lex.data[lex.ts:lex.te])
|
||||
if strings.HasSuffix(tokenStr, s) {
|
||||
lex.ungetCnt(len(s))
|
||||
}
|
||||
}
|
||||
|
||||
func (lex *Lexer) ungetCnt(n int) {
|
||||
lex.p = lex.p - n
|
||||
lex.te = lex.te - n
|
||||
}
|
||||
|
||||
func (lex *Lexer) Error(msg string) {
|
||||
pos := position.NewPosition(
|
||||
lex.NewLines.GetLine(lex.ts),
|
||||
lex.NewLines.GetLine(lex.te-1),
|
||||
lex.ts,
|
||||
lex.te,
|
||||
)
|
||||
|
||||
lex.Errors = append(lex.Errors, errors.NewError(msg, pos))
|
||||
}
|
||||
|
||||
func isValidVarNameStart(r byte) bool {
|
||||
return r >= 'A' && r <= 'Z' || r == '_' || r >= 'a' && r <= 'z' || r >= '\u007f' && r <= 'ÿ'
|
||||
}
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
package scanner
|
||||
|
||||
type LexerToken int
|
||||
type TokenID int
|
||||
|
||||
//go:generate stringer -type=LexerToken -output ./lexer_tokens_string.go
|
||||
//go:generate stringer -type=TokenID -output ./tokenid_string.go
|
||||
const (
|
||||
T_INCLUDE LexerToken = iota + 57346
|
||||
T_INCLUDE TokenID = iota + 57346
|
||||
T_INCLUDE_ONCE
|
||||
T_EXIT
|
||||
T_IF
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
// Code generated by "stringer -type=LexerToken -output ./lexer_tokens_string.go"; DO NOT EDIT.
|
||||
|
||||
package scanner
|
||||
|
||||
import "strconv"
|
||||
|
||||
const _LexerToken_name = "T_INCLUDET_INCLUDE_ONCET_EXITT_IFT_LNUMBERT_DNUMBERT_STRINGT_STRING_VARNAMET_VARIABLET_NUM_STRINGT_INLINE_HTMLT_CHARACTERT_BAD_CHARACTERT_ENCAPSED_AND_WHITESPACET_CONSTANT_ENCAPSED_STRINGT_ECHOT_DOT_WHILET_ENDWHILET_FORT_ENDFORT_FOREACHT_ENDFOREACHT_DECLARET_ENDDECLARET_AST_SWITCHT_ENDSWITCHT_CASET_DEFAULTT_BREAKT_CONTINUET_GOTOT_FUNCTIONT_CONSTT_RETURNT_TRYT_CATCHT_FINALLYT_THROWT_USET_INSTEADOFT_GLOBALT_VART_UNSETT_ISSETT_EMPTYT_HALT_COMPILERT_CLASST_TRAITT_INTERFACET_EXTENDST_IMPLEMENTST_OBJECT_OPERATORT_DOUBLE_ARROWT_LISTT_ARRAYT_CALLABLET_CLASS_CT_TRAIT_CT_METHOD_CT_FUNC_CT_LINET_FILET_COMMENTT_DOC_COMMENTT_OPEN_TAGT_OPEN_TAG_WITH_ECHOT_CLOSE_TAGT_WHITESPACET_START_HEREDOCT_END_HEREDOCT_DOLLAR_OPEN_CURLY_BRACEST_CURLY_OPENT_PAAMAYIM_NEKUDOTAYIMT_NAMESPACET_NS_CT_DIRT_NS_SEPARATORT_ELLIPSIST_EVALT_REQUIRET_REQUIRE_ONCET_LOGICAL_ORT_LOGICAL_XORT_LOGICAL_ANDT_INSTANCEOFT_NEWT_CLONET_ELSEIFT_ELSET_ENDIFT_PRINTT_YIELDT_STATICT_ABSTRACTT_FINALT_PRIVATET_PROTECTEDT_PUBLICT_INCT_DECT_YIELD_FROMT_INT_CASTT_DOUBLE_CASTT_STRING_CASTT_ARRAY_CASTT_OBJECT_CASTT_BOOL_CASTT_UNSET_CASTT_COALESCET_SPACESHIPT_NOELSET_PLUS_EQUALT_MINUS_EQUALT_MUL_EQUALT_POW_EQUALT_DIV_EQUALT_CONCAT_EQUALT_MOD_EQUALT_AND_EQUALT_OR_EQUALT_XOR_EQUALT_SL_EQUALT_SR_EQUALT_BOOLEAN_ORT_BOOLEAN_ANDT_POWT_SLT_SRT_IS_IDENTICALT_IS_NOT_IDENTICALT_IS_EQUALT_IS_NOT_EQUALT_IS_SMALLER_OR_EQUALT_IS_GREATER_OR_EQUAL"
|
||||
|
||||
var _LexerToken_index = [...]uint16{0, 9, 23, 29, 33, 42, 51, 59, 75, 85, 97, 110, 121, 136, 161, 187, 193, 197, 204, 214, 219, 227, 236, 248, 257, 269, 273, 281, 292, 298, 307, 314, 324, 330, 340, 347, 355, 360, 367, 376, 383, 388, 399, 407, 412, 419, 426, 433, 448, 455, 462, 473, 482, 494, 511, 525, 531, 538, 548, 557, 566, 576, 584, 590, 596, 605, 618, 628, 648, 659, 671, 686, 699, 725, 737, 759, 770, 776, 781, 795, 805, 811, 820, 834, 846, 859, 872, 884, 889, 896, 904, 910, 917, 924, 931, 939, 949, 956, 965, 976, 984, 989, 994, 1006, 1016, 1029, 1042, 1054, 1067, 1078, 1090, 1100, 1111, 1119, 1131, 1144, 1155, 1166, 1177, 1191, 1202, 1213, 1223, 1234, 1244, 1254, 1266, 1279, 1284, 1288, 1292, 1306, 1324, 1334, 1348, 1369, 1390}
|
||||
|
||||
func (i LexerToken) String() string {
|
||||
i -= 57346
|
||||
if i < 0 || i >= LexerToken(len(_LexerToken_index)-1) {
|
||||
return "LexerToken(" + strconv.FormatInt(int64(i+57346), 10) + ")"
|
||||
}
|
||||
return _LexerToken_name[_LexerToken_index[i]:_LexerToken_index[i+1]]
|
||||
}
|
||||
25
scanner/newline.go
Normal file
25
scanner/newline.go
Normal file
@@ -0,0 +1,25 @@
|
||||
package scanner
|
||||
|
||||
type NewLines struct {
|
||||
data []int
|
||||
}
|
||||
|
||||
func (nl *NewLines) Append(p int) {
|
||||
if len(nl.data) == 0 || nl.data[len(nl.data)-1] < p {
|
||||
nl.data = append(nl.data, p)
|
||||
}
|
||||
}
|
||||
|
||||
func (nl *NewLines) GetLine(p int) int {
|
||||
line := len(nl.data) + 1
|
||||
|
||||
for i := len(nl.data) - 1; i >= 0; i-- {
|
||||
if p < nl.data[i] {
|
||||
line = i + 1
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return line
|
||||
}
|
||||
30147
scanner/scanner.go
30147
scanner/scanner.go
File diff suppressed because it is too large
Load Diff
@@ -1,690 +0,0 @@
|
||||
%{
|
||||
// Copyright (c) 2011 CZ.NIC z.s.p.o. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// blame: jnml, labs.nic.cz
|
||||
|
||||
package scanner
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/z7zmey/php-parser/freefloating"
|
||||
"github.com/cznic/golex/lex"
|
||||
)
|
||||
|
||||
const (
|
||||
INITIAL = iota
|
||||
PHP
|
||||
STRING
|
||||
STRING_VAR
|
||||
STRING_VAR_INDEX
|
||||
STRING_VAR_NAME
|
||||
PROPERTY
|
||||
HEREDOC_END
|
||||
NOWDOC
|
||||
HEREDOC
|
||||
BACKQUOTE
|
||||
HALT_COMPILER
|
||||
)
|
||||
|
||||
func isValidFirstVarNameRune(r rune) bool {
|
||||
return r >= 'A' && r <= 'Z' || r == '_' || r >= 'a' && r <= 'z' || r >= '\u007f' && r <= 'ÿ'
|
||||
}
|
||||
|
||||
func (l *Lexer) Lex(lval Lval) int {
|
||||
l.FreeFloating = nil
|
||||
c := l.Enter()
|
||||
%}
|
||||
|
||||
%s PHP STRING STRING_VAR STRING_VAR_INDEX STRING_VAR_NAME PROPERTY HEREDOC_END NOWDOC HEREDOC BACKQUOTE HALT_COMPILER
|
||||
|
||||
%yyb last == '\n' || last = '\0'
|
||||
%yyt l.getCurrentState()
|
||||
%yyc c
|
||||
%yyn c = l.Next()
|
||||
%yym l.Mark()
|
||||
%optioncase-insensitive
|
||||
|
||||
LNUM [0-9]+
|
||||
DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
|
||||
HNUM 0x[0-9a-fA-F]+
|
||||
BNUM 0b[01]+
|
||||
EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
|
||||
VAR_NAME [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
|
||||
OPERATORS [;:,.\[\]()|\/\^&\+-*=%!~$<>?@]
|
||||
NEW_LINE (\r|\n|\r\n)
|
||||
ANY_CHAR .
|
||||
|
||||
%%
|
||||
c = l.Rule0()
|
||||
|
||||
<INITIAL>[ \t\n\r]+ l.addFreeFloating(freefloating.WhiteSpaceType, l.Token())
|
||||
<INITIAL>.
|
||||
tb := []lex.Char{}
|
||||
|
||||
for {
|
||||
if c == -1 {
|
||||
tb = l.Token();
|
||||
break;
|
||||
}
|
||||
|
||||
if '?' == rune(c) {
|
||||
tb = l.Token();
|
||||
if (len(tb) < 2 || tb[len(tb)-1].Rune != '<') {
|
||||
c = l.Next()
|
||||
continue;
|
||||
}
|
||||
|
||||
tb = l.ungetChars(1)
|
||||
break;
|
||||
}
|
||||
|
||||
c = l.Next()
|
||||
}
|
||||
|
||||
lval.Token(l.createToken(tb))
|
||||
return int(T_INLINE_HTML)
|
||||
|
||||
<INITIAL>\<\?php([ \t]|{NEW_LINE}) l.addFreeFloating(freefloating.TokenType, l.Token()[:5]);l.Begin(PHP);l.ungetChars(len(l.Token())-5)
|
||||
<INITIAL>\<\? l.addFreeFloating(freefloating.TokenType, l.Token());l.Begin(PHP);
|
||||
<INITIAL>\<\?= l.Begin(PHP);lval.Token(l.createToken(l.Token())); return int(T_ECHO);
|
||||
|
||||
|
||||
<PHP>[ \t\n\r]+ l.addFreeFloating(freefloating.WhiteSpaceType, l.Token())
|
||||
<PHP>[;][ \t\n\r]*\?\>{NEW_LINE}? l.Begin(INITIAL);lval.Token(l.createToken(l.Token())); return Rune2Class(';');
|
||||
<PHP>\?\>{NEW_LINE}? l.Begin(INITIAL);lval.Token(l.createToken(l.Token())); return Rune2Class(';');
|
||||
|
||||
<PHP>{DNUM}|{EXPONENT_DNUM} lval.Token(l.createToken(l.Token())); return int(T_DNUMBER)
|
||||
<PHP>{BNUM}
|
||||
tb := l.Token()
|
||||
i:=2
|
||||
BNUMFOR:for {
|
||||
if i > len(tb)-1 {
|
||||
break BNUMFOR;
|
||||
}
|
||||
switch tb[i].Rune {
|
||||
case '0': i++;
|
||||
default: break BNUMFOR;
|
||||
}
|
||||
}
|
||||
if len(tb) - i < 64 {
|
||||
lval.Token(l.createToken(l.Token())); return int(T_LNUMBER)
|
||||
} else {
|
||||
lval.Token(l.createToken(l.Token())); return int(T_DNUMBER)
|
||||
}
|
||||
<PHP>{LNUM}
|
||||
if len(l.Token()) < 20 {
|
||||
lval.Token(l.createToken(l.Token())); return int(T_LNUMBER)
|
||||
} else {
|
||||
lval.Token(l.createToken(l.Token())); return int(T_DNUMBER)
|
||||
}
|
||||
<PHP>{HNUM}
|
||||
tb := l.Token()
|
||||
i:=2
|
||||
HNUMFOR:for {
|
||||
if i > len(tb)-1 {
|
||||
break HNUMFOR;
|
||||
}
|
||||
switch tb[i].Rune {
|
||||
case '0': i++;
|
||||
default: break HNUMFOR;
|
||||
}
|
||||
}
|
||||
length := len(tb) - i
|
||||
if length < 16 || (length == 16 && tb[i].Rune <= '7') {
|
||||
lval.Token(l.createToken(l.Token())); return int(T_LNUMBER)
|
||||
} else {
|
||||
lval.Token(l.createToken(l.Token())); return int(T_DNUMBER)
|
||||
}
|
||||
|
||||
<PHP>abstract lval.Token(l.createToken(l.Token())); return int(T_ABSTRACT)
|
||||
<PHP>array lval.Token(l.createToken(l.Token())); return int(T_ARRAY)
|
||||
<PHP>as lval.Token(l.createToken(l.Token())); return int(T_AS)
|
||||
<PHP>break lval.Token(l.createToken(l.Token())); return int(T_BREAK)
|
||||
<PHP>callable lval.Token(l.createToken(l.Token())); return int(T_CALLABLE)
|
||||
<PHP>case lval.Token(l.createToken(l.Token())); return int(T_CASE)
|
||||
<PHP>catch lval.Token(l.createToken(l.Token())); return int(T_CATCH)
|
||||
<PHP>class lval.Token(l.createToken(l.Token())); return int(T_CLASS)
|
||||
<PHP>clone lval.Token(l.createToken(l.Token())); return int(T_CLONE)
|
||||
<PHP>const lval.Token(l.createToken(l.Token())); return int(T_CONST)
|
||||
<PHP>continue lval.Token(l.createToken(l.Token())); return int(T_CONTINUE)
|
||||
<PHP>declare lval.Token(l.createToken(l.Token())); return int(T_DECLARE)
|
||||
<PHP>default lval.Token(l.createToken(l.Token())); return int(T_DEFAULT)
|
||||
<PHP>do lval.Token(l.createToken(l.Token())); return int(T_DO)
|
||||
<PHP>echo lval.Token(l.createToken(l.Token())); return int(T_ECHO)
|
||||
<PHP>else lval.Token(l.createToken(l.Token())); return int(T_ELSE)
|
||||
<PHP>elseif lval.Token(l.createToken(l.Token())); return int(T_ELSEIF)
|
||||
<PHP>empty lval.Token(l.createToken(l.Token())); return int(T_EMPTY)
|
||||
<PHP>enddeclare lval.Token(l.createToken(l.Token())); return int(T_ENDDECLARE)
|
||||
<PHP>endfor lval.Token(l.createToken(l.Token())); return int(T_ENDFOR)
|
||||
<PHP>endforeach lval.Token(l.createToken(l.Token())); return int(T_ENDFOREACH)
|
||||
<PHP>endif lval.Token(l.createToken(l.Token())); return int(T_ENDIF)
|
||||
<PHP>endswitch lval.Token(l.createToken(l.Token())); return int(T_ENDSWITCH)
|
||||
<PHP>endwhile lval.Token(l.createToken(l.Token())); return int(T_ENDWHILE)
|
||||
<PHP>eval lval.Token(l.createToken(l.Token())); return int(T_EVAL)
|
||||
<PHP>exit|die lval.Token(l.createToken(l.Token())); return int(T_EXIT)
|
||||
<PHP>extends lval.Token(l.createToken(l.Token())); return int(T_EXTENDS)
|
||||
<PHP>final lval.Token(l.createToken(l.Token())); return int(T_FINAL)
|
||||
<PHP>finally lval.Token(l.createToken(l.Token())); return int(T_FINALLY)
|
||||
<PHP>for lval.Token(l.createToken(l.Token())); return int(T_FOR)
|
||||
<PHP>foreach lval.Token(l.createToken(l.Token())); return int(T_FOREACH)
|
||||
<PHP>function|cfunction lval.Token(l.createToken(l.Token())); return int(T_FUNCTION)
|
||||
<PHP>global lval.Token(l.createToken(l.Token())); return int(T_GLOBAL)
|
||||
<PHP>goto lval.Token(l.createToken(l.Token())); return int(T_GOTO)
|
||||
<PHP>if lval.Token(l.createToken(l.Token())); return int(T_IF)
|
||||
<PHP>isset lval.Token(l.createToken(l.Token())); return int(T_ISSET)
|
||||
<PHP>implements lval.Token(l.createToken(l.Token())); return int(T_IMPLEMENTS)
|
||||
<PHP>instanceof lval.Token(l.createToken(l.Token())); return int(T_INSTANCEOF)
|
||||
<PHP>insteadof lval.Token(l.createToken(l.Token())); return int(T_INSTEADOF)
|
||||
<PHP>interface lval.Token(l.createToken(l.Token())); return int(T_INTERFACE)
|
||||
<PHP>list lval.Token(l.createToken(l.Token())); return int(T_LIST)
|
||||
<PHP>namespace lval.Token(l.createToken(l.Token())); return int(T_NAMESPACE)
|
||||
<PHP>private lval.Token(l.createToken(l.Token())); return int(T_PRIVATE)
|
||||
<PHP>public lval.Token(l.createToken(l.Token())); return int(T_PUBLIC)
|
||||
<PHP>print lval.Token(l.createToken(l.Token())); return int(T_PRINT)
|
||||
<PHP>protected lval.Token(l.createToken(l.Token())); return int(T_PROTECTED)
|
||||
<PHP>return lval.Token(l.createToken(l.Token())); return int(T_RETURN)
|
||||
<PHP>static lval.Token(l.createToken(l.Token())); return int(T_STATIC)
|
||||
<PHP>switch lval.Token(l.createToken(l.Token())); return int(T_SWITCH)
|
||||
<PHP>throw lval.Token(l.createToken(l.Token())); return int(T_THROW)
|
||||
<PHP>trait lval.Token(l.createToken(l.Token())); return int(T_TRAIT)
|
||||
<PHP>try lval.Token(l.createToken(l.Token())); return int(T_TRY)
|
||||
<PHP>unset lval.Token(l.createToken(l.Token())); return int(T_UNSET)
|
||||
<PHP>use lval.Token(l.createToken(l.Token())); return int(T_USE)
|
||||
<PHP>var lval.Token(l.createToken(l.Token())); return int(T_VAR)
|
||||
<PHP>while lval.Token(l.createToken(l.Token())); return int(T_WHILE)
|
||||
<PHP>yield[ \t\n\r]+from lval.Token(l.createToken(l.Token())); return int(T_YIELD_FROM)
|
||||
<PHP>yield lval.Token(l.createToken(l.Token())); return int(T_YIELD)
|
||||
<PHP>include lval.Token(l.createToken(l.Token())); return int(T_INCLUDE)
|
||||
<PHP>include_once lval.Token(l.createToken(l.Token())); return int(T_INCLUDE_ONCE)
|
||||
<PHP>require lval.Token(l.createToken(l.Token())); return int(T_REQUIRE)
|
||||
<PHP>require_once lval.Token(l.createToken(l.Token())); return int(T_REQUIRE_ONCE)
|
||||
<PHP>__CLASS__ lval.Token(l.createToken(l.Token())); return int(T_CLASS_C)
|
||||
<PHP>__DIR__ lval.Token(l.createToken(l.Token())); return int(T_DIR)
|
||||
<PHP>__FILE__ lval.Token(l.createToken(l.Token())); return int(T_FILE)
|
||||
<PHP>__FUNCTION__ lval.Token(l.createToken(l.Token())); return int(T_FUNC_C)
|
||||
<PHP>__LINE__ lval.Token(l.createToken(l.Token())); return int(T_LINE)
|
||||
<PHP>__NAMESPACE__ lval.Token(l.createToken(l.Token())); return int(T_NS_C)
|
||||
<PHP>__METHOD__ lval.Token(l.createToken(l.Token())); return int(T_METHOD_C)
|
||||
<PHP>__TRAIT__ lval.Token(l.createToken(l.Token())); return int(T_TRAIT_C)
|
||||
<PHP>__halt_compiler lval.Token(l.createToken(l.Token())); return int(T_HALT_COMPILER)
|
||||
<PHP>\([ \t]*array[ \t]*\) lval.Token(l.createToken(l.Token())); return int(T_ARRAY_CAST)
|
||||
<PHP>\([ \t]*(bool|boolean)[ \t]*\) lval.Token(l.createToken(l.Token())); return int(T_BOOL_CAST)
|
||||
<PHP>\([ \t]*(real|double|float)[ \t]*\) lval.Token(l.createToken(l.Token())); return int(T_DOUBLE_CAST)
|
||||
<PHP>\([ \t]*(int|integer)[ \t]*\) lval.Token(l.createToken(l.Token())); return int(T_INT_CAST)
|
||||
<PHP>\([ \t]*object[ \t]*\) lval.Token(l.createToken(l.Token())); return int(T_OBJECT_CAST)
|
||||
<PHP>\([ \t]*(string|binary)[ \t]*\) lval.Token(l.createToken(l.Token())); return int(T_STRING_CAST)
|
||||
<PHP>\([ \t]*unset[ \t]*\) lval.Token(l.createToken(l.Token())); return int(T_UNSET_CAST)
|
||||
<PHP>new lval.Token(l.createToken(l.Token())); return int(T_NEW)
|
||||
<PHP>and lval.Token(l.createToken(l.Token())); return int(T_LOGICAL_AND)
|
||||
<PHP>or lval.Token(l.createToken(l.Token())); return int(T_LOGICAL_OR)
|
||||
<PHP>xor lval.Token(l.createToken(l.Token())); return int(T_LOGICAL_XOR)
|
||||
<PHP>\\ lval.Token(l.createToken(l.Token())); return int(T_NS_SEPARATOR)
|
||||
<PHP>\.\.\. lval.Token(l.createToken(l.Token())); return int(T_ELLIPSIS)
|
||||
<PHP>:: lval.Token(l.createToken(l.Token())); return int(T_PAAMAYIM_NEKUDOTAYIM) // T_DOUBLE_COLON
|
||||
<PHP>&& lval.Token(l.createToken(l.Token())); return int(T_BOOLEAN_AND)
|
||||
<PHP>\|\| lval.Token(l.createToken(l.Token())); return int(T_BOOLEAN_OR)
|
||||
<PHP>&= lval.Token(l.createToken(l.Token())); return int(T_AND_EQUAL)
|
||||
<PHP>\|= lval.Token(l.createToken(l.Token())); return int(T_OR_EQUAL)
|
||||
<PHP>\.= lval.Token(l.createToken(l.Token())); return int(T_CONCAT_EQUAL)
|
||||
<PHP>\*= lval.Token(l.createToken(l.Token())); return int(T_MUL_EQUAL)
|
||||
<PHP>\*\*= lval.Token(l.createToken(l.Token())); return int(T_POW_EQUAL)
|
||||
<PHP>[/]= lval.Token(l.createToken(l.Token())); return int(T_DIV_EQUAL)
|
||||
<PHP>\+= lval.Token(l.createToken(l.Token())); return int(T_PLUS_EQUAL)
|
||||
<PHP>-= lval.Token(l.createToken(l.Token())); return int(T_MINUS_EQUAL)
|
||||
<PHP>\^= lval.Token(l.createToken(l.Token())); return int(T_XOR_EQUAL)
|
||||
<PHP>%= lval.Token(l.createToken(l.Token())); return int(T_MOD_EQUAL)
|
||||
<PHP>-- lval.Token(l.createToken(l.Token())); return int(T_DEC)
|
||||
<PHP>\+\+ lval.Token(l.createToken(l.Token())); return int(T_INC)
|
||||
<PHP>=> lval.Token(l.createToken(l.Token())); return int(T_DOUBLE_ARROW)
|
||||
<PHP>\<=\> lval.Token(l.createToken(l.Token())); return int(T_SPACESHIP)
|
||||
<PHP>\!=|\<\> lval.Token(l.createToken(l.Token())); return int(T_IS_NOT_EQUAL)
|
||||
<PHP>\!== lval.Token(l.createToken(l.Token())); return int(T_IS_NOT_IDENTICAL)
|
||||
<PHP>== lval.Token(l.createToken(l.Token())); return int(T_IS_EQUAL)
|
||||
<PHP>=== lval.Token(l.createToken(l.Token())); return int(T_IS_IDENTICAL)
|
||||
<PHP>\<\<= lval.Token(l.createToken(l.Token())); return int(T_SL_EQUAL)
|
||||
<PHP>\>\>= lval.Token(l.createToken(l.Token())); return int(T_SR_EQUAL)
|
||||
<PHP>\>= lval.Token(l.createToken(l.Token())); return int(T_IS_GREATER_OR_EQUAL)
|
||||
<PHP>\<= lval.Token(l.createToken(l.Token())); return int(T_IS_SMALLER_OR_EQUAL)
|
||||
<PHP>\*\* lval.Token(l.createToken(l.Token())); return int(T_POW)
|
||||
<PHP>\<\< lval.Token(l.createToken(l.Token())); return int(T_SL)
|
||||
<PHP>\>\> lval.Token(l.createToken(l.Token())); return int(T_SR)
|
||||
<PHP>\?\? lval.Token(l.createToken(l.Token())); return int(T_COALESCE)
|
||||
<PHP>(#|[/][/])
|
||||
tb := l.Token()
|
||||
|
||||
for {
|
||||
if c == -1 {
|
||||
break
|
||||
}
|
||||
|
||||
tb = append(tb, l.Last)
|
||||
|
||||
switch c {
|
||||
case '\r':
|
||||
c = l.Next()
|
||||
if c == '\n' {
|
||||
continue
|
||||
}
|
||||
|
||||
case '\n':
|
||||
c = l.Next()
|
||||
|
||||
case '?':
|
||||
c = l.Next()
|
||||
if c == '>' {
|
||||
l.ungetChars(1)
|
||||
tb = tb[:len(tb)-1]
|
||||
break
|
||||
}
|
||||
continue
|
||||
|
||||
default:
|
||||
c = l.Next()
|
||||
continue
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
l.addFreeFloating(freefloating.CommentType, tb)
|
||||
|
||||
<PHP>[/][*][*][/]
|
||||
l.addFreeFloating(freefloating.CommentType, l.Token())
|
||||
<PHP>([/][*])|([/][*][*])
|
||||
tb := l.Token()
|
||||
is_doc_comment := false
|
||||
if len(tb) > 2 {
|
||||
is_doc_comment = true
|
||||
l.PhpDocComment = ""
|
||||
}
|
||||
|
||||
c = l.Next()
|
||||
for {
|
||||
if c == -1 {
|
||||
break; // TODO: Unterminated comment starting line %d
|
||||
}
|
||||
|
||||
if l.Prev.Rune == '*' && l.Last.Rune == '/' {
|
||||
c = l.Next()
|
||||
break;
|
||||
}
|
||||
|
||||
c = l.Next()
|
||||
}
|
||||
|
||||
if is_doc_comment {
|
||||
l.PhpDocComment = string(l.TokenBytes(nil))
|
||||
l.addFreeFloating(freefloating.CommentType, l.Token())
|
||||
} else {
|
||||
l.addFreeFloating(freefloating.CommentType, l.Token())
|
||||
}
|
||||
|
||||
<PHP>{OPERATORS} lval.Token(l.createToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
|
||||
|
||||
<PHP>\{ l.pushState(PHP); lval.Token(l.createToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
|
||||
<PHP>\} l.popState(); lval.Token(l.createToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0])); l.PhpDocComment = ""
|
||||
<PHP>\${VAR_NAME} lval.Token(l.createToken(l.Token())); return int(T_VARIABLE)
|
||||
<PHP>{VAR_NAME} lval.Token(l.createToken(l.Token())); return int(T_STRING)
|
||||
|
||||
<PHP>-> l.Begin(PROPERTY);lval.Token(l.createToken(l.Token())); return int(T_OBJECT_OPERATOR);
|
||||
<PROPERTY>[ \t\n\r]+ l.addFreeFloating(freefloating.WhiteSpaceType, l.Token())
|
||||
<PROPERTY>-> lval.Token(l.createToken(l.Token())); return int(T_OBJECT_OPERATOR);
|
||||
<PROPERTY>{VAR_NAME} l.Begin(PHP);lval.Token(l.createToken(l.Token())); return int(T_STRING);
|
||||
<PROPERTY>. l.ungetChars(1);l.Begin(PHP)
|
||||
|
||||
<PHP>[\']([^\\\']*(\\(.|\n))*)*[\'] lval.Token(l.createToken(l.Token())); return int(T_CONSTANT_ENCAPSED_STRING);
|
||||
|
||||
<PHP>` l.Begin(BACKQUOTE); lval.Token(l.createToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
|
||||
<BACKQUOTE>` l.Begin(PHP); lval.Token(l.createToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
|
||||
|
||||
<PHP>[b]?\<\<\<[ \t]*({VAR_NAME}|([']{VAR_NAME}['])|(["]{VAR_NAME}["])){NEW_LINE}
|
||||
tb := l.Token()
|
||||
binPrefix := 0
|
||||
if tb[0].Rune == 'b' {
|
||||
binPrefix = 1
|
||||
}
|
||||
|
||||
lblFirst := 3 + binPrefix
|
||||
lblLast := len(tb)-2
|
||||
if tb[lblLast].Rune == '\r' {
|
||||
lblLast--
|
||||
}
|
||||
|
||||
for {
|
||||
if tb[lblFirst].Rune == ' ' || tb[lblFirst].Rune == '\t' {
|
||||
lblFirst++
|
||||
continue
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
heredocToken := make([]lex.Char, lblLast - lblFirst + 1)
|
||||
copy(heredocToken, tb[lblFirst:lblLast+1])
|
||||
|
||||
switch tb[lblFirst].Rune {
|
||||
case '\'' :
|
||||
lblFirst++
|
||||
lblLast--
|
||||
l.Begin(NOWDOC)
|
||||
case '"' :
|
||||
lblFirst++
|
||||
lblLast--
|
||||
l.Begin(HEREDOC)
|
||||
default:
|
||||
l.Begin(HEREDOC)
|
||||
}
|
||||
|
||||
l.heredocLabel = l.tokenString(tb[lblFirst:lblLast+1])
|
||||
|
||||
ungetCnt := len(l.heredocLabel)
|
||||
searchLabelAhead := []byte{}
|
||||
for i := 0; i < len(l.heredocLabel); i++ {
|
||||
if c == -1 {
|
||||
break;
|
||||
}
|
||||
searchLabelAhead = append(searchLabelAhead, byte(rune(c)))
|
||||
c = l.Next()
|
||||
}
|
||||
|
||||
if l.heredocLabel == string(searchLabelAhead) && ';' == rune(c) {
|
||||
ungetCnt++
|
||||
c = l.Next()
|
||||
if '\n' == rune(c) || '\r' == rune(c) {
|
||||
l.Begin(HEREDOC_END)
|
||||
}
|
||||
}
|
||||
|
||||
l.ungetChars(ungetCnt)
|
||||
|
||||
lval.Token(l.createToken(heredocToken));
|
||||
return int(T_START_HEREDOC)
|
||||
|
||||
<NOWDOC>.|[ \t\n\r]
|
||||
searchLabel := []byte{}
|
||||
tb := []lex.Char{}
|
||||
|
||||
for {
|
||||
if c == -1 {
|
||||
break;
|
||||
}
|
||||
|
||||
if '\n' == rune(c) || '\r' == rune(c) {
|
||||
if l.heredocLabel + ";" == string(searchLabel) {
|
||||
l.Begin(HEREDOC_END)
|
||||
tb = l.ungetChars(len(l.heredocLabel)+1)
|
||||
tb = tb[:len(tb)-1]
|
||||
break;
|
||||
}
|
||||
|
||||
if l.heredocLabel == string(searchLabel) {
|
||||
l.Begin(HEREDOC_END)
|
||||
tb = l.ungetChars(len(l.heredocLabel))
|
||||
tb = tb[:len(tb)-1]
|
||||
break;
|
||||
}
|
||||
|
||||
searchLabel = []byte{}
|
||||
} else {
|
||||
searchLabel = append(searchLabel, byte(rune(c)))
|
||||
}
|
||||
|
||||
c = l.Next()
|
||||
}
|
||||
|
||||
lval.Token(l.createToken(tb) )
|
||||
return int(T_ENCAPSED_AND_WHITESPACE)
|
||||
|
||||
<HEREDOC_END>{VAR_NAME}\; l.Begin(PHP);lval.Token(l.createToken(l.ungetChars(1))); return int(T_END_HEREDOC)
|
||||
<HEREDOC_END>{VAR_NAME} l.Begin(PHP);lval.Token(l.createToken(l.Token())); return int(T_END_HEREDOC)
|
||||
|
||||
<PHP>[b]?[\"]
|
||||
binPrefix := l.Token()[0].Rune == 'b'
|
||||
|
||||
beginString := func() int {
|
||||
cnt := 1; if (binPrefix) {cnt = 2}
|
||||
|
||||
l.ungetChars(len(l.Token())-cnt)
|
||||
chars := l.Token()[:cnt]
|
||||
l.pushState(STRING)
|
||||
|
||||
lval.Token(l.createToken(chars)); return Rune2Class('"')
|
||||
}
|
||||
|
||||
F:for {
|
||||
if c == -1 {
|
||||
break;
|
||||
}
|
||||
|
||||
switch c {
|
||||
case '"' :
|
||||
c = l.Next();
|
||||
lval.Token(l.createToken(l.Token())); return int(T_CONSTANT_ENCAPSED_STRING)
|
||||
break F;
|
||||
|
||||
case '$':
|
||||
c = l.Next();
|
||||
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
|
||||
return beginString()
|
||||
break F;
|
||||
}
|
||||
l.ungetChars(0)
|
||||
|
||||
case '{':
|
||||
c = l.Next();
|
||||
if rune(c) == '$' {
|
||||
return beginString()
|
||||
break F;
|
||||
}
|
||||
l.ungetChars(0)
|
||||
|
||||
case '\\':
|
||||
c = l.Next();
|
||||
}
|
||||
|
||||
c = l.Next()
|
||||
}
|
||||
|
||||
<STRING>\" l.popState(); lval.Token(l.createToken(l.Token())); return Rune2Class(l.Token()[0].Rune)
|
||||
<STRING,HEREDOC,BACKQUOTE>\{\$ lval.Token(l.createToken(l.ungetChars(1))); l.pushState(PHP); return int(T_CURLY_OPEN)
|
||||
<STRING,HEREDOC,BACKQUOTE>\$\{ l.pushState(STRING_VAR_NAME); lval.Token(l.createToken(l.Token())); return int(T_DOLLAR_OPEN_CURLY_BRACES)
|
||||
<STRING,HEREDOC,BACKQUOTE>\${VAR_NAME} l.ungetChars(len(l.Token()));l.pushState(STRING_VAR)
|
||||
<STRING>.|[ \t\n\r]
|
||||
currentChar := l.Prev
|
||||
tb := []lex.Char{currentChar}
|
||||
for {
|
||||
switch currentChar.Rune {
|
||||
case '$':
|
||||
if c == '{' || isValidFirstVarNameRune(rune(c)) {
|
||||
l.ungetChars(1)
|
||||
lval.Token(l.createToken(tb[:len(tb)-1]));
|
||||
return int(T_ENCAPSED_AND_WHITESPACE)
|
||||
}
|
||||
|
||||
case '{':
|
||||
if rune(c) == '$' {
|
||||
l.ungetChars(1)
|
||||
lval.Token(l.createToken(tb[:len(tb)-1]));
|
||||
return int(T_ENCAPSED_AND_WHITESPACE)
|
||||
}
|
||||
|
||||
case '\\':
|
||||
currentChar := l.Last
|
||||
tb = append(tb, currentChar)
|
||||
c = l.Next();
|
||||
}
|
||||
|
||||
if rune(c) == '"' {
|
||||
lval.Token(l.createToken(l.Token()));
|
||||
return int(T_ENCAPSED_AND_WHITESPACE)
|
||||
}
|
||||
|
||||
currentChar = l.Last
|
||||
tb = append(tb, currentChar)
|
||||
c = l.Next()
|
||||
|
||||
if c == -1 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
<BACKQUOTE>.|[ \t\n\r]
|
||||
currentChar := l.Prev
|
||||
tb := []lex.Char{currentChar}
|
||||
|
||||
for {
|
||||
switch currentChar.Rune {
|
||||
case '$':
|
||||
if c == '{' || isValidFirstVarNameRune(rune(c)) {
|
||||
l.ungetChars(1)
|
||||
lval.Token(l.createToken(tb[:len(tb)-1]));
|
||||
return int(T_ENCAPSED_AND_WHITESPACE)
|
||||
}
|
||||
|
||||
case '{':
|
||||
if rune(c) == '$' {
|
||||
l.ungetChars(1)
|
||||
lval.Token(l.createToken(tb[:len(tb)-1]));
|
||||
return int(T_ENCAPSED_AND_WHITESPACE)
|
||||
}
|
||||
|
||||
case '\\':
|
||||
currentChar := l.Last
|
||||
tb = append(tb, currentChar)
|
||||
c = l.Next();
|
||||
}
|
||||
|
||||
if rune(c) == '`' {
|
||||
lval.Token(l.createToken(l.Token()));
|
||||
return int(T_ENCAPSED_AND_WHITESPACE)
|
||||
}
|
||||
|
||||
currentChar = l.Last
|
||||
tb = append(tb, currentChar)
|
||||
c = l.Next()
|
||||
|
||||
if c == -1 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
<HEREDOC>.|[ \t\n\r]
|
||||
searchLabel := []byte{}
|
||||
currentChar := l.Prev
|
||||
tb := []lex.Char{currentChar}
|
||||
|
||||
HEREDOC_FOR:for {
|
||||
nls := 0
|
||||
|
||||
switch currentChar.Rune {
|
||||
case '\r':
|
||||
|
||||
if c == '\n' {
|
||||
nls = 1
|
||||
currentChar := l.Last
|
||||
tb = append(tb, currentChar)
|
||||
c = l.Next();
|
||||
}
|
||||
|
||||
fallthrough
|
||||
|
||||
case '\n':
|
||||
if l.heredocLabel + ";" == string(searchLabel) {
|
||||
l.Begin(HEREDOC_END)
|
||||
l.ungetChars(len(l.heredocLabel)+1+nls)
|
||||
|
||||
i := len(tb) - len(l.heredocLabel) - 3 - nls
|
||||
if i < 1 {
|
||||
break HEREDOC_FOR;
|
||||
}
|
||||
tb = tb[:i]
|
||||
|
||||
lval.Token(l.createToken(tb));
|
||||
return int(T_ENCAPSED_AND_WHITESPACE)
|
||||
}
|
||||
|
||||
if l.heredocLabel == string(searchLabel) {
|
||||
l.Begin(HEREDOC_END)
|
||||
l.ungetChars(len(l.heredocLabel)+nls)
|
||||
|
||||
i := len(tb) - len(l.heredocLabel) - 2 - nls
|
||||
if i < 1 {
|
||||
break HEREDOC_FOR;
|
||||
}
|
||||
tb = tb[:i]
|
||||
|
||||
lval.Token(l.createToken(tb));
|
||||
return int(T_ENCAPSED_AND_WHITESPACE)
|
||||
}
|
||||
|
||||
searchLabel = []byte{}
|
||||
|
||||
case '$':
|
||||
if c == '{' || isValidFirstVarNameRune(rune(c)) {
|
||||
l.ungetChars(1)
|
||||
lval.Token(l.createToken(tb[:len(tb)-1]));
|
||||
return int(T_ENCAPSED_AND_WHITESPACE)
|
||||
}
|
||||
|
||||
case '{':
|
||||
if rune(c) == '$' {
|
||||
l.ungetChars(1)
|
||||
lval.Token(l.createToken(tb[:len(tb)-1]));
|
||||
return int(T_ENCAPSED_AND_WHITESPACE)
|
||||
}
|
||||
|
||||
case '\\':
|
||||
if c != '\n' && c != '\r' {
|
||||
currentChar := l.Last
|
||||
tb = append(tb, currentChar)
|
||||
c = l.Next();
|
||||
}
|
||||
|
||||
default:
|
||||
searchLabel = append(searchLabel, byte(rune(currentChar.Rune)))
|
||||
}
|
||||
|
||||
if c == -1 {
|
||||
break;
|
||||
}
|
||||
|
||||
currentChar = l.Last
|
||||
tb = append(tb, currentChar)
|
||||
c = l.Next()
|
||||
|
||||
}
|
||||
|
||||
<STRING_VAR>\${VAR_NAME} lval.Token(l.createToken(l.Token())); return int(T_VARIABLE)
|
||||
<STRING_VAR>->{VAR_NAME} lval.Token(l.createToken(l.ungetChars(len(l.Token())-2))); return int(T_OBJECT_OPERATOR)
|
||||
<STRING_VAR>{VAR_NAME} l.popState();lval.Token(l.createToken(l.Token())); return int(T_STRING)
|
||||
<STRING_VAR>\[ l.pushState(STRING_VAR_INDEX);lval.Token(l.createToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
|
||||
<STRING_VAR>.|[ \t\n\r] l.ungetChars(1);l.popState()
|
||||
|
||||
<STRING_VAR_INDEX>{LNUM}|{HNUM}|{BNUM} lval.Token(l.createToken(l.Token())); return int(T_NUM_STRING)
|
||||
<STRING_VAR_INDEX>\${VAR_NAME} lval.Token(l.createToken(l.Token())); return int(T_VARIABLE)
|
||||
<STRING_VAR_INDEX>{VAR_NAME} lval.Token(l.createToken(l.Token())); return int(T_STRING)
|
||||
<STRING_VAR_INDEX>\] l.popState(); l.popState();lval.Token(l.createToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
|
||||
<STRING_VAR_INDEX>[ \n\r\t\\'#] l.popState(); l.popState();lval.Token(l.createToken(l.Token())); return int(T_ENCAPSED_AND_WHITESPACE)
|
||||
<STRING_VAR_INDEX>{OPERATORS} lval.Token(l.createToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
|
||||
<STRING_VAR_INDEX>{ANY_CHAR} l.Error(fmt.Sprintf("WARNING: Unexpected character in input: '%c' (ASCII=%d)", l.TokenBytes(nil)[0], l.TokenBytes(nil)[0]));l.Abort();
|
||||
|
||||
<STRING_VAR_NAME>{VAR_NAME}[\[\}] l.popState();l.pushState(PHP);lval.Token(l.createToken(l.ungetChars(1))); return int(T_STRING_VARNAME)
|
||||
<STRING_VAR_NAME>. l.ungetChars(1);l.popState();l.pushState(PHP)
|
||||
|
||||
<HALT_COMPILER>.|[ \t\n\r] l.addFreeFloating(freefloating.TokenType, l.Token())
|
||||
|
||||
<PHP>{ANY_CHAR} l.Error(fmt.Sprintf("WARNING: Unexpected character in input: '%c' (ASCII=%d)", l.TokenBytes(nil)[0], l.TokenBytes(nil)[0]));l.Abort();
|
||||
|
||||
%%
|
||||
if _, ok := l.Abort(); ok {
|
||||
// always return same $end token
|
||||
if l.lastToken == nil {
|
||||
l.lastToken = l.createToken(l.Token())
|
||||
}
|
||||
lval.Token(l.lastToken);
|
||||
return -1
|
||||
}
|
||||
goto yyAction
|
||||
}
|
||||
476
scanner/scanner.rl
Normal file
476
scanner/scanner.rl
Normal file
@@ -0,0 +1,476 @@
|
||||
package scanner
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/z7zmey/php-parser/freefloating"
|
||||
)
|
||||
|
||||
%%{
|
||||
machine lexer;
|
||||
write data;
|
||||
access lex.;
|
||||
variable p lex.p;
|
||||
variable pe lex.pe;
|
||||
}%%
|
||||
|
||||
func NewLexer(data []byte) *Lexer {
|
||||
lex := &Lexer{
|
||||
data: data,
|
||||
pe: len(data),
|
||||
stack: make([]int, 0),
|
||||
|
||||
TokenPool: &TokenPool{},
|
||||
NewLines: NewLines{make([]int, 0, 128)},
|
||||
}
|
||||
%% write init;
|
||||
return lex
|
||||
}
|
||||
|
||||
func (lex *Lexer) Lex(lval Lval) int {
|
||||
lex.FreeFloating = nil
|
||||
eof := lex.pe
|
||||
var tok TokenID
|
||||
|
||||
lblStart := 0
|
||||
lblEnd := 0
|
||||
|
||||
_, _ = lblStart, lblEnd
|
||||
|
||||
%%{
|
||||
action heredoc_lbl_start {lblStart = lex.p}
|
||||
action heredoc_lbl_end {lblEnd = lex.p}
|
||||
|
||||
action is_not_heredoc_end { lex.isNotHeredocEnd(lex.p) }
|
||||
action is_not_comment_end { lex.isNotPhpCloseToken() && lex.isNotNewLine() }
|
||||
action is_not_heredoc_end_or_var { lex.isNotHeredocEnd(lex.p) && lex.isNotStringVar() }
|
||||
action is_not_string_end_or_var { lex.isNotStringEnd('"') && lex.isNotStringVar() }
|
||||
action is_not_backqoute_end_or_var { lex.isNotStringEnd('`') && lex.isNotStringVar() }
|
||||
|
||||
newline = ('\r\n' >(nl, 1) | '\r' >(nl, 0) | '\n' >(nl, 0)) %{lex.NewLines.Append(lex.p);};
|
||||
any_line = any | newline;
|
||||
whitespace = [\t\v\f ];
|
||||
whitespace_line = [\t\v\f ] | newline;
|
||||
|
||||
lnum = [0-9]+;
|
||||
dnum = ( [0-9]* "." [0-9]+ ) | ( [0-9]+ "." [0-9]* );
|
||||
hnum = '0x' [0-9a-fA-F]+;
|
||||
bnum = '0b' [01]+;
|
||||
|
||||
exponent_dnum = (lnum | dnum) ('e'|'E') ('+'|'-')? lnum;
|
||||
varname = /[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/;
|
||||
varname_first = /[a-zA-Z_\x7f-\xff]/;
|
||||
heredoc_label = varname >heredoc_lbl_start %heredoc_lbl_end;
|
||||
operators = ';'|':'|','|'.'|'['|']'|'('|')'|'|'|'/'|'^'|'&'|'+'|'-'|'*'|'='|'%'|'!'|'~'|'$'|'<'|'>'|'?'|'@';
|
||||
|
||||
prepush { lex.growCallStack(); }
|
||||
|
||||
constant_string =
|
||||
start: (
|
||||
"'" -> qoute
|
||||
| "b"i? '"' -> double_qoute
|
||||
),
|
||||
qoute: (
|
||||
(any - [\\'\r\n]) -> qoute
|
||||
| "\r" @{if lex.p+1 != eof && lex.data[lex.p+1] != '\n' {lex.NewLines.Append(lex.p)}} -> qoute
|
||||
| "\n" @{lex.NewLines.Append(lex.p)} -> qoute
|
||||
| "\\" -> qoute_any
|
||||
| "'" -> final
|
||||
),
|
||||
qoute_any: (
|
||||
any_line -> qoute
|
||||
),
|
||||
double_qoute: (
|
||||
(any - [\\"${\r\n]) -> double_qoute
|
||||
| "\r" @{if lex.p+1 != eof && lex.data[lex.p+1] != '\n' {lex.NewLines.Append(lex.p)}} -> double_qoute
|
||||
| "\n" @{lex.NewLines.Append(lex.p)} -> double_qoute
|
||||
| "\\" -> double_qoute_any
|
||||
| '"' -> final
|
||||
| '$' -> double_qoute_nonvarname
|
||||
| '{' -> double_qoute_nondollar
|
||||
),
|
||||
double_qoute_any: (
|
||||
any_line -> double_qoute
|
||||
),
|
||||
double_qoute_nondollar: (
|
||||
'"' -> final
|
||||
| "\\" -> double_qoute_any
|
||||
| [^$\\"] -> double_qoute
|
||||
),
|
||||
double_qoute_nonvarname: (
|
||||
'"' -> final
|
||||
| "\\" -> double_qoute_any
|
||||
| /[^"\\{a-zA-Z_\x7f-\xff]/ -> double_qoute
|
||||
);
|
||||
|
||||
main := |*
|
||||
any_line+ -- '<?' => {
|
||||
lex.ungetStr("<")
|
||||
lex.createToken(lval)
|
||||
tok = T_INLINE_HTML;
|
||||
fbreak;
|
||||
};
|
||||
'<?' => {
|
||||
lex.addFreeFloating(freefloating.TokenType, lex.ts, lex.te)
|
||||
fnext php;
|
||||
};
|
||||
'<?php'i ( [ \t] | newline ) => {
|
||||
lex.ungetCnt(lex.te - lex.ts - 5)
|
||||
lex.addFreeFloating(freefloating.TokenType, lex.ts, lex.ts+5)
|
||||
fnext php;
|
||||
};
|
||||
'<?='i => {
|
||||
lex.createToken(lval);
|
||||
tok = T_ECHO;
|
||||
fnext php;
|
||||
fbreak;
|
||||
};
|
||||
*|;
|
||||
|
||||
php := |*
|
||||
whitespace_line* => {lex.addFreeFloating(freefloating.WhiteSpaceType, lex.ts, lex.te)};
|
||||
'?>' newline? => {lex.createToken(lval); tok = TokenID(int(';')); fnext main; fbreak;};
|
||||
';' whitespace_line* '?>' newline? => {lex.createToken(lval); tok = TokenID(int(';')); fnext main; fbreak;};
|
||||
|
||||
(dnum | exponent_dnum) => {lex.createToken(lval); tok = T_DNUMBER; fbreak;};
|
||||
bnum => {
|
||||
firstNum := 2
|
||||
for i := lex.ts + 2; i < lex.te; i++ {
|
||||
if lex.data[i] == '0' {
|
||||
firstNum++
|
||||
}
|
||||
}
|
||||
|
||||
if lex.te - lex.ts - firstNum < 64 {
|
||||
lex.createToken(lval); tok = T_LNUMBER; fbreak;
|
||||
}
|
||||
lex.createToken(lval); tok = T_DNUMBER; fbreak;
|
||||
};
|
||||
lnum => {
|
||||
if lex.te - lex.ts < 20 {
|
||||
lex.createToken(lval); tok = T_LNUMBER; fbreak;
|
||||
}
|
||||
lex.createToken(lval); tok = T_DNUMBER; fbreak;
|
||||
};
|
||||
hnum => {
|
||||
firstNum := lex.ts + 2
|
||||
for i := lex.ts + 2; i < lex.te; i++ {
|
||||
if lex.data[i] == '0' {
|
||||
firstNum++
|
||||
}
|
||||
}
|
||||
|
||||
length := lex.te - firstNum
|
||||
if length < 16 || (length == 16 && lex.data[firstNum] <= '7') {
|
||||
lex.createToken(lval); tok = T_LNUMBER; fbreak;
|
||||
}
|
||||
lex.createToken(lval); tok = T_DNUMBER; fbreak;
|
||||
};
|
||||
|
||||
'abstract'i => {lex.createToken(lval); tok = T_ABSTRACT; fbreak;};
|
||||
'array'i => {lex.createToken(lval); tok = T_ARRAY; fbreak;};
|
||||
'as'i => {lex.createToken(lval); tok = T_AS; fbreak;};
|
||||
'break'i => {lex.createToken(lval); tok = T_BREAK; fbreak;};
|
||||
'callable'i => {lex.createToken(lval); tok = T_CALLABLE; fbreak;};
|
||||
'case'i => {lex.createToken(lval); tok = T_CASE; fbreak;};
|
||||
'catch'i => {lex.createToken(lval); tok = T_CATCH; fbreak;};
|
||||
'class'i => {lex.createToken(lval); tok = T_CLASS; fbreak;};
|
||||
'clone'i => {lex.createToken(lval); tok = T_CLONE; fbreak;};
|
||||
'const'i => {lex.createToken(lval); tok = T_CONST; fbreak;};
|
||||
'continue'i => {lex.createToken(lval); tok = T_CONTINUE; fbreak;};
|
||||
'declare'i => {lex.createToken(lval); tok = T_DECLARE; fbreak;};
|
||||
'default'i => {lex.createToken(lval); tok = T_DEFAULT; fbreak;};
|
||||
'do'i => {lex.createToken(lval); tok = T_DO; fbreak;};
|
||||
'echo'i => {lex.createToken(lval); tok = T_ECHO; fbreak;};
|
||||
'else'i => {lex.createToken(lval); tok = T_ELSE; fbreak;};
|
||||
'elseif'i => {lex.createToken(lval); tok = T_ELSEIF; fbreak;};
|
||||
'empty'i => {lex.createToken(lval); tok = T_EMPTY; fbreak;};
|
||||
'enddeclare'i => {lex.createToken(lval); tok = T_ENDDECLARE; fbreak;};
|
||||
'endfor'i => {lex.createToken(lval); tok = T_ENDFOR; fbreak;};
|
||||
'endforeach'i => {lex.createToken(lval); tok = T_ENDFOREACH; fbreak;};
|
||||
'endif'i => {lex.createToken(lval); tok = T_ENDIF; fbreak;};
|
||||
'endswitch'i => {lex.createToken(lval); tok = T_ENDSWITCH; fbreak;};
|
||||
'endwhile'i => {lex.createToken(lval); tok = T_ENDWHILE; fbreak;};
|
||||
'eval'i => {lex.createToken(lval); tok = T_EVAL; fbreak;};
|
||||
'exit'i | 'die'i => {lex.createToken(lval); tok = T_EXIT; fbreak;};
|
||||
'extends'i => {lex.createToken(lval); tok = T_EXTENDS; fbreak;};
|
||||
'final'i => {lex.createToken(lval); tok = T_FINAL; fbreak;};
|
||||
'finally'i => {lex.createToken(lval); tok = T_FINALLY; fbreak;};
|
||||
'for'i => {lex.createToken(lval); tok = T_FOR; fbreak;};
|
||||
'foreach'i => {lex.createToken(lval); tok = T_FOREACH; fbreak;};
|
||||
'function'i | 'cfunction'i => {lex.createToken(lval); tok = T_FUNCTION; fbreak;};
|
||||
'global'i => {lex.createToken(lval); tok = T_GLOBAL; fbreak;};
|
||||
'goto'i => {lex.createToken(lval); tok = T_GOTO; fbreak;};
|
||||
'if'i => {lex.createToken(lval); tok = T_IF; fbreak;};
|
||||
'isset'i => {lex.createToken(lval); tok = T_ISSET; fbreak;};
|
||||
'implements'i => {lex.createToken(lval); tok = T_IMPLEMENTS; fbreak;};
|
||||
'instanceof'i => {lex.createToken(lval); tok = T_INSTANCEOF; fbreak;};
|
||||
'insteadof'i => {lex.createToken(lval); tok = T_INSTEADOF; fbreak;};
|
||||
'interface'i => {lex.createToken(lval); tok = T_INTERFACE; fbreak;};
|
||||
'list'i => {lex.createToken(lval); tok = T_LIST; fbreak;};
|
||||
'namespace'i => {lex.createToken(lval); tok = T_NAMESPACE; fbreak;};
|
||||
'private'i => {lex.createToken(lval); tok = T_PRIVATE; fbreak;};
|
||||
'public'i => {lex.createToken(lval); tok = T_PUBLIC; fbreak;};
|
||||
'print'i => {lex.createToken(lval); tok = T_PRINT; fbreak;};
|
||||
'protected'i => {lex.createToken(lval); tok = T_PROTECTED; fbreak;};
|
||||
'return'i => {lex.createToken(lval); tok = T_RETURN; fbreak;};
|
||||
'static'i => {lex.createToken(lval); tok = T_STATIC; fbreak;};
|
||||
'switch'i => {lex.createToken(lval); tok = T_SWITCH; fbreak;};
|
||||
'throw'i => {lex.createToken(lval); tok = T_THROW; fbreak;};
|
||||
'trait'i => {lex.createToken(lval); tok = T_TRAIT; fbreak;};
|
||||
'try'i => {lex.createToken(lval); tok = T_TRY; fbreak;};
|
||||
'unset'i => {lex.createToken(lval); tok = T_UNSET; fbreak;};
|
||||
'use'i => {lex.createToken(lval); tok = T_USE; fbreak;};
|
||||
'var'i => {lex.createToken(lval); tok = T_VAR; fbreak;};
|
||||
'while'i => {lex.createToken(lval); tok = T_WHILE; fbreak;};
|
||||
'yield'i whitespace_line* 'from'i => {lex.createToken(lval); tok = T_YIELD_FROM; fbreak;};
|
||||
'yield'i => {lex.createToken(lval); tok = T_YIELD; fbreak;};
|
||||
'include'i => {lex.createToken(lval); tok = T_INCLUDE; fbreak;};
|
||||
'include_once'i => {lex.createToken(lval); tok = T_INCLUDE_ONCE; fbreak;};
|
||||
'require'i => {lex.createToken(lval); tok = T_REQUIRE; fbreak;};
|
||||
'require_once'i => {lex.createToken(lval); tok = T_REQUIRE_ONCE; fbreak;};
|
||||
'__CLASS__'i => {lex.createToken(lval); tok = T_CLASS_C; fbreak;};
|
||||
'__DIR__'i => {lex.createToken(lval); tok = T_DIR; fbreak;};
|
||||
'__FILE__'i => {lex.createToken(lval); tok = T_FILE; fbreak;};
|
||||
'__FUNCTION__'i => {lex.createToken(lval); tok = T_FUNC_C; fbreak;};
|
||||
'__LINE__'i => {lex.createToken(lval); tok = T_LINE; fbreak;};
|
||||
'__NAMESPACE__'i => {lex.createToken(lval); tok = T_NS_C; fbreak;};
|
||||
'__METHOD__'i => {lex.createToken(lval); tok = T_METHOD_C; fbreak;};
|
||||
'__TRAIT__'i => {lex.createToken(lval); tok = T_TRAIT_C; fbreak;};
|
||||
'__halt_compiler'i => {lex.createToken(lval); tok = T_HALT_COMPILER; fnext halt_compiller_open_parenthesis; fbreak;};
|
||||
'new'i => {lex.createToken(lval); tok = T_NEW; fbreak;};
|
||||
'and'i => {lex.createToken(lval); tok = T_LOGICAL_AND; fbreak;};
|
||||
'or'i => {lex.createToken(lval); tok = T_LOGICAL_OR; fbreak;};
|
||||
'xor'i => {lex.createToken(lval); tok = T_LOGICAL_XOR; fbreak;};
|
||||
'\\' => {lex.createToken(lval); tok = T_NS_SEPARATOR; fbreak;};
|
||||
'...' => {lex.createToken(lval); tok = T_ELLIPSIS; fbreak;};
|
||||
'::' => {lex.createToken(lval); tok = T_PAAMAYIM_NEKUDOTAYIM; fbreak;};
|
||||
'&&' => {lex.createToken(lval); tok = T_BOOLEAN_AND; fbreak;};
|
||||
'||' => {lex.createToken(lval); tok = T_BOOLEAN_OR; fbreak;};
|
||||
'&=' => {lex.createToken(lval); tok = T_AND_EQUAL; fbreak;};
|
||||
'|=' => {lex.createToken(lval); tok = T_OR_EQUAL; fbreak;};
|
||||
'.=' => {lex.createToken(lval); tok = T_CONCAT_EQUAL; fbreak;};
|
||||
'*=' => {lex.createToken(lval); tok = T_MUL_EQUAL; fbreak;};
|
||||
'**=' => {lex.createToken(lval); tok = T_POW_EQUAL; fbreak;};
|
||||
'/=' => {lex.createToken(lval); tok = T_DIV_EQUAL; fbreak;};
|
||||
'+=' => {lex.createToken(lval); tok = T_PLUS_EQUAL; fbreak;};
|
||||
'-=' => {lex.createToken(lval); tok = T_MINUS_EQUAL; fbreak;};
|
||||
'^=' => {lex.createToken(lval); tok = T_XOR_EQUAL; fbreak;};
|
||||
'%=' => {lex.createToken(lval); tok = T_MOD_EQUAL; fbreak;};
|
||||
'--' => {lex.createToken(lval); tok = T_DEC; fbreak;};
|
||||
'++' => {lex.createToken(lval); tok = T_INC; fbreak;};
|
||||
'=>' => {lex.createToken(lval); tok = T_DOUBLE_ARROW; fbreak;};
|
||||
'<=>' => {lex.createToken(lval); tok = T_SPACESHIP; fbreak;};
|
||||
'!=' | '<>' => {lex.createToken(lval); tok = T_IS_NOT_EQUAL; fbreak;};
|
||||
'!==' => {lex.createToken(lval); tok = T_IS_NOT_IDENTICAL; fbreak;};
|
||||
'==' => {lex.createToken(lval); tok = T_IS_EQUAL; fbreak;};
|
||||
'===' => {lex.createToken(lval); tok = T_IS_IDENTICAL; fbreak;};
|
||||
'<<=' => {lex.createToken(lval); tok = T_SL_EQUAL; fbreak;};
|
||||
'>>=' => {lex.createToken(lval); tok = T_SR_EQUAL; fbreak;};
|
||||
'>=' => {lex.createToken(lval); tok = T_IS_GREATER_OR_EQUAL; fbreak;};
|
||||
'<=' => {lex.createToken(lval); tok = T_IS_SMALLER_OR_EQUAL; fbreak;};
|
||||
'**' => {lex.createToken(lval); tok = T_POW; fbreak;};
|
||||
'<<' => {lex.createToken(lval); tok = T_SL; fbreak;};
|
||||
'>>' => {lex.createToken(lval); tok = T_SR; fbreak;};
|
||||
'??' => {lex.createToken(lval); tok = T_COALESCE; fbreak;};
|
||||
|
||||
'(' whitespace* 'array'i whitespace* ')' => {lex.createToken(lval); tok = T_ARRAY_CAST; fbreak;};
|
||||
'(' whitespace* ('bool'i|'boolean'i) whitespace* ')' => {lex.createToken(lval); tok = T_BOOL_CAST; fbreak;};
|
||||
'(' whitespace* ('real'i|'double'i|'float'i) whitespace* ')' => {lex.createToken(lval); tok = T_DOUBLE_CAST; fbreak;};
|
||||
'(' whitespace* ('int'i|'integer'i) whitespace* ')' => {lex.createToken(lval); tok = T_INT_CAST; fbreak;};
|
||||
'(' whitespace* 'object'i whitespace* ')' => {lex.createToken(lval); tok = T_OBJECT_CAST; fbreak;};
|
||||
'(' whitespace* ('string'i|'binary'i) whitespace* ')' => {lex.createToken(lval); tok = T_STRING_CAST; fbreak;};
|
||||
'(' whitespace* 'unset'i whitespace* ')' => {lex.createToken(lval); tok = T_UNSET_CAST; fbreak;};
|
||||
|
||||
('#' | '//') any_line* when is_not_comment_end => {
|
||||
lex.ungetStr("?>")
|
||||
lex.addFreeFloating(freefloating.CommentType, lex.ts, lex.te)
|
||||
};
|
||||
'/*' any_line* :>> '*/' {
|
||||
isDocComment := false;
|
||||
if lex.te - lex.ts > 4 && string(lex.data[lex.ts:lex.ts+3]) == "/**" {
|
||||
isDocComment = true;
|
||||
}
|
||||
lex.addFreeFloating(freefloating.CommentType, lex.ts, lex.te)
|
||||
|
||||
if isDocComment {
|
||||
lex.PhpDocComment = string(lex.data[lex.ts:lex.te])
|
||||
}
|
||||
};
|
||||
|
||||
operators => {
|
||||
// rune, _ := utf8.DecodeRune(lex.data[lex.ts:lex.te]);
|
||||
// tok = TokenID(Rune2Class(rune));
|
||||
lex.createToken(lval);
|
||||
tok = TokenID(int(lex.data[lex.ts]));
|
||||
fbreak;
|
||||
};
|
||||
|
||||
"{" => { lex.createToken(lval); tok = TokenID(int('{')); lex.call(ftargs, fentry(php)); goto _out; };
|
||||
"}" => { lex.createToken(lval); tok = TokenID(int('}')); lex.ret(1); lex.PhpDocComment = ""; goto _out;};
|
||||
"$" varname => { lex.createToken(lval); tok = T_VARIABLE; fbreak; };
|
||||
varname => { lex.createToken(lval); tok = T_STRING; fbreak; };
|
||||
|
||||
"->" => { lex.createToken(lval); tok = T_OBJECT_OPERATOR; fnext property; fbreak; };
|
||||
|
||||
constant_string => {
|
||||
lex.createToken(lval);
|
||||
tok = T_CONSTANT_ENCAPSED_STRING;
|
||||
fbreak;
|
||||
};
|
||||
|
||||
"b"i? "<<<" [ \t]* ( heredoc_label | ("'" heredoc_label "'") | ('"' heredoc_label '"') ) newline => {
|
||||
lex.heredocLabel = lex.data[lblStart:lblEnd]
|
||||
lex.createToken(lval);
|
||||
tok = T_START_HEREDOC;
|
||||
|
||||
if lex.isHeredocEnd(lex.p+1) {
|
||||
fnext heredoc_end;
|
||||
} else if lex.data[lblStart-1] == '\'' {
|
||||
fnext nowdoc;
|
||||
} else {
|
||||
fnext heredoc;
|
||||
}
|
||||
fbreak;
|
||||
};
|
||||
"`" => {lex.createToken(lval); tok = TokenID(int('`')); fnext backqote; fbreak;};
|
||||
'"' => {lex.createToken(lval); tok = TokenID(int('"')); fnext template_string; fbreak;};
|
||||
|
||||
any_line => {
|
||||
c := lex.data[lex.p]
|
||||
lex.Error(fmt.Sprintf("WARNING: Unexpected character in input: '%c' (ASCII=%d)", c, c));
|
||||
};
|
||||
*|;
|
||||
|
||||
property := |*
|
||||
whitespace_line* => {lex.addFreeFloating(freefloating.WhiteSpaceType, lex.ts, lex.te)};
|
||||
"->" => {lex.createToken(lval); tok = T_OBJECT_OPERATOR; fbreak;};
|
||||
varname => {lex.createToken(lval); tok = T_STRING; fnext php; fbreak;};
|
||||
any => {lex.ungetCnt(1); fgoto php;};
|
||||
*|;
|
||||
|
||||
nowdoc := |*
|
||||
any_line* when is_not_heredoc_end => {
|
||||
lex.createToken(lval);
|
||||
tok = T_ENCAPSED_AND_WHITESPACE;
|
||||
fnext heredoc_end;
|
||||
fbreak;
|
||||
};
|
||||
*|;
|
||||
|
||||
heredoc := |*
|
||||
"{$" => {lex.ungetCnt(1); lex.createToken(lval); tok = T_CURLY_OPEN; lex.call(ftargs, fentry(php)); goto _out;};
|
||||
"${" => {lex.createToken(lval); tok = T_DOLLAR_OPEN_CURLY_BRACES; lex.call(ftargs, fentry(string_var_name)); goto _out;};
|
||||
"$" => {lex.ungetCnt(1); fcall string_var;};
|
||||
any_line* when is_not_heredoc_end_or_var => {
|
||||
lex.createToken(lval);
|
||||
tok = T_ENCAPSED_AND_WHITESPACE;
|
||||
|
||||
if lex.data[lex.p+1] != '$' && lex.data[lex.p+1] != '{' {
|
||||
fnext heredoc_end;
|
||||
}
|
||||
fbreak;
|
||||
};
|
||||
*|;
|
||||
|
||||
backqote := |*
|
||||
"{$" => {lex.ungetCnt(1); lex.createToken(lval); tok = T_CURLY_OPEN; lex.call(ftargs, fentry(php)); goto _out;};
|
||||
"${" => {lex.createToken(lval); tok = T_DOLLAR_OPEN_CURLY_BRACES; lex.call(ftargs, fentry(string_var_name)); goto _out;};
|
||||
"$" => {lex.ungetCnt(1); fcall string_var;};
|
||||
'`' => {lex.createToken(lval); tok = TokenID(int('`')); fnext php; fbreak;};
|
||||
any_line* when is_not_backqoute_end_or_var => {
|
||||
lex.createToken(lval);
|
||||
tok = T_ENCAPSED_AND_WHITESPACE;
|
||||
fbreak;
|
||||
};
|
||||
*|;
|
||||
|
||||
template_string := |*
|
||||
"{$" => {lex.ungetCnt(1); lex.createToken(lval); tok = T_CURLY_OPEN; lex.call(ftargs, fentry(php)); goto _out;};
|
||||
"${" => {lex.createToken(lval); tok = T_DOLLAR_OPEN_CURLY_BRACES; lex.call(ftargs, fentry(string_var_name)); goto _out;};
|
||||
"$" => {lex.ungetCnt(1); fcall string_var;};
|
||||
'"' => {lex.createToken(lval); tok = TokenID(int('"')); fnext php; fbreak;};
|
||||
any_line* when is_not_string_end_or_var => {
|
||||
lex.createToken(lval);
|
||||
tok = T_ENCAPSED_AND_WHITESPACE;
|
||||
fbreak;
|
||||
};
|
||||
*|;
|
||||
|
||||
heredoc_end := |*
|
||||
varname -- ";" => {
|
||||
lex.createToken(lval);
|
||||
tok = T_END_HEREDOC;
|
||||
fnext php;
|
||||
fbreak;
|
||||
};
|
||||
varname => {
|
||||
lex.createToken(lval);
|
||||
tok = T_END_HEREDOC;
|
||||
fnext php;
|
||||
fbreak;
|
||||
};
|
||||
*|;
|
||||
|
||||
string_var := |*
|
||||
'$' varname => {lex.createToken(lval); tok = T_VARIABLE; fbreak;};
|
||||
'->' varname_first => {lex.ungetCnt(1); lex.createToken(lval); tok = T_OBJECT_OPERATOR; fbreak;};
|
||||
varname => {lex.createToken(lval); tok = T_STRING; fbreak;};
|
||||
'[' => {lex.createToken(lval); tok = TokenID(int('[')); lex.call(ftargs, fentry(string_var_index)); goto _out;};
|
||||
any => {lex.ungetCnt(1); fret;};
|
||||
*|;
|
||||
|
||||
string_var_index := |*
|
||||
lnum | hnum | bnum => {lex.createToken(lval); tok = T_NUM_STRING; fbreak;};
|
||||
'$' varname => {lex.createToken(lval); tok = T_VARIABLE; fbreak;};
|
||||
varname => {lex.createToken(lval); tok = T_STRING; fbreak;};
|
||||
whitespace_line | [\\'#] => {lex.createToken(lval); tok = T_ENCAPSED_AND_WHITESPACE; lex.ret(2); goto _out;};
|
||||
operators > (svi, 1) => {lex.createToken(lval); tok = TokenID(int(lex.data[lex.ts])); fbreak;};
|
||||
']' > (svi, 2) => {lex.createToken(lval); tok = TokenID(int(']')); lex.ret(2); goto _out;};
|
||||
any_line => {
|
||||
c := lex.data[lex.p]
|
||||
lex.Error(fmt.Sprintf("WARNING: Unexpected character in input: '%c' (ASCII=%d)", c, c));
|
||||
};
|
||||
*|;
|
||||
|
||||
string_var_name := |*
|
||||
varname ("[" | "}") => {lex.ungetCnt(1); lex.createToken(lval); tok = T_STRING_VARNAME; fnext php; fbreak;};
|
||||
any => {lex.ungetCnt(1); fnext php;};
|
||||
*|;
|
||||
|
||||
halt_compiller_open_parenthesis := |*
|
||||
whitespace_line* => {lex.addFreeFloating(freefloating.WhiteSpaceType, lex.ts, lex.te)};
|
||||
"(" => {lex.createToken(lval); tok = TokenID(int('(')); fnext halt_compiller_close_parenthesis; fbreak;};
|
||||
any => {lex.ungetCnt(1); fnext php;};
|
||||
*|;
|
||||
|
||||
halt_compiller_close_parenthesis := |*
|
||||
whitespace_line* => {lex.addFreeFloating(freefloating.WhiteSpaceType, lex.ts, lex.te)};
|
||||
")" => {lex.createToken(lval); tok = TokenID(int(')')); fnext halt_compiller_close_semicolon; fbreak;};
|
||||
any => {lex.ungetCnt(1); fnext php;};
|
||||
*|;
|
||||
|
||||
halt_compiller_close_semicolon := |*
|
||||
whitespace_line* => {lex.addFreeFloating(freefloating.WhiteSpaceType, lex.ts, lex.te)};
|
||||
";" => {lex.createToken(lval); tok = TokenID(int(';')); fnext halt_compiller_end; fbreak;};
|
||||
any => {lex.ungetCnt(1); fnext php;};
|
||||
*|;
|
||||
|
||||
halt_compiller_end := |*
|
||||
any_line* => { lex.addFreeFloating(freefloating.TokenType, lex.ts, lex.te); };
|
||||
*|;
|
||||
|
||||
write exec;
|
||||
}%%
|
||||
|
||||
// always return same $end token
|
||||
if tok == 0 {
|
||||
if lex.lastToken == nil {
|
||||
lex.ts, lex.te = 0, 0
|
||||
lex.lastToken = lex.createToken(lval)
|
||||
}
|
||||
lval.Token(lex.lastToken);
|
||||
}
|
||||
|
||||
return int(tok);
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
17
scanner/tokenid_string.go
Normal file
17
scanner/tokenid_string.go
Normal file
@@ -0,0 +1,17 @@
|
||||
// Code generated by "stringer -type=TokenID -output ./tokenid_string.go"; DO NOT EDIT.
|
||||
|
||||
package scanner
|
||||
|
||||
import "strconv"
|
||||
|
||||
const _TokenID_name = "T_INCLUDET_INCLUDE_ONCET_EXITT_IFT_LNUMBERT_DNUMBERT_STRINGT_STRING_VARNAMET_VARIABLET_NUM_STRINGT_INLINE_HTMLT_CHARACTERT_BAD_CHARACTERT_ENCAPSED_AND_WHITESPACET_CONSTANT_ENCAPSED_STRINGT_ECHOT_DOT_WHILET_ENDWHILET_FORT_ENDFORT_FOREACHT_ENDFOREACHT_DECLARET_ENDDECLARET_AST_SWITCHT_ENDSWITCHT_CASET_DEFAULTT_BREAKT_CONTINUET_GOTOT_FUNCTIONT_CONSTT_RETURNT_TRYT_CATCHT_FINALLYT_THROWT_USET_INSTEADOFT_GLOBALT_VART_UNSETT_ISSETT_EMPTYT_HALT_COMPILERT_CLASST_TRAITT_INTERFACET_EXTENDST_IMPLEMENTST_OBJECT_OPERATORT_DOUBLE_ARROWT_LISTT_ARRAYT_CALLABLET_CLASS_CT_TRAIT_CT_METHOD_CT_FUNC_CT_LINET_FILET_COMMENTT_DOC_COMMENTT_OPEN_TAGT_OPEN_TAG_WITH_ECHOT_CLOSE_TAGT_WHITESPACET_START_HEREDOCT_END_HEREDOCT_DOLLAR_OPEN_CURLY_BRACEST_CURLY_OPENT_PAAMAYIM_NEKUDOTAYIMT_NAMESPACET_NS_CT_DIRT_NS_SEPARATORT_ELLIPSIST_EVALT_REQUIRET_REQUIRE_ONCET_LOGICAL_ORT_LOGICAL_XORT_LOGICAL_ANDT_INSTANCEOFT_NEWT_CLONET_ELSEIFT_ELSET_ENDIFT_PRINTT_YIELDT_STATICT_ABSTRACTT_FINALT_PRIVATET_PROTECTEDT_PUBLICT_INCT_DECT_YIELD_FROMT_INT_CASTT_DOUBLE_CASTT_STRING_CASTT_ARRAY_CASTT_OBJECT_CASTT_BOOL_CASTT_UNSET_CASTT_COALESCET_SPACESHIPT_NOELSET_PLUS_EQUALT_MINUS_EQUALT_MUL_EQUALT_POW_EQUALT_DIV_EQUALT_CONCAT_EQUALT_MOD_EQUALT_AND_EQUALT_OR_EQUALT_XOR_EQUALT_SL_EQUALT_SR_EQUALT_BOOLEAN_ORT_BOOLEAN_ANDT_POWT_SLT_SRT_IS_IDENTICALT_IS_NOT_IDENTICALT_IS_EQUALT_IS_NOT_EQUALT_IS_SMALLER_OR_EQUALT_IS_GREATER_OR_EQUAL"
|
||||
|
||||
var _TokenID_index = [...]uint16{0, 9, 23, 29, 33, 42, 51, 59, 75, 85, 97, 110, 121, 136, 161, 187, 193, 197, 204, 214, 219, 227, 236, 248, 257, 269, 273, 281, 292, 298, 307, 314, 324, 330, 340, 347, 355, 360, 367, 376, 383, 388, 399, 407, 412, 419, 426, 433, 448, 455, 462, 473, 482, 494, 511, 525, 531, 538, 548, 557, 566, 576, 584, 590, 596, 605, 618, 628, 648, 659, 671, 686, 699, 725, 737, 759, 770, 776, 781, 795, 805, 811, 820, 834, 846, 859, 872, 884, 889, 896, 904, 910, 917, 924, 931, 939, 949, 956, 965, 976, 984, 989, 994, 1006, 1016, 1029, 1042, 1054, 1067, 1078, 1090, 1100, 1111, 1119, 1131, 1144, 1155, 1166, 1177, 1191, 1202, 1213, 1223, 1234, 1244, 1254, 1266, 1279, 1284, 1288, 1292, 1306, 1324, 1334, 1348, 1369, 1390}
|
||||
|
||||
func (i TokenID) String() string {
|
||||
i -= 57346
|
||||
if i < 0 || i >= TokenID(len(_TokenID_index)-1) {
|
||||
return "TokenID(" + strconv.FormatInt(int64(i+57346), 10) + ")"
|
||||
}
|
||||
return _TokenID_name[_TokenID_index[i]:_TokenID_index[i+1]]
|
||||
}
|
||||
Reference in New Issue
Block a user