split scanner and parser
This commit is contained in:
parent
c6f73cf618
commit
b93d24f95f
4
Makefile
4
Makefile
@ -1,6 +1,6 @@
|
||||
PHPFILE=example.php
|
||||
|
||||
all: ./parser/php7.go ./parser/scanner.go
|
||||
all: ./parser/php7.go ./scanner/scanner.go
|
||||
rm -f y.output
|
||||
gofmt -l -s -w *.go
|
||||
go build
|
||||
@ -11,7 +11,7 @@ run: all
|
||||
test: all
|
||||
go test ./... --cover
|
||||
|
||||
./parser/scanner.go: ./parser/scanner.l
|
||||
./scanner/scanner.go: ./scanner/scanner.l
|
||||
golex -o $@ $<
|
||||
|
||||
./parser/php7.go: ./parser/php7.y
|
||||
|
119
parser/lexer.go
119
parser/lexer.go
@ -1,119 +0,0 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"go/token"
|
||||
"io"
|
||||
"unicode"
|
||||
|
||||
"github.com/cznic/golex/lex"
|
||||
"github.com/z7zmey/php-parser/comment"
|
||||
t "github.com/z7zmey/php-parser/token"
|
||||
)
|
||||
|
||||
// Allocate Character classes anywhere in [0x80, 0xFF].
|
||||
const (
|
||||
classUnicodeLeter = iota + 0x80
|
||||
classUnicodeDigit
|
||||
classUnicodeGraphic
|
||||
classOther
|
||||
)
|
||||
|
||||
type lexer struct {
|
||||
*lex.Lexer
|
||||
stateStack []int
|
||||
phpDocComment string
|
||||
comments []comment.Comment
|
||||
}
|
||||
|
||||
func rune2Class(r rune) int {
|
||||
if r >= 0 && r < 0x80 { // Keep ASCII as it is.
|
||||
return int(r)
|
||||
}
|
||||
if unicode.IsLetter(r) {
|
||||
return classUnicodeLeter
|
||||
}
|
||||
if unicode.IsDigit(r) {
|
||||
return classUnicodeDigit
|
||||
}
|
||||
if unicode.IsGraphic(r) {
|
||||
return classUnicodeGraphic
|
||||
}
|
||||
// return classOther
|
||||
return -1
|
||||
}
|
||||
|
||||
func newLexer(src io.Reader, fName string) *lexer {
|
||||
file := token.NewFileSet().AddFile(fName, -1, 1<<31-1)
|
||||
lx, err := lex.New(file, bufio.NewReader(src), lex.RuneClass(rune2Class))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return &lexer{lx, []int{0}, "", nil}
|
||||
}
|
||||
|
||||
func (l *lexer) ungetChars(n int) []lex.Char {
|
||||
l.Unget(l.Lookahead())
|
||||
|
||||
chars := l.Token()
|
||||
|
||||
for i := 1; i <= n; i++ {
|
||||
char := chars[len(chars)-i]
|
||||
l.Unget(char)
|
||||
}
|
||||
|
||||
buf := l.Token()
|
||||
buf = buf[:len(buf)-n]
|
||||
|
||||
return buf
|
||||
}
|
||||
|
||||
func (l *lexer) pushState(state int) {
|
||||
l.stateStack = append(l.stateStack, state)
|
||||
}
|
||||
|
||||
func (l *lexer) popState() {
|
||||
len := len(l.stateStack)
|
||||
if len <= 1 {
|
||||
return
|
||||
}
|
||||
|
||||
l.stateStack = l.stateStack[:len-1]
|
||||
}
|
||||
|
||||
func (l *lexer) begin(state int) {
|
||||
len := len(l.stateStack)
|
||||
l.stateStack = l.stateStack[:len-1]
|
||||
l.stateStack = append(l.stateStack, state)
|
||||
}
|
||||
|
||||
func (l *lexer) getCurrentState() int {
|
||||
return l.stateStack[len(l.stateStack)-1]
|
||||
}
|
||||
|
||||
func (l *lexer) newToken(chars []lex.Char) t.Token {
|
||||
firstChar := chars[0]
|
||||
lastChar := chars[len(chars)-1]
|
||||
|
||||
startLine := l.File.Line(firstChar.Pos())
|
||||
endLine := l.File.Line(lastChar.Pos())
|
||||
startPos := int(firstChar.Pos())
|
||||
endPos := int(lastChar.Pos())
|
||||
|
||||
return t.NewToken(l.charsToBytes(chars), startLine, endLine, startPos, endPos).SetComments(l.comments)
|
||||
}
|
||||
|
||||
func (l *lexer) addComment(c comment.Comment) {
|
||||
l.comments = append(l.comments, c)
|
||||
}
|
||||
|
||||
func (l *lexer) charsToBytes(chars []lex.Char) []byte {
|
||||
bytesBuf := bytes.Buffer{}
|
||||
|
||||
for _, c := range chars {
|
||||
bytesBuf.WriteRune(c.Rune)
|
||||
}
|
||||
|
||||
return bytesBuf.Bytes()
|
||||
}
|
1007
parser/php7.go
1007
parser/php7.go
File diff suppressed because it is too large
Load Diff
@ -5,6 +5,11 @@ import (
|
||||
"io"
|
||||
"strings"
|
||||
"strconv"
|
||||
"bufio"
|
||||
goToken "go/token"
|
||||
|
||||
"github.com/cznic/golex/lex"
|
||||
|
||||
"github.com/z7zmey/php-parser/token"
|
||||
"github.com/z7zmey/php-parser/node"
|
||||
"github.com/z7zmey/php-parser/node/scalar"
|
||||
@ -16,6 +21,7 @@ import (
|
||||
"github.com/z7zmey/php-parser/node/expr/cast"
|
||||
"github.com/z7zmey/php-parser/comment"
|
||||
"github.com/z7zmey/php-parser/position"
|
||||
"github.com/z7zmey/php-parser/scanner"
|
||||
)
|
||||
|
||||
var rootnode node.Node
|
||||
@ -23,6 +29,27 @@ var comments comment.Comments
|
||||
var positions position.Positions
|
||||
var positionBuilder position.Builder
|
||||
|
||||
type lexer struct {
|
||||
scanner.Lexer
|
||||
}
|
||||
|
||||
func (l *lexer) Lex(lval *yySymType) int {
|
||||
return l.Lexer.Lex(lval)
|
||||
}
|
||||
|
||||
func (lval *yySymType) Token(t token.Token) {
|
||||
lval.token = t
|
||||
}
|
||||
|
||||
func newLexer(src io.Reader, fName string) *lexer {
|
||||
file := goToken.NewFileSet().AddFile(fName, -1, 1<<31-1)
|
||||
lx, err := lex.New(file, bufio.NewReader(src), lex.RuneClass(scanner.Rune2Class))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return &lexer{scanner.Lexer{lx, []int{0}, "", nil}}
|
||||
}
|
||||
|
||||
func ParsePhp7(src io.Reader, fName string) (node.Node, comment.Comments, position.Positions) {
|
||||
yyDebug = 0
|
||||
yyErrorVerbose = true
|
||||
@ -1952,7 +1979,7 @@ expr_without_variable:
|
||||
;
|
||||
|
||||
backup_doc_comment:
|
||||
/* empty */ { $$ = yylex.(*lexer).phpDocComment; yylex.(*lexer).phpDocComment = "" }
|
||||
/* empty */ { $$ = yylex.(*lexer).PhpDocComment; yylex.(*lexer).PhpDocComment = "" }
|
||||
;
|
||||
|
||||
returns_ref:
|
||||
|
609
parser/scanner.l
609
parser/scanner.l
@ -1,609 +0,0 @@
|
||||
%{
|
||||
// Copyright (c) 2011 CZ.NIC z.s.p.o. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// blame: jnml, labs.nic.cz
|
||||
|
||||
package parser
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"bytes"
|
||||
"github.com/cznic/golex/lex"
|
||||
"github.com/z7zmey/php-parser/comment"
|
||||
)
|
||||
|
||||
const (
|
||||
INITIAL = iota
|
||||
PHP
|
||||
STRING
|
||||
STRING_VAR
|
||||
STRING_VAR_INDEX
|
||||
STRING_VAR_NAME
|
||||
PROPERTY
|
||||
HEREDOC_END
|
||||
NOWDOC
|
||||
HEREDOC
|
||||
BACKQUOTE
|
||||
)
|
||||
|
||||
var heredocLabel []lex.Char
|
||||
|
||||
func (l *lexer) Lex(lval *yySymType) int {
|
||||
l.comments = nil
|
||||
c := l.Enter()
|
||||
|
||||
%}
|
||||
|
||||
%s PHP STRING STRING_VAR STRING_VAR_INDEX STRING_VAR_NAME PROPERTY HEREDOC_END NOWDOC HEREDOC BACKQUOTE
|
||||
|
||||
%yyb last == '\n' || last = '\0'
|
||||
%yyt l.getCurrentState()
|
||||
%yyc c
|
||||
%yyn c = l.Next()
|
||||
%yym l.Mark()
|
||||
%optioncase-insensitive
|
||||
|
||||
LNUM [0-9]+
|
||||
DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
|
||||
HNUM 0x[0-9a-fA-F]+
|
||||
BNUM 0b[01]+
|
||||
EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
|
||||
VAR_NAME [a-zA-Z_\x7f-\xff^0-9/][a-zA-Z0-9_\x7f-\xff]*
|
||||
OPERATORS [;:,.\[\]()|\/\^&\+-*=%!~$<>?@]
|
||||
NEW_LINE (\r|\n|\r\n)
|
||||
|
||||
%%
|
||||
c = l.Rule0()
|
||||
|
||||
<INITIAL>[ \t\n\r]+ lval.token = l.newToken(l.Token());
|
||||
<INITIAL>.
|
||||
tb := []lex.Char{}
|
||||
|
||||
for {
|
||||
if c == -1 {
|
||||
tb = l.Token();
|
||||
break;
|
||||
}
|
||||
|
||||
if '?' == rune(c) {
|
||||
tb = l.Token();
|
||||
if (len(tb) < 2 || tb[len(tb)-1].Rune != '<') {
|
||||
c = l.Next()
|
||||
continue;
|
||||
}
|
||||
|
||||
tb = l.ungetChars(1)
|
||||
break;
|
||||
}
|
||||
|
||||
c = l.Next()
|
||||
}
|
||||
|
||||
lval.token = l.newToken(tb)
|
||||
return T_INLINE_HTML
|
||||
|
||||
<INITIAL>\<\?php([ \t]|{NEW_LINE}) l.begin(PHP);lval.token = l.newToken(l.Token());// return T_OPEN_TAG;
|
||||
<INITIAL>\<\? l.begin(PHP);lval.token = l.newToken(l.Token());// return T_OPEN_TAG;
|
||||
<INITIAL>\<\?= l.begin(PHP);lval.token = l.newToken(l.Token()); return T_ECHO;
|
||||
|
||||
<PHP>[ \t\n\r]+ lval.token = l.newToken(l.Token());// return T_WHITESPACE
|
||||
<PHP>\?\>{NEW_LINE}? l.begin(INITIAL);lval.token = l.newToken(l.Token()); return rune2Class(';');
|
||||
|
||||
<PHP>{DNUM}|{EXPONENT_DNUM} lval.token = l.newToken(l.Token()); return T_DNUMBER
|
||||
<PHP>{BNUM}
|
||||
tb := l.Token()
|
||||
i:=2
|
||||
BNUMFOR:for {
|
||||
if i > len(tb)-1 {
|
||||
break BNUMFOR;
|
||||
}
|
||||
switch tb[i].Rune {
|
||||
case '0': i++;
|
||||
default: break BNUMFOR;
|
||||
}
|
||||
}
|
||||
if len(tb) - i < 64 {
|
||||
lval.token = l.newToken(l.Token()); return T_LNUMBER
|
||||
} else {
|
||||
lval.token = l.newToken(l.Token()); return T_DNUMBER
|
||||
}
|
||||
<PHP>{LNUM}
|
||||
if len(l.Token()) < 20 {
|
||||
lval.token = l.newToken(l.Token()); return T_LNUMBER
|
||||
} else {
|
||||
lval.token = l.newToken(l.Token()); return T_DNUMBER
|
||||
}
|
||||
<PHP>{HNUM}
|
||||
tb := l.Token()
|
||||
i:=2
|
||||
HNUMFOR:for {
|
||||
if i > len(tb)-1 {
|
||||
break HNUMFOR;
|
||||
}
|
||||
switch tb[i].Rune {
|
||||
case '0': i++;
|
||||
default: break HNUMFOR;
|
||||
}
|
||||
}
|
||||
length := len(tb) - i
|
||||
if length < 16 || (length == 16 && tb[i].Rune <= '7') {
|
||||
lval.token = l.newToken(l.Token()); return T_LNUMBER
|
||||
} else {
|
||||
lval.token = l.newToken(l.Token()); return T_DNUMBER
|
||||
}
|
||||
|
||||
<PHP>abstract lval.token = l.newToken(l.Token()); return T_ABSTRACT
|
||||
<PHP>array lval.token = l.newToken(l.Token()); return T_ARRAY
|
||||
<PHP>as lval.token = l.newToken(l.Token()); return T_AS
|
||||
<PHP>break lval.token = l.newToken(l.Token()); return T_BREAK
|
||||
<PHP>callable lval.token = l.newToken(l.Token()); return T_CALLABLE
|
||||
<PHP>case lval.token = l.newToken(l.Token()); return T_CASE
|
||||
<PHP>catch lval.token = l.newToken(l.Token()); return T_CATCH
|
||||
<PHP>class lval.token = l.newToken(l.Token()); return T_CLASS
|
||||
<PHP>clone lval.token = l.newToken(l.Token()); return T_CLONE
|
||||
<PHP>const lval.token = l.newToken(l.Token()); return T_CONST;
|
||||
<PHP>continue lval.token = l.newToken(l.Token()); return T_CONTINUE;
|
||||
<PHP>declare lval.token = l.newToken(l.Token()); return T_DECLARE;
|
||||
<PHP>default lval.token = l.newToken(l.Token()); return T_DEFAULT;
|
||||
<PHP>do lval.token = l.newToken(l.Token()); return T_DO;
|
||||
<PHP>echo lval.token = l.newToken(l.Token()); return T_ECHO;
|
||||
<PHP>else lval.token = l.newToken(l.Token()); return T_ELSE;
|
||||
<PHP>elseif lval.token = l.newToken(l.Token()); return T_ELSEIF;
|
||||
<PHP>empty lval.token = l.newToken(l.Token()); return T_EMPTY;
|
||||
<PHP>enddeclare lval.token = l.newToken(l.Token()); return T_ENDDECLARE
|
||||
<PHP>endfor lval.token = l.newToken(l.Token()); return T_ENDFOR
|
||||
<PHP>endforeach lval.token = l.newToken(l.Token()); return T_ENDFOREACH
|
||||
<PHP>endif lval.token = l.newToken(l.Token()); return T_ENDIF
|
||||
<PHP>endswitch lval.token = l.newToken(l.Token()); return T_ENDSWITCH
|
||||
<PHP>endwhile lval.token = l.newToken(l.Token()); return T_ENDWHILE
|
||||
<PHP>eval lval.token = l.newToken(l.Token()); return T_EVAL
|
||||
<PHP>exit|die lval.token = l.newToken(l.Token()); return T_EXIT
|
||||
<PHP>extends lval.token = l.newToken(l.Token()); return T_EXTENDS
|
||||
<PHP>final lval.token = l.newToken(l.Token()); return T_FINAL
|
||||
<PHP>finally lval.token = l.newToken(l.Token()); return T_FINALLY
|
||||
<PHP>for lval.token = l.newToken(l.Token()); return T_FOR
|
||||
<PHP>foreach lval.token = l.newToken(l.Token()); return T_FOREACH
|
||||
<PHP>function|cfunction lval.token = l.newToken(l.Token()); return T_FUNCTION
|
||||
<PHP>global lval.token = l.newToken(l.Token()); return T_GLOBAL
|
||||
<PHP>goto lval.token = l.newToken(l.Token()); return T_GOTO
|
||||
<PHP>if lval.token = l.newToken(l.Token()); return T_IF
|
||||
<PHP>isset lval.token = l.newToken(l.Token()); return T_ISSET
|
||||
<PHP>implements lval.token = l.newToken(l.Token()); return T_IMPLEMENTS
|
||||
<PHP>instanceof lval.token = l.newToken(l.Token()); return T_INSTANCEOF
|
||||
<PHP>insteadof lval.token = l.newToken(l.Token()); return T_INSTEADOF
|
||||
<PHP>interface lval.token = l.newToken(l.Token()); return T_INTERFACE
|
||||
<PHP>list lval.token = l.newToken(l.Token()); return T_LIST
|
||||
<PHP>namespace lval.token = l.newToken(l.Token()); return T_NAMESPACE
|
||||
<PHP>private lval.token = l.newToken(l.Token()); return T_PRIVATE
|
||||
<PHP>public lval.token = l.newToken(l.Token()); return T_PUBLIC
|
||||
<PHP>print lval.token = l.newToken(l.Token()); return T_PRINT
|
||||
<PHP>protected lval.token = l.newToken(l.Token()); return T_PROTECTED
|
||||
<PHP>return lval.token = l.newToken(l.Token()); return T_RETURN
|
||||
<PHP>static lval.token = l.newToken(l.Token()); return T_STATIC
|
||||
<PHP>switch lval.token = l.newToken(l.Token()); return T_SWITCH
|
||||
<PHP>throw lval.token = l.newToken(l.Token()); return T_THROW
|
||||
<PHP>trait lval.token = l.newToken(l.Token()); return T_TRAIT
|
||||
<PHP>try lval.token = l.newToken(l.Token()); return T_TRY
|
||||
<PHP>unset lval.token = l.newToken(l.Token()); return T_UNSET
|
||||
<PHP>use lval.token = l.newToken(l.Token()); return T_USE
|
||||
<PHP>var lval.token = l.newToken(l.Token()); return T_VAR
|
||||
<PHP>while lval.token = l.newToken(l.Token()); return T_WHILE
|
||||
<PHP>yield[ \t\n\r]+from[^a-zA-Z0-9_\x80-\xff] lval.token = l.newToken(l.Token()); return T_YIELD_FROM
|
||||
<PHP>yield lval.token = l.newToken(l.Token()); return T_YIELD
|
||||
<PHP>include lval.token = l.newToken(l.Token()); return T_INCLUDE
|
||||
<PHP>include_once lval.token = l.newToken(l.Token()); return T_INCLUDE_ONCE
|
||||
<PHP>require lval.token = l.newToken(l.Token()); return T_REQUIRE
|
||||
<PHP>require_once lval.token = l.newToken(l.Token()); return T_REQUIRE_ONCE
|
||||
<PHP>__CLASS__ lval.token = l.newToken(l.Token()); return T_CLASS_C
|
||||
<PHP>__DIR__ lval.token = l.newToken(l.Token()); return T_DIR
|
||||
<PHP>__FILE__ lval.token = l.newToken(l.Token()); return T_FILE
|
||||
<PHP>__FUNCTION__ lval.token = l.newToken(l.Token()); return T_FUNC_C
|
||||
<PHP>__LINE__ lval.token = l.newToken(l.Token()); return T_LINE
|
||||
<PHP>__NAMESPACE__ lval.token = l.newToken(l.Token()); return T_NS_C
|
||||
<PHP>__METHOD__ lval.token = l.newToken(l.Token()); return T_METHOD_C
|
||||
<PHP>__TRAIT__ lval.token = l.newToken(l.Token()); return T_TRAIT_C
|
||||
<PHP>__halt_compiler lval.token = l.newToken(l.Token()); return T_HALT_COMPILER
|
||||
<PHP>\([ \t]*array[ \t]*\) lval.token = l.newToken(l.Token()); return T_ARRAY_CAST
|
||||
<PHP>\([ \t]*(bool|boolean)[ \t]*\) lval.token = l.newToken(l.Token()); return T_BOOL_CAST
|
||||
<PHP>\([ \t]*(real|double|float)[ \t]*\) lval.token = l.newToken(l.Token()); return T_DOUBLE_CAST
|
||||
<PHP>\([ \t]*(int|integer)[ \t]*\) lval.token = l.newToken(l.Token()); return T_INT_CAST
|
||||
<PHP>\([ \t]*object[ \t]*\) lval.token = l.newToken(l.Token()); return T_OBJECT_CAST
|
||||
<PHP>\([ \t]*string[ \t]*\) lval.token = l.newToken(l.Token()); return T_STRING_CAST
|
||||
<PHP>\([ \t]*unset[ \t]*\) lval.token = l.newToken(l.Token()); return T_UNSET_CAST
|
||||
<PHP>new lval.token = l.newToken(l.Token()); return T_NEW
|
||||
<PHP>and lval.token = l.newToken(l.Token()); return T_LOGICAL_AND
|
||||
<PHP>or lval.token = l.newToken(l.Token()); return T_LOGICAL_OR
|
||||
<PHP>xor lval.token = l.newToken(l.Token()); return T_LOGICAL_XOR
|
||||
<PHP>\\ lval.token = l.newToken(l.Token()); return T_NS_SEPARATOR
|
||||
<PHP>\.\.\. lval.token = l.newToken(l.Token()); return T_ELLIPSIS;
|
||||
<PHP>:: lval.token = l.newToken(l.Token()); return T_PAAMAYIM_NEKUDOTAYIM; // T_DOUBLE_COLON
|
||||
<PHP>&& lval.token = l.newToken(l.Token()); return T_BOOLEAN_AND
|
||||
<PHP>\|\| lval.token = l.newToken(l.Token()); return T_BOOLEAN_OR
|
||||
<PHP>&= lval.token = l.newToken(l.Token()); return T_AND_EQUAL
|
||||
<PHP>\|= lval.token = l.newToken(l.Token()); return T_OR_EQUAL
|
||||
<PHP>\.= lval.token = l.newToken(l.Token()); return T_CONCAT_EQUAL;
|
||||
<PHP>\*= lval.token = l.newToken(l.Token()); return T_MUL_EQUAL
|
||||
<PHP>\*\*= lval.token = l.newToken(l.Token()); return T_POW_EQUAL
|
||||
<PHP>[/]= lval.token = l.newToken(l.Token()); return T_DIV_EQUAL;
|
||||
<PHP>\+= lval.token = l.newToken(l.Token()); return T_PLUS_EQUAL
|
||||
<PHP>-= lval.token = l.newToken(l.Token()); return T_MINUS_EQUAL
|
||||
<PHP>\^= lval.token = l.newToken(l.Token()); return T_XOR_EQUAL
|
||||
<PHP>%= lval.token = l.newToken(l.Token()); return T_MOD_EQUAL
|
||||
<PHP>-- lval.token = l.newToken(l.Token()); return T_DEC;
|
||||
<PHP>\+\+ lval.token = l.newToken(l.Token()); return T_INC
|
||||
<PHP>=> lval.token = l.newToken(l.Token()); return T_DOUBLE_ARROW;
|
||||
<PHP>\<=\> lval.token = l.newToken(l.Token()); return T_SPACESHIP
|
||||
<PHP>\!=|\<\> lval.token = l.newToken(l.Token()); return T_IS_NOT_EQUAL
|
||||
<PHP>\!== lval.token = l.newToken(l.Token()); return T_IS_NOT_IDENTICAL
|
||||
<PHP>== lval.token = l.newToken(l.Token()); return T_IS_EQUAL
|
||||
<PHP>=== lval.token = l.newToken(l.Token()); return T_IS_IDENTICAL
|
||||
<PHP>\<\<= lval.token = l.newToken(l.Token()); return T_SL_EQUAL
|
||||
<PHP>\>\>= lval.token = l.newToken(l.Token()); return T_SR_EQUAL
|
||||
<PHP>\>= lval.token = l.newToken(l.Token()); return T_IS_GREATER_OR_EQUAL
|
||||
<PHP>\<= lval.token = l.newToken(l.Token()); return T_IS_SMALLER_OR_EQUAL
|
||||
<PHP>\*\* lval.token = l.newToken(l.Token()); return T_POW
|
||||
<PHP>\<\< lval.token = l.newToken(l.Token()); return T_SL
|
||||
<PHP>\>\> lval.token = l.newToken(l.Token()); return T_SR
|
||||
<PHP>\?\? lval.token = l.newToken(l.Token()); return T_COALESCE
|
||||
<PHP>(#|[/][/]).*{NEW_LINE} lval.token = l.newToken(l.Token());// return T_COMMENT; // TODO: handle ?>
|
||||
<PHP>([/][*])|([/][*][*])
|
||||
tb := l.Token()
|
||||
is_doc_comment := false
|
||||
if len(tb) > 2 {
|
||||
is_doc_comment = true
|
||||
l.phpDocComment = ""
|
||||
}
|
||||
|
||||
for {
|
||||
if c == -1 {
|
||||
break; // TODO: Unterminated comment starting line %d
|
||||
}
|
||||
|
||||
p := c
|
||||
c = l.Next()
|
||||
|
||||
if rune(p) == '*' && rune(c) == '/' {
|
||||
c = l.Next()
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
lval.token = l.newToken(l.Token())
|
||||
if is_doc_comment {
|
||||
l.phpDocComment = string(l.TokenBytes(nil))
|
||||
l.addComment(comment.NewDocComment(string(l.TokenBytes(nil))))
|
||||
// return T_DOC_COMMENT
|
||||
} else {
|
||||
l.addComment(comment.NewPlainComment(string(l.TokenBytes(nil))))
|
||||
// return T_COMMENT
|
||||
}
|
||||
|
||||
<PHP>{OPERATORS} lval.token = l.newToken(l.Token()); return rune2Class(rune(l.TokenBytes(nil)[0]))
|
||||
|
||||
<PHP>\{ l.pushState(PHP); lval.token = l.newToken(l.Token()); return rune2Class(rune(l.TokenBytes(nil)[0]))
|
||||
<PHP>\} l.popState(); lval.token = l.newToken(l.Token()); return rune2Class(rune(l.TokenBytes(nil)[0])); l.phpDocComment = ""
|
||||
<PHP>\${VAR_NAME} lval.token = l.newToken(l.Token()); return T_VARIABLE
|
||||
<PHP>{VAR_NAME} lval.token = l.newToken(l.Token()); return T_STRING
|
||||
|
||||
<PHP>-> l.begin(PROPERTY);lval.token = l.newToken(l.Token()); return T_OBJECT_OPERATOR;
|
||||
<PROPERTY>[ \t\n\r]+ lval.token = l.newToken(l.Token()); return T_WHITESPACE;
|
||||
<PROPERTY>-> lval.token = l.newToken(l.Token()); return T_OBJECT_OPERATOR;
|
||||
<PROPERTY>{VAR_NAME} l.begin(PHP);lval.token = l.newToken(l.Token()); return T_STRING;
|
||||
<PROPERTY>. l.ungetChars(1);l.begin(PHP)
|
||||
|
||||
<PHP>[\']([^\\\']*([\\].)*)*[\'] lval.token = l.newToken(l.Token()); return T_CONSTANT_ENCAPSED_STRING;
|
||||
|
||||
<PHP>` l.begin(BACKQUOTE); lval.token = l.newToken(l.Token()); return rune2Class(rune(l.TokenBytes(nil)[0]))
|
||||
<BACKQUOTE>` l.begin(PHP); lval.token = l.newToken(l.Token()); return rune2Class(rune(l.TokenBytes(nil)[0]))
|
||||
|
||||
<PHP>[b]?\<\<\<[ \t]*({VAR_NAME}|([']{VAR_NAME}['])|(["]{VAR_NAME}["])){NEW_LINE}
|
||||
tb := l.Token()
|
||||
binPrefix := 0
|
||||
if tb[0].Rune == 'b' {
|
||||
binPrefix = 1
|
||||
}
|
||||
|
||||
lblFirst := 3 + binPrefix
|
||||
lblLast := len(tb)-2
|
||||
if tb[lblLast].Rune == '\r' {
|
||||
lblLast--
|
||||
}
|
||||
|
||||
for {
|
||||
if tb[lblFirst].Rune == ' ' || tb[lblFirst].Rune == '\t' {
|
||||
lblFirst++
|
||||
continue
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
switch tb[lblFirst].Rune {
|
||||
case '\'' :
|
||||
lblFirst++
|
||||
lblLast--
|
||||
l.begin(NOWDOC)
|
||||
case '"' :
|
||||
lblFirst++
|
||||
lblLast--
|
||||
l.begin(HEREDOC)
|
||||
default:
|
||||
l.begin(HEREDOC)
|
||||
}
|
||||
|
||||
heredocLabel = make([]lex.Char, lblLast - lblFirst + 1)
|
||||
copy(heredocLabel, tb[lblFirst:lblLast+1])
|
||||
|
||||
ungetCnt := len(heredocLabel)
|
||||
searchLabelAhead := []lex.Char{}
|
||||
for i := 0; i < len(heredocLabel); i++ {
|
||||
if c == -1 {
|
||||
break;
|
||||
}
|
||||
searchLabelAhead = append(searchLabelAhead, l.Lookahead())
|
||||
c = l.Next()
|
||||
}
|
||||
|
||||
if bytes.Equal(l.charsToBytes(heredocLabel), l.charsToBytes(searchLabelAhead)) && ';' == rune(c) {
|
||||
ungetCnt++
|
||||
c = l.Next()
|
||||
if '\n' == rune(c) || '\r' == rune(c) {
|
||||
l.begin(HEREDOC_END)
|
||||
}
|
||||
}
|
||||
|
||||
l.ungetChars(ungetCnt)
|
||||
|
||||
lval.token = l.newToken(tb);
|
||||
return T_START_HEREDOC
|
||||
|
||||
<NOWDOC>.|[ \t\n\r]
|
||||
searchLabel := []byte{}
|
||||
tb := []lex.Char{}
|
||||
|
||||
for {
|
||||
if c == -1 {
|
||||
break;
|
||||
}
|
||||
|
||||
if '\n' == rune(c) || '\r' == rune(c) {
|
||||
if bytes.Equal(append(l.charsToBytes(heredocLabel), ';'), searchLabel) {
|
||||
l.begin(HEREDOC_END)
|
||||
tb = l.ungetChars(len(heredocLabel)+1)
|
||||
break;
|
||||
}
|
||||
|
||||
if bytes.Equal(l.charsToBytes(heredocLabel), searchLabel) {
|
||||
l.begin(HEREDOC_END)
|
||||
tb = l.ungetChars(len(heredocLabel))
|
||||
break;
|
||||
}
|
||||
|
||||
searchLabel = []byte{}
|
||||
} else {
|
||||
searchLabel = append(searchLabel, byte(rune(c)))
|
||||
}
|
||||
|
||||
c = l.Next()
|
||||
}
|
||||
|
||||
lval.token = l.newToken(tb)
|
||||
return T_ENCAPSED_AND_WHITESPACE
|
||||
|
||||
<HEREDOC_END>{VAR_NAME}\; l.begin(PHP);lval.token = l.newToken(l.ungetChars(1)); return T_END_HEREDOC
|
||||
<HEREDOC_END>{VAR_NAME} l.begin(PHP);lval.token = l.newToken(l.Token()); return T_END_HEREDOC
|
||||
|
||||
<PHP>[b]?[\"]
|
||||
binPrefix := l.Token()[0].Rune == 'b'
|
||||
|
||||
beginString := func() int {
|
||||
cnt := 1; if (binPrefix) {cnt = 2}
|
||||
|
||||
l.ungetChars(len(l.Token())-cnt)
|
||||
chars := l.Token()[:cnt]
|
||||
l.pushState(STRING)
|
||||
|
||||
lval.token = l.newToken(chars); return rune2Class('"')
|
||||
}
|
||||
|
||||
F:for {
|
||||
if c == -1 {
|
||||
break;
|
||||
}
|
||||
|
||||
switch c {
|
||||
case '"' :
|
||||
c = l.Next();
|
||||
lval.token = l.newToken(l.Token()); return T_CONSTANT_ENCAPSED_STRING
|
||||
break F;
|
||||
|
||||
case '$':
|
||||
c = l.Next();
|
||||
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
|
||||
return beginString()
|
||||
break F;
|
||||
}
|
||||
l.ungetChars(0)
|
||||
|
||||
case '{':
|
||||
c = l.Next();
|
||||
if rune(c) == '$' {
|
||||
return beginString()
|
||||
break F;
|
||||
}
|
||||
l.ungetChars(0)
|
||||
|
||||
case '\\':
|
||||
c = l.Next();
|
||||
}
|
||||
|
||||
c = l.Next()
|
||||
}
|
||||
|
||||
<STRING>\" l.popState(); lval.token = l.newToken(l.Token()); return rune2Class(l.Token()[0].Rune)
|
||||
<STRING,HEREDOC,BACKQUOTE>\{\$ lval.token = l.newToken(l.ungetChars(1)); l.pushState(PHP); return T_CURLY_OPEN
|
||||
<STRING,HEREDOC,BACKQUOTE>\$\{ l.pushState(STRING_VAR_NAME); lval.token = l.newToken(l.Token()); return T_DOLLAR_OPEN_CURLY_BRACES
|
||||
<STRING,HEREDOC,BACKQUOTE>\$ l.ungetChars(1);l.pushState(STRING_VAR)
|
||||
<STRING>.|[ \t\n\r]
|
||||
F1:for {
|
||||
if c == -1 {
|
||||
break;
|
||||
}
|
||||
|
||||
switch c {
|
||||
case '"' :
|
||||
lval.token = l.newToken(l.Token());
|
||||
return T_ENCAPSED_AND_WHITESPACE
|
||||
break F1;
|
||||
|
||||
case '$':
|
||||
c = l.Next();
|
||||
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
|
||||
l.ungetChars(1)
|
||||
tb := l.Token()
|
||||
lval.token = l.newToken(tb[:len(tb)-1]);
|
||||
return T_ENCAPSED_AND_WHITESPACE
|
||||
break F1;
|
||||
}
|
||||
l.ungetChars(0)
|
||||
|
||||
case '{':
|
||||
c = l.Next();
|
||||
if rune(c) == '$' {
|
||||
l.ungetChars(1)
|
||||
tb := l.Token()
|
||||
lval.token = l.newToken(tb[:len(tb)-1]);
|
||||
return T_ENCAPSED_AND_WHITESPACE
|
||||
break F1;
|
||||
}
|
||||
l.ungetChars(0)
|
||||
|
||||
case '\\':
|
||||
c = l.Next();
|
||||
}
|
||||
|
||||
c = l.Next()
|
||||
}
|
||||
|
||||
<BACKQUOTE>.|[ \t\n\r]
|
||||
F2:for {
|
||||
if c == -1 {
|
||||
break;
|
||||
}
|
||||
|
||||
switch c {
|
||||
case '`' :
|
||||
lval.token = l.newToken(l.Token());
|
||||
return T_ENCAPSED_AND_WHITESPACE
|
||||
break F2;
|
||||
|
||||
case '$':
|
||||
c = l.Next();
|
||||
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
|
||||
l.ungetChars(1)
|
||||
tb := l.Token()
|
||||
lval.token = l.newToken(tb[:len(tb)-1]);
|
||||
return T_ENCAPSED_AND_WHITESPACE
|
||||
break F2;
|
||||
}
|
||||
l.ungetChars(0)
|
||||
|
||||
case '{':
|
||||
c = l.Next();
|
||||
if rune(c) == '$' {
|
||||
l.ungetChars(1)
|
||||
tb := l.Token()
|
||||
lval.token = l.newToken(tb[:len(tb)-1]);
|
||||
return T_ENCAPSED_AND_WHITESPACE
|
||||
break F2;
|
||||
}
|
||||
l.ungetChars(0)
|
||||
|
||||
case '\\':
|
||||
c = l.Next();
|
||||
}
|
||||
|
||||
c = l.Next()
|
||||
}
|
||||
|
||||
<HEREDOC>.|[ \t\n\r]
|
||||
searchLabel := []byte{}
|
||||
tb := []lex.Char{}
|
||||
|
||||
HEREDOCFOR:for {
|
||||
if c == -1 {
|
||||
break;
|
||||
}
|
||||
|
||||
switch c {
|
||||
case '\n': fallthrough
|
||||
case '\r':
|
||||
if bytes.Equal(append(l.charsToBytes(heredocLabel), ';'), searchLabel) {
|
||||
l.begin(HEREDOC_END)
|
||||
tb = l.ungetChars(len(heredocLabel)+1)
|
||||
break HEREDOCFOR;
|
||||
}
|
||||
|
||||
if bytes.Equal(l.charsToBytes(heredocLabel), searchLabel) {
|
||||
l.begin(HEREDOC_END)
|
||||
tb = l.ungetChars(len(heredocLabel))
|
||||
break HEREDOCFOR;
|
||||
}
|
||||
|
||||
searchLabel = []byte{}
|
||||
|
||||
case '$':
|
||||
c = l.Next();
|
||||
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
|
||||
tb = l.ungetChars(1)
|
||||
break HEREDOCFOR;
|
||||
}
|
||||
l.ungetChars(0)
|
||||
searchLabel = []byte{}
|
||||
|
||||
case '{':
|
||||
c = l.Next();
|
||||
if rune(c) == '$' {
|
||||
tb = l.ungetChars(1)
|
||||
break HEREDOCFOR;
|
||||
}
|
||||
l.ungetChars(0)
|
||||
searchLabel = []byte{}
|
||||
|
||||
case '\\':
|
||||
c = l.Next();
|
||||
searchLabel = []byte{}
|
||||
|
||||
default:
|
||||
searchLabel = append(searchLabel, byte(rune(c)))
|
||||
}
|
||||
|
||||
c = l.Next()
|
||||
}
|
||||
|
||||
lval.token = l.newToken(tb);
|
||||
return T_ENCAPSED_AND_WHITESPACE
|
||||
|
||||
<STRING_VAR>\${VAR_NAME} lval.token = l.newToken(l.Token()); return T_VARIABLE
|
||||
<STRING_VAR>->{VAR_NAME} lval.token = l.newToken(l.ungetChars(len(l.Token())-2)); return T_OBJECT_OPERATOR
|
||||
<STRING_VAR>{VAR_NAME} l.popState();lval.token = l.newToken(l.Token()); return T_STRING
|
||||
<STRING_VAR>\[ l.pushState(STRING_VAR_INDEX);lval.token = l.newToken(l.Token()); return rune2Class(rune(l.TokenBytes(nil)[0]))
|
||||
<STRING_VAR>.|[ \t\n\r] l.ungetChars(1);l.popState()
|
||||
|
||||
<STRING_VAR_INDEX>{LNUM}|{HNUM}|{BNUM} lval.token = l.newToken(l.Token()); return T_NUM_STRING
|
||||
<STRING_VAR_INDEX>\${VAR_NAME} lval.token = l.newToken(l.Token()); return T_VARIABLE
|
||||
<STRING_VAR_INDEX>{VAR_NAME} lval.token = l.newToken(l.Token()); return T_STRING
|
||||
<STRING_VAR_INDEX>\] l.popState(); l.popState();lval.token = l.newToken(l.Token()); return rune2Class(rune(l.TokenBytes(nil)[0]))
|
||||
<STRING_VAR_INDEX>[ \n\r\t\\'#] l.popState(); l.popState();lval.token = l.newToken(l.Token()); return T_ENCAPSED_AND_WHITESPACE
|
||||
<STRING_VAR_INDEX>{OPERATORS} lval.token = l.newToken(l.Token()); return rune2Class(rune(l.TokenBytes(nil)[0]))
|
||||
<STRING_VAR_INDEX>. lval.token = l.newToken(l.Token()); return rune2Class(rune(l.TokenBytes(nil)[0]))
|
||||
|
||||
<STRING_VAR_NAME>{VAR_NAME}[\[\}] l.popState();l.pushState(PHP);lval.token = l.newToken(l.ungetChars(1)); return T_STRING_VARNAME
|
||||
<STRING_VAR_NAME>. l.ungetChars(1);l.popState();l.pushState(PHP)
|
||||
|
||||
%%
|
||||
if c, ok := l.Abort(); ok { return int(c) }
|
||||
goto yyAction
|
||||
}
|
260
scanner/lexer.go
Normal file
260
scanner/lexer.go
Normal file
@ -0,0 +1,260 @@
|
||||
package scanner
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"go/token"
|
||||
"io"
|
||||
"unicode"
|
||||
|
||||
"github.com/cznic/golex/lex"
|
||||
"github.com/z7zmey/php-parser/comment"
|
||||
t "github.com/z7zmey/php-parser/token"
|
||||
)
|
||||
|
||||
// Allocate Character classes anywhere in [0x80, 0xFF].
|
||||
const (
|
||||
classUnicodeLeter = iota + 0x80
|
||||
classUnicodeDigit
|
||||
classUnicodeGraphic
|
||||
classOther
|
||||
)
|
||||
|
||||
const T_INCLUDE = 57346
|
||||
const T_INCLUDE_ONCE = 57347
|
||||
const T_EVAL = 57348
|
||||
const T_REQUIRE = 57349
|
||||
const T_REQUIRE_ONCE = 57350
|
||||
const T_LOGICAL_OR = 57351
|
||||
const T_LOGICAL_XOR = 57352
|
||||
const T_LOGICAL_AND = 57353
|
||||
const T_PRINT = 57354
|
||||
const T_YIELD = 57355
|
||||
const T_DOUBLE_ARROW = 57356
|
||||
const T_YIELD_FROM = 57357
|
||||
const T_PLUS_EQUAL = 57358
|
||||
const T_MINUS_EQUAL = 57359
|
||||
const T_MUL_EQUAL = 57360
|
||||
const T_DIV_EQUAL = 57361
|
||||
const T_CONCAT_EQUAL = 57362
|
||||
const T_MOD_EQUAL = 57363
|
||||
const T_AND_EQUAL = 57364
|
||||
const T_OR_EQUAL = 57365
|
||||
const T_XOR_EQUAL = 57366
|
||||
const T_SL_EQUAL = 57367
|
||||
const T_SR_EQUAL = 57368
|
||||
const T_POW_EQUAL = 57369
|
||||
const T_COALESCE = 57370
|
||||
const T_BOOLEAN_OR = 57371
|
||||
const T_BOOLEAN_AND = 57372
|
||||
const T_IS_EQUAL = 57373
|
||||
const T_IS_NOT_EQUAL = 57374
|
||||
const T_IS_IDENTICAL = 57375
|
||||
const T_IS_NOT_IDENTICAL = 57376
|
||||
const T_SPACESHIP = 57377
|
||||
const T_IS_SMALLER_OR_EQUAL = 57378
|
||||
const T_IS_GREATER_OR_EQUAL = 57379
|
||||
const T_SL = 57380
|
||||
const T_SR = 57381
|
||||
const T_INSTANCEOF = 57382
|
||||
const T_INC = 57383
|
||||
const T_DEC = 57384
|
||||
const T_INT_CAST = 57385
|
||||
const T_DOUBLE_CAST = 57386
|
||||
const T_STRING_CAST = 57387
|
||||
const T_ARRAY_CAST = 57388
|
||||
const T_OBJECT_CAST = 57389
|
||||
const T_BOOL_CAST = 57390
|
||||
const T_UNSET_CAST = 57391
|
||||
const T_POW = 57392
|
||||
const T_NEW = 57393
|
||||
const T_CLONE = 57394
|
||||
const T_NOELSE = 57395
|
||||
const T_ELSEIF = 57396
|
||||
const T_ELSE = 57397
|
||||
const T_ENDIF = 57398
|
||||
const T_STATIC = 57399
|
||||
const T_ABSTRACT = 57400
|
||||
const T_FINAL = 57401
|
||||
const T_PRIVATE = 57402
|
||||
const T_PROTECTED = 57403
|
||||
const T_PUBLIC = 57404
|
||||
const T_EXIT = 57405
|
||||
const T_IF = 57406
|
||||
const T_LNUMBER = 57407
|
||||
const T_DNUMBER = 57408
|
||||
const T_STRING = 57409
|
||||
const T_STRING_VARNAME = 57410
|
||||
const T_VARIABLE = 57411
|
||||
const T_NUM_STRING = 57412
|
||||
const T_INLINE_HTML = 57413
|
||||
const T_CHARACTER = 57414
|
||||
const T_BAD_CHARACTER = 57415
|
||||
const T_ENCAPSED_AND_WHITESPACE = 57416
|
||||
const T_CONSTANT_ENCAPSED_STRING = 57417
|
||||
const T_ECHO = 57418
|
||||
const T_DO = 57419
|
||||
const T_WHILE = 57420
|
||||
const T_ENDWHILE = 57421
|
||||
const T_FOR = 57422
|
||||
const T_ENDFOR = 57423
|
||||
const T_FOREACH = 57424
|
||||
const T_ENDFOREACH = 57425
|
||||
const T_DECLARE = 57426
|
||||
const T_ENDDECLARE = 57427
|
||||
const T_AS = 57428
|
||||
const T_SWITCH = 57429
|
||||
const T_ENDSWITCH = 57430
|
||||
const T_CASE = 57431
|
||||
const T_DEFAULT = 57432
|
||||
const T_BREAK = 57433
|
||||
const T_CONTINUE = 57434
|
||||
const T_GOTO = 57435
|
||||
const T_FUNCTION = 57436
|
||||
const T_CONST = 57437
|
||||
const T_RETURN = 57438
|
||||
const T_TRY = 57439
|
||||
const T_CATCH = 57440
|
||||
const T_FINALLY = 57441
|
||||
const T_THROW = 57442
|
||||
const T_USE = 57443
|
||||
const T_INSTEADOF = 57444
|
||||
const T_GLOBAL = 57445
|
||||
const T_VAR = 57446
|
||||
const T_UNSET = 57447
|
||||
const T_ISSET = 57448
|
||||
const T_EMPTY = 57449
|
||||
const T_HALT_COMPILER = 57450
|
||||
const T_CLASS = 57451
|
||||
const T_TRAIT = 57452
|
||||
const T_INTERFACE = 57453
|
||||
const T_EXTENDS = 57454
|
||||
const T_IMPLEMENTS = 57455
|
||||
const T_OBJECT_OPERATOR = 57456
|
||||
const T_LIST = 57457
|
||||
const T_ARRAY = 57458
|
||||
const T_CALLABLE = 57459
|
||||
const T_CLASS_C = 57460
|
||||
const T_TRAIT_C = 57461
|
||||
const T_METHOD_C = 57462
|
||||
const T_FUNC_C = 57463
|
||||
const T_LINE = 57464
|
||||
const T_FILE = 57465
|
||||
const T_COMMENT = 57466
|
||||
const T_DOC_COMMENT = 57467
|
||||
const T_OPEN_TAG = 57468
|
||||
const T_OPEN_TAG_WITH_ECHO = 57469
|
||||
const T_CLOSE_TAG = 57470
|
||||
const T_WHITESPACE = 57471
|
||||
const T_START_HEREDOC = 57472
|
||||
const T_END_HEREDOC = 57473
|
||||
const T_DOLLAR_OPEN_CURLY_BRACES = 57474
|
||||
const T_CURLY_OPEN = 57475
|
||||
const T_PAAMAYIM_NEKUDOTAYIM = 57476
|
||||
const T_NAMESPACE = 57477
|
||||
const T_NS_C = 57478
|
||||
const T_DIR = 57479
|
||||
const T_NS_SEPARATOR = 57480
|
||||
const T_ELLIPSIS = 57481
|
||||
|
||||
type Lval interface {
|
||||
Token(tkn t.Token)
|
||||
}
|
||||
|
||||
type Lexer struct {
|
||||
*lex.Lexer
|
||||
StateStack []int
|
||||
PhpDocComment string
|
||||
Comments []comment.Comment
|
||||
}
|
||||
|
||||
func Rune2Class(r rune) int {
|
||||
if r >= 0 && r < 0x80 { // Keep ASCII as it is.
|
||||
return int(r)
|
||||
}
|
||||
if unicode.IsLetter(r) {
|
||||
return classUnicodeLeter
|
||||
}
|
||||
if unicode.IsDigit(r) {
|
||||
return classUnicodeDigit
|
||||
}
|
||||
if unicode.IsGraphic(r) {
|
||||
return classUnicodeGraphic
|
||||
}
|
||||
// return classOther
|
||||
return -1
|
||||
}
|
||||
|
||||
func NewLexer(src io.Reader, fName string) *Lexer {
|
||||
file := token.NewFileSet().AddFile(fName, -1, 1<<31-1)
|
||||
lx, err := lex.New(file, bufio.NewReader(src), lex.RuneClass(Rune2Class))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return &Lexer{lx, []int{0}, "", nil}
|
||||
}
|
||||
|
||||
func (l *Lexer) ungetChars(n int) []lex.Char {
|
||||
l.Unget(l.Lookahead())
|
||||
|
||||
chars := l.Token()
|
||||
|
||||
for i := 1; i <= n; i++ {
|
||||
char := chars[len(chars)-i]
|
||||
l.Unget(char)
|
||||
}
|
||||
|
||||
buf := l.Token()
|
||||
buf = buf[:len(buf)-n]
|
||||
|
||||
return buf
|
||||
}
|
||||
|
||||
func (l *Lexer) pushState(state int) {
|
||||
l.StateStack = append(l.StateStack, state)
|
||||
}
|
||||
|
||||
func (l *Lexer) popState() {
|
||||
len := len(l.StateStack)
|
||||
if len <= 1 {
|
||||
return
|
||||
}
|
||||
|
||||
l.StateStack = l.StateStack[:len-1]
|
||||
}
|
||||
|
||||
func (l *Lexer) begin(state int) {
|
||||
len := len(l.StateStack)
|
||||
l.StateStack = l.StateStack[:len-1]
|
||||
l.StateStack = append(l.StateStack, state)
|
||||
}
|
||||
|
||||
func (l *Lexer) getCurrentState() int {
|
||||
return l.StateStack[len(l.StateStack)-1]
|
||||
}
|
||||
|
||||
func (l *Lexer) newToken(chars []lex.Char) t.Token {
|
||||
firstChar := chars[0]
|
||||
lastChar := chars[len(chars)-1]
|
||||
|
||||
startLine := l.File.Line(firstChar.Pos())
|
||||
endLine := l.File.Line(lastChar.Pos())
|
||||
startPos := int(firstChar.Pos())
|
||||
endPos := int(lastChar.Pos())
|
||||
|
||||
return t.NewToken(l.charsToBytes(chars), startLine, endLine, startPos, endPos).SetComments(l.Comments)
|
||||
}
|
||||
|
||||
func (l *Lexer) addComment(c comment.Comment) {
|
||||
l.Comments = append(l.Comments, c)
|
||||
}
|
||||
|
||||
func (l *Lexer) charsToBytes(chars []lex.Char) []byte {
|
||||
bytesBuf := bytes.Buffer{}
|
||||
|
||||
for _, c := range chars {
|
||||
bytesBuf.WriteRune(c.Rune)
|
||||
}
|
||||
|
||||
return bytesBuf.Bytes()
|
||||
}
|
File diff suppressed because it is too large
Load Diff
609
scanner/scanner.l
Normal file
609
scanner/scanner.l
Normal file
@ -0,0 +1,609 @@
|
||||
%{
|
||||
// Copyright (c) 2011 CZ.NIC z.s.p.o. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// blame: jnml, labs.nic.cz
|
||||
|
||||
package scanner
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"bytes"
|
||||
"github.com/cznic/golex/lex"
|
||||
"github.com/z7zmey/php-parser/comment"
|
||||
)
|
||||
|
||||
const (
|
||||
INITIAL = iota
|
||||
PHP
|
||||
STRING
|
||||
STRING_VAR
|
||||
STRING_VAR_INDEX
|
||||
STRING_VAR_NAME
|
||||
PROPERTY
|
||||
HEREDOC_END
|
||||
NOWDOC
|
||||
HEREDOC
|
||||
BACKQUOTE
|
||||
)
|
||||
|
||||
var heredocLabel []lex.Char
|
||||
|
||||
func (l *Lexer) Lex(lval Lval) int {
|
||||
l.Comments = nil
|
||||
c := l.Enter()
|
||||
|
||||
%}
|
||||
|
||||
%s PHP STRING STRING_VAR STRING_VAR_INDEX STRING_VAR_NAME PROPERTY HEREDOC_END NOWDOC HEREDOC BACKQUOTE
|
||||
|
||||
%yyb last == '\n' || last = '\0'
|
||||
%yyt l.getCurrentState()
|
||||
%yyc c
|
||||
%yyn c = l.Next()
|
||||
%yym l.Mark()
|
||||
%optioncase-insensitive
|
||||
|
||||
LNUM [0-9]+
|
||||
DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
|
||||
HNUM 0x[0-9a-fA-F]+
|
||||
BNUM 0b[01]+
|
||||
EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
|
||||
VAR_NAME [a-zA-Z_\x7f-\xff^0-9/][a-zA-Z0-9_\x7f-\xff]*
|
||||
OPERATORS [;:,.\[\]()|\/\^&\+-*=%!~$<>?@]
|
||||
NEW_LINE (\r|\n|\r\n)
|
||||
|
||||
%%
|
||||
c = l.Rule0()
|
||||
|
||||
<INITIAL>[ \t\n\r]+ lval.Token(l.newToken(l.Token()));
|
||||
<INITIAL>.
|
||||
tb := []lex.Char{}
|
||||
|
||||
for {
|
||||
if c == -1 {
|
||||
tb = l.Token();
|
||||
break;
|
||||
}
|
||||
|
||||
if '?' == rune(c) {
|
||||
tb = l.Token();
|
||||
if (len(tb) < 2 || tb[len(tb)-1].Rune != '<') {
|
||||
c = l.Next()
|
||||
continue;
|
||||
}
|
||||
|
||||
tb = l.ungetChars(1)
|
||||
break;
|
||||
}
|
||||
|
||||
c = l.Next()
|
||||
}
|
||||
|
||||
lval.Token(l.newToken(tb))
|
||||
return T_INLINE_HTML
|
||||
|
||||
<INITIAL>\<\?php([ \t]|{NEW_LINE}) l.begin(PHP);lval.Token(l.newToken(l.Token()));// return T_OPEN_TAG;
|
||||
<INITIAL>\<\? l.begin(PHP);lval.Token(l.newToken(l.Token()));// return T_OPEN_TAG;
|
||||
<INITIAL>\<\?= l.begin(PHP);lval.Token(l.newToken(l.Token())); return T_ECHO;
|
||||
|
||||
<PHP>[ \t\n\r]+ lval.Token(l.newToken(l.Token()));// return T_WHITESPACE
|
||||
<PHP>\?\>{NEW_LINE}? l.begin(INITIAL);lval.Token(l.newToken(l.Token())); return Rune2Class(';');
|
||||
|
||||
<PHP>{DNUM}|{EXPONENT_DNUM} lval.Token(l.newToken(l.Token())); return T_DNUMBER
|
||||
<PHP>{BNUM}
|
||||
tb := l.Token()
|
||||
i:=2
|
||||
BNUMFOR:for {
|
||||
if i > len(tb)-1 {
|
||||
break BNUMFOR;
|
||||
}
|
||||
switch tb[i].Rune {
|
||||
case '0': i++;
|
||||
default: break BNUMFOR;
|
||||
}
|
||||
}
|
||||
if len(tb) - i < 64 {
|
||||
lval.Token(l.newToken(l.Token())); return T_LNUMBER
|
||||
} else {
|
||||
lval.Token(l.newToken(l.Token())); return T_DNUMBER
|
||||
}
|
||||
<PHP>{LNUM}
|
||||
if len(l.Token()) < 20 {
|
||||
lval.Token(l.newToken(l.Token())); return T_LNUMBER
|
||||
} else {
|
||||
lval.Token(l.newToken(l.Token())); return T_DNUMBER
|
||||
}
|
||||
<PHP>{HNUM}
|
||||
tb := l.Token()
|
||||
i:=2
|
||||
HNUMFOR:for {
|
||||
if i > len(tb)-1 {
|
||||
break HNUMFOR;
|
||||
}
|
||||
switch tb[i].Rune {
|
||||
case '0': i++;
|
||||
default: break HNUMFOR;
|
||||
}
|
||||
}
|
||||
length := len(tb) - i
|
||||
if length < 16 || (length == 16 && tb[i].Rune <= '7') {
|
||||
lval.Token(l.newToken(l.Token())); return T_LNUMBER
|
||||
} else {
|
||||
lval.Token(l.newToken(l.Token())); return T_DNUMBER
|
||||
}
|
||||
|
||||
<PHP>abstract lval.Token(l.newToken(l.Token())); return T_ABSTRACT
|
||||
<PHP>array lval.Token(l.newToken(l.Token())); return T_ARRAY
|
||||
<PHP>as lval.Token(l.newToken(l.Token())); return T_AS
|
||||
<PHP>break lval.Token(l.newToken(l.Token())); return T_BREAK
|
||||
<PHP>callable lval.Token(l.newToken(l.Token())); return T_CALLABLE
|
||||
<PHP>case lval.Token(l.newToken(l.Token())); return T_CASE
|
||||
<PHP>catch lval.Token(l.newToken(l.Token())); return T_CATCH
|
||||
<PHP>class lval.Token(l.newToken(l.Token())); return T_CLASS
|
||||
<PHP>clone lval.Token(l.newToken(l.Token())); return T_CLONE
|
||||
<PHP>const lval.Token(l.newToken(l.Token())); return T_CONST;
|
||||
<PHP>continue lval.Token(l.newToken(l.Token())); return T_CONTINUE;
|
||||
<PHP>declare lval.Token(l.newToken(l.Token())); return T_DECLARE;
|
||||
<PHP>default lval.Token(l.newToken(l.Token())); return T_DEFAULT;
|
||||
<PHP>do lval.Token(l.newToken(l.Token())); return T_DO;
|
||||
<PHP>echo lval.Token(l.newToken(l.Token())); return T_ECHO;
|
||||
<PHP>else lval.Token(l.newToken(l.Token())); return T_ELSE;
|
||||
<PHP>elseif lval.Token(l.newToken(l.Token())); return T_ELSEIF;
|
||||
<PHP>empty lval.Token(l.newToken(l.Token())); return T_EMPTY;
|
||||
<PHP>enddeclare lval.Token(l.newToken(l.Token())); return T_ENDDECLARE
|
||||
<PHP>endfor lval.Token(l.newToken(l.Token())); return T_ENDFOR
|
||||
<PHP>endforeach lval.Token(l.newToken(l.Token())); return T_ENDFOREACH
|
||||
<PHP>endif lval.Token(l.newToken(l.Token())); return T_ENDIF
|
||||
<PHP>endswitch lval.Token(l.newToken(l.Token())); return T_ENDSWITCH
|
||||
<PHP>endwhile lval.Token(l.newToken(l.Token())); return T_ENDWHILE
|
||||
<PHP>eval lval.Token(l.newToken(l.Token())); return T_EVAL
|
||||
<PHP>exit|die lval.Token(l.newToken(l.Token())); return T_EXIT
|
||||
<PHP>extends lval.Token(l.newToken(l.Token())); return T_EXTENDS
|
||||
<PHP>final lval.Token(l.newToken(l.Token())); return T_FINAL
|
||||
<PHP>finally lval.Token(l.newToken(l.Token())); return T_FINALLY
|
||||
<PHP>for lval.Token(l.newToken(l.Token())); return T_FOR
|
||||
<PHP>foreach lval.Token(l.newToken(l.Token())); return T_FOREACH
|
||||
<PHP>function|cfunction lval.Token(l.newToken(l.Token())); return T_FUNCTION
|
||||
<PHP>global lval.Token(l.newToken(l.Token())); return T_GLOBAL
|
||||
<PHP>goto lval.Token(l.newToken(l.Token())); return T_GOTO
|
||||
<PHP>if lval.Token(l.newToken(l.Token())); return T_IF
|
||||
<PHP>isset lval.Token(l.newToken(l.Token())); return T_ISSET
|
||||
<PHP>implements lval.Token(l.newToken(l.Token())); return T_IMPLEMENTS
|
||||
<PHP>instanceof lval.Token(l.newToken(l.Token())); return T_INSTANCEOF
|
||||
<PHP>insteadof lval.Token(l.newToken(l.Token())); return T_INSTEADOF
|
||||
<PHP>interface lval.Token(l.newToken(l.Token())); return T_INTERFACE
|
||||
<PHP>list lval.Token(l.newToken(l.Token())); return T_LIST
|
||||
<PHP>namespace lval.Token(l.newToken(l.Token())); return T_NAMESPACE
|
||||
<PHP>private lval.Token(l.newToken(l.Token())); return T_PRIVATE
|
||||
<PHP>public lval.Token(l.newToken(l.Token())); return T_PUBLIC
|
||||
<PHP>print lval.Token(l.newToken(l.Token())); return T_PRINT
|
||||
<PHP>protected lval.Token(l.newToken(l.Token())); return T_PROTECTED
|
||||
<PHP>return lval.Token(l.newToken(l.Token())); return T_RETURN
|
||||
<PHP>static lval.Token(l.newToken(l.Token())); return T_STATIC
|
||||
<PHP>switch lval.Token(l.newToken(l.Token())); return T_SWITCH
|
||||
<PHP>throw lval.Token(l.newToken(l.Token())); return T_THROW
|
||||
<PHP>trait lval.Token(l.newToken(l.Token())); return T_TRAIT
|
||||
<PHP>try lval.Token(l.newToken(l.Token())); return T_TRY
|
||||
<PHP>unset lval.Token(l.newToken(l.Token())); return T_UNSET
|
||||
<PHP>use lval.Token(l.newToken(l.Token())); return T_USE
|
||||
<PHP>var lval.Token(l.newToken(l.Token())); return T_VAR
|
||||
<PHP>while lval.Token(l.newToken(l.Token())); return T_WHILE
|
||||
<PHP>yield[ \t\n\r]+from[^a-zA-Z0-9_\x80-\xff] lval.Token(l.newToken(l.Token())); return T_YIELD_FROM
|
||||
<PHP>yield lval.Token(l.newToken(l.Token())); return T_YIELD
|
||||
<PHP>include lval.Token(l.newToken(l.Token())); return T_INCLUDE
|
||||
<PHP>include_once lval.Token(l.newToken(l.Token())); return T_INCLUDE_ONCE
|
||||
<PHP>require lval.Token(l.newToken(l.Token())); return T_REQUIRE
|
||||
<PHP>require_once lval.Token(l.newToken(l.Token())); return T_REQUIRE_ONCE
|
||||
<PHP>__CLASS__ lval.Token(l.newToken(l.Token())); return T_CLASS_C
|
||||
<PHP>__DIR__ lval.Token(l.newToken(l.Token())); return T_DIR
|
||||
<PHP>__FILE__ lval.Token(l.newToken(l.Token())); return T_FILE
|
||||
<PHP>__FUNCTION__ lval.Token(l.newToken(l.Token())); return T_FUNC_C
|
||||
<PHP>__LINE__ lval.Token(l.newToken(l.Token())); return T_LINE
|
||||
<PHP>__NAMESPACE__ lval.Token(l.newToken(l.Token())); return T_NS_C
|
||||
<PHP>__METHOD__ lval.Token(l.newToken(l.Token())); return T_METHOD_C
|
||||
<PHP>__TRAIT__ lval.Token(l.newToken(l.Token())); return T_TRAIT_C
|
||||
<PHP>__halt_compiler lval.Token(l.newToken(l.Token())); return T_HALT_COMPILER
|
||||
<PHP>\([ \t]*array[ \t]*\) lval.Token(l.newToken(l.Token())); return T_ARRAY_CAST
|
||||
<PHP>\([ \t]*(bool|boolean)[ \t]*\) lval.Token(l.newToken(l.Token())); return T_BOOL_CAST
|
||||
<PHP>\([ \t]*(real|double|float)[ \t]*\) lval.Token(l.newToken(l.Token())); return T_DOUBLE_CAST
|
||||
<PHP>\([ \t]*(int|integer)[ \t]*\) lval.Token(l.newToken(l.Token())); return T_INT_CAST
|
||||
<PHP>\([ \t]*object[ \t]*\) lval.Token(l.newToken(l.Token())); return T_OBJECT_CAST
|
||||
<PHP>\([ \t]*string[ \t]*\) lval.Token(l.newToken(l.Token())); return T_STRING_CAST
|
||||
<PHP>\([ \t]*unset[ \t]*\) lval.Token(l.newToken(l.Token())); return T_UNSET_CAST
|
||||
<PHP>new lval.Token(l.newToken(l.Token())); return T_NEW
|
||||
<PHP>and lval.Token(l.newToken(l.Token())); return T_LOGICAL_AND
|
||||
<PHP>or lval.Token(l.newToken(l.Token())); return T_LOGICAL_OR
|
||||
<PHP>xor lval.Token(l.newToken(l.Token())); return T_LOGICAL_XOR
|
||||
<PHP>\\ lval.Token(l.newToken(l.Token())); return T_NS_SEPARATOR
|
||||
<PHP>\.\.\. lval.Token(l.newToken(l.Token())); return T_ELLIPSIS;
|
||||
<PHP>:: lval.Token(l.newToken(l.Token())); return T_PAAMAYIM_NEKUDOTAYIM; // T_DOUBLE_COLON
|
||||
<PHP>&& lval.Token(l.newToken(l.Token())); return T_BOOLEAN_AND
|
||||
<PHP>\|\| lval.Token(l.newToken(l.Token())); return T_BOOLEAN_OR
|
||||
<PHP>&= lval.Token(l.newToken(l.Token())); return T_AND_EQUAL
|
||||
<PHP>\|= lval.Token(l.newToken(l.Token())); return T_OR_EQUAL
|
||||
<PHP>\.= lval.Token(l.newToken(l.Token())); return T_CONCAT_EQUAL;
|
||||
<PHP>\*= lval.Token(l.newToken(l.Token())); return T_MUL_EQUAL
|
||||
<PHP>\*\*= lval.Token(l.newToken(l.Token())); return T_POW_EQUAL
|
||||
<PHP>[/]= lval.Token(l.newToken(l.Token())); return T_DIV_EQUAL;
|
||||
<PHP>\+= lval.Token(l.newToken(l.Token())); return T_PLUS_EQUAL
|
||||
<PHP>-= lval.Token(l.newToken(l.Token())); return T_MINUS_EQUAL
|
||||
<PHP>\^= lval.Token(l.newToken(l.Token())); return T_XOR_EQUAL
|
||||
<PHP>%= lval.Token(l.newToken(l.Token())); return T_MOD_EQUAL
|
||||
<PHP>-- lval.Token(l.newToken(l.Token())); return T_DEC;
|
||||
<PHP>\+\+ lval.Token(l.newToken(l.Token())); return T_INC
|
||||
<PHP>=> lval.Token(l.newToken(l.Token())); return T_DOUBLE_ARROW;
|
||||
<PHP>\<=\> lval.Token(l.newToken(l.Token())); return T_SPACESHIP
|
||||
<PHP>\!=|\<\> lval.Token(l.newToken(l.Token())); return T_IS_NOT_EQUAL
|
||||
<PHP>\!== lval.Token(l.newToken(l.Token())); return T_IS_NOT_IDENTICAL
|
||||
<PHP>== lval.Token(l.newToken(l.Token())); return T_IS_EQUAL
|
||||
<PHP>=== lval.Token(l.newToken(l.Token())); return T_IS_IDENTICAL
|
||||
<PHP>\<\<= lval.Token(l.newToken(l.Token())); return T_SL_EQUAL
|
||||
<PHP>\>\>= lval.Token(l.newToken(l.Token())); return T_SR_EQUAL
|
||||
<PHP>\>= lval.Token(l.newToken(l.Token())); return T_IS_GREATER_OR_EQUAL
|
||||
<PHP>\<= lval.Token(l.newToken(l.Token())); return T_IS_SMALLER_OR_EQUAL
|
||||
<PHP>\*\* lval.Token(l.newToken(l.Token())); return T_POW
|
||||
<PHP>\<\< lval.Token(l.newToken(l.Token())); return T_SL
|
||||
<PHP>\>\> lval.Token(l.newToken(l.Token())); return T_SR
|
||||
<PHP>\?\? lval.Token(l.newToken(l.Token())); return T_COALESCE
|
||||
<PHP>(#|[/][/]).*{NEW_LINE} lval.Token(l.newToken(l.Token()));// return T_COMMENT; // TODO: handle ?>
|
||||
<PHP>([/][*])|([/][*][*])
|
||||
tb := l.Token()
|
||||
is_doc_comment := false
|
||||
if len(tb) > 2 {
|
||||
is_doc_comment = true
|
||||
l.PhpDocComment = ""
|
||||
}
|
||||
|
||||
for {
|
||||
if c == -1 {
|
||||
break; // TODO: Unterminated comment starting line %d
|
||||
}
|
||||
|
||||
p := c
|
||||
c = l.Next()
|
||||
|
||||
if rune(p) == '*' && rune(c) == '/' {
|
||||
c = l.Next()
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
lval.Token(l.newToken(l.Token()))
|
||||
if is_doc_comment {
|
||||
l.PhpDocComment = string(l.TokenBytes(nil))
|
||||
l.addComment(comment.NewDocComment(string(l.TokenBytes(nil))))
|
||||
// return T_DOC_COMMENT
|
||||
} else {
|
||||
l.addComment(comment.NewPlainComment(string(l.TokenBytes(nil))))
|
||||
// return T_COMMENT
|
||||
}
|
||||
|
||||
<PHP>{OPERATORS} lval.Token(l.newToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
|
||||
|
||||
<PHP>\{ l.pushState(PHP); lval.Token(l.newToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
|
||||
<PHP>\} l.popState(); lval.Token(l.newToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0])); l.PhpDocComment = ""
|
||||
<PHP>\${VAR_NAME} lval.Token(l.newToken(l.Token())); return T_VARIABLE
|
||||
<PHP>{VAR_NAME} lval.Token(l.newToken(l.Token())); return T_STRING
|
||||
|
||||
<PHP>-> l.begin(PROPERTY);lval.Token(l.newToken(l.Token())); return T_OBJECT_OPERATOR;
|
||||
<PROPERTY>[ \t\n\r]+ lval.Token(l.newToken(l.Token())); return T_WHITESPACE;
|
||||
<PROPERTY>-> lval.Token(l.newToken(l.Token())); return T_OBJECT_OPERATOR;
|
||||
<PROPERTY>{VAR_NAME} l.begin(PHP);lval.Token(l.newToken(l.Token())); return T_STRING;
|
||||
<PROPERTY>. l.ungetChars(1);l.begin(PHP)
|
||||
|
||||
<PHP>[\']([^\\\']*([\\].)*)*[\'] lval.Token(l.newToken(l.Token())); return T_CONSTANT_ENCAPSED_STRING;
|
||||
|
||||
<PHP>` l.begin(BACKQUOTE); lval.Token(l.newToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
|
||||
<BACKQUOTE>` l.begin(PHP); lval.Token(l.newToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
|
||||
|
||||
<PHP>[b]?\<\<\<[ \t]*({VAR_NAME}|([']{VAR_NAME}['])|(["]{VAR_NAME}["])){NEW_LINE}
|
||||
tb := l.Token()
|
||||
binPrefix := 0
|
||||
if tb[0].Rune == 'b' {
|
||||
binPrefix = 1
|
||||
}
|
||||
|
||||
lblFirst := 3 + binPrefix
|
||||
lblLast := len(tb)-2
|
||||
if tb[lblLast].Rune == '\r' {
|
||||
lblLast--
|
||||
}
|
||||
|
||||
for {
|
||||
if tb[lblFirst].Rune == ' ' || tb[lblFirst].Rune == '\t' {
|
||||
lblFirst++
|
||||
continue
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
switch tb[lblFirst].Rune {
|
||||
case '\'' :
|
||||
lblFirst++
|
||||
lblLast--
|
||||
l.begin(NOWDOC)
|
||||
case '"' :
|
||||
lblFirst++
|
||||
lblLast--
|
||||
l.begin(HEREDOC)
|
||||
default:
|
||||
l.begin(HEREDOC)
|
||||
}
|
||||
|
||||
heredocLabel = make([]lex.Char, lblLast - lblFirst + 1)
|
||||
copy(heredocLabel, tb[lblFirst:lblLast+1])
|
||||
|
||||
ungetCnt := len(heredocLabel)
|
||||
searchLabelAhead := []lex.Char{}
|
||||
for i := 0; i < len(heredocLabel); i++ {
|
||||
if c == -1 {
|
||||
break;
|
||||
}
|
||||
searchLabelAhead = append(searchLabelAhead, l.Lookahead())
|
||||
c = l.Next()
|
||||
}
|
||||
|
||||
if bytes.Equal(l.charsToBytes(heredocLabel), l.charsToBytes(searchLabelAhead)) && ';' == rune(c) {
|
||||
ungetCnt++
|
||||
c = l.Next()
|
||||
if '\n' == rune(c) || '\r' == rune(c) {
|
||||
l.begin(HEREDOC_END)
|
||||
}
|
||||
}
|
||||
|
||||
l.ungetChars(ungetCnt)
|
||||
|
||||
lval.Token(l.newToken(tb));
|
||||
return T_START_HEREDOC
|
||||
|
||||
<NOWDOC>.|[ \t\n\r]
|
||||
searchLabel := []byte{}
|
||||
tb := []lex.Char{}
|
||||
|
||||
for {
|
||||
if c == -1 {
|
||||
break;
|
||||
}
|
||||
|
||||
if '\n' == rune(c) || '\r' == rune(c) {
|
||||
if bytes.Equal(append(l.charsToBytes(heredocLabel), ';'), searchLabel) {
|
||||
l.begin(HEREDOC_END)
|
||||
tb = l.ungetChars(len(heredocLabel)+1)
|
||||
break;
|
||||
}
|
||||
|
||||
if bytes.Equal(l.charsToBytes(heredocLabel), searchLabel) {
|
||||
l.begin(HEREDOC_END)
|
||||
tb = l.ungetChars(len(heredocLabel))
|
||||
break;
|
||||
}
|
||||
|
||||
searchLabel = []byte{}
|
||||
} else {
|
||||
searchLabel = append(searchLabel, byte(rune(c)))
|
||||
}
|
||||
|
||||
c = l.Next()
|
||||
}
|
||||
|
||||
lval.Token(l.newToken(tb) )
|
||||
return T_ENCAPSED_AND_WHITESPACE
|
||||
|
||||
<HEREDOC_END>{VAR_NAME}\; l.begin(PHP);lval.Token(l.newToken(l.ungetChars(1))); return T_END_HEREDOC
|
||||
<HEREDOC_END>{VAR_NAME} l.begin(PHP);lval.Token(l.newToken(l.Token())); return T_END_HEREDOC
|
||||
|
||||
<PHP>[b]?[\"]
|
||||
binPrefix := l.Token()[0].Rune == 'b'
|
||||
|
||||
beginString := func() int {
|
||||
cnt := 1; if (binPrefix) {cnt = 2}
|
||||
|
||||
l.ungetChars(len(l.Token())-cnt)
|
||||
chars := l.Token()[:cnt]
|
||||
l.pushState(STRING)
|
||||
|
||||
lval.Token(l.newToken(chars)); return Rune2Class('"')
|
||||
}
|
||||
|
||||
F:for {
|
||||
if c == -1 {
|
||||
break;
|
||||
}
|
||||
|
||||
switch c {
|
||||
case '"' :
|
||||
c = l.Next();
|
||||
lval.Token(l.newToken(l.Token())); return T_CONSTANT_ENCAPSED_STRING
|
||||
break F;
|
||||
|
||||
case '$':
|
||||
c = l.Next();
|
||||
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
|
||||
return beginString()
|
||||
break F;
|
||||
}
|
||||
l.ungetChars(0)
|
||||
|
||||
case '{':
|
||||
c = l.Next();
|
||||
if rune(c) == '$' {
|
||||
return beginString()
|
||||
break F;
|
||||
}
|
||||
l.ungetChars(0)
|
||||
|
||||
case '\\':
|
||||
c = l.Next();
|
||||
}
|
||||
|
||||
c = l.Next()
|
||||
}
|
||||
|
||||
<STRING>\" l.popState(); lval.Token(l.newToken(l.Token())); return Rune2Class(l.Token()[0].Rune)
|
||||
<STRING,HEREDOC,BACKQUOTE>\{\$ lval.Token(l.newToken(l.ungetChars(1))); l.pushState(PHP); return T_CURLY_OPEN
|
||||
<STRING,HEREDOC,BACKQUOTE>\$\{ l.pushState(STRING_VAR_NAME); lval.Token(l.newToken(l.Token())); return T_DOLLAR_OPEN_CURLY_BRACES
|
||||
<STRING,HEREDOC,BACKQUOTE>\$ l.ungetChars(1);l.pushState(STRING_VAR)
|
||||
<STRING>.|[ \t\n\r]
|
||||
F1:for {
|
||||
if c == -1 {
|
||||
break;
|
||||
}
|
||||
|
||||
switch c {
|
||||
case '"' :
|
||||
lval.Token(l.newToken(l.Token()));
|
||||
return T_ENCAPSED_AND_WHITESPACE
|
||||
break F1;
|
||||
|
||||
case '$':
|
||||
c = l.Next();
|
||||
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
|
||||
l.ungetChars(1)
|
||||
tb := l.Token()
|
||||
lval.Token(l.newToken(tb[:len(tb)-1]));
|
||||
return T_ENCAPSED_AND_WHITESPACE
|
||||
break F1;
|
||||
}
|
||||
l.ungetChars(0)
|
||||
|
||||
case '{':
|
||||
c = l.Next();
|
||||
if rune(c) == '$' {
|
||||
l.ungetChars(1)
|
||||
tb := l.Token()
|
||||
lval.Token(l.newToken(tb[:len(tb)-1]));
|
||||
return T_ENCAPSED_AND_WHITESPACE
|
||||
break F1;
|
||||
}
|
||||
l.ungetChars(0)
|
||||
|
||||
case '\\':
|
||||
c = l.Next();
|
||||
}
|
||||
|
||||
c = l.Next()
|
||||
}
|
||||
|
||||
<BACKQUOTE>.|[ \t\n\r]
|
||||
F2:for {
|
||||
if c == -1 {
|
||||
break;
|
||||
}
|
||||
|
||||
switch c {
|
||||
case '`' :
|
||||
lval.Token(l.newToken(l.Token()));
|
||||
return T_ENCAPSED_AND_WHITESPACE
|
||||
break F2;
|
||||
|
||||
case '$':
|
||||
c = l.Next();
|
||||
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
|
||||
l.ungetChars(1)
|
||||
tb := l.Token()
|
||||
lval.Token(l.newToken(tb[:len(tb)-1]));
|
||||
return T_ENCAPSED_AND_WHITESPACE
|
||||
break F2;
|
||||
}
|
||||
l.ungetChars(0)
|
||||
|
||||
case '{':
|
||||
c = l.Next();
|
||||
if rune(c) == '$' {
|
||||
l.ungetChars(1)
|
||||
tb := l.Token()
|
||||
lval.Token(l.newToken(tb[:len(tb)-1]));
|
||||
return T_ENCAPSED_AND_WHITESPACE
|
||||
break F2;
|
||||
}
|
||||
l.ungetChars(0)
|
||||
|
||||
case '\\':
|
||||
c = l.Next();
|
||||
}
|
||||
|
||||
c = l.Next()
|
||||
}
|
||||
|
||||
<HEREDOC>.|[ \t\n\r]
|
||||
searchLabel := []byte{}
|
||||
tb := []lex.Char{}
|
||||
|
||||
HEREDOCFOR:for {
|
||||
if c == -1 {
|
||||
break;
|
||||
}
|
||||
|
||||
switch c {
|
||||
case '\n': fallthrough
|
||||
case '\r':
|
||||
if bytes.Equal(append(l.charsToBytes(heredocLabel), ';'), searchLabel) {
|
||||
l.begin(HEREDOC_END)
|
||||
tb = l.ungetChars(len(heredocLabel)+1)
|
||||
break HEREDOCFOR;
|
||||
}
|
||||
|
||||
if bytes.Equal(l.charsToBytes(heredocLabel), searchLabel) {
|
||||
l.begin(HEREDOC_END)
|
||||
tb = l.ungetChars(len(heredocLabel))
|
||||
break HEREDOCFOR;
|
||||
}
|
||||
|
||||
searchLabel = []byte{}
|
||||
|
||||
case '$':
|
||||
c = l.Next();
|
||||
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
|
||||
tb = l.ungetChars(1)
|
||||
break HEREDOCFOR;
|
||||
}
|
||||
l.ungetChars(0)
|
||||
searchLabel = []byte{}
|
||||
|
||||
case '{':
|
||||
c = l.Next();
|
||||
if rune(c) == '$' {
|
||||
tb = l.ungetChars(1)
|
||||
break HEREDOCFOR;
|
||||
}
|
||||
l.ungetChars(0)
|
||||
searchLabel = []byte{}
|
||||
|
||||
case '\\':
|
||||
c = l.Next();
|
||||
searchLabel = []byte{}
|
||||
|
||||
default:
|
||||
searchLabel = append(searchLabel, byte(rune(c)))
|
||||
}
|
||||
|
||||
c = l.Next()
|
||||
}
|
||||
|
||||
lval.Token(l.newToken(tb));
|
||||
return T_ENCAPSED_AND_WHITESPACE
|
||||
|
||||
<STRING_VAR>\${VAR_NAME} lval.Token(l.newToken(l.Token())); return T_VARIABLE
|
||||
<STRING_VAR>->{VAR_NAME} lval.Token(l.newToken(l.ungetChars(len(l.Token())-2))); return T_OBJECT_OPERATOR
|
||||
<STRING_VAR>{VAR_NAME} l.popState();lval.Token(l.newToken(l.Token())); return T_STRING
|
||||
<STRING_VAR>\[ l.pushState(STRING_VAR_INDEX);lval.Token(l.newToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
|
||||
<STRING_VAR>.|[ \t\n\r] l.ungetChars(1);l.popState()
|
||||
|
||||
<STRING_VAR_INDEX>{LNUM}|{HNUM}|{BNUM} lval.Token(l.newToken(l.Token())); return T_NUM_STRING
|
||||
<STRING_VAR_INDEX>\${VAR_NAME} lval.Token(l.newToken(l.Token())); return T_VARIABLE
|
||||
<STRING_VAR_INDEX>{VAR_NAME} lval.Token(l.newToken(l.Token())); return T_STRING
|
||||
<STRING_VAR_INDEX>\] l.popState(); l.popState();lval.Token(l.newToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
|
||||
<STRING_VAR_INDEX>[ \n\r\t\\'#] l.popState(); l.popState();lval.Token(l.newToken(l.Token())); return T_ENCAPSED_AND_WHITESPACE
|
||||
<STRING_VAR_INDEX>{OPERATORS} lval.Token(l.newToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
|
||||
<STRING_VAR_INDEX>. lval.Token(l.newToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
|
||||
|
||||
<STRING_VAR_NAME>{VAR_NAME}[\[\}] l.popState();l.pushState(PHP);lval.Token(l.newToken(l.ungetChars(1))); return T_STRING_VARNAME
|
||||
<STRING_VAR_NAME>. l.ungetChars(1);l.popState();l.pushState(PHP)
|
||||
|
||||
%%
|
||||
if c, ok := l.Abort(); ok { return int(c) }
|
||||
goto yyAction
|
||||
}
|
Loading…
Reference in New Issue
Block a user