Merge branch 'php5syntax'

This commit is contained in:
z7zmey 2018-02-04 21:48:38 +02:00
commit 048e281914
29 changed files with 13434 additions and 2801 deletions

View File

@ -1,6 +1,6 @@
PHPFILE=example.php
all: ./parser/php7.go ./parser/scanner.go
all: ./php5/php5.go ./php7/php7.go ./scanner/scanner.go
rm -f y.output
gofmt -l -s -w *.go
go build
@ -11,8 +11,11 @@ run: all
test: all
go test ./... --cover
./parser/scanner.go: ./parser/scanner.l
./scanner/scanner.go: ./scanner/scanner.l
golex -o $@ $<
./parser/php7.go: ./parser/php7.y
./php5/php5.go: ./php5/php5.y
goyacc -o $@ $<
./php7/php7.go: ./php7/php7.y
goyacc -o $@ $<

View File

@ -14,7 +14,7 @@
A Parser for PHP written in Go inspired by [Nikic PHP Parser](https://github.com/nikic/PHP-Parser)
## Features:
- Fully support PHP7 syntax (PHP5 in progress)
- Fully support PHP5 and PHP7 syntax
- Abstract syntax tree representation
- Traversing AST
@ -25,18 +25,21 @@ package main
import (
"bytes"
"github.com/z7zmey/php-parser/parser"
"github.com/z7zmey/php-parser/php5"
"github.com/z7zmey/php-parser/visitor"
)
func main() {
src := bytes.NewBufferString(`<? echo "Hello world";`)
nodes, comments, positions := parser.ParsePhp7(src, "example.php")
nodes, comments, positions := php5.Parse(src, "example.php")
visitor := Dumper{"", comments, positions}
visitor := visitor.Dumper{
Indent: "",
Comments: comments,
Positions: positions,
}
nodes.Walk(visitor)
}
```
## Roadmap
@ -47,10 +50,10 @@ func main() {
- [x] AST dumper
- [x] node position
- [x] handling comments
- [x] PHP 5 syntax analyzer
- [ ] Tests
- [ ] Error handling
- [ ] Stabilize api
- [ ] Documentation
- [ ] PHP 5 syntax analyzer
- [ ] Code flow graph
- [ ] Pretty printer
- [ ] Code flow graph

View File

@ -8,7 +8,8 @@ import (
"path/filepath"
"github.com/yookoala/realpath"
"github.com/z7zmey/php-parser/parser"
"github.com/z7zmey/php-parser/php5"
"github.com/z7zmey/php-parser/visitor"
)
func main() {
@ -23,9 +24,9 @@ func main() {
fmt.Printf("==> %s\n", path)
src, _ := os.Open(string(path))
nodes, comments, positions := parser.ParsePhp7(src, path)
nodes, comments, positions := php5.Parse(src, path)
visitor := Dumper{
visitor := visitor.Dumper{
Indent: " | ",
Comments: comments,
Positions: positions,

View File

@ -4,14 +4,16 @@ import "github.com/z7zmey/php-parser/walker"
// Argument node
type Argument struct {
Variadic bool // if ... before variable
Expr Node // Exression
Variadic bool // if ... before variable
IsReference bool // if & before variable
Expr Node // Exression
}
// NewArgument node constuctor
func NewArgument(Expression Node, Variadic bool) *Argument {
func NewArgument(Expression Node, Variadic bool, IsReference bool) *Argument {
return &Argument{
Variadic,
IsReference,
Expression,
}
}
@ -19,7 +21,8 @@ func NewArgument(Expression Node, Variadic bool) *Argument {
// Attributes returns node attributes as map
func (n *Argument) Attributes() map[string]interface{} {
return map[string]interface{}{
"Variadic": n.Variadic,
"Variadic": n.Variadic,
"IsReference": n.IsReference,
}
}

View File

@ -22,6 +22,11 @@ func (n *Variable) Attributes() map[string]interface{} {
return nil
}
// SetVarName reset var name
func (n *Variable) SetVarName(VarName node.Node) {
n.VarName = VarName
}
// Walk traverses nodes
// Walk is invoked recursively until v.EnterNode returns true
func (n *Variable) Walk(v walker.Visitor) {

View File

@ -12,7 +12,7 @@ import (
"github.com/kylelemons/godebug/pretty"
"github.com/z7zmey/php-parser/node"
"github.com/z7zmey/php-parser/node/stmt"
"github.com/z7zmey/php-parser/parser"
"github.com/z7zmey/php-parser/php7"
)
func assertEqual(t *testing.T, expected interface{}, actual interface{}) {
@ -44,7 +44,7 @@ func TestName(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}
@ -65,7 +65,7 @@ func TestFullyQualified(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}
@ -86,7 +86,7 @@ func TestRelative(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}

View File

@ -12,7 +12,7 @@ import (
"github.com/kylelemons/godebug/pretty"
"github.com/z7zmey/php-parser/node"
"github.com/z7zmey/php-parser/node/stmt"
"github.com/z7zmey/php-parser/parser"
"github.com/z7zmey/php-parser/php7"
)
func assertEqual(t *testing.T, expected interface{}, actual interface{}) {
@ -41,7 +41,7 @@ func TestIdentifier(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}
@ -132,7 +132,7 @@ func TestArgumentNode(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}
@ -198,7 +198,7 @@ func TestParameterNode(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}

View File

@ -10,7 +10,7 @@ import (
"github.com/z7zmey/php-parser/node"
"github.com/z7zmey/php-parser/node/scalar"
"github.com/z7zmey/php-parser/node/stmt"
"github.com/z7zmey/php-parser/parser"
"github.com/z7zmey/php-parser/php7"
)
func TestSimpleVar(t *testing.T) {
@ -29,7 +29,7 @@ func TestSimpleVar(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
if diff := pretty.Compare(expected, actual); diff != "" {
t.Errorf("diff: (-expected +actual)\n%s", diff)
@ -56,7 +56,7 @@ func TestSimpleVarPropertyFetch(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
if diff := pretty.Compare(expected, actual); diff != "" {
t.Errorf("diff: (-expected +actual)\n%s", diff)
@ -79,7 +79,7 @@ func TestDollarOpenCurlyBraces(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
if diff := pretty.Compare(expected, actual); diff != "" {
t.Errorf("diff: (-expected +actual)\n%s", diff)
@ -105,7 +105,7 @@ func TestDollarOpenCurlyBracesDimNumber(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
if diff := pretty.Compare(expected, actual); diff != "" {
t.Errorf("diff: (-expected +actual)\n%s", diff)
@ -131,7 +131,7 @@ func TestCurlyOpenMethodCall(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
if diff := pretty.Compare(expected, actual); diff != "" {
t.Errorf("diff: (-expected +actual)\n%s", diff)

View File

@ -8,7 +8,7 @@ import (
"github.com/z7zmey/php-parser/node"
"github.com/z7zmey/php-parser/node/scalar"
"github.com/z7zmey/php-parser/node/stmt"
"github.com/z7zmey/php-parser/parser"
"github.com/z7zmey/php-parser/php7"
)
func TestMagicConstant(t *testing.T) {
@ -22,7 +22,7 @@ func TestMagicConstant(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
if diff := pretty.Compare(expected, actual); diff != "" {
t.Errorf("diff: (-expected +actual)\n%s", diff)

View File

@ -9,7 +9,7 @@ import (
"github.com/z7zmey/php-parser/node"
"github.com/z7zmey/php-parser/node/scalar"
"github.com/z7zmey/php-parser/node/stmt"
"github.com/z7zmey/php-parser/parser"
"github.com/z7zmey/php-parser/php7"
)
func assertEqual(t *testing.T, expected interface{}, actual interface{}) {
@ -36,7 +36,7 @@ func TestLNumber(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}
@ -52,7 +52,7 @@ func TestDNumber(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}
@ -68,7 +68,7 @@ func TestFloat(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}
@ -84,7 +84,7 @@ func TestBinaryLNumber(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}
@ -100,7 +100,7 @@ func TestBinaryDNumber(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}
@ -116,7 +116,7 @@ func TestHLNumber(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}
@ -132,7 +132,7 @@ func TestHDNumber(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}

View File

@ -8,7 +8,7 @@ import (
"github.com/z7zmey/php-parser/node"
"github.com/z7zmey/php-parser/node/scalar"
"github.com/z7zmey/php-parser/node/stmt"
"github.com/z7zmey/php-parser/parser"
"github.com/z7zmey/php-parser/php7"
)
func TestDoubleQuotedScalarString(t *testing.T) {
@ -22,7 +22,7 @@ func TestDoubleQuotedScalarString(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
if diff := pretty.Compare(expected, actual); diff != "" {
t.Errorf("diff: (-expected +actual)\n%s", diff)
@ -39,7 +39,7 @@ func TestDoubleQuotedScalarStringWithEscapedVar(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
if diff := pretty.Compare(expected, actual); diff != "" {
t.Errorf("diff: (-expected +actual)\n%s", diff)
@ -59,7 +59,7 @@ func TestMultilineDoubleQuotedScalarString(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
if diff := pretty.Compare(expected, actual); diff != "" {
t.Errorf("diff: (-expected +actual)\n%s", diff)
@ -77,7 +77,7 @@ func TestSingleQuotedScalarString(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
if diff := pretty.Compare(expected, actual); diff != "" {
t.Errorf("diff: (-expected +actual)\n%s", diff)
@ -97,7 +97,7 @@ func TestMultilineSingleQuotedScalarString(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
if diff := pretty.Compare(expected, actual); diff != "" {
t.Errorf("diff: (-expected +actual)\n%s", diff)
@ -118,7 +118,7 @@ CAD;
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
if diff := pretty.Compare(expected, actual); diff != "" {
t.Errorf("diff: (-expected +actual)\n%s", diff)
@ -139,7 +139,7 @@ CAD;
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
if diff := pretty.Compare(expected, actual); diff != "" {
t.Errorf("diff: (-expected +actual)\n%s", diff)
@ -160,7 +160,7 @@ CAD;
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
if diff := pretty.Compare(expected, actual); diff != "" {
t.Errorf("diff: (-expected +actual)\n%s", diff)

View File

@ -14,12 +14,12 @@ type AltIf struct {
}
// NewAltIf node constuctor
func NewAltIf(Cond node.Node, Stmt node.Node) *AltIf {
func NewAltIf(Cond node.Node, Stmt node.Node, ElseIf []node.Node, Else node.Node) *AltIf {
return &AltIf{
Cond,
Stmt,
nil,
nil,
ElseIf,
Else,
}
}

View File

@ -10,7 +10,7 @@ import (
"github.com/kylelemons/godebug/pretty"
"github.com/z7zmey/php-parser/node"
"github.com/z7zmey/php-parser/node/stmt"
"github.com/z7zmey/php-parser/parser"
"github.com/z7zmey/php-parser/php7"
)
func assertEqual(t *testing.T, expected interface{}, actual interface{}) {
@ -41,7 +41,7 @@ func TestAltIf(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}
@ -68,7 +68,7 @@ func TestAltElseIf(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}
@ -92,7 +92,7 @@ func TestAltElse(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}
@ -128,7 +128,7 @@ func TestAltElseElseIf(t *testing.T) {
},
}
actual, _, _ := parser.ParsePhp7(bytes.NewBufferString(src), "test.php")
actual, _, _ := php7.Parse(bytes.NewBufferString(src), "test.php")
assertEqual(t, expected, actual)
}

View File

@ -10,16 +10,16 @@ type If struct {
Cond node.Node
Stmt node.Node
ElseIf []node.Node
_else node.Node
Else node.Node
}
// NewIf node constuctor
func NewIf(Cond node.Node, Stmt node.Node) *If {
func NewIf(Cond node.Node, Stmt node.Node, ElseIf []node.Node, Else node.Node) *If {
return &If{
Cond,
Stmt,
nil,
nil,
ElseIf,
Else,
}
}
@ -38,8 +38,8 @@ func (n *If) AddElseIf(ElseIf node.Node) node.Node {
return n
}
func (n *If) SetElse(_else node.Node) node.Node {
n._else = _else
func (n *If) SetElse(Else node.Node) node.Node {
n.Else = Else
return n
}
@ -70,9 +70,9 @@ func (n *If) Walk(v walker.Visitor) {
}
}
if n._else != nil {
if n.Else != nil {
vv := v.GetChildrenVisitor("else")
n._else.Walk(vv)
n.Else.Walk(vv)
}
v.LeaveNode(n)

View File

@ -31,7 +31,7 @@ var nodesToTest = []struct {
{
&node.Argument{Variadic: true, Expr: &expr.Variable{VarName: &node.Identifier{Value: "foo"}}},
[]string{"Expr"},
map[string]interface{}{"Variadic": true},
map[string]interface{}{"IsReference": false, "Variadic": true},
},
{
&node.Parameter{

View File

@ -1,119 +0,0 @@
package parser
import (
"bufio"
"bytes"
"go/token"
"io"
"unicode"
"github.com/cznic/golex/lex"
"github.com/z7zmey/php-parser/comment"
t "github.com/z7zmey/php-parser/token"
)
// Allocate Character classes anywhere in [0x80, 0xFF].
const (
classUnicodeLeter = iota + 0x80
classUnicodeDigit
classUnicodeGraphic
classOther
)
type lexer struct {
*lex.Lexer
stateStack []int
phpDocComment string
comments []comment.Comment
}
func rune2Class(r rune) int {
if r >= 0 && r < 0x80 { // Keep ASCII as it is.
return int(r)
}
if unicode.IsLetter(r) {
return classUnicodeLeter
}
if unicode.IsDigit(r) {
return classUnicodeDigit
}
if unicode.IsGraphic(r) {
return classUnicodeGraphic
}
// return classOther
return -1
}
func newLexer(src io.Reader, fName string) *lexer {
file := token.NewFileSet().AddFile(fName, -1, 1<<31-1)
lx, err := lex.New(file, bufio.NewReader(src), lex.RuneClass(rune2Class))
if err != nil {
panic(err)
}
return &lexer{lx, []int{0}, "", nil}
}
func (l *lexer) ungetChars(n int) []lex.Char {
l.Unget(l.Lookahead())
chars := l.Token()
for i := 1; i <= n; i++ {
char := chars[len(chars)-i]
l.Unget(char)
}
buf := l.Token()
buf = buf[:len(buf)-n]
return buf
}
func (l *lexer) pushState(state int) {
l.stateStack = append(l.stateStack, state)
}
func (l *lexer) popState() {
len := len(l.stateStack)
if len <= 1 {
return
}
l.stateStack = l.stateStack[:len-1]
}
func (l *lexer) begin(state int) {
len := len(l.stateStack)
l.stateStack = l.stateStack[:len-1]
l.stateStack = append(l.stateStack, state)
}
func (l *lexer) getCurrentState() int {
return l.stateStack[len(l.stateStack)-1]
}
func (l *lexer) newToken(chars []lex.Char) t.Token {
firstChar := chars[0]
lastChar := chars[len(chars)-1]
startLine := l.File.Line(firstChar.Pos())
endLine := l.File.Line(lastChar.Pos())
startPos := int(firstChar.Pos())
endPos := int(lastChar.Pos())
return t.NewToken(l.charsToBytes(chars), startLine, endLine, startPos, endPos).SetComments(l.comments)
}
func (l *lexer) addComment(c comment.Comment) {
l.comments = append(l.comments, c)
}
func (l *lexer) charsToBytes(chars []lex.Char) []byte {
bytesBuf := bytes.Buffer{}
for _, c := range chars {
bytesBuf.WriteRune(c.Rune)
}
return bytesBuf.Bytes()
}

View File

@ -1,609 +0,0 @@
%{
// Copyright (c) 2011 CZ.NIC z.s.p.o. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// blame: jnml, labs.nic.cz
package parser
import (
"fmt"
"bytes"
"github.com/cznic/golex/lex"
"github.com/z7zmey/php-parser/comment"
)
const (
INITIAL = iota
PHP
STRING
STRING_VAR
STRING_VAR_INDEX
STRING_VAR_NAME
PROPERTY
HEREDOC_END
NOWDOC
HEREDOC
BACKQUOTE
)
var heredocLabel []lex.Char
func (l *lexer) Lex(lval *yySymType) int {
l.comments = nil
c := l.Enter()
%}
%s PHP STRING STRING_VAR STRING_VAR_INDEX STRING_VAR_NAME PROPERTY HEREDOC_END NOWDOC HEREDOC BACKQUOTE
%yyb last == '\n' || last = '\0'
%yyt l.getCurrentState()
%yyc c
%yyn c = l.Next()
%yym l.Mark()
%optioncase-insensitive
LNUM [0-9]+
DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
HNUM 0x[0-9a-fA-F]+
BNUM 0b[01]+
EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
VAR_NAME [a-zA-Z_\x7f-\xff^0-9/][a-zA-Z0-9_\x7f-\xff]*
OPERATORS [;:,.\[\]()|\/\^&\+-*=%!~$<>?@]
NEW_LINE (\r|\n|\r\n)
%%
c = l.Rule0()
<INITIAL>[ \t\n\r]+ lval.token = l.newToken(l.Token());
<INITIAL>.
tb := []lex.Char{}
for {
if c == -1 {
tb = l.Token();
break;
}
if '?' == rune(c) {
tb = l.Token();
if (len(tb) < 2 || tb[len(tb)-1].Rune != '<') {
c = l.Next()
continue;
}
tb = l.ungetChars(1)
break;
}
c = l.Next()
}
lval.token = l.newToken(tb)
return T_INLINE_HTML
<INITIAL>\<\?php([ \t]|{NEW_LINE}) l.begin(PHP);lval.token = l.newToken(l.Token());// return T_OPEN_TAG;
<INITIAL>\<\? l.begin(PHP);lval.token = l.newToken(l.Token());// return T_OPEN_TAG;
<INITIAL>\<\?= l.begin(PHP);lval.token = l.newToken(l.Token()); return T_ECHO;
<PHP>[ \t\n\r]+ lval.token = l.newToken(l.Token());// return T_WHITESPACE
<PHP>\?\>{NEW_LINE}? l.begin(INITIAL);lval.token = l.newToken(l.Token()); return rune2Class(';');
<PHP>{DNUM}|{EXPONENT_DNUM} lval.token = l.newToken(l.Token()); return T_DNUMBER
<PHP>{BNUM}
tb := l.Token()
i:=2
BNUMFOR:for {
if i > len(tb)-1 {
break BNUMFOR;
}
switch tb[i].Rune {
case '0': i++;
default: break BNUMFOR;
}
}
if len(tb) - i < 64 {
lval.token = l.newToken(l.Token()); return T_LNUMBER
} else {
lval.token = l.newToken(l.Token()); return T_DNUMBER
}
<PHP>{LNUM}
if len(l.Token()) < 20 {
lval.token = l.newToken(l.Token()); return T_LNUMBER
} else {
lval.token = l.newToken(l.Token()); return T_DNUMBER
}
<PHP>{HNUM}
tb := l.Token()
i:=2
HNUMFOR:for {
if i > len(tb)-1 {
break HNUMFOR;
}
switch tb[i].Rune {
case '0': i++;
default: break HNUMFOR;
}
}
length := len(tb) - i
if length < 16 || (length == 16 && tb[i].Rune <= '7') {
lval.token = l.newToken(l.Token()); return T_LNUMBER
} else {
lval.token = l.newToken(l.Token()); return T_DNUMBER
}
<PHP>abstract lval.token = l.newToken(l.Token()); return T_ABSTRACT
<PHP>array lval.token = l.newToken(l.Token()); return T_ARRAY
<PHP>as lval.token = l.newToken(l.Token()); return T_AS
<PHP>break lval.token = l.newToken(l.Token()); return T_BREAK
<PHP>callable lval.token = l.newToken(l.Token()); return T_CALLABLE
<PHP>case lval.token = l.newToken(l.Token()); return T_CASE
<PHP>catch lval.token = l.newToken(l.Token()); return T_CATCH
<PHP>class lval.token = l.newToken(l.Token()); return T_CLASS
<PHP>clone lval.token = l.newToken(l.Token()); return T_CLONE
<PHP>const lval.token = l.newToken(l.Token()); return T_CONST;
<PHP>continue lval.token = l.newToken(l.Token()); return T_CONTINUE;
<PHP>declare lval.token = l.newToken(l.Token()); return T_DECLARE;
<PHP>default lval.token = l.newToken(l.Token()); return T_DEFAULT;
<PHP>do lval.token = l.newToken(l.Token()); return T_DO;
<PHP>echo lval.token = l.newToken(l.Token()); return T_ECHO;
<PHP>else lval.token = l.newToken(l.Token()); return T_ELSE;
<PHP>elseif lval.token = l.newToken(l.Token()); return T_ELSEIF;
<PHP>empty lval.token = l.newToken(l.Token()); return T_EMPTY;
<PHP>enddeclare lval.token = l.newToken(l.Token()); return T_ENDDECLARE
<PHP>endfor lval.token = l.newToken(l.Token()); return T_ENDFOR
<PHP>endforeach lval.token = l.newToken(l.Token()); return T_ENDFOREACH
<PHP>endif lval.token = l.newToken(l.Token()); return T_ENDIF
<PHP>endswitch lval.token = l.newToken(l.Token()); return T_ENDSWITCH
<PHP>endwhile lval.token = l.newToken(l.Token()); return T_ENDWHILE
<PHP>eval lval.token = l.newToken(l.Token()); return T_EVAL
<PHP>exit|die lval.token = l.newToken(l.Token()); return T_EXIT
<PHP>extends lval.token = l.newToken(l.Token()); return T_EXTENDS
<PHP>final lval.token = l.newToken(l.Token()); return T_FINAL
<PHP>finally lval.token = l.newToken(l.Token()); return T_FINALLY
<PHP>for lval.token = l.newToken(l.Token()); return T_FOR
<PHP>foreach lval.token = l.newToken(l.Token()); return T_FOREACH
<PHP>function|cfunction lval.token = l.newToken(l.Token()); return T_FUNCTION
<PHP>global lval.token = l.newToken(l.Token()); return T_GLOBAL
<PHP>goto lval.token = l.newToken(l.Token()); return T_GOTO
<PHP>if lval.token = l.newToken(l.Token()); return T_IF
<PHP>isset lval.token = l.newToken(l.Token()); return T_ISSET
<PHP>implements lval.token = l.newToken(l.Token()); return T_IMPLEMENTS
<PHP>instanceof lval.token = l.newToken(l.Token()); return T_INSTANCEOF
<PHP>insteadof lval.token = l.newToken(l.Token()); return T_INSTEADOF
<PHP>interface lval.token = l.newToken(l.Token()); return T_INTERFACE
<PHP>list lval.token = l.newToken(l.Token()); return T_LIST
<PHP>namespace lval.token = l.newToken(l.Token()); return T_NAMESPACE
<PHP>private lval.token = l.newToken(l.Token()); return T_PRIVATE
<PHP>public lval.token = l.newToken(l.Token()); return T_PUBLIC
<PHP>print lval.token = l.newToken(l.Token()); return T_PRINT
<PHP>protected lval.token = l.newToken(l.Token()); return T_PROTECTED
<PHP>return lval.token = l.newToken(l.Token()); return T_RETURN
<PHP>static lval.token = l.newToken(l.Token()); return T_STATIC
<PHP>switch lval.token = l.newToken(l.Token()); return T_SWITCH
<PHP>throw lval.token = l.newToken(l.Token()); return T_THROW
<PHP>trait lval.token = l.newToken(l.Token()); return T_TRAIT
<PHP>try lval.token = l.newToken(l.Token()); return T_TRY
<PHP>unset lval.token = l.newToken(l.Token()); return T_UNSET
<PHP>use lval.token = l.newToken(l.Token()); return T_USE
<PHP>var lval.token = l.newToken(l.Token()); return T_VAR
<PHP>while lval.token = l.newToken(l.Token()); return T_WHILE
<PHP>yield[ \t\n\r]+from[^a-zA-Z0-9_\x80-\xff] lval.token = l.newToken(l.Token()); return T_YIELD_FROM
<PHP>yield lval.token = l.newToken(l.Token()); return T_YIELD
<PHP>include lval.token = l.newToken(l.Token()); return T_INCLUDE
<PHP>include_once lval.token = l.newToken(l.Token()); return T_INCLUDE_ONCE
<PHP>require lval.token = l.newToken(l.Token()); return T_REQUIRE
<PHP>require_once lval.token = l.newToken(l.Token()); return T_REQUIRE_ONCE
<PHP>__CLASS__ lval.token = l.newToken(l.Token()); return T_CLASS_C
<PHP>__DIR__ lval.token = l.newToken(l.Token()); return T_DIR
<PHP>__FILE__ lval.token = l.newToken(l.Token()); return T_FILE
<PHP>__FUNCTION__ lval.token = l.newToken(l.Token()); return T_FUNC_C
<PHP>__LINE__ lval.token = l.newToken(l.Token()); return T_LINE
<PHP>__NAMESPACE__ lval.token = l.newToken(l.Token()); return T_NS_C
<PHP>__METHOD__ lval.token = l.newToken(l.Token()); return T_METHOD_C
<PHP>__TRAIT__ lval.token = l.newToken(l.Token()); return T_TRAIT_C
<PHP>__halt_compiler lval.token = l.newToken(l.Token()); return T_HALT_COMPILER
<PHP>\([ \t]*array[ \t]*\) lval.token = l.newToken(l.Token()); return T_ARRAY_CAST
<PHP>\([ \t]*(bool|boolean)[ \t]*\) lval.token = l.newToken(l.Token()); return T_BOOL_CAST
<PHP>\([ \t]*(real|double|float)[ \t]*\) lval.token = l.newToken(l.Token()); return T_DOUBLE_CAST
<PHP>\([ \t]*(int|integer)[ \t]*\) lval.token = l.newToken(l.Token()); return T_INT_CAST
<PHP>\([ \t]*object[ \t]*\) lval.token = l.newToken(l.Token()); return T_OBJECT_CAST
<PHP>\([ \t]*string[ \t]*\) lval.token = l.newToken(l.Token()); return T_STRING_CAST
<PHP>\([ \t]*unset[ \t]*\) lval.token = l.newToken(l.Token()); return T_UNSET_CAST
<PHP>new lval.token = l.newToken(l.Token()); return T_NEW
<PHP>and lval.token = l.newToken(l.Token()); return T_LOGICAL_AND
<PHP>or lval.token = l.newToken(l.Token()); return T_LOGICAL_OR
<PHP>xor lval.token = l.newToken(l.Token()); return T_LOGICAL_XOR
<PHP>\\ lval.token = l.newToken(l.Token()); return T_NS_SEPARATOR
<PHP>\.\.\. lval.token = l.newToken(l.Token()); return T_ELLIPSIS;
<PHP>:: lval.token = l.newToken(l.Token()); return T_PAAMAYIM_NEKUDOTAYIM; // T_DOUBLE_COLON
<PHP>&& lval.token = l.newToken(l.Token()); return T_BOOLEAN_AND
<PHP>\|\| lval.token = l.newToken(l.Token()); return T_BOOLEAN_OR
<PHP>&= lval.token = l.newToken(l.Token()); return T_AND_EQUAL
<PHP>\|= lval.token = l.newToken(l.Token()); return T_OR_EQUAL
<PHP>\.= lval.token = l.newToken(l.Token()); return T_CONCAT_EQUAL;
<PHP>\*= lval.token = l.newToken(l.Token()); return T_MUL_EQUAL
<PHP>\*\*= lval.token = l.newToken(l.Token()); return T_POW_EQUAL
<PHP>[/]= lval.token = l.newToken(l.Token()); return T_DIV_EQUAL;
<PHP>\+= lval.token = l.newToken(l.Token()); return T_PLUS_EQUAL
<PHP>-= lval.token = l.newToken(l.Token()); return T_MINUS_EQUAL
<PHP>\^= lval.token = l.newToken(l.Token()); return T_XOR_EQUAL
<PHP>%= lval.token = l.newToken(l.Token()); return T_MOD_EQUAL
<PHP>-- lval.token = l.newToken(l.Token()); return T_DEC;
<PHP>\+\+ lval.token = l.newToken(l.Token()); return T_INC
<PHP>=> lval.token = l.newToken(l.Token()); return T_DOUBLE_ARROW;
<PHP>\<=\> lval.token = l.newToken(l.Token()); return T_SPACESHIP
<PHP>\!=|\<\> lval.token = l.newToken(l.Token()); return T_IS_NOT_EQUAL
<PHP>\!== lval.token = l.newToken(l.Token()); return T_IS_NOT_IDENTICAL
<PHP>== lval.token = l.newToken(l.Token()); return T_IS_EQUAL
<PHP>=== lval.token = l.newToken(l.Token()); return T_IS_IDENTICAL
<PHP>\<\<= lval.token = l.newToken(l.Token()); return T_SL_EQUAL
<PHP>\>\>= lval.token = l.newToken(l.Token()); return T_SR_EQUAL
<PHP>\>= lval.token = l.newToken(l.Token()); return T_IS_GREATER_OR_EQUAL
<PHP>\<= lval.token = l.newToken(l.Token()); return T_IS_SMALLER_OR_EQUAL
<PHP>\*\* lval.token = l.newToken(l.Token()); return T_POW
<PHP>\<\< lval.token = l.newToken(l.Token()); return T_SL
<PHP>\>\> lval.token = l.newToken(l.Token()); return T_SR
<PHP>\?\? lval.token = l.newToken(l.Token()); return T_COALESCE
<PHP>(#|[/][/]).*{NEW_LINE} lval.token = l.newToken(l.Token());// return T_COMMENT; // TODO: handle ?>
<PHP>([/][*])|([/][*][*])
tb := l.Token()
is_doc_comment := false
if len(tb) > 2 {
is_doc_comment = true
l.phpDocComment = ""
}
for {
if c == -1 {
break; // TODO: Unterminated comment starting line %d
}
p := c
c = l.Next()
if rune(p) == '*' && rune(c) == '/' {
c = l.Next()
break;
}
}
lval.token = l.newToken(l.Token())
if is_doc_comment {
l.phpDocComment = string(l.TokenBytes(nil))
l.addComment(comment.NewDocComment(string(l.TokenBytes(nil))))
// return T_DOC_COMMENT
} else {
l.addComment(comment.NewPlainComment(string(l.TokenBytes(nil))))
// return T_COMMENT
}
<PHP>{OPERATORS} lval.token = l.newToken(l.Token()); return rune2Class(rune(l.TokenBytes(nil)[0]))
<PHP>\{ l.pushState(PHP); lval.token = l.newToken(l.Token()); return rune2Class(rune(l.TokenBytes(nil)[0]))
<PHP>\} l.popState(); lval.token = l.newToken(l.Token()); return rune2Class(rune(l.TokenBytes(nil)[0])); l.phpDocComment = ""
<PHP>\${VAR_NAME} lval.token = l.newToken(l.Token()); return T_VARIABLE
<PHP>{VAR_NAME} lval.token = l.newToken(l.Token()); return T_STRING
<PHP>-> l.begin(PROPERTY);lval.token = l.newToken(l.Token()); return T_OBJECT_OPERATOR;
<PROPERTY>[ \t\n\r]+ lval.token = l.newToken(l.Token()); return T_WHITESPACE;
<PROPERTY>-> lval.token = l.newToken(l.Token()); return T_OBJECT_OPERATOR;
<PROPERTY>{VAR_NAME} l.begin(PHP);lval.token = l.newToken(l.Token()); return T_STRING;
<PROPERTY>. l.ungetChars(1);l.begin(PHP)
<PHP>[\']([^\\\']*([\\].)*)*[\'] lval.token = l.newToken(l.Token()); return T_CONSTANT_ENCAPSED_STRING;
<PHP>` l.begin(BACKQUOTE); lval.token = l.newToken(l.Token()); return rune2Class(rune(l.TokenBytes(nil)[0]))
<BACKQUOTE>` l.begin(PHP); lval.token = l.newToken(l.Token()); return rune2Class(rune(l.TokenBytes(nil)[0]))
<PHP>[b]?\<\<\<[ \t]*({VAR_NAME}|([']{VAR_NAME}['])|(["]{VAR_NAME}["])){NEW_LINE}
tb := l.Token()
binPrefix := 0
if tb[0].Rune == 'b' {
binPrefix = 1
}
lblFirst := 3 + binPrefix
lblLast := len(tb)-2
if tb[lblLast].Rune == '\r' {
lblLast--
}
for {
if tb[lblFirst].Rune == ' ' || tb[lblFirst].Rune == '\t' {
lblFirst++
continue
}
break
}
switch tb[lblFirst].Rune {
case '\'' :
lblFirst++
lblLast--
l.begin(NOWDOC)
case '"' :
lblFirst++
lblLast--
l.begin(HEREDOC)
default:
l.begin(HEREDOC)
}
heredocLabel = make([]lex.Char, lblLast - lblFirst + 1)
copy(heredocLabel, tb[lblFirst:lblLast+1])
ungetCnt := len(heredocLabel)
searchLabelAhead := []lex.Char{}
for i := 0; i < len(heredocLabel); i++ {
if c == -1 {
break;
}
searchLabelAhead = append(searchLabelAhead, l.Lookahead())
c = l.Next()
}
if bytes.Equal(l.charsToBytes(heredocLabel), l.charsToBytes(searchLabelAhead)) && ';' == rune(c) {
ungetCnt++
c = l.Next()
if '\n' == rune(c) || '\r' == rune(c) {
l.begin(HEREDOC_END)
}
}
l.ungetChars(ungetCnt)
lval.token = l.newToken(tb);
return T_START_HEREDOC
<NOWDOC>.|[ \t\n\r]
searchLabel := []byte{}
tb := []lex.Char{}
for {
if c == -1 {
break;
}
if '\n' == rune(c) || '\r' == rune(c) {
if bytes.Equal(append(l.charsToBytes(heredocLabel), ';'), searchLabel) {
l.begin(HEREDOC_END)
tb = l.ungetChars(len(heredocLabel)+1)
break;
}
if bytes.Equal(l.charsToBytes(heredocLabel), searchLabel) {
l.begin(HEREDOC_END)
tb = l.ungetChars(len(heredocLabel))
break;
}
searchLabel = []byte{}
} else {
searchLabel = append(searchLabel, byte(rune(c)))
}
c = l.Next()
}
lval.token = l.newToken(tb)
return T_ENCAPSED_AND_WHITESPACE
<HEREDOC_END>{VAR_NAME}\; l.begin(PHP);lval.token = l.newToken(l.ungetChars(1)); return T_END_HEREDOC
<HEREDOC_END>{VAR_NAME} l.begin(PHP);lval.token = l.newToken(l.Token()); return T_END_HEREDOC
<PHP>[b]?[\"]
binPrefix := l.Token()[0].Rune == 'b'
beginString := func() int {
cnt := 1; if (binPrefix) {cnt = 2}
l.ungetChars(len(l.Token())-cnt)
chars := l.Token()[:cnt]
l.pushState(STRING)
lval.token = l.newToken(chars); return rune2Class('"')
}
F:for {
if c == -1 {
break;
}
switch c {
case '"' :
c = l.Next();
lval.token = l.newToken(l.Token()); return T_CONSTANT_ENCAPSED_STRING
break F;
case '$':
c = l.Next();
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
return beginString()
break F;
}
l.ungetChars(0)
case '{':
c = l.Next();
if rune(c) == '$' {
return beginString()
break F;
}
l.ungetChars(0)
case '\\':
c = l.Next();
}
c = l.Next()
}
<STRING>\" l.popState(); lval.token = l.newToken(l.Token()); return rune2Class(l.Token()[0].Rune)
<STRING,HEREDOC,BACKQUOTE>\{\$ lval.token = l.newToken(l.ungetChars(1)); l.pushState(PHP); return T_CURLY_OPEN
<STRING,HEREDOC,BACKQUOTE>\$\{ l.pushState(STRING_VAR_NAME); lval.token = l.newToken(l.Token()); return T_DOLLAR_OPEN_CURLY_BRACES
<STRING,HEREDOC,BACKQUOTE>\$ l.ungetChars(1);l.pushState(STRING_VAR)
<STRING>.|[ \t\n\r]
F1:for {
if c == -1 {
break;
}
switch c {
case '"' :
lval.token = l.newToken(l.Token());
return T_ENCAPSED_AND_WHITESPACE
break F1;
case '$':
c = l.Next();
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
l.ungetChars(1)
tb := l.Token()
lval.token = l.newToken(tb[:len(tb)-1]);
return T_ENCAPSED_AND_WHITESPACE
break F1;
}
l.ungetChars(0)
case '{':
c = l.Next();
if rune(c) == '$' {
l.ungetChars(1)
tb := l.Token()
lval.token = l.newToken(tb[:len(tb)-1]);
return T_ENCAPSED_AND_WHITESPACE
break F1;
}
l.ungetChars(0)
case '\\':
c = l.Next();
}
c = l.Next()
}
<BACKQUOTE>.|[ \t\n\r]
F2:for {
if c == -1 {
break;
}
switch c {
case '`' :
lval.token = l.newToken(l.Token());
return T_ENCAPSED_AND_WHITESPACE
break F2;
case '$':
c = l.Next();
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
l.ungetChars(1)
tb := l.Token()
lval.token = l.newToken(tb[:len(tb)-1]);
return T_ENCAPSED_AND_WHITESPACE
break F2;
}
l.ungetChars(0)
case '{':
c = l.Next();
if rune(c) == '$' {
l.ungetChars(1)
tb := l.Token()
lval.token = l.newToken(tb[:len(tb)-1]);
return T_ENCAPSED_AND_WHITESPACE
break F2;
}
l.ungetChars(0)
case '\\':
c = l.Next();
}
c = l.Next()
}
<HEREDOC>.|[ \t\n\r]
searchLabel := []byte{}
tb := []lex.Char{}
HEREDOCFOR:for {
if c == -1 {
break;
}
switch c {
case '\n': fallthrough
case '\r':
if bytes.Equal(append(l.charsToBytes(heredocLabel), ';'), searchLabel) {
l.begin(HEREDOC_END)
tb = l.ungetChars(len(heredocLabel)+1)
break HEREDOCFOR;
}
if bytes.Equal(l.charsToBytes(heredocLabel), searchLabel) {
l.begin(HEREDOC_END)
tb = l.ungetChars(len(heredocLabel))
break HEREDOCFOR;
}
searchLabel = []byte{}
case '$':
c = l.Next();
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
tb = l.ungetChars(1)
break HEREDOCFOR;
}
l.ungetChars(0)
searchLabel = []byte{}
case '{':
c = l.Next();
if rune(c) == '$' {
tb = l.ungetChars(1)
break HEREDOCFOR;
}
l.ungetChars(0)
searchLabel = []byte{}
case '\\':
c = l.Next();
searchLabel = []byte{}
default:
searchLabel = append(searchLabel, byte(rune(c)))
}
c = l.Next()
}
lval.token = l.newToken(tb);
return T_ENCAPSED_AND_WHITESPACE
<STRING_VAR>\${VAR_NAME} lval.token = l.newToken(l.Token()); return T_VARIABLE
<STRING_VAR>->{VAR_NAME} lval.token = l.newToken(l.ungetChars(len(l.Token())-2)); return T_OBJECT_OPERATOR
<STRING_VAR>{VAR_NAME} l.popState();lval.token = l.newToken(l.Token()); return T_STRING
<STRING_VAR>\[ l.pushState(STRING_VAR_INDEX);lval.token = l.newToken(l.Token()); return rune2Class(rune(l.TokenBytes(nil)[0]))
<STRING_VAR>.|[ \t\n\r] l.ungetChars(1);l.popState()
<STRING_VAR_INDEX>{LNUM}|{HNUM}|{BNUM} lval.token = l.newToken(l.Token()); return T_NUM_STRING
<STRING_VAR_INDEX>\${VAR_NAME} lval.token = l.newToken(l.Token()); return T_VARIABLE
<STRING_VAR_INDEX>{VAR_NAME} lval.token = l.newToken(l.Token()); return T_STRING
<STRING_VAR_INDEX>\] l.popState(); l.popState();lval.token = l.newToken(l.Token()); return rune2Class(rune(l.TokenBytes(nil)[0]))
<STRING_VAR_INDEX>[ \n\r\t\\'#] l.popState(); l.popState();lval.token = l.newToken(l.Token()); return T_ENCAPSED_AND_WHITESPACE
<STRING_VAR_INDEX>{OPERATORS} lval.token = l.newToken(l.Token()); return rune2Class(rune(l.TokenBytes(nil)[0]))
<STRING_VAR_INDEX>. lval.token = l.newToken(l.Token()); return rune2Class(rune(l.TokenBytes(nil)[0]))
<STRING_VAR_NAME>{VAR_NAME}[\[\}] l.popState();l.pushState(PHP);lval.token = l.newToken(l.ungetChars(1)); return T_STRING_VARNAME
<STRING_VAR_NAME>. l.ungetChars(1);l.popState();l.pushState(PHP)
%%
if c, ok := l.Abort(); ok { return int(c) }
goto yyAction
}

41
php5/lexer.go Normal file
View File

@ -0,0 +1,41 @@
package php5
import (
"bufio"
goToken "go/token"
"io"
"github.com/cznic/golex/lex"
"github.com/z7zmey/php-parser/scanner"
"github.com/z7zmey/php-parser/token"
)
type lexer struct {
scanner.Lexer
}
func (l *lexer) Lex(lval *yySymType) int {
return l.Lexer.Lex(lval)
}
func (lval *yySymType) Token(t token.Token) {
lval.token = t
}
func newLexer(src io.Reader, fName string) *lexer {
file := goToken.NewFileSet().AddFile(fName, -1, 1<<31-1)
lx, err := lex.New(file, bufio.NewReader(src), lex.RuneClass(scanner.Rune2Class))
if err != nil {
panic(err)
}
return &lexer{
scanner.Lexer{
Lexer: lx,
StateStack: []int{0},
PhpDocComment: "",
Comments: nil,
},
}
}

61
php5/parser.go Normal file
View File

@ -0,0 +1,61 @@
package php5
import (
"io"
"github.com/z7zmey/php-parser/node/expr"
"github.com/z7zmey/php-parser/comment"
"github.com/z7zmey/php-parser/node"
"github.com/z7zmey/php-parser/node/stmt"
"github.com/z7zmey/php-parser/position"
"github.com/z7zmey/php-parser/token"
)
var rootnode node.Node
var comments comment.Comments
var positions position.Positions
var positionBuilder position.Builder
var parentNode node.Node
func Parse(src io.Reader, fName string) (node.Node, comment.Comments, position.Positions) {
yyDebug = 0
yyErrorVerbose = true
rootnode = stmt.NewStmtList([]node.Node{}) //reset
comments = comment.Comments{}
positions = position.Positions{}
positionBuilder = position.Builder{&positions}
yyParse(newLexer(src, fName))
return rootnode, comments, positions
}
func ListGetFirstNodeComments(list []node.Node) []comment.Comment {
if len(list) == 0 {
return nil
}
node := list[0]
return comments[node]
}
type foreachVariable struct {
node node.Node
byRef bool
}
type nodesWithEndToken struct {
nodes []node.Node
endToken token.Token
}
type boolWithToken struct {
value bool
token *token.Token
}
type simpleIndirectReference struct {
all []*expr.Variable
last *expr.Variable
}

6651
php5/php5.go Normal file

File diff suppressed because it is too large Load Diff

3741
php5/php5.y Normal file

File diff suppressed because it is too large Load Diff

41
php7/lexer.go Normal file
View File

@ -0,0 +1,41 @@
package php7
import (
"bufio"
goToken "go/token"
"io"
"github.com/cznic/golex/lex"
"github.com/z7zmey/php-parser/scanner"
"github.com/z7zmey/php-parser/token"
)
type lexer struct {
scanner.Lexer
}
func (l *lexer) Lex(lval *yySymType) int {
return l.Lexer.Lex(lval)
}
func (lval *yySymType) Token(t token.Token) {
lval.token = t
}
func newLexer(src io.Reader, fName string) *lexer {
file := goToken.NewFileSet().AddFile(fName, -1, 1<<31-1)
lx, err := lex.New(file, bufio.NewReader(src), lex.RuneClass(scanner.Rune2Class))
if err != nil {
panic(err)
}
return &lexer{
scanner.Lexer{
Lexer: lx,
StateStack: []int{0},
PhpDocComment: "",
Comments: nil,
},
}
}

52
php7/parser.go Normal file
View File

@ -0,0 +1,52 @@
package php7
import (
"io"
"github.com/z7zmey/php-parser/comment"
"github.com/z7zmey/php-parser/node"
"github.com/z7zmey/php-parser/node/stmt"
"github.com/z7zmey/php-parser/position"
"github.com/z7zmey/php-parser/token"
)
var rootnode node.Node
var comments comment.Comments
var positions position.Positions
var positionBuilder position.Builder
func Parse(src io.Reader, fName string) (node.Node, comment.Comments, position.Positions) {
yyDebug = 0
yyErrorVerbose = true
rootnode = stmt.NewStmtList([]node.Node{}) //reset
comments = comment.Comments{}
positions = position.Positions{}
positionBuilder = position.Builder{&positions}
yyParse(newLexer(src, fName))
return rootnode, comments, positions
}
func ListGetFirstNodeComments(list []node.Node) []comment.Comment {
if len(list) == 0 {
return nil
}
node := list[0]
return comments[node]
}
type foreachVariable struct {
node node.Node
byRef bool
}
type nodesWithEndToken struct {
nodes []node.Node
endToken token.Token
}
type boolWithToken struct {
value bool
token *token.Token
}

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,10 @@
%{
package parser
package php7
import (
"io"
"strings"
"strconv"
"github.com/z7zmey/php-parser/token"
"github.com/z7zmey/php-parser/node"
"github.com/z7zmey/php-parser/node/scalar"
@ -14,51 +14,8 @@ import (
"github.com/z7zmey/php-parser/node/expr/assign_op"
"github.com/z7zmey/php-parser/node/expr/binary_op"
"github.com/z7zmey/php-parser/node/expr/cast"
"github.com/z7zmey/php-parser/comment"
"github.com/z7zmey/php-parser/position"
)
var rootnode node.Node
var comments comment.Comments
var positions position.Positions
var positionBuilder position.Builder
func ParsePhp7(src io.Reader, fName string) (node.Node, comment.Comments, position.Positions) {
yyDebug = 0
yyErrorVerbose = true
rootnode = stmt.NewStmtList([]node.Node{}) //reset
comments = comment.Comments{}
positions = position.Positions{}
positionBuilder = position.Builder{&positions}
yyParse(newLexer(src, fName))
return rootnode, comments, positions
}
func ListGetFirstNodeComments(list []node.Node) []comment.Comment {
if len(list) == 0 {
return nil
}
node := list[0]
return comments[node]
}
type foreachVariable struct {
node node.Node
byRef bool
}
type nodesWithEndToken struct {
nodes []node.Node
endToken token.Token
}
type boolWithToken struct {
value bool
token *token.Token
}
%}
%union{
@ -71,40 +28,6 @@ type boolWithToken struct {
str string
}
%left T_INCLUDE T_INCLUDE_ONCE T_EVAL T_REQUIRE T_REQUIRE_ONCE
%left ','
%left T_LOGICAL_OR
%left T_LOGICAL_XOR
%left T_LOGICAL_AND
%right T_PRINT
%right T_YIELD
%right T_DOUBLE_ARROW
%right T_YIELD_FROM
%left '=' T_PLUS_EQUAL T_MINUS_EQUAL T_MUL_EQUAL T_DIV_EQUAL T_CONCAT_EQUAL T_MOD_EQUAL T_AND_EQUAL T_OR_EQUAL T_XOR_EQUAL T_SL_EQUAL T_SR_EQUAL T_POW_EQUAL
%left '?' ':'
%right T_COALESCE
%left T_BOOLEAN_OR
%left T_BOOLEAN_AND
%left '|'
%left '^'
%left '&'
%nonassoc T_IS_EQUAL T_IS_NOT_EQUAL T_IS_IDENTICAL T_IS_NOT_IDENTICAL T_SPACESHIP
%nonassoc '<' T_IS_SMALLER_OR_EQUAL '>' T_IS_GREATER_OR_EQUAL
%left T_SL T_SR
%left '+' '-' '.'
%left '*' '/' '%'
%right '!'
%nonassoc T_INSTANCEOF
%right '~' T_INC T_DEC T_INT_CAST T_DOUBLE_CAST T_STRING_CAST T_ARRAY_CAST T_OBJECT_CAST T_BOOL_CAST T_UNSET_CAST '@'
%right T_POW
%right '['
%nonassoc T_NEW T_CLONE
%left T_NOELSE
%left T_ELSEIF
%left T_ELSE
%left T_ENDIF
%right T_STATIC T_ABSTRACT T_FINAL T_PRIVATE T_PROTECTED T_PUBLIC
%type <token> $unk
%token <token> T_INCLUDE
%token <token> T_INCLUDE_ONCE
@ -216,6 +139,9 @@ type boolWithToken struct {
%token <token> T_OBJECT_CAST
%token <token> T_BOOL_CAST
%token <token> T_UNSET_CAST
%token <token> T_COALESCE
%token <token> T_SPACESHIP
%token <token> T_NOELSE
%token <token> '"'
%token <token> '`'
%token <token> '{'
@ -235,6 +161,40 @@ type boolWithToken struct {
%token <token> '@'
%token <token> '$'
%left T_INCLUDE T_INCLUDE_ONCE T_EVAL T_REQUIRE T_REQUIRE_ONCE
%left ','
%left T_LOGICAL_OR
%left T_LOGICAL_XOR
%left T_LOGICAL_AND
%right T_PRINT
%right T_YIELD
%right T_DOUBLE_ARROW
%right T_YIELD_FROM
%left '=' T_PLUS_EQUAL T_MINUS_EQUAL T_MUL_EQUAL T_DIV_EQUAL T_CONCAT_EQUAL T_MOD_EQUAL T_AND_EQUAL T_OR_EQUAL T_XOR_EQUAL T_SL_EQUAL T_SR_EQUAL T_POW_EQUAL
%left '?' ':'
%right T_COALESCE
%left T_BOOLEAN_OR
%left T_BOOLEAN_AND
%left '|'
%left '^'
%left '&'
%nonassoc T_IS_EQUAL T_IS_NOT_EQUAL T_IS_IDENTICAL T_IS_NOT_IDENTICAL T_SPACESHIP
%nonassoc '<' T_IS_SMALLER_OR_EQUAL '>' T_IS_GREATER_OR_EQUAL
%left T_SL T_SR
%left '+' '-' '.'
%left '*' '/' '%'
%right '!'
%nonassoc T_INSTANCEOF
%right '~' T_INC T_DEC T_INT_CAST T_DOUBLE_CAST T_STRING_CAST T_ARRAY_CAST T_OBJECT_CAST T_BOOL_CAST T_UNSET_CAST '@'
%right T_POW
%right '['
%nonassoc T_NEW T_CLONE
%left T_NOELSE
%left T_ELSEIF
%left T_ELSE
%left T_ENDIF
%right T_STATIC T_ABSTRACT T_FINAL T_PRIVATE T_PROTECTED T_PUBLIC
%type <boolWithToken> is_reference is_variadic returns_ref
%type <token> reserved_non_modifiers
@ -942,7 +902,7 @@ while_statement:
if_stmt_without_else:
T_IF '(' expr ')' statement
{
$$ = stmt.NewIf($3, $5)
$$ = stmt.NewIf($3, $5, nil, nil)
positions.AddPosition($$, positionBuilder.NewTokenNodePosition($1, $5))
comments.AddComments($$, $1.Comments())
}
@ -975,7 +935,7 @@ alt_if_stmt_without_else:
{
stmts := stmt.NewStmtList($6)
positions.AddPosition(stmts, positionBuilder.NewNodeListPosition($6))
$$ = stmt.NewAltIf($3, stmts)
$$ = stmt.NewAltIf($3, stmts, nil, nil)
positions.AddPosition($$, positionBuilder.NewTokenNodeListPosition($1, $6))
comments.AddComments(stmts, $5.Comments())
@ -1132,13 +1092,13 @@ non_empty_argument_list:
argument:
expr
{
$$ = node.NewArgument($1, false)
$$ = node.NewArgument($1, false, false)
positions.AddPosition($$, positionBuilder.NewNodePosition($1))
comments.AddComments($$, comments[$1])
}
| T_ELLIPSIS expr
{
$$ = node.NewArgument($2, true)
$$ = node.NewArgument($2, true, false)
positions.AddPosition($$, positionBuilder.NewTokenNodePosition($1, $2))
comments.AddComments($$, $1.Comments())
}
@ -1952,7 +1912,7 @@ expr_without_variable:
;
backup_doc_comment:
/* empty */ { $$ = yylex.(*lexer).phpDocComment; yylex.(*lexer).phpDocComment = "" }
/* empty */ { $$ = yylex.(*lexer).PhpDocComment; yylex.(*lexer).PhpDocComment = "" }
;
returns_ref:

260
scanner/lexer.go Normal file
View File

@ -0,0 +1,260 @@
package scanner
import (
"bufio"
"bytes"
"go/token"
"io"
"unicode"
"github.com/cznic/golex/lex"
"github.com/z7zmey/php-parser/comment"
t "github.com/z7zmey/php-parser/token"
)
// Allocate Character classes anywhere in [0x80, 0xFF].
const (
classUnicodeLeter = iota + 0x80
classUnicodeDigit
classUnicodeGraphic
classOther
)
const T_INCLUDE = 57346
const T_INCLUDE_ONCE = 57347
const T_EXIT = 57348
const T_IF = 57349
const T_LNUMBER = 57350
const T_DNUMBER = 57351
const T_STRING = 57352
const T_STRING_VARNAME = 57353
const T_VARIABLE = 57354
const T_NUM_STRING = 57355
const T_INLINE_HTML = 57356
const T_CHARACTER = 57357
const T_BAD_CHARACTER = 57358
const T_ENCAPSED_AND_WHITESPACE = 57359
const T_CONSTANT_ENCAPSED_STRING = 57360
const T_ECHO = 57361
const T_DO = 57362
const T_WHILE = 57363
const T_ENDWHILE = 57364
const T_FOR = 57365
const T_ENDFOR = 57366
const T_FOREACH = 57367
const T_ENDFOREACH = 57368
const T_DECLARE = 57369
const T_ENDDECLARE = 57370
const T_AS = 57371
const T_SWITCH = 57372
const T_ENDSWITCH = 57373
const T_CASE = 57374
const T_DEFAULT = 57375
const T_BREAK = 57376
const T_CONTINUE = 57377
const T_GOTO = 57378
const T_FUNCTION = 57379
const T_CONST = 57380
const T_RETURN = 57381
const T_TRY = 57382
const T_CATCH = 57383
const T_FINALLY = 57384
const T_THROW = 57385
const T_USE = 57386
const T_INSTEADOF = 57387
const T_GLOBAL = 57388
const T_VAR = 57389
const T_UNSET = 57390
const T_ISSET = 57391
const T_EMPTY = 57392
const T_HALT_COMPILER = 57393
const T_CLASS = 57394
const T_TRAIT = 57395
const T_INTERFACE = 57396
const T_EXTENDS = 57397
const T_IMPLEMENTS = 57398
const T_OBJECT_OPERATOR = 57399
const T_DOUBLE_ARROW = 57400
const T_LIST = 57401
const T_ARRAY = 57402
const T_CALLABLE = 57403
const T_CLASS_C = 57404
const T_TRAIT_C = 57405
const T_METHOD_C = 57406
const T_FUNC_C = 57407
const T_LINE = 57408
const T_FILE = 57409
const T_COMMENT = 57410
const T_DOC_COMMENT = 57411
const T_OPEN_TAG = 57412
const T_OPEN_TAG_WITH_ECHO = 57413
const T_CLOSE_TAG = 57414
const T_WHITESPACE = 57415
const T_START_HEREDOC = 57416
const T_END_HEREDOC = 57417
const T_DOLLAR_OPEN_CURLY_BRACES = 57418
const T_CURLY_OPEN = 57419
const T_PAAMAYIM_NEKUDOTAYIM = 57420
const T_NAMESPACE = 57421
const T_NS_C = 57422
const T_DIR = 57423
const T_NS_SEPARATOR = 57424
const T_ELLIPSIS = 57425
const T_EVAL = 57426
const T_REQUIRE = 57427
const T_REQUIRE_ONCE = 57428
const T_LOGICAL_OR = 57429
const T_LOGICAL_XOR = 57430
const T_LOGICAL_AND = 57431
const T_INSTANCEOF = 57432
const T_NEW = 57433
const T_CLONE = 57434
const T_ELSEIF = 57435
const T_ELSE = 57436
const T_ENDIF = 57437
const T_PRINT = 57438
const T_YIELD = 57439
const T_STATIC = 57440
const T_ABSTRACT = 57441
const T_FINAL = 57442
const T_PRIVATE = 57443
const T_PROTECTED = 57444
const T_PUBLIC = 57445
const T_INC = 57446
const T_DEC = 57447
const T_YIELD_FROM = 57448
const T_INT_CAST = 57449
const T_DOUBLE_CAST = 57450
const T_STRING_CAST = 57451
const T_ARRAY_CAST = 57452
const T_OBJECT_CAST = 57453
const T_BOOL_CAST = 57454
const T_UNSET_CAST = 57455
const T_COALESCE = 57456
const T_SPACESHIP = 57457
const T_NOELSE = 57458
const T_PLUS_EQUAL = 57459
const T_MINUS_EQUAL = 57460
const T_MUL_EQUAL = 57461
const T_DIV_EQUAL = 57462
const T_CONCAT_EQUAL = 57463
const T_MOD_EQUAL = 57464
const T_AND_EQUAL = 57465
const T_OR_EQUAL = 57466
const T_XOR_EQUAL = 57467
const T_SL_EQUAL = 57468
const T_SR_EQUAL = 57469
const T_POW_EQUAL = 57470
const T_BOOLEAN_OR = 57471
const T_BOOLEAN_AND = 57472
const T_IS_EQUAL = 57473
const T_IS_NOT_EQUAL = 57474
const T_IS_IDENTICAL = 57475
const T_IS_NOT_IDENTICAL = 57476
const T_IS_SMALLER_OR_EQUAL = 57477
const T_IS_GREATER_OR_EQUAL = 57478
const T_SL = 57479
const T_SR = 57480
const T_POW = 57481
type Lval interface {
Token(tkn t.Token)
}
type Lexer struct {
*lex.Lexer
StateStack []int
PhpDocComment string
Comments []comment.Comment
}
func Rune2Class(r rune) int {
if r >= 0 && r < 0x80 { // Keep ASCII as it is.
return int(r)
}
if unicode.IsLetter(r) {
return classUnicodeLeter
}
if unicode.IsDigit(r) {
return classUnicodeDigit
}
if unicode.IsGraphic(r) {
return classUnicodeGraphic
}
// return classOther
return -1
}
func NewLexer(src io.Reader, fName string) *Lexer {
file := token.NewFileSet().AddFile(fName, -1, 1<<31-1)
lx, err := lex.New(file, bufio.NewReader(src), lex.RuneClass(Rune2Class))
if err != nil {
panic(err)
}
return &Lexer{lx, []int{0}, "", nil}
}
func (l *Lexer) ungetChars(n int) []lex.Char {
l.Unget(l.Lookahead())
chars := l.Token()
for i := 1; i <= n; i++ {
char := chars[len(chars)-i]
l.Unget(char)
}
buf := l.Token()
buf = buf[:len(buf)-n]
return buf
}
func (l *Lexer) pushState(state int) {
l.StateStack = append(l.StateStack, state)
}
func (l *Lexer) popState() {
len := len(l.StateStack)
if len <= 1 {
return
}
l.StateStack = l.StateStack[:len-1]
}
func (l *Lexer) begin(state int) {
len := len(l.StateStack)
l.StateStack = l.StateStack[:len-1]
l.StateStack = append(l.StateStack, state)
}
func (l *Lexer) getCurrentState() int {
return l.StateStack[len(l.StateStack)-1]
}
func (l *Lexer) newToken(chars []lex.Char) t.Token {
firstChar := chars[0]
lastChar := chars[len(chars)-1]
startLine := l.File.Line(firstChar.Pos())
endLine := l.File.Line(lastChar.Pos())
startPos := int(firstChar.Pos())
endPos := int(lastChar.Pos())
return t.NewToken(l.charsToBytes(chars), startLine, endLine, startPos, endPos).SetComments(l.Comments)
}
func (l *Lexer) addComment(c comment.Comment) {
l.Comments = append(l.Comments, c)
}
func (l *Lexer) charsToBytes(chars []lex.Char) []byte {
bytesBuf := bytes.Buffer{}
for _, c := range chars {
bytesBuf.WriteRune(c.Rune)
}
return bytesBuf.Bytes()
}

File diff suppressed because it is too large Load Diff

609
scanner/scanner.l Normal file
View File

@ -0,0 +1,609 @@
%{
// Copyright (c) 2011 CZ.NIC z.s.p.o. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// blame: jnml, labs.nic.cz
package scanner
import (
"fmt"
"bytes"
"github.com/cznic/golex/lex"
"github.com/z7zmey/php-parser/comment"
)
const (
INITIAL = iota
PHP
STRING
STRING_VAR
STRING_VAR_INDEX
STRING_VAR_NAME
PROPERTY
HEREDOC_END
NOWDOC
HEREDOC
BACKQUOTE
)
var heredocLabel []lex.Char
func (l *Lexer) Lex(lval Lval) int {
l.Comments = nil
c := l.Enter()
%}
%s PHP STRING STRING_VAR STRING_VAR_INDEX STRING_VAR_NAME PROPERTY HEREDOC_END NOWDOC HEREDOC BACKQUOTE
%yyb last == '\n' || last = '\0'
%yyt l.getCurrentState()
%yyc c
%yyn c = l.Next()
%yym l.Mark()
%optioncase-insensitive
LNUM [0-9]+
DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
HNUM 0x[0-9a-fA-F]+
BNUM 0b[01]+
EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
VAR_NAME [a-zA-Z_\x7f-\xff^0-9/][a-zA-Z0-9_\x7f-\xff]*
OPERATORS [;:,.\[\]()|\/\^&\+-*=%!~$<>?@]
NEW_LINE (\r|\n|\r\n)
%%
c = l.Rule0()
<INITIAL>[ \t\n\r]+ lval.Token(l.newToken(l.Token()));
<INITIAL>.
tb := []lex.Char{}
for {
if c == -1 {
tb = l.Token();
break;
}
if '?' == rune(c) {
tb = l.Token();
if (len(tb) < 2 || tb[len(tb)-1].Rune != '<') {
c = l.Next()
continue;
}
tb = l.ungetChars(1)
break;
}
c = l.Next()
}
lval.Token(l.newToken(tb))
return T_INLINE_HTML
<INITIAL>\<\?php([ \t]|{NEW_LINE}) l.begin(PHP);lval.Token(l.newToken(l.Token()));// return T_OPEN_TAG;
<INITIAL>\<\? l.begin(PHP);lval.Token(l.newToken(l.Token()));// return T_OPEN_TAG;
<INITIAL>\<\?= l.begin(PHP);lval.Token(l.newToken(l.Token())); return T_ECHO;
<PHP>[ \t\n\r]+ lval.Token(l.newToken(l.Token()));// return T_WHITESPACE
<PHP>\?\>{NEW_LINE}? l.begin(INITIAL);lval.Token(l.newToken(l.Token())); return Rune2Class(';');
<PHP>{DNUM}|{EXPONENT_DNUM} lval.Token(l.newToken(l.Token())); return T_DNUMBER
<PHP>{BNUM}
tb := l.Token()
i:=2
BNUMFOR:for {
if i > len(tb)-1 {
break BNUMFOR;
}
switch tb[i].Rune {
case '0': i++;
default: break BNUMFOR;
}
}
if len(tb) - i < 64 {
lval.Token(l.newToken(l.Token())); return T_LNUMBER
} else {
lval.Token(l.newToken(l.Token())); return T_DNUMBER
}
<PHP>{LNUM}
if len(l.Token()) < 20 {
lval.Token(l.newToken(l.Token())); return T_LNUMBER
} else {
lval.Token(l.newToken(l.Token())); return T_DNUMBER
}
<PHP>{HNUM}
tb := l.Token()
i:=2
HNUMFOR:for {
if i > len(tb)-1 {
break HNUMFOR;
}
switch tb[i].Rune {
case '0': i++;
default: break HNUMFOR;
}
}
length := len(tb) - i
if length < 16 || (length == 16 && tb[i].Rune <= '7') {
lval.Token(l.newToken(l.Token())); return T_LNUMBER
} else {
lval.Token(l.newToken(l.Token())); return T_DNUMBER
}
<PHP>abstract lval.Token(l.newToken(l.Token())); return T_ABSTRACT
<PHP>array lval.Token(l.newToken(l.Token())); return T_ARRAY
<PHP>as lval.Token(l.newToken(l.Token())); return T_AS
<PHP>break lval.Token(l.newToken(l.Token())); return T_BREAK
<PHP>callable lval.Token(l.newToken(l.Token())); return T_CALLABLE
<PHP>case lval.Token(l.newToken(l.Token())); return T_CASE
<PHP>catch lval.Token(l.newToken(l.Token())); return T_CATCH
<PHP>class lval.Token(l.newToken(l.Token())); return T_CLASS
<PHP>clone lval.Token(l.newToken(l.Token())); return T_CLONE
<PHP>const lval.Token(l.newToken(l.Token())); return T_CONST;
<PHP>continue lval.Token(l.newToken(l.Token())); return T_CONTINUE;
<PHP>declare lval.Token(l.newToken(l.Token())); return T_DECLARE;
<PHP>default lval.Token(l.newToken(l.Token())); return T_DEFAULT;
<PHP>do lval.Token(l.newToken(l.Token())); return T_DO;
<PHP>echo lval.Token(l.newToken(l.Token())); return T_ECHO;
<PHP>else lval.Token(l.newToken(l.Token())); return T_ELSE;
<PHP>elseif lval.Token(l.newToken(l.Token())); return T_ELSEIF;
<PHP>empty lval.Token(l.newToken(l.Token())); return T_EMPTY;
<PHP>enddeclare lval.Token(l.newToken(l.Token())); return T_ENDDECLARE
<PHP>endfor lval.Token(l.newToken(l.Token())); return T_ENDFOR
<PHP>endforeach lval.Token(l.newToken(l.Token())); return T_ENDFOREACH
<PHP>endif lval.Token(l.newToken(l.Token())); return T_ENDIF
<PHP>endswitch lval.Token(l.newToken(l.Token())); return T_ENDSWITCH
<PHP>endwhile lval.Token(l.newToken(l.Token())); return T_ENDWHILE
<PHP>eval lval.Token(l.newToken(l.Token())); return T_EVAL
<PHP>exit|die lval.Token(l.newToken(l.Token())); return T_EXIT
<PHP>extends lval.Token(l.newToken(l.Token())); return T_EXTENDS
<PHP>final lval.Token(l.newToken(l.Token())); return T_FINAL
<PHP>finally lval.Token(l.newToken(l.Token())); return T_FINALLY
<PHP>for lval.Token(l.newToken(l.Token())); return T_FOR
<PHP>foreach lval.Token(l.newToken(l.Token())); return T_FOREACH
<PHP>function|cfunction lval.Token(l.newToken(l.Token())); return T_FUNCTION
<PHP>global lval.Token(l.newToken(l.Token())); return T_GLOBAL
<PHP>goto lval.Token(l.newToken(l.Token())); return T_GOTO
<PHP>if lval.Token(l.newToken(l.Token())); return T_IF
<PHP>isset lval.Token(l.newToken(l.Token())); return T_ISSET
<PHP>implements lval.Token(l.newToken(l.Token())); return T_IMPLEMENTS
<PHP>instanceof lval.Token(l.newToken(l.Token())); return T_INSTANCEOF
<PHP>insteadof lval.Token(l.newToken(l.Token())); return T_INSTEADOF
<PHP>interface lval.Token(l.newToken(l.Token())); return T_INTERFACE
<PHP>list lval.Token(l.newToken(l.Token())); return T_LIST
<PHP>namespace lval.Token(l.newToken(l.Token())); return T_NAMESPACE
<PHP>private lval.Token(l.newToken(l.Token())); return T_PRIVATE
<PHP>public lval.Token(l.newToken(l.Token())); return T_PUBLIC
<PHP>print lval.Token(l.newToken(l.Token())); return T_PRINT
<PHP>protected lval.Token(l.newToken(l.Token())); return T_PROTECTED
<PHP>return lval.Token(l.newToken(l.Token())); return T_RETURN
<PHP>static lval.Token(l.newToken(l.Token())); return T_STATIC
<PHP>switch lval.Token(l.newToken(l.Token())); return T_SWITCH
<PHP>throw lval.Token(l.newToken(l.Token())); return T_THROW
<PHP>trait lval.Token(l.newToken(l.Token())); return T_TRAIT
<PHP>try lval.Token(l.newToken(l.Token())); return T_TRY
<PHP>unset lval.Token(l.newToken(l.Token())); return T_UNSET
<PHP>use lval.Token(l.newToken(l.Token())); return T_USE
<PHP>var lval.Token(l.newToken(l.Token())); return T_VAR
<PHP>while lval.Token(l.newToken(l.Token())); return T_WHILE
<PHP>yield[ \t\n\r]+from[^a-zA-Z0-9_\x80-\xff] lval.Token(l.newToken(l.Token())); return T_YIELD_FROM
<PHP>yield lval.Token(l.newToken(l.Token())); return T_YIELD
<PHP>include lval.Token(l.newToken(l.Token())); return T_INCLUDE
<PHP>include_once lval.Token(l.newToken(l.Token())); return T_INCLUDE_ONCE
<PHP>require lval.Token(l.newToken(l.Token())); return T_REQUIRE
<PHP>require_once lval.Token(l.newToken(l.Token())); return T_REQUIRE_ONCE
<PHP>__CLASS__ lval.Token(l.newToken(l.Token())); return T_CLASS_C
<PHP>__DIR__ lval.Token(l.newToken(l.Token())); return T_DIR
<PHP>__FILE__ lval.Token(l.newToken(l.Token())); return T_FILE
<PHP>__FUNCTION__ lval.Token(l.newToken(l.Token())); return T_FUNC_C
<PHP>__LINE__ lval.Token(l.newToken(l.Token())); return T_LINE
<PHP>__NAMESPACE__ lval.Token(l.newToken(l.Token())); return T_NS_C
<PHP>__METHOD__ lval.Token(l.newToken(l.Token())); return T_METHOD_C
<PHP>__TRAIT__ lval.Token(l.newToken(l.Token())); return T_TRAIT_C
<PHP>__halt_compiler lval.Token(l.newToken(l.Token())); return T_HALT_COMPILER
<PHP>\([ \t]*array[ \t]*\) lval.Token(l.newToken(l.Token())); return T_ARRAY_CAST
<PHP>\([ \t]*(bool|boolean)[ \t]*\) lval.Token(l.newToken(l.Token())); return T_BOOL_CAST
<PHP>\([ \t]*(real|double|float)[ \t]*\) lval.Token(l.newToken(l.Token())); return T_DOUBLE_CAST
<PHP>\([ \t]*(int|integer)[ \t]*\) lval.Token(l.newToken(l.Token())); return T_INT_CAST
<PHP>\([ \t]*object[ \t]*\) lval.Token(l.newToken(l.Token())); return T_OBJECT_CAST
<PHP>\([ \t]*string[ \t]*\) lval.Token(l.newToken(l.Token())); return T_STRING_CAST
<PHP>\([ \t]*unset[ \t]*\) lval.Token(l.newToken(l.Token())); return T_UNSET_CAST
<PHP>new lval.Token(l.newToken(l.Token())); return T_NEW
<PHP>and lval.Token(l.newToken(l.Token())); return T_LOGICAL_AND
<PHP>or lval.Token(l.newToken(l.Token())); return T_LOGICAL_OR
<PHP>xor lval.Token(l.newToken(l.Token())); return T_LOGICAL_XOR
<PHP>\\ lval.Token(l.newToken(l.Token())); return T_NS_SEPARATOR
<PHP>\.\.\. lval.Token(l.newToken(l.Token())); return T_ELLIPSIS;
<PHP>:: lval.Token(l.newToken(l.Token())); return T_PAAMAYIM_NEKUDOTAYIM; // T_DOUBLE_COLON
<PHP>&& lval.Token(l.newToken(l.Token())); return T_BOOLEAN_AND
<PHP>\|\| lval.Token(l.newToken(l.Token())); return T_BOOLEAN_OR
<PHP>&= lval.Token(l.newToken(l.Token())); return T_AND_EQUAL
<PHP>\|= lval.Token(l.newToken(l.Token())); return T_OR_EQUAL
<PHP>\.= lval.Token(l.newToken(l.Token())); return T_CONCAT_EQUAL;
<PHP>\*= lval.Token(l.newToken(l.Token())); return T_MUL_EQUAL
<PHP>\*\*= lval.Token(l.newToken(l.Token())); return T_POW_EQUAL
<PHP>[/]= lval.Token(l.newToken(l.Token())); return T_DIV_EQUAL;
<PHP>\+= lval.Token(l.newToken(l.Token())); return T_PLUS_EQUAL
<PHP>-= lval.Token(l.newToken(l.Token())); return T_MINUS_EQUAL
<PHP>\^= lval.Token(l.newToken(l.Token())); return T_XOR_EQUAL
<PHP>%= lval.Token(l.newToken(l.Token())); return T_MOD_EQUAL
<PHP>-- lval.Token(l.newToken(l.Token())); return T_DEC;
<PHP>\+\+ lval.Token(l.newToken(l.Token())); return T_INC
<PHP>=> lval.Token(l.newToken(l.Token())); return T_DOUBLE_ARROW;
<PHP>\<=\> lval.Token(l.newToken(l.Token())); return T_SPACESHIP
<PHP>\!=|\<\> lval.Token(l.newToken(l.Token())); return T_IS_NOT_EQUAL
<PHP>\!== lval.Token(l.newToken(l.Token())); return T_IS_NOT_IDENTICAL
<PHP>== lval.Token(l.newToken(l.Token())); return T_IS_EQUAL
<PHP>=== lval.Token(l.newToken(l.Token())); return T_IS_IDENTICAL
<PHP>\<\<= lval.Token(l.newToken(l.Token())); return T_SL_EQUAL
<PHP>\>\>= lval.Token(l.newToken(l.Token())); return T_SR_EQUAL
<PHP>\>= lval.Token(l.newToken(l.Token())); return T_IS_GREATER_OR_EQUAL
<PHP>\<= lval.Token(l.newToken(l.Token())); return T_IS_SMALLER_OR_EQUAL
<PHP>\*\* lval.Token(l.newToken(l.Token())); return T_POW
<PHP>\<\< lval.Token(l.newToken(l.Token())); return T_SL
<PHP>\>\> lval.Token(l.newToken(l.Token())); return T_SR
<PHP>\?\? lval.Token(l.newToken(l.Token())); return T_COALESCE
<PHP>(#|[/][/]).*{NEW_LINE} lval.Token(l.newToken(l.Token()));// return T_COMMENT; // TODO: handle ?>
<PHP>([/][*])|([/][*][*])
tb := l.Token()
is_doc_comment := false
if len(tb) > 2 {
is_doc_comment = true
l.PhpDocComment = ""
}
for {
if c == -1 {
break; // TODO: Unterminated comment starting line %d
}
p := c
c = l.Next()
if rune(p) == '*' && rune(c) == '/' {
c = l.Next()
break;
}
}
lval.Token(l.newToken(l.Token()))
if is_doc_comment {
l.PhpDocComment = string(l.TokenBytes(nil))
l.addComment(comment.NewDocComment(string(l.TokenBytes(nil))))
// return T_DOC_COMMENT
} else {
l.addComment(comment.NewPlainComment(string(l.TokenBytes(nil))))
// return T_COMMENT
}
<PHP>{OPERATORS} lval.Token(l.newToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
<PHP>\{ l.pushState(PHP); lval.Token(l.newToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
<PHP>\} l.popState(); lval.Token(l.newToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0])); l.PhpDocComment = ""
<PHP>\${VAR_NAME} lval.Token(l.newToken(l.Token())); return T_VARIABLE
<PHP>{VAR_NAME} lval.Token(l.newToken(l.Token())); return T_STRING
<PHP>-> l.begin(PROPERTY);lval.Token(l.newToken(l.Token())); return T_OBJECT_OPERATOR;
<PROPERTY>[ \t\n\r]+ lval.Token(l.newToken(l.Token())); return T_WHITESPACE;
<PROPERTY>-> lval.Token(l.newToken(l.Token())); return T_OBJECT_OPERATOR;
<PROPERTY>{VAR_NAME} l.begin(PHP);lval.Token(l.newToken(l.Token())); return T_STRING;
<PROPERTY>. l.ungetChars(1);l.begin(PHP)
<PHP>[\']([^\\\']*([\\].)*)*[\'] lval.Token(l.newToken(l.Token())); return T_CONSTANT_ENCAPSED_STRING;
<PHP>` l.begin(BACKQUOTE); lval.Token(l.newToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
<BACKQUOTE>` l.begin(PHP); lval.Token(l.newToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
<PHP>[b]?\<\<\<[ \t]*({VAR_NAME}|([']{VAR_NAME}['])|(["]{VAR_NAME}["])){NEW_LINE}
tb := l.Token()
binPrefix := 0
if tb[0].Rune == 'b' {
binPrefix = 1
}
lblFirst := 3 + binPrefix
lblLast := len(tb)-2
if tb[lblLast].Rune == '\r' {
lblLast--
}
for {
if tb[lblFirst].Rune == ' ' || tb[lblFirst].Rune == '\t' {
lblFirst++
continue
}
break
}
switch tb[lblFirst].Rune {
case '\'' :
lblFirst++
lblLast--
l.begin(NOWDOC)
case '"' :
lblFirst++
lblLast--
l.begin(HEREDOC)
default:
l.begin(HEREDOC)
}
heredocLabel = make([]lex.Char, lblLast - lblFirst + 1)
copy(heredocLabel, tb[lblFirst:lblLast+1])
ungetCnt := len(heredocLabel)
searchLabelAhead := []lex.Char{}
for i := 0; i < len(heredocLabel); i++ {
if c == -1 {
break;
}
searchLabelAhead = append(searchLabelAhead, l.Lookahead())
c = l.Next()
}
if bytes.Equal(l.charsToBytes(heredocLabel), l.charsToBytes(searchLabelAhead)) && ';' == rune(c) {
ungetCnt++
c = l.Next()
if '\n' == rune(c) || '\r' == rune(c) {
l.begin(HEREDOC_END)
}
}
l.ungetChars(ungetCnt)
lval.Token(l.newToken(tb));
return T_START_HEREDOC
<NOWDOC>.|[ \t\n\r]
searchLabel := []byte{}
tb := []lex.Char{}
for {
if c == -1 {
break;
}
if '\n' == rune(c) || '\r' == rune(c) {
if bytes.Equal(append(l.charsToBytes(heredocLabel), ';'), searchLabel) {
l.begin(HEREDOC_END)
tb = l.ungetChars(len(heredocLabel)+1)
break;
}
if bytes.Equal(l.charsToBytes(heredocLabel), searchLabel) {
l.begin(HEREDOC_END)
tb = l.ungetChars(len(heredocLabel))
break;
}
searchLabel = []byte{}
} else {
searchLabel = append(searchLabel, byte(rune(c)))
}
c = l.Next()
}
lval.Token(l.newToken(tb) )
return T_ENCAPSED_AND_WHITESPACE
<HEREDOC_END>{VAR_NAME}\; l.begin(PHP);lval.Token(l.newToken(l.ungetChars(1))); return T_END_HEREDOC
<HEREDOC_END>{VAR_NAME} l.begin(PHP);lval.Token(l.newToken(l.Token())); return T_END_HEREDOC
<PHP>[b]?[\"]
binPrefix := l.Token()[0].Rune == 'b'
beginString := func() int {
cnt := 1; if (binPrefix) {cnt = 2}
l.ungetChars(len(l.Token())-cnt)
chars := l.Token()[:cnt]
l.pushState(STRING)
lval.Token(l.newToken(chars)); return Rune2Class('"')
}
F:for {
if c == -1 {
break;
}
switch c {
case '"' :
c = l.Next();
lval.Token(l.newToken(l.Token())); return T_CONSTANT_ENCAPSED_STRING
break F;
case '$':
c = l.Next();
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
return beginString()
break F;
}
l.ungetChars(0)
case '{':
c = l.Next();
if rune(c) == '$' {
return beginString()
break F;
}
l.ungetChars(0)
case '\\':
c = l.Next();
}
c = l.Next()
}
<STRING>\" l.popState(); lval.Token(l.newToken(l.Token())); return Rune2Class(l.Token()[0].Rune)
<STRING,HEREDOC,BACKQUOTE>\{\$ lval.Token(l.newToken(l.ungetChars(1))); l.pushState(PHP); return T_CURLY_OPEN
<STRING,HEREDOC,BACKQUOTE>\$\{ l.pushState(STRING_VAR_NAME); lval.Token(l.newToken(l.Token())); return T_DOLLAR_OPEN_CURLY_BRACES
<STRING,HEREDOC,BACKQUOTE>\$ l.ungetChars(1);l.pushState(STRING_VAR)
<STRING>.|[ \t\n\r]
F1:for {
if c == -1 {
break;
}
switch c {
case '"' :
lval.Token(l.newToken(l.Token()));
return T_ENCAPSED_AND_WHITESPACE
break F1;
case '$':
c = l.Next();
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
l.ungetChars(1)
tb := l.Token()
lval.Token(l.newToken(tb[:len(tb)-1]));
return T_ENCAPSED_AND_WHITESPACE
break F1;
}
l.ungetChars(0)
case '{':
c = l.Next();
if rune(c) == '$' {
l.ungetChars(1)
tb := l.Token()
lval.Token(l.newToken(tb[:len(tb)-1]));
return T_ENCAPSED_AND_WHITESPACE
break F1;
}
l.ungetChars(0)
case '\\':
c = l.Next();
}
c = l.Next()
}
<BACKQUOTE>.|[ \t\n\r]
F2:for {
if c == -1 {
break;
}
switch c {
case '`' :
lval.Token(l.newToken(l.Token()));
return T_ENCAPSED_AND_WHITESPACE
break F2;
case '$':
c = l.Next();
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
l.ungetChars(1)
tb := l.Token()
lval.Token(l.newToken(tb[:len(tb)-1]));
return T_ENCAPSED_AND_WHITESPACE
break F2;
}
l.ungetChars(0)
case '{':
c = l.Next();
if rune(c) == '$' {
l.ungetChars(1)
tb := l.Token()
lval.Token(l.newToken(tb[:len(tb)-1]));
return T_ENCAPSED_AND_WHITESPACE
break F2;
}
l.ungetChars(0)
case '\\':
c = l.Next();
}
c = l.Next()
}
<HEREDOC>.|[ \t\n\r]
searchLabel := []byte{}
tb := []lex.Char{}
HEREDOCFOR:for {
if c == -1 {
break;
}
switch c {
case '\n': fallthrough
case '\r':
if bytes.Equal(append(l.charsToBytes(heredocLabel), ';'), searchLabel) {
l.begin(HEREDOC_END)
tb = l.ungetChars(len(heredocLabel)+1)
break HEREDOCFOR;
}
if bytes.Equal(l.charsToBytes(heredocLabel), searchLabel) {
l.begin(HEREDOC_END)
tb = l.ungetChars(len(heredocLabel))
break HEREDOCFOR;
}
searchLabel = []byte{}
case '$':
c = l.Next();
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
tb = l.ungetChars(1)
break HEREDOCFOR;
}
l.ungetChars(0)
searchLabel = []byte{}
case '{':
c = l.Next();
if rune(c) == '$' {
tb = l.ungetChars(1)
break HEREDOCFOR;
}
l.ungetChars(0)
searchLabel = []byte{}
case '\\':
c = l.Next();
searchLabel = []byte{}
default:
searchLabel = append(searchLabel, byte(rune(c)))
}
c = l.Next()
}
lval.Token(l.newToken(tb));
return T_ENCAPSED_AND_WHITESPACE
<STRING_VAR>\${VAR_NAME} lval.Token(l.newToken(l.Token())); return T_VARIABLE
<STRING_VAR>->{VAR_NAME} lval.Token(l.newToken(l.ungetChars(len(l.Token())-2))); return T_OBJECT_OPERATOR
<STRING_VAR>{VAR_NAME} l.popState();lval.Token(l.newToken(l.Token())); return T_STRING
<STRING_VAR>\[ l.pushState(STRING_VAR_INDEX);lval.Token(l.newToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
<STRING_VAR>.|[ \t\n\r] l.ungetChars(1);l.popState()
<STRING_VAR_INDEX>{LNUM}|{HNUM}|{BNUM} lval.Token(l.newToken(l.Token())); return T_NUM_STRING
<STRING_VAR_INDEX>\${VAR_NAME} lval.Token(l.newToken(l.Token())); return T_VARIABLE
<STRING_VAR_INDEX>{VAR_NAME} lval.Token(l.newToken(l.Token())); return T_STRING
<STRING_VAR_INDEX>\] l.popState(); l.popState();lval.Token(l.newToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
<STRING_VAR_INDEX>[ \n\r\t\\'#] l.popState(); l.popState();lval.Token(l.newToken(l.Token())); return T_ENCAPSED_AND_WHITESPACE
<STRING_VAR_INDEX>{OPERATORS} lval.Token(l.newToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
<STRING_VAR_INDEX>. lval.Token(l.newToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
<STRING_VAR_NAME>{VAR_NAME}[\[\}] l.popState();l.pushState(PHP);lval.Token(l.newToken(l.ungetChars(1))); return T_STRING_VARNAME
<STRING_VAR_NAME>. l.ungetChars(1);l.popState();l.pushState(PHP)
%%
if c, ok := l.Abort(); ok { return int(c) }
goto yyAction
}

View File

@ -1,4 +1,4 @@
package main
package visitor
import (
"fmt"