From c11d3ece0d56ada09108b3dfdda808f29483514b Mon Sep 17 00:00:00 2001 From: vadim Date: Thu, 23 Nov 2017 17:33:47 +0200 Subject: [PATCH] parser, first try --- Makefile | 9 +- lexer.go | 907 +++++++++++++++++++++++++++++++----------------------- lexer.l | 355 +++++++++++---------- parser.go | 828 +++++++++++++++++++++++++++++++++++++++++++++++++ parser.y | 193 ++++++++++++ 5 files changed, 1720 insertions(+), 572 deletions(-) create mode 100644 parser.go create mode 100644 parser.y diff --git a/Makefile b/Makefile index a1348ad..df22327 100644 --- a/Makefile +++ b/Makefile @@ -4,14 +4,19 @@ # blame: jnml, labs.nic.cz -all: lexer.go +all: parser.go lexer.go + rm -f y.output + gofmt -l -s -w *.go go build run: all ./php-parser lexer.go: lexer.l - golex -t $< | gofmt > $@ + golex -o $@ $< + +parser.go: parser.y + goyacc -o $@ $< clean: rm -f php-parser.go lex.yy.go y.output *~ diff --git a/lexer.go b/lexer.go index a8b4521..85006da 100644 --- a/lexer.go +++ b/lexer.go @@ -14,7 +14,6 @@ import ( "fmt" "go/token" "io" - "os" "unicode" "github.com/cznic/golex/lex" @@ -83,7 +82,8 @@ func rune2Class(r rune) int { if unicode.IsDigit(r) { return classUnicodeDigit } - return classOther + // return classOther + return -1 } func newLexer(src io.Reader, dst io.Writer, fName string) *lexer { @@ -95,8 +95,6 @@ func newLexer(src io.Reader, dst io.Writer, fName string) *lexer { return &lexer{lx} } -type yySymType struct{} - func (l *lexer) unget(r rune) []byte { l.Unget(l.Lookahead()) @@ -131,14 +129,13 @@ func (l *lexer) ungetN(n int) []byte { return buf } -func (l *lexer) Lex() int { // Lex(lval *yySymType) +func (l *lexer) Lex(lval *yySymType) int { // Lex(lval *yySymType) c := l.Enter() yystate0: yyrule := -1 _ = yyrule c = l.Rule0() - // ([\$]{NCH})* switch yyt := sc; yyt { default: @@ -160,11 +157,11 @@ yystate0: case 7: // start condition: HEREDOC_END goto yystart593 case 8: // start condition: NOWDOC - goto yystart598 + goto yystart596 case 9: // start condition: HEREDOC - goto yystart600 + goto yystart598 case 10: // start condition: BACKQUOTE - goto yystart604 + goto yystart602 } goto yystate0 // silence unused label error @@ -499,8 +496,6 @@ yyAction: goto yyrule163 case 164: goto yyrule164 - case 165: - goto yyrule165 } goto yystate1 // silence unused label error yystate1: @@ -773,9 +768,9 @@ yystate17: yystate18: c = l.Next() - yyrule = 144 + yyrule = 143 l.Mark() - goto yyrule144 + goto yyrule143 yystate19: c = l.Next() @@ -7053,49 +7048,49 @@ yystart546: yystate547: c = l.Next() - yyrule = 149 + yyrule = 148 l.Mark() - goto yyrule149 + goto yyrule148 yystate548: c = l.Next() - yyrule = 145 + yyrule = 144 l.Mark() - goto yyrule145 + goto yyrule144 yystate549: c = l.Next() - yyrule = 148 + yyrule = 147 l.Mark() switch { default: - goto yyrule148 + goto yyrule147 case c == '{': goto yystate550 } yystate550: c = l.Next() - yyrule = 147 + yyrule = 146 l.Mark() - goto yyrule147 + goto yyrule146 yystate551: c = l.Next() - yyrule = 149 + yyrule = 148 l.Mark() switch { default: - goto yyrule149 + goto yyrule148 case c == '$': goto yystate552 } yystate552: c = l.Next() - yyrule = 146 + yyrule = 145 l.Mark() - goto yyrule146 + goto yyrule145 goto yystate553 // silence unused label error yystate553: @@ -7118,39 +7113,39 @@ yystart553: yystate554: c = l.Next() - yyrule = 156 + yyrule = 155 l.Mark() - goto yyrule156 + goto yyrule155 yystate555: c = l.Next() - yyrule = 156 + yyrule = 155 l.Mark() switch { default: - goto yyrule156 + goto yyrule155 case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': goto yystate556 } yystate556: c = l.Next() - yyrule = 152 + yyrule = 151 l.Mark() switch { default: - goto yyrule152 + goto yyrule151 case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': goto yystate556 } yystate557: c = l.Next() - yyrule = 156 + yyrule = 155 l.Mark() switch { default: - goto yyrule156 + goto yyrule155 case c == '>': goto yystate558 } @@ -7166,42 +7161,42 @@ yystate558: yystate559: c = l.Next() - yyrule = 153 + yyrule = 152 l.Mark() switch { default: - goto yyrule153 + goto yyrule152 case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': goto yystate559 } yystate560: c = l.Next() - yyrule = 154 + yyrule = 153 l.Mark() switch { default: - goto yyrule154 + goto yyrule153 case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': goto yystate561 } yystate561: c = l.Next() - yyrule = 154 + yyrule = 153 l.Mark() switch { default: - goto yyrule154 + goto yyrule153 case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': goto yystate561 } yystate562: c = l.Next() - yyrule = 155 + yyrule = 154 l.Mark() - goto yyrule155 + goto yyrule154 goto yystate563 // silence unused label error yystate563: @@ -7231,58 +7226,58 @@ yystart563: } yystate564: - c = l.Next() - yyrule = 163 - l.Mark() - goto yyrule163 - -yystate565: - c = l.Next() - yyrule = 161 - l.Mark() - goto yyrule161 - -yystate566: - c = l.Next() - yyrule = 161 - l.Mark() - goto yyrule161 - -yystate567: c = l.Next() yyrule = 162 l.Mark() goto yyrule162 +yystate565: + c = l.Next() + yyrule = 160 + l.Mark() + goto yyrule160 + +yystate566: + c = l.Next() + yyrule = 160 + l.Mark() + goto yyrule160 + +yystate567: + c = l.Next() + yyrule = 161 + l.Mark() + goto yyrule161 + yystate568: c = l.Next() - yyrule = 162 + yyrule = 161 l.Mark() switch { default: - goto yyrule162 + goto yyrule161 case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': goto yystate569 } yystate569: c = l.Next() - yyrule = 158 + yyrule = 157 l.Mark() switch { default: - goto yyrule158 + goto yyrule157 case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': goto yystate569 } yystate570: c = l.Next() - yyrule = 157 + yyrule = 156 l.Mark() switch { default: - goto yyrule157 + goto yyrule156 case c == 'b': goto yystate572 case c == 'x': @@ -7293,11 +7288,11 @@ yystate570: yystate571: c = l.Next() - yyrule = 157 + yyrule = 156 l.Mark() switch { default: - goto yyrule157 + goto yyrule156 case c >= '0' && c <= '9': goto yystate571 } @@ -7313,11 +7308,11 @@ yystate572: yystate573: c = l.Next() - yyrule = 157 + yyrule = 156 l.Mark() switch { default: - goto yyrule157 + goto yyrule156 case c == '0' || c == '1': goto yystate573 } @@ -7333,53 +7328,53 @@ yystate574: yystate575: c = l.Next() - yyrule = 157 + yyrule = 156 l.Mark() switch { default: - goto yyrule157 + goto yyrule156 case c >= '0' && c <= '9' || c >= 'A' && c <= 'F' || c >= 'a' && c <= 'f': goto yystate575 } yystate576: c = l.Next() - yyrule = 157 + yyrule = 156 l.Mark() switch { default: - goto yyrule157 + goto yyrule156 case c >= '0' && c <= '9': goto yystate571 } yystate577: c = l.Next() - yyrule = 159 + yyrule = 158 l.Mark() switch { default: - goto yyrule159 + goto yyrule158 case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': goto yystate578 } yystate578: c = l.Next() - yyrule = 159 + yyrule = 158 l.Mark() switch { default: - goto yyrule159 + goto yyrule158 case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': goto yystate578 } yystate579: c = l.Next() - yyrule = 160 + yyrule = 159 l.Mark() - goto yyrule160 + goto yyrule159 goto yystate580 // silence unused label error yystate580: @@ -7396,17 +7391,17 @@ yystart580: yystate581: c = l.Next() - yyrule = 165 + yyrule = 164 l.Mark() - goto yyrule165 + goto yyrule164 yystate582: c = l.Next() - yyrule = 165 + yyrule = 164 l.Mark() switch { default: - goto yyrule165 + goto yyrule164 case c == '[' || c == '}': goto yystate584 case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': @@ -7426,9 +7421,9 @@ yystate583: yystate584: c = l.Next() - yyrule = 164 + yyrule = 163 l.Mark() - goto yyrule164 + goto yyrule163 goto yystate585 // silence unused label error yystate585: @@ -7524,47 +7519,43 @@ yystart593: default: goto yyabort case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': - goto yystate595 - case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '@' || c >= '[' && c <= '^' || c == '`' || c >= '{' && c <= '~': goto yystate594 } yystate594: - c = l.Next() - yyrule = 143 - l.Mark() - goto yyrule143 - -yystate595: - c = l.Next() - yyrule = 143 - l.Mark() - switch { - default: - goto yyrule143 - case c == ';': - goto yystate597 - case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': - goto yystate596 - } - -yystate596: c = l.Next() switch { default: goto yyabort case c == ';': - goto yystate597 + goto yystate595 case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': - goto yystate596 + goto yystate594 } -yystate597: +yystate595: c = l.Next() yyrule = 142 l.Mark() goto yyrule142 + goto yystate596 // silence unused label error +yystate596: + c = l.Next() +yystart596: + switch { + default: + goto yyabort + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': + goto yystate597 + } + +yystate597: + c = l.Next() + yyrule = 141 + l.Mark() + goto yyrule141 + goto yystate598 // silence unused label error yystate598: c = l.Next() @@ -7572,100 +7563,32 @@ yystart598: switch { default: goto yyabort - case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': + case c == '$': + goto yystate600 + case c == '{': + goto yystate601 + case c >= '\x01' && c <= '#' || c >= '%' && c <= 'z' || c >= '|' && c <= 'ÿ': goto yystate599 } yystate599: - c = l.Next() - yyrule = 141 - l.Mark() - goto yyrule141 - - goto yystate600 // silence unused label error -yystate600: - c = l.Next() -yystart600: - switch { - default: - goto yyabort - case c == '$': - goto yystate602 - case c == '{': - goto yystate603 - case c >= '\x01' && c <= '#' || c >= '%' && c <= 'z' || c >= '|' && c <= 'ÿ': - goto yystate601 - } - -yystate601: - c = l.Next() - yyrule = 151 - l.Mark() - goto yyrule151 - -yystate602: - c = l.Next() - yyrule = 148 - l.Mark() - switch { - default: - goto yyrule148 - case c == '{': - goto yystate550 - } - -yystate603: - c = l.Next() - yyrule = 151 - l.Mark() - switch { - default: - goto yyrule151 - case c == '$': - goto yystate552 - } - - goto yystate604 // silence unused label error -yystate604: - c = l.Next() -yystart604: - switch { - default: - goto yyabort - case c == '$': - goto yystate606 - case c == '`': - goto yystate607 - case c == '{': - goto yystate608 - case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '#' || c >= '%' && c <= '_' || c >= 'a' && c <= 'z' || c >= '|' && c <= 'ÿ': - goto yystate605 - } - -yystate605: c = l.Next() yyrule = 150 l.Mark() goto yyrule150 -yystate606: +yystate600: c = l.Next() - yyrule = 148 + yyrule = 147 l.Mark() switch { default: - goto yyrule148 + goto yyrule147 case c == '{': goto yystate550 } -yystate607: - c = l.Next() - yyrule = 139 - l.Mark() - goto yyrule139 - -yystate608: +yystate601: c = l.Next() yyrule = 150 l.Mark() @@ -7676,6 +7599,57 @@ yystate608: goto yystate552 } + goto yystate602 // silence unused label error +yystate602: + c = l.Next() +yystart602: + switch { + default: + goto yyabort + case c == '$': + goto yystate604 + case c == '`': + goto yystate605 + case c == '{': + goto yystate606 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '#' || c >= '%' && c <= '_' || c >= 'a' && c <= 'z' || c >= '|' && c <= 'ÿ': + goto yystate603 + } + +yystate603: + c = l.Next() + yyrule = 149 + l.Mark() + goto yyrule149 + +yystate604: + c = l.Next() + yyrule = 147 + l.Mark() + switch { + default: + goto yyrule147 + case c == '{': + goto yystate550 + } + +yystate605: + c = l.Next() + yyrule = 139 + l.Mark() + goto yyrule139 + +yystate606: + c = l.Next() + yyrule = 149 + l.Mark() + switch { + default: + goto yyrule149 + case c == '$': + goto yystate552 + } + yyrule1: // [ \t\n\r]+ goto yystate0 @@ -7684,36 +7658,37 @@ yyrule2: // . goto yystate0 yyrule3: // \<\?php([ \t]|{NEW_LINE}) { - fmt.Println("T_OPEN_TAG") - begin(PHP) + begin(PHP) //lval.token = string(l.TokenBytes(nil)); return T_OPEN_TAG; goto yystate0 } yyrule4: // \<\? { - fmt.Println("T_OPEN_TAG") - begin(PHP) + begin(PHP) //lval.token = string(l.TokenBytes(nil)); return T_OPEN_TAG; goto yystate0 } yyrule5: // \<\?= { - fmt.Println("T_OPEN_TAG_WITH_ECHO") begin(PHP) + lval.token = string(l.TokenBytes(nil)) + return T_OPEN_TAG_WITH_ECHO goto yystate0 } yyrule6: // [ \t\n\r]+ { - fmt.Println("T_WHITESPACE") + //lval.token = string(l.TokenBytes(nil)); return T_WHITESPACE goto yystate0 } yyrule7: // \?\>{NEW_LINE}? { - fmt.Println("T_CLOSE_TAG") begin(INITIAL) + lval.token = string(l.TokenBytes(nil)) + return T_CLOSE_TAG goto yystate0 } yyrule8: // {DNUM}|{EXPONENT_DNUM} { - fmt.Println("T_DNUMBER") + lval.token = string(l.TokenBytes(nil)) + return T_DNUMBER goto yystate0 } yyrule9: // {BNUM} @@ -7731,9 +7706,11 @@ yyrule9: // {BNUM} } } if len(tb)-i < 64 { - fmt.Println("T_LNUMBER") + lval.token = string(l.TokenBytes(nil)) + return T_LNUMBER } else { - fmt.Println("T_DNUMBER") + lval.token = string(l.TokenBytes(nil)) + return T_DNUMBER } goto yystate0 } @@ -7741,9 +7718,11 @@ yyrule10: // {LNUM} { if len(l.TokenBytes(nil)) < 20 { - fmt.Println("T_LNUMBER") + lval.token = string(l.TokenBytes(nil)) + return T_LNUMBER } else { - fmt.Println("T_DNUMBER") + lval.token = string(l.TokenBytes(nil)) + return T_DNUMBER } goto yystate0 } @@ -7763,634 +7742,763 @@ yyrule11: // {HNUM} } length := len(tb) - i if length < 16 || (length == 16 && tb[i] <= '7') { - fmt.Println("T_LNUMBER") + lval.token = string(l.TokenBytes(nil)) + return T_LNUMBER } else { - fmt.Println("T_DNUMBER") + lval.token = string(l.TokenBytes(nil)) + return T_DNUMBER } goto yystate0 } yyrule12: // abstract { - fmt.Println("T_ABSTRACT") + lval.token = string(l.TokenBytes(nil)) + return T_ABSTRACT goto yystate0 } yyrule13: // array { - fmt.Println("T_ARRAY") + lval.token = string(l.TokenBytes(nil)) + return T_ARRAY goto yystate0 } yyrule14: // as { - fmt.Println("T_AS") + lval.token = string(l.TokenBytes(nil)) + return T_AS goto yystate0 } yyrule15: // break { - fmt.Println("T_BREAK") + lval.token = string(l.TokenBytes(nil)) + return T_BREAK goto yystate0 } yyrule16: // callable { - fmt.Println("T_CALLABLE") + lval.token = string(l.TokenBytes(nil)) + return T_CALLABLE goto yystate0 } yyrule17: // case { - fmt.Println("T_CASE") + lval.token = string(l.TokenBytes(nil)) + return T_CASE goto yystate0 } yyrule18: // catch { - fmt.Println("T_CATCH") + lval.token = string(l.TokenBytes(nil)) + return T_CATCH goto yystate0 } yyrule19: // class { - fmt.Println("T_CLASS") + lval.token = string(l.TokenBytes(nil)) + return T_CLASS goto yystate0 } yyrule20: // clone { - fmt.Println("T_CLONE") + lval.token = string(l.TokenBytes(nil)) + return T_CLONE goto yystate0 } yyrule21: // const { - fmt.Println("T_CONST") + lval.token = string(l.TokenBytes(nil)) + return T_CONST goto yystate0 } yyrule22: // continue { - fmt.Println("T_CONTINUE") + lval.token = string(l.TokenBytes(nil)) + return T_CONTINUE goto yystate0 } yyrule23: // declare { - fmt.Println("T_DECLARE") + lval.token = string(l.TokenBytes(nil)) + return T_DECLARE goto yystate0 } yyrule24: // default { - fmt.Println("T_DEFAULT") + lval.token = string(l.TokenBytes(nil)) + return T_DEFAULT goto yystate0 } yyrule25: // do { - fmt.Println("T_DO") + lval.token = string(l.TokenBytes(nil)) + return T_DO goto yystate0 } yyrule26: // echo { - fmt.Println("T_ECHO") + lval.token = string(l.TokenBytes(nil)) + return T_ECHO goto yystate0 } yyrule27: // else { - fmt.Println("T_ELSE") + lval.token = string(l.TokenBytes(nil)) + return T_ELSE goto yystate0 } yyrule28: // elseif { - fmt.Println("T_ELSEIF") + lval.token = string(l.TokenBytes(nil)) + return T_ELSEIF goto yystate0 } yyrule29: // empty { - fmt.Println("T_EMPTY") + lval.token = string(l.TokenBytes(nil)) + return T_EMPTY goto yystate0 } yyrule30: // endfor { - fmt.Println("T_ENDFOR") + lval.token = string(l.TokenBytes(nil)) + return T_ENDFOR goto yystate0 } yyrule31: // endforeach { - fmt.Println("T_ENDFOREACH") + lval.token = string(l.TokenBytes(nil)) + return T_ENDFOREACH goto yystate0 } yyrule32: // endif { - fmt.Println("T_ENDIF") + lval.token = string(l.TokenBytes(nil)) + return T_ENDIF goto yystate0 } yyrule33: // endswitch { - fmt.Println("T_ENDSWITCH") + lval.token = string(l.TokenBytes(nil)) + return T_ENDSWITCH goto yystate0 } yyrule34: // endwhile { - fmt.Println("T_ENDWHILE") + lval.token = string(l.TokenBytes(nil)) + return T_ENDWHILE goto yystate0 } yyrule35: // eval { - fmt.Println("T_EVAL") + lval.token = string(l.TokenBytes(nil)) + return T_EVAL goto yystate0 } yyrule36: // exit|die { - fmt.Println("T_EXIT") + lval.token = string(l.TokenBytes(nil)) + return T_EXIT goto yystate0 } yyrule37: // extends { - fmt.Println("T_EXTENDS") + lval.token = string(l.TokenBytes(nil)) + return T_EXTENDS goto yystate0 } yyrule38: // final { - fmt.Println("T_FINAL") + lval.token = string(l.TokenBytes(nil)) + return T_FINAL goto yystate0 } yyrule39: // finally { - fmt.Println("T_FINALLY") + lval.token = string(l.TokenBytes(nil)) + return T_FINALLY goto yystate0 } yyrule40: // for { - fmt.Println("T_FOR") + lval.token = string(l.TokenBytes(nil)) + return T_FOR goto yystate0 } yyrule41: // foreach { - fmt.Println("T_FOREACH") + lval.token = string(l.TokenBytes(nil)) + return T_FOREACH goto yystate0 } yyrule42: // function|cfunction { - fmt.Println("T_FUNCTION") + lval.token = string(l.TokenBytes(nil)) + return T_FUNCTION goto yystate0 } yyrule43: // global { - fmt.Println("T_GLOBAL") + lval.token = string(l.TokenBytes(nil)) + return T_GLOBAL goto yystate0 } yyrule44: // goto { - fmt.Println("T_GOTO") + lval.token = string(l.TokenBytes(nil)) + return T_GOTO goto yystate0 } yyrule45: // if { - fmt.Println("T_IF") + lval.token = string(l.TokenBytes(nil)) + return T_IF goto yystate0 } yyrule46: // isset { - fmt.Println("T_ISSET") + lval.token = string(l.TokenBytes(nil)) + return T_ISSET goto yystate0 } yyrule47: // implements { - fmt.Println("T_IMPLEMENTS") + lval.token = string(l.TokenBytes(nil)) + return T_IMPLEMENTS goto yystate0 } yyrule48: // instanceof { - fmt.Println("T_INSTANCEOF") + lval.token = string(l.TokenBytes(nil)) + return T_INSTANCEOF goto yystate0 } yyrule49: // insteadof { - fmt.Println("T_INSTEADOF") + lval.token = string(l.TokenBytes(nil)) + return T_INSTEADOF goto yystate0 } yyrule50: // interface { - fmt.Println("T_INTERFACE") + lval.token = string(l.TokenBytes(nil)) + return T_INTERFACE goto yystate0 } yyrule51: // list { - fmt.Println("T_LIST") + lval.token = string(l.TokenBytes(nil)) + return T_LIST goto yystate0 } yyrule52: // namespace { - fmt.Println("T_NAMESPACE") + lval.token = string(l.TokenBytes(nil)) + return T_NAMESPACE goto yystate0 } yyrule53: // private { - fmt.Println("T_PRIVATE") + lval.token = string(l.TokenBytes(nil)) + return T_PRIVATE goto yystate0 } yyrule54: // public { - fmt.Println("T_PUBLIC") + lval.token = string(l.TokenBytes(nil)) + return T_PUBLIC goto yystate0 } yyrule55: // print { - fmt.Println("T_PRINT") + lval.token = string(l.TokenBytes(nil)) + return T_PRINT goto yystate0 } yyrule56: // protected { - fmt.Println("T_PROTECTED") + lval.token = string(l.TokenBytes(nil)) + return T_PROTECTED goto yystate0 } yyrule57: // return { - fmt.Println("T_RETURN") + lval.token = string(l.TokenBytes(nil)) + return T_RETURN goto yystate0 } yyrule58: // static { - fmt.Println("T_STATIC") + lval.token = string(l.TokenBytes(nil)) + return T_STATIC goto yystate0 } yyrule59: // switch { - fmt.Println("T_SWITCH") + lval.token = string(l.TokenBytes(nil)) + return T_SWITCH goto yystate0 } yyrule60: // throw { - fmt.Println("T_THROW") + lval.token = string(l.TokenBytes(nil)) + return T_THROW goto yystate0 } yyrule61: // trait { - fmt.Println("T_TRAIT") + lval.token = string(l.TokenBytes(nil)) + return T_TRAIT goto yystate0 } yyrule62: // try { - fmt.Println("T_TRY") + lval.token = string(l.TokenBytes(nil)) + return T_TRY goto yystate0 } yyrule63: // unset { - fmt.Println("T_UNSET") + lval.token = string(l.TokenBytes(nil)) + return T_UNSET goto yystate0 } yyrule64: // use { - fmt.Println("T_USE") + lval.token = string(l.TokenBytes(nil)) + return T_USE goto yystate0 } yyrule65: // var { - fmt.Println("T_VAR") + lval.token = string(l.TokenBytes(nil)) + return T_VAR goto yystate0 } yyrule66: // while { - fmt.Println("T_WHILE") + lval.token = string(l.TokenBytes(nil)) + return T_WHILE goto yystate0 } yyrule67: // yield[ \t\n\r]+from[^a-zA-Z0-9_\x80-\xff] { - fmt.Println("T_YIELD_FROM") + lval.token = string(l.TokenBytes(nil)) + return T_YIELD_FROM goto yystate0 } yyrule68: // yield { - fmt.Println("T_YIELD") + lval.token = string(l.TokenBytes(nil)) + return T_YIELD goto yystate0 } yyrule69: // include { - fmt.Println("T_INCLUDE") + lval.token = string(l.TokenBytes(nil)) + return T_INCLUDE goto yystate0 } yyrule70: // include_once { - fmt.Println("T_INCLUDE_ONCE") + lval.token = string(l.TokenBytes(nil)) + return T_INCLUDE_ONCE goto yystate0 } yyrule71: // require { - fmt.Println("T_REQUIRE") + lval.token = string(l.TokenBytes(nil)) + return T_REQUIRE goto yystate0 } yyrule72: // require_once { - fmt.Println("T_REQUIRE_ONCE") + lval.token = string(l.TokenBytes(nil)) + return T_REQUIRE_ONCE goto yystate0 } yyrule73: // __CLASS__ { - fmt.Println("T_CLASS_C") + lval.token = string(l.TokenBytes(nil)) + return T_CLASS_C goto yystate0 } yyrule74: // __DIR__ { - fmt.Println("T_DIR") + lval.token = string(l.TokenBytes(nil)) + return T_DIR goto yystate0 } yyrule75: // __FILE__ { - fmt.Println("T_FILE") + lval.token = string(l.TokenBytes(nil)) + return T_FILE goto yystate0 } yyrule76: // __FUNCTION__ { - fmt.Println("T_FUNC_C") + lval.token = string(l.TokenBytes(nil)) + return T_FUNC_C goto yystate0 } yyrule77: // __LINE__ { - fmt.Println("T_LINE") + lval.token = string(l.TokenBytes(nil)) + return T_LINE goto yystate0 } yyrule78: // __NAMESPACE__ { - fmt.Println("T_NS_C") + lval.token = string(l.TokenBytes(nil)) + return T_NS_C goto yystate0 } yyrule79: // __METHOD__ { - fmt.Println("T_METHOD_C") + lval.token = string(l.TokenBytes(nil)) + return T_METHOD_C goto yystate0 } yyrule80: // __TRAIT__ { - fmt.Println("T_TRAIT_C") + lval.token = string(l.TokenBytes(nil)) + return T_TRAIT_C goto yystate0 } yyrule81: // __halt_compiler { - fmt.Println("T_HALT_COMPILER") + lval.token = string(l.TokenBytes(nil)) + return T_HALT_COMPILER goto yystate0 } yyrule82: // \([ \t]*array[ \t]*\) { - fmt.Println("T_ARRAY_CAST") + lval.token = string(l.TokenBytes(nil)) + return T_ARRAY_CAST goto yystate0 } yyrule83: // \([ \t]*(bool|boolean)[ \t]*\) { - fmt.Println("T_BOOL_CAST") + lval.token = string(l.TokenBytes(nil)) + return T_BOOL_CAST goto yystate0 } yyrule84: // \([ \t]*(real|double|float)[ \t]*\) { - fmt.Println("T_DOUBLE_CAST") + lval.token = string(l.TokenBytes(nil)) + return T_DOUBLE_CAST goto yystate0 } yyrule85: // \([ \t]*(int|integer)[ \t]*\) { - fmt.Println("T_INT_CAST") + lval.token = string(l.TokenBytes(nil)) + return T_INT_CAST goto yystate0 } yyrule86: // \([ \t]*object[ \t]*\) { - fmt.Println("T_OBJECT_CAST") + lval.token = string(l.TokenBytes(nil)) + return T_OBJECT_CAST goto yystate0 } yyrule87: // \([ \t]*string[ \t]*\) { - fmt.Println("T_STRING_CAST") + lval.token = string(l.TokenBytes(nil)) + return T_STRING_CAST goto yystate0 } yyrule88: // \([ \t]*unset[ \t]*\) { - fmt.Println("T_UNSET_CAST") + lval.token = string(l.TokenBytes(nil)) + return T_UNSET_CAST goto yystate0 } yyrule89: // new { - fmt.Println("T_NEW") + lval.token = string(l.TokenBytes(nil)) + return T_NEW goto yystate0 } yyrule90: // and { - fmt.Println("T_LOGICAL_AND") + lval.token = string(l.TokenBytes(nil)) + return T_LOGICAL_AND goto yystate0 } yyrule91: // or { - fmt.Println("T_LOGICAL_OR") + lval.token = string(l.TokenBytes(nil)) + return T_LOGICAL_OR goto yystate0 } yyrule92: // xor { - fmt.Println("T_LOGICAL_XOR") + lval.token = string(l.TokenBytes(nil)) + return T_LOGICAL_XOR goto yystate0 } yyrule93: // \\ { - fmt.Println("T_NS_SEPARATOR") + lval.token = string(l.TokenBytes(nil)) + return T_NS_SEPARATOR goto yystate0 } yyrule94: // \.\.\. { - fmt.Println("T_ELLIPSIS") + lval.token = string(l.TokenBytes(nil)) + return T_ELLIPSIS goto yystate0 } yyrule95: // :: { - fmt.Println("T_PAAMAYIM_NEKUDOTAYIM") // T_DOUBLE_COLON + lval.token = string(l.TokenBytes(nil)) + return T_PAAMAYIM_NEKUDOTAYIM // T_DOUBLE_COLON goto yystate0 } yyrule96: // && { - fmt.Println("T_BOOLEAN_AND") + lval.token = string(l.TokenBytes(nil)) + return T_BOOLEAN_AND goto yystate0 } yyrule97: // \|\| { - fmt.Println("T_BOOLEAN_OR") + lval.token = string(l.TokenBytes(nil)) + return T_BOOLEAN_OR goto yystate0 } yyrule98: // &= { - fmt.Println("T_AND_EQUAL") + lval.token = string(l.TokenBytes(nil)) + return T_AND_EQUAL goto yystate0 } yyrule99: // \|= { - fmt.Println("T_OR_EQUAL") + lval.token = string(l.TokenBytes(nil)) + return T_OR_EQUAL goto yystate0 } yyrule100: // \.= { - fmt.Println("T_CONCAT_EQUAL") + lval.token = string(l.TokenBytes(nil)) + return T_CONCAT_EQUAL goto yystate0 } yyrule101: // \*= { - fmt.Println("T_MUL_EQUAL") + lval.token = string(l.TokenBytes(nil)) + return T_MUL_EQUAL goto yystate0 } yyrule102: // \*\*= { - fmt.Println("T_POW_EQUAL") + lval.token = string(l.TokenBytes(nil)) + return T_POW_EQUAL goto yystate0 } yyrule103: // [/]= { - fmt.Println("T_DIV_EQUAL") + lval.token = string(l.TokenBytes(nil)) + return T_DIV_EQUAL goto yystate0 } yyrule104: // \+= { - fmt.Println("T_PLUS_EQUAL") + lval.token = string(l.TokenBytes(nil)) + return T_PLUS_EQUAL goto yystate0 } yyrule105: // -= { - fmt.Println("T_MINUS_EQUAL") + lval.token = string(l.TokenBytes(nil)) + return T_MINUS_EQUAL goto yystate0 } yyrule106: // \^= { - fmt.Println("T_XOR_EQUAL") + lval.token = string(l.TokenBytes(nil)) + return T_XOR_EQUAL goto yystate0 } yyrule107: // %= { - fmt.Println("T_MOD_EQUAL") + lval.token = string(l.TokenBytes(nil)) + return T_MOD_EQUAL goto yystate0 } yyrule108: // -- { - fmt.Println("T_DEC") + lval.token = string(l.TokenBytes(nil)) + return T_DEC goto yystate0 } yyrule109: // \+\+ { - fmt.Println("T_INC") + lval.token = string(l.TokenBytes(nil)) + return T_INC goto yystate0 } yyrule110: // => { - fmt.Println("T_DOUBLE_ARROW") + lval.token = string(l.TokenBytes(nil)) + return T_DOUBLE_ARROW goto yystate0 } yyrule111: // \<=\> { - fmt.Println("T_SPACESHIP") + lval.token = string(l.TokenBytes(nil)) + return T_SPACESHIP goto yystate0 } yyrule112: // \!=|\<\> { - fmt.Println("T_IS_NOT_EQUAL") + lval.token = string(l.TokenBytes(nil)) + return T_IS_NOT_EQUAL goto yystate0 } yyrule113: // \!== { - fmt.Println("T_IS_NOT_IDENTICAL") + lval.token = string(l.TokenBytes(nil)) + return T_IS_NOT_IDENTICAL goto yystate0 } yyrule114: // == { - fmt.Println("T_IS_EQUAL") + lval.token = string(l.TokenBytes(nil)) + return T_IS_EQUAL goto yystate0 } yyrule115: // === { - fmt.Println("T_IS_IDENTICAL") + lval.token = string(l.TokenBytes(nil)) + return T_IS_IDENTICAL goto yystate0 } yyrule116: // \<\<= { - fmt.Println("T_SL_EQUAL") + lval.token = string(l.TokenBytes(nil)) + return T_SL_EQUAL goto yystate0 } yyrule117: // \>\>= { - fmt.Println("T_SR_EQUAL") + lval.token = string(l.TokenBytes(nil)) + return T_SR_EQUAL goto yystate0 } yyrule118: // \>= { - fmt.Println("T_IS_GREATER_OR_EQUAL") + lval.token = string(l.TokenBytes(nil)) + return T_IS_GREATER_OR_EQUAL goto yystate0 } yyrule119: // \<= { - fmt.Println("T_IS_SMALLER_OR_EQUAL") + lval.token = string(l.TokenBytes(nil)) + return T_IS_SMALLER_OR_EQUAL goto yystate0 } yyrule120: // \*\* { - fmt.Println("T_POW") + lval.token = string(l.TokenBytes(nil)) + return T_POW goto yystate0 } yyrule121: // \<\< { - fmt.Println("T_SL") + lval.token = string(l.TokenBytes(nil)) + return T_SL goto yystate0 } yyrule122: // \>\> { - fmt.Println("T_SR") + lval.token = string(l.TokenBytes(nil)) + return T_SR goto yystate0 } yyrule123: // (#|[/][/]){NEW_LINE} { - fmt.Println("T_COMMENT") // TODO: handle ?> + lval.token = string(l.TokenBytes(nil)) + return T_COMMENT // TODO: handle ?> goto yystate0 } yyrule124: // [/][*][^*]*[*]+([^*/][^*]*[*]+)*[/] { - fmt.Println("T_COMMENT") // TODO: handle ?> + lval.token = string(l.TokenBytes(nil)) + return T_COMMENT // TODO: handle ?> goto yystate0 } yyrule125: // [/][*][*][^*]*[*]+([^*/][^*]*[*]+)*[/] { - fmt.Println("T_DOC_COMMENT") // TODO: handle ?> + lval.token = string(l.TokenBytes(nil)) + return T_DOC_COMMENT // TODO: handle ?> goto yystate0 } yyrule126: // '[^']*(\\')*' { - fmt.Println("T_CONSTANT_ENCAPSED_STRING") + lval.token = string(l.TokenBytes(nil)) + return T_CONSTANT_ENCAPSED_STRING goto yystate0 } yyrule127: // {OPERATORS} { - fmt.Printf("%s\n", l.TokenBytes(nil)) + lval.token = string(l.TokenBytes(nil)) + return c goto yystate0 } yyrule128: // \{ { - fmt.Println("{") pushState(PHP) + lval.token = string(l.TokenBytes(nil)) + return c goto yystate0 } yyrule129: // \} { - fmt.Println("}") popState() + lval.token = string(l.TokenBytes(nil)) + return c goto yystate0 } yyrule130: // \${VAR_NAME} { - fmt.Printf("T_VARIABLE: %q\n", l.TokenBytes(nil)) + lval.token = string(l.TokenBytes(nil)) + return T_VARIABLE goto yystate0 } yyrule131: // {VAR_NAME} { - fmt.Printf("T_STRING: %q\n", l.TokenBytes(nil)) + if c == -1 { + fmt.Printf("%q\n", string(l.TokenBytes(nil))) + } + lval.token = string(l.TokenBytes(nil)) + return T_STRING goto yystate0 } yyrule132: // -> { - fmt.Println("T_OBJECT_OPERATOR") begin(PROPERTY) + lval.token = string(l.TokenBytes(nil)) + return T_OBJECT_OPERATOR goto yystate0 } yyrule133: // [ \t\n\r]+ { - fmt.Println("T_WHITESPACE") + lval.token = string(l.TokenBytes(nil)) + return T_WHITESPACE goto yystate0 } yyrule134: // -> { - fmt.Println("T_OBJECT_OPERATOR") + lval.token = string(l.TokenBytes(nil)) + return T_OBJECT_OPERATOR goto yystate0 } yyrule135: // {VAR_NAME} { - fmt.Println("T_STRING") begin(PHP) + lval.token = string(l.TokenBytes(nil)) + return T_STRING goto yystate0 } yyrule136: // . @@ -8401,19 +8509,22 @@ yyrule136: // . } yyrule137: // [\']([^\\\']*([\\][\'])*)*[\'] { - fmt.Printf("T_CONSTANT_ENCAPSED_STRING: %s\n", l.TokenBytes(nil)) + lval.token = string(l.TokenBytes(nil)) + return T_CONSTANT_ENCAPSED_STRING goto yystate0 } yyrule138: // ` { - fmt.Println("`") begin(BACKQUOTE) + lval.token = string(l.TokenBytes(nil)) + return c goto yystate0 } yyrule139: // ` { - fmt.Println("`") begin(PHP) + lval.token = string(l.TokenBytes(nil)) + return c goto yystate0 } yyrule140: // [b]?\<\<\<[ \t]*({VAR_NAME}|([']{VAR_NAME}['])|(["]{VAR_NAME}["])){NEW_LINE} @@ -8469,7 +8580,8 @@ yyrule140: // [b]?\<\<\<[ \t]*({VAR_NAME}|([']{VAR_NAME}['])|(["]{VAR_NAME}["])) } } l.ungetN(ungetCnt) - fmt.Printf("T_START_HEREDOC: %q\n", tb) + lval.token = string(tb) + return T_START_HEREDOC goto yystate0 } yyrule141: // . @@ -8495,25 +8607,22 @@ yyrule141: // . } c = l.Next() } - fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %q\n", tb) + lval.token = string(tb) + return T_ENCAPSED_AND_WHITESPACE goto yystate0 } yyrule142: // {VAR_NAME}\; { - fmt.Printf("T_END_HEREDOC: %q\n", l.ungetN(1)) begin(PHP) + lval.token = string(l.ungetN(1)) + return T_END_HEREDOC goto yystate0 } -yyrule143: // . - { - fmt.Printf("ERROR HEREDOC: %q\n", l.ungetN(1)) - goto yystate0 - } -yyrule144: // [b]?[\"] +yyrule143: // [b]?[\"] { binPrefix := l.TokenBytes(nil)[0] == 'b' - beginString := func() { + beginString := func() int { cnt := 1 if binPrefix { cnt = 2 @@ -8523,6 +8632,8 @@ yyrule144: // [b]?[\"] tokenBytes := l.TokenBytes(nil)[:cnt] fmt.Println(string(tokenBytes)) // TODO: RETURN TOKEN pushState(STRING) + lval.token = string(tokenBytes) + return rune2Class('"') } F: for { @@ -8532,13 +8643,14 @@ yyrule144: // [b]?[\"] switch c { case '"': c = l.Next() - fmt.Printf("T_CONSTANT_ENCAPSED_STRING: %s\n", l.TokenBytes(nil)) + lval.token = string(l.TokenBytes(nil)) + return T_CONSTANT_ENCAPSED_STRING break F case '$': c = l.Next() if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' { - beginString() + return beginString() break F } l.ungetN(0) @@ -8546,7 +8658,7 @@ yyrule144: // [b]?[\"] case '{': c = l.Next() if rune(c) == '$' { - beginString() + return beginString() break F } l.ungetN(0) @@ -8557,31 +8669,33 @@ yyrule144: // [b]?[\"] } goto yystate0 } -yyrule145: // \" +yyrule144: // \" { - fmt.Println("\"") popState() + lval.token = "\"" + return c goto yystate0 } -yyrule146: // \{\$ +yyrule145: // \{\$ { - fmt.Printf("T_CURLY_OPEN: %q\n", l.ungetN(1)) - pushState(PHP) + lval.token = string(l.ungetN(1)) + return T_CURLY_OPEN goto yystate0 } -yyrule147: // \$\{ +yyrule146: // \$\{ { - fmt.Printf("T_DOLLAR_OPEN_CURLY_BRACES: %q\n", l.TokenBytes(nil)) pushState(STRING_VAR_NAME) + lval.token = string(l.TokenBytes(nil)) + return T_DOLLAR_OPEN_CURLY_BRACES goto yystate0 } -yyrule148: // \$ +yyrule147: // \$ { l.ungetN(1) pushState(STRING_VAR) goto yystate0 } -yyrule149: // . +yyrule148: // . { F1: @@ -8591,7 +8705,8 @@ yyrule149: // . } switch c { case '"': - fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %s\n", l.TokenBytes(nil)) + lval.token = string(l.TokenBytes(nil)) + return T_ENCAPSED_AND_WHITESPACE break F1 case '$': @@ -8599,7 +8714,8 @@ yyrule149: // . if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' { l.ungetN(1) tb := l.TokenBytes(nil) - fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %s\n", tb[:len(tb)-1]) + lval.token = string(tb[:len(tb)-1]) + return T_ENCAPSED_AND_WHITESPACE break F1 } l.ungetN(0) @@ -8609,7 +8725,8 @@ yyrule149: // . if rune(c) == '$' { l.ungetN(1) tb := l.TokenBytes(nil) - fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %s\n", tb[:len(tb)-1]) + lval.token = string(tb[:len(tb)-1]) + return T_ENCAPSED_AND_WHITESPACE break F1 } l.ungetN(0) @@ -8620,7 +8737,7 @@ yyrule149: // . } goto yystate0 } -yyrule150: // . +yyrule149: // . { F2: @@ -8630,7 +8747,8 @@ yyrule150: // . } switch c { case '`': - fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %s\n", l.TokenBytes(nil)) + lval.token = string(l.TokenBytes(nil)) + return T_ENCAPSED_AND_WHITESPACE break F2 case '$': @@ -8638,7 +8756,8 @@ yyrule150: // . if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' { l.ungetN(1) tb := l.TokenBytes(nil) - fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %s\n", tb[:len(tb)-1]) + lval.token = string(tb[:len(tb)-1]) + return T_ENCAPSED_AND_WHITESPACE break F2 } l.ungetN(0) @@ -8648,7 +8767,8 @@ yyrule150: // . if rune(c) == '$' { l.ungetN(1) tb := l.TokenBytes(nil) - fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %s\n", tb[:len(tb)-1]) + lval.token = string(tb[:len(tb)-1]) + return T_ENCAPSED_AND_WHITESPACE break F2 } l.ungetN(0) @@ -8659,7 +8779,7 @@ yyrule150: // . } goto yystate0 } -yyrule151: // .|[ \t\n\r] +yyrule150: // .|[ \t\n\r] { searchLabel := []byte{} @@ -8708,84 +8828,98 @@ yyrule151: // .|[ \t\n\r] } c = l.Next() } - fmt.Printf("T_ENCAPSED_AND_WHITESPACE(HEREDOC): %q\n", tb) + + lval.token = string(tb) + return T_ENCAPSED_AND_WHITESPACE goto yystate0 } -yyrule152: // \${VAR_NAME} +yyrule151: // \${VAR_NAME} { - fmt.Printf("T_VARIABLE: %q\n", l.TokenBytes(nil)) + lval.token = string(l.TokenBytes(nil)) + return T_VARIABLE goto yystate0 } -yyrule153: // ->{VAR_NAME} +yyrule152: // ->{VAR_NAME} { - fmt.Printf("T_OBJECT_OPERATOR: %q\n", l.ungetN(len(l.TokenBytes(nil))-2)) + lval.token = string(l.ungetN(len(l.TokenBytes(nil)) - 2)) + return T_OBJECT_OPERATOR goto yystate0 } -yyrule154: // {VAR_NAME} +yyrule153: // {VAR_NAME} { - fmt.Printf("T_STRING: %q\n", l.TokenBytes(nil)) popState() + lval.token = string(l.TokenBytes(nil)) + return T_STRING goto yystate0 } -yyrule155: // \[ +yyrule154: // \[ { - fmt.Println("[") pushState(STRING_VAR_INDEX) + lval.token = string(l.TokenBytes(nil)) + return c goto yystate0 } -yyrule156: // .|[ \t\n\r] +yyrule155: // .|[ \t\n\r] { l.ungetN(1) popState() goto yystate0 } -yyrule157: // {LNUM}|{HNUM}|{BNUM} +yyrule156: // {LNUM}|{HNUM}|{BNUM} { - fmt.Printf("T_NUM_STRING: %q\n", l.TokenBytes(nil)) + lval.token = string(l.TokenBytes(nil)) + return T_NUM_STRING goto yystate0 } -yyrule158: // \${VAR_NAME} +yyrule157: // \${VAR_NAME} { - fmt.Printf("T_VARIABLE: %q\n", l.TokenBytes(nil)) + lval.token = string(l.TokenBytes(nil)) + return T_VARIABLE goto yystate0 } -yyrule159: // {VAR_NAME} +yyrule158: // {VAR_NAME} { - fmt.Printf("T_STRING: %q\n", l.TokenBytes(nil)) + lval.token = string(l.TokenBytes(nil)) + return T_STRING goto yystate0 } -yyrule160: // \] +yyrule159: // \] { - fmt.Println("\"]\"") popState() popState() + lval.token = string(l.TokenBytes(nil)) + return c goto yystate0 } -yyrule161: // [ \n\r\t\\'#] +yyrule160: // [ \n\r\t\\'#] { - fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %q\n", l.ungetN(1)) popState() popState() + lval.token = string(l.TokenBytes(nil)) + return T_ENCAPSED_AND_WHITESPACE goto yystate0 } -yyrule162: // {OPERATORS} +yyrule161: // {OPERATORS} { - fmt.Printf("%q\n", l.TokenBytes(nil)) + lval.token = string(l.TokenBytes(nil)) + return c goto yystate0 } -yyrule163: // . +yyrule162: // . { - fmt.Printf("%q\n", l.TokenBytes(nil)) + lval.token = string(l.TokenBytes(nil)) + return c goto yystate0 } -yyrule164: // {VAR_NAME}[\[\}] +yyrule163: // {VAR_NAME}[\[\}] { - fmt.Printf("T_STRING_VARNAME: %q\n", l.ungetN(1)) popState() pushState(PHP) + lval.token = string(l.ungetN(1)) + return T_STRING_VARNAME goto yystate0 } -yyrule165: // . +yyrule164: // . { l.ungetN(1) popState() @@ -8802,8 +8936,3 @@ yyabort: // no lexem recognized } goto yyAction } - -func main() { - l := newLexer(os.Stdin, os.Stdout, "file.name") - l.Lex() -} diff --git a/lexer.l b/lexer.l index ec00b72..71ee0c4 100644 --- a/lexer.l +++ b/lexer.l @@ -13,7 +13,6 @@ import ( "io" "unicode" "fmt" - "os" "bytes" "github.com/cznic/golex/lex" @@ -83,7 +82,8 @@ func rune2Class(r rune) int { if unicode.IsDigit(r) { return classUnicodeDigit } - return classOther + // return classOther + return -1 } func newLexer(src io.Reader, dst io.Writer, fName string) *lexer { @@ -93,8 +93,6 @@ func newLexer(src io.Reader, dst io.Writer, fName string) *lexer { return &lexer{lx} } -type yySymType struct {} - func (l *lexer) unget(r rune) []byte{ l.Unget(l.Lookahead()) @@ -129,7 +127,7 @@ func (l *lexer) ungetN(n int) []byte{ return buf } -func (l *lexer) Lex() int { // Lex(lval *yySymType) +func (l *lexer) Lex(lval *yySymType) int { // Lex(lval *yySymType) c := l.Enter() %} @@ -153,18 +151,17 @@ NEW_LINE (\r|\n|\r\n) %% c = l.Rule0() - // ([\$]{NCH})* [ \t\n\r]+ . -\<\?php([ \t]|{NEW_LINE}) fmt.Println("T_OPEN_TAG");begin(PHP) -\<\? fmt.Println("T_OPEN_TAG");begin(PHP) -\<\?= fmt.Println("T_OPEN_TAG_WITH_ECHO");begin(PHP) +\<\?php([ \t]|{NEW_LINE}) begin(PHP);//lval.token = string(l.TokenBytes(nil)); return T_OPEN_TAG; +\<\? begin(PHP);//lval.token = string(l.TokenBytes(nil)); return T_OPEN_TAG; +\<\?= begin(PHP);lval.token = string(l.TokenBytes(nil)); return T_OPEN_TAG_WITH_ECHO; -[ \t\n\r]+ fmt.Println("T_WHITESPACE") -\?\>{NEW_LINE}? fmt.Println("T_CLOSE_TAG");begin(INITIAL) +[ \t\n\r]+ //lval.token = string(l.TokenBytes(nil)); return T_WHITESPACE +\?\>{NEW_LINE}? begin(INITIAL);lval.token = string(l.TokenBytes(nil)); return T_CLOSE_TAG; -{DNUM}|{EXPONENT_DNUM} fmt.Println("T_DNUMBER") +{DNUM}|{EXPONENT_DNUM} lval.token = string(l.TokenBytes(nil)); return T_DNUMBER {BNUM} tb := l.TokenBytes(nil) i:=2 @@ -175,15 +172,15 @@ NEW_LINE (\r|\n|\r\n) } } if len(tb) - i < 64 { - fmt.Println("T_LNUMBER") + lval.token = string(l.TokenBytes(nil)); return T_LNUMBER } else { - fmt.Println("T_DNUMBER") + lval.token = string(l.TokenBytes(nil)); return T_DNUMBER } {LNUM} if len(l.TokenBytes(nil)) < 20 { - fmt.Println("T_LNUMBER") + lval.token = string(l.TokenBytes(nil)); return T_LNUMBER } else { - fmt.Println("T_DNUMBER") + lval.token = string(l.TokenBytes(nil)); return T_DNUMBER } {HNUM} tb := l.TokenBytes(nil) @@ -196,143 +193,143 @@ NEW_LINE (\r|\n|\r\n) } length := len(tb) - i if length < 16 || (length == 16 && tb[i] <= '7') { - fmt.Println("T_LNUMBER") + lval.token = string(l.TokenBytes(nil)); return T_LNUMBER } else { - fmt.Println("T_DNUMBER") + lval.token = string(l.TokenBytes(nil)); return T_DNUMBER } -abstract fmt.Println("T_ABSTRACT") -array fmt.Println("T_ARRAY") -as fmt.Println("T_AS") -break fmt.Println("T_BREAK") -callable fmt.Println("T_CALLABLE") -case fmt.Println("T_CASE") -catch fmt.Println("T_CATCH") -class fmt.Println("T_CLASS") -clone fmt.Println("T_CLONE") -const fmt.Println("T_CONST"); -continue fmt.Println("T_CONTINUE"); -declare fmt.Println("T_DECLARE"); -default fmt.Println("T_DEFAULT"); -do fmt.Println("T_DO"); -echo fmt.Println("T_ECHO"); -else fmt.Println("T_ELSE"); -elseif fmt.Println("T_ELSEIF"); -empty fmt.Println("T_EMPTY"); -endfor fmt.Println("T_ENDFOR") -endforeach fmt.Println("T_ENDFOREACH") -endif fmt.Println("T_ENDIF") -endswitch fmt.Println("T_ENDSWITCH") -endwhile fmt.Println("T_ENDWHILE") -eval fmt.Println("T_EVAL") -exit|die fmt.Println("T_EXIT") -extends fmt.Println("T_EXTENDS") -final fmt.Println("T_FINAL") -finally fmt.Println("T_FINALLY") -for fmt.Println("T_FOR") -foreach fmt.Println("T_FOREACH") -function|cfunction fmt.Println("T_FUNCTION") -global fmt.Println("T_GLOBAL") -goto fmt.Println("T_GOTO") -if fmt.Println("T_IF") -isset fmt.Println("T_ISSET") -implements fmt.Println("T_IMPLEMENTS") -instanceof fmt.Println("T_INSTANCEOF") -insteadof fmt.Println("T_INSTEADOF") -interface fmt.Println("T_INTERFACE") -list fmt.Println("T_LIST") -namespace fmt.Println("T_NAMESPACE") -private fmt.Println("T_PRIVATE") -public fmt.Println("T_PUBLIC") -print fmt.Println("T_PRINT") -protected fmt.Println("T_PROTECTED") -return fmt.Println("T_RETURN") -static fmt.Println("T_STATIC") -switch fmt.Println("T_SWITCH") -throw fmt.Println("T_THROW") -trait fmt.Println("T_TRAIT") -try fmt.Println("T_TRY") -unset fmt.Println("T_UNSET") -use fmt.Println("T_USE") -var fmt.Println("T_VAR") -while fmt.Println("T_WHILE") -yield[ \t\n\r]+from[^a-zA-Z0-9_\x80-\xff] fmt.Println("T_YIELD_FROM") -yield fmt.Println("T_YIELD") -include fmt.Println("T_INCLUDE") -include_once fmt.Println("T_INCLUDE_ONCE") -require fmt.Println("T_REQUIRE") -require_once fmt.Println("T_REQUIRE_ONCE") -__CLASS__ fmt.Println("T_CLASS_C") -__DIR__ fmt.Println("T_DIR") -__FILE__ fmt.Println("T_FILE") -__FUNCTION__ fmt.Println("T_FUNC_C") -__LINE__ fmt.Println("T_LINE") -__NAMESPACE__ fmt.Println("T_NS_C") -__METHOD__ fmt.Println("T_METHOD_C") -__TRAIT__ fmt.Println("T_TRAIT_C") -__halt_compiler fmt.Println("T_HALT_COMPILER") -\([ \t]*array[ \t]*\) fmt.Println("T_ARRAY_CAST") -\([ \t]*(bool|boolean)[ \t]*\) fmt.Println("T_BOOL_CAST") -\([ \t]*(real|double|float)[ \t]*\) fmt.Println("T_DOUBLE_CAST") -\([ \t]*(int|integer)[ \t]*\) fmt.Println("T_INT_CAST") -\([ \t]*object[ \t]*\) fmt.Println("T_OBJECT_CAST") -\([ \t]*string[ \t]*\) fmt.Println("T_STRING_CAST") -\([ \t]*unset[ \t]*\) fmt.Println("T_UNSET_CAST") -new fmt.Println("T_NEW") -and fmt.Println("T_LOGICAL_AND") -or fmt.Println("T_LOGICAL_OR") -xor fmt.Println("T_LOGICAL_XOR") -\\ fmt.Println("T_NS_SEPARATOR") -\.\.\. fmt.Println("T_ELLIPSIS"); -:: fmt.Println("T_PAAMAYIM_NEKUDOTAYIM"); // T_DOUBLE_COLON -&& fmt.Println("T_BOOLEAN_AND") -\|\| fmt.Println("T_BOOLEAN_OR") -&= fmt.Println("T_AND_EQUAL") -\|= fmt.Println("T_OR_EQUAL") -\.= fmt.Println("T_CONCAT_EQUAL"); -\*= fmt.Println("T_MUL_EQUAL") -\*\*= fmt.Println("T_POW_EQUAL") -[/]= fmt.Println("T_DIV_EQUAL"); -\+= fmt.Println("T_PLUS_EQUAL") --= fmt.Println("T_MINUS_EQUAL") -\^= fmt.Println("T_XOR_EQUAL") -%= fmt.Println("T_MOD_EQUAL") --- fmt.Println("T_DEC"); -\+\+ fmt.Println("T_INC") -=> fmt.Println("T_DOUBLE_ARROW"); -\<=\> fmt.Println("T_SPACESHIP") -\!=|\<\> fmt.Println("T_IS_NOT_EQUAL") -\!== fmt.Println("T_IS_NOT_IDENTICAL") -== fmt.Println("T_IS_EQUAL") -=== fmt.Println("T_IS_IDENTICAL") -\<\<= fmt.Println("T_SL_EQUAL") -\>\>= fmt.Println("T_SR_EQUAL") -\>= fmt.Println("T_IS_GREATER_OR_EQUAL") -\<= fmt.Println("T_IS_SMALLER_OR_EQUAL") -\*\* fmt.Println("T_POW") -\<\< fmt.Println("T_SL") -\>\> fmt.Println("T_SR") -(#|[/][/]){NEW_LINE} fmt.Println("T_COMMENT"); // TODO: handle ?> -[/][*][^*]*[*]+([^*/][^*]*[*]+)*[/] fmt.Println("T_COMMENT"); // TODO: handle ?> -[/][*][*][^*]*[*]+([^*/][^*]*[*]+)*[/] fmt.Println("T_DOC_COMMENT"); // TODO: handle ?> -'[^']*(\\')*' fmt.Println("T_CONSTANT_ENCAPSED_STRING") -{OPERATORS} fmt.Printf("%s\n", l.TokenBytes(nil)); +abstract lval.token = string(l.TokenBytes(nil)); return T_ABSTRACT +array lval.token = string(l.TokenBytes(nil)); return T_ARRAY +as lval.token = string(l.TokenBytes(nil)); return T_AS +break lval.token = string(l.TokenBytes(nil)); return T_BREAK +callable lval.token = string(l.TokenBytes(nil)); return T_CALLABLE +case lval.token = string(l.TokenBytes(nil)); return T_CASE +catch lval.token = string(l.TokenBytes(nil)); return T_CATCH +class lval.token = string(l.TokenBytes(nil)); return T_CLASS +clone lval.token = string(l.TokenBytes(nil)); return T_CLONE +const lval.token = string(l.TokenBytes(nil)); return T_CONST; +continue lval.token = string(l.TokenBytes(nil)); return T_CONTINUE; +declare lval.token = string(l.TokenBytes(nil)); return T_DECLARE; +default lval.token = string(l.TokenBytes(nil)); return T_DEFAULT; +do lval.token = string(l.TokenBytes(nil)); return T_DO; +echo lval.token = string(l.TokenBytes(nil)); return T_ECHO; +else lval.token = string(l.TokenBytes(nil)); return T_ELSE; +elseif lval.token = string(l.TokenBytes(nil)); return T_ELSEIF; +empty lval.token = string(l.TokenBytes(nil)); return T_EMPTY; +endfor lval.token = string(l.TokenBytes(nil)); return T_ENDFOR +endforeach lval.token = string(l.TokenBytes(nil)); return T_ENDFOREACH +endif lval.token = string(l.TokenBytes(nil)); return T_ENDIF +endswitch lval.token = string(l.TokenBytes(nil)); return T_ENDSWITCH +endwhile lval.token = string(l.TokenBytes(nil)); return T_ENDWHILE +eval lval.token = string(l.TokenBytes(nil)); return T_EVAL +exit|die lval.token = string(l.TokenBytes(nil)); return T_EXIT +extends lval.token = string(l.TokenBytes(nil)); return T_EXTENDS +final lval.token = string(l.TokenBytes(nil)); return T_FINAL +finally lval.token = string(l.TokenBytes(nil)); return T_FINALLY +for lval.token = string(l.TokenBytes(nil)); return T_FOR +foreach lval.token = string(l.TokenBytes(nil)); return T_FOREACH +function|cfunction lval.token = string(l.TokenBytes(nil)); return T_FUNCTION +global lval.token = string(l.TokenBytes(nil)); return T_GLOBAL +goto lval.token = string(l.TokenBytes(nil)); return T_GOTO +if lval.token = string(l.TokenBytes(nil)); return T_IF +isset lval.token = string(l.TokenBytes(nil)); return T_ISSET +implements lval.token = string(l.TokenBytes(nil)); return T_IMPLEMENTS +instanceof lval.token = string(l.TokenBytes(nil)); return T_INSTANCEOF +insteadof lval.token = string(l.TokenBytes(nil)); return T_INSTEADOF +interface lval.token = string(l.TokenBytes(nil)); return T_INTERFACE +list lval.token = string(l.TokenBytes(nil)); return T_LIST +namespace lval.token = string(l.TokenBytes(nil)); return T_NAMESPACE +private lval.token = string(l.TokenBytes(nil)); return T_PRIVATE +public lval.token = string(l.TokenBytes(nil)); return T_PUBLIC +print lval.token = string(l.TokenBytes(nil)); return T_PRINT +protected lval.token = string(l.TokenBytes(nil)); return T_PROTECTED +return lval.token = string(l.TokenBytes(nil)); return T_RETURN +static lval.token = string(l.TokenBytes(nil)); return T_STATIC +switch lval.token = string(l.TokenBytes(nil)); return T_SWITCH +throw lval.token = string(l.TokenBytes(nil)); return T_THROW +trait lval.token = string(l.TokenBytes(nil)); return T_TRAIT +try lval.token = string(l.TokenBytes(nil)); return T_TRY +unset lval.token = string(l.TokenBytes(nil)); return T_UNSET +use lval.token = string(l.TokenBytes(nil)); return T_USE +var lval.token = string(l.TokenBytes(nil)); return T_VAR +while lval.token = string(l.TokenBytes(nil)); return T_WHILE +yield[ \t\n\r]+from[^a-zA-Z0-9_\x80-\xff] lval.token = string(l.TokenBytes(nil)); return T_YIELD_FROM +yield lval.token = string(l.TokenBytes(nil)); return T_YIELD +include lval.token = string(l.TokenBytes(nil)); return T_INCLUDE +include_once lval.token = string(l.TokenBytes(nil)); return T_INCLUDE_ONCE +require lval.token = string(l.TokenBytes(nil)); return T_REQUIRE +require_once lval.token = string(l.TokenBytes(nil)); return T_REQUIRE_ONCE +__CLASS__ lval.token = string(l.TokenBytes(nil)); return T_CLASS_C +__DIR__ lval.token = string(l.TokenBytes(nil)); return T_DIR +__FILE__ lval.token = string(l.TokenBytes(nil)); return T_FILE +__FUNCTION__ lval.token = string(l.TokenBytes(nil)); return T_FUNC_C +__LINE__ lval.token = string(l.TokenBytes(nil)); return T_LINE +__NAMESPACE__ lval.token = string(l.TokenBytes(nil)); return T_NS_C +__METHOD__ lval.token = string(l.TokenBytes(nil)); return T_METHOD_C +__TRAIT__ lval.token = string(l.TokenBytes(nil)); return T_TRAIT_C +__halt_compiler lval.token = string(l.TokenBytes(nil)); return T_HALT_COMPILER +\([ \t]*array[ \t]*\) lval.token = string(l.TokenBytes(nil)); return T_ARRAY_CAST +\([ \t]*(bool|boolean)[ \t]*\) lval.token = string(l.TokenBytes(nil)); return T_BOOL_CAST +\([ \t]*(real|double|float)[ \t]*\) lval.token = string(l.TokenBytes(nil)); return T_DOUBLE_CAST +\([ \t]*(int|integer)[ \t]*\) lval.token = string(l.TokenBytes(nil)); return T_INT_CAST +\([ \t]*object[ \t]*\) lval.token = string(l.TokenBytes(nil)); return T_OBJECT_CAST +\([ \t]*string[ \t]*\) lval.token = string(l.TokenBytes(nil)); return T_STRING_CAST +\([ \t]*unset[ \t]*\) lval.token = string(l.TokenBytes(nil)); return T_UNSET_CAST +new lval.token = string(l.TokenBytes(nil)); return T_NEW +and lval.token = string(l.TokenBytes(nil)); return T_LOGICAL_AND +or lval.token = string(l.TokenBytes(nil)); return T_LOGICAL_OR +xor lval.token = string(l.TokenBytes(nil)); return T_LOGICAL_XOR +\\ lval.token = string(l.TokenBytes(nil)); return T_NS_SEPARATOR +\.\.\. lval.token = string(l.TokenBytes(nil)); return T_ELLIPSIS; +:: lval.token = string(l.TokenBytes(nil)); return T_PAAMAYIM_NEKUDOTAYIM; // T_DOUBLE_COLON +&& lval.token = string(l.TokenBytes(nil)); return T_BOOLEAN_AND +\|\| lval.token = string(l.TokenBytes(nil)); return T_BOOLEAN_OR +&= lval.token = string(l.TokenBytes(nil)); return T_AND_EQUAL +\|= lval.token = string(l.TokenBytes(nil)); return T_OR_EQUAL +\.= lval.token = string(l.TokenBytes(nil)); return T_CONCAT_EQUAL; +\*= lval.token = string(l.TokenBytes(nil)); return T_MUL_EQUAL +\*\*= lval.token = string(l.TokenBytes(nil)); return T_POW_EQUAL +[/]= lval.token = string(l.TokenBytes(nil)); return T_DIV_EQUAL; +\+= lval.token = string(l.TokenBytes(nil)); return T_PLUS_EQUAL +-= lval.token = string(l.TokenBytes(nil)); return T_MINUS_EQUAL +\^= lval.token = string(l.TokenBytes(nil)); return T_XOR_EQUAL +%= lval.token = string(l.TokenBytes(nil)); return T_MOD_EQUAL +-- lval.token = string(l.TokenBytes(nil)); return T_DEC; +\+\+ lval.token = string(l.TokenBytes(nil)); return T_INC +=> lval.token = string(l.TokenBytes(nil)); return T_DOUBLE_ARROW; +\<=\> lval.token = string(l.TokenBytes(nil)); return T_SPACESHIP +\!=|\<\> lval.token = string(l.TokenBytes(nil)); return T_IS_NOT_EQUAL +\!== lval.token = string(l.TokenBytes(nil)); return T_IS_NOT_IDENTICAL +== lval.token = string(l.TokenBytes(nil)); return T_IS_EQUAL +=== lval.token = string(l.TokenBytes(nil)); return T_IS_IDENTICAL +\<\<= lval.token = string(l.TokenBytes(nil)); return T_SL_EQUAL +\>\>= lval.token = string(l.TokenBytes(nil)); return T_SR_EQUAL +\>= lval.token = string(l.TokenBytes(nil)); return T_IS_GREATER_OR_EQUAL +\<= lval.token = string(l.TokenBytes(nil)); return T_IS_SMALLER_OR_EQUAL +\*\* lval.token = string(l.TokenBytes(nil)); return T_POW +\<\< lval.token = string(l.TokenBytes(nil)); return T_SL +\>\> lval.token = string(l.TokenBytes(nil)); return T_SR +(#|[/][/]){NEW_LINE} lval.token = string(l.TokenBytes(nil)); return T_COMMENT; // TODO: handle ?> +[/][*][^*]*[*]+([^*/][^*]*[*]+)*[/] lval.token = string(l.TokenBytes(nil)); return T_COMMENT; // TODO: handle ?> +[/][*][*][^*]*[*]+([^*/][^*]*[*]+)*[/] lval.token = string(l.TokenBytes(nil)); return T_DOC_COMMENT; // TODO: handle ?> +'[^']*(\\')*' lval.token = string(l.TokenBytes(nil)); return T_CONSTANT_ENCAPSED_STRING +{OPERATORS} lval.token = string(l.TokenBytes(nil)); return c -\{ fmt.Println("{"); pushState(PHP); -\} fmt.Println("}"); popState(); -\${VAR_NAME} fmt.Printf("T_VARIABLE: %q\n", l.TokenBytes(nil)) -{VAR_NAME} fmt.Printf("T_STRING: %q\n", l.TokenBytes(nil)); +\{ pushState(PHP); lval.token = string(l.TokenBytes(nil)); return c +\} popState(); lval.token = string(l.TokenBytes(nil)); return c +\${VAR_NAME} lval.token = string(l.TokenBytes(nil)); return T_VARIABLE +{VAR_NAME} if c == -1 {fmt.Printf("%q\n", string(l.TokenBytes(nil)))};lval.token = string(l.TokenBytes(nil)); return T_STRING --> fmt.Println("T_OBJECT_OPERATOR");begin(PROPERTY) -[ \t\n\r]+ fmt.Println("T_WHITESPACE"); --> fmt.Println("T_OBJECT_OPERATOR"); -{VAR_NAME} fmt.Println("T_STRING");begin(PHP) +-> begin(PROPERTY);lval.token = string(l.TokenBytes(nil)); return T_OBJECT_OPERATOR; +[ \t\n\r]+ lval.token = string(l.TokenBytes(nil)); return T_WHITESPACE; +-> lval.token = string(l.TokenBytes(nil)); return T_OBJECT_OPERATOR; +{VAR_NAME} begin(PHP);lval.token = string(l.TokenBytes(nil)); return T_STRING; . l.ungetN(1);begin(PHP) -[\']([^\\\']*([\\][\'])*)*[\'] fmt.Printf("T_CONSTANT_ENCAPSED_STRING: %s\n", l.TokenBytes(nil)); +[\']([^\\\']*([\\][\'])*)*[\'] lval.token = string(l.TokenBytes(nil)); return T_CONSTANT_ENCAPSED_STRING; -` fmt.Println("`");begin(BACKQUOTE) -` fmt.Println("`");begin(PHP) +` begin(BACKQUOTE); lval.token = string(l.TokenBytes(nil)); return c +` begin(PHP); lval.token = string(l.TokenBytes(nil)); return c [b]?\<\<\<[ \t]*({VAR_NAME}|([']{VAR_NAME}['])|(["]{VAR_NAME}["])){NEW_LINE} tb := l.TokenBytes(nil) @@ -392,7 +389,7 @@ NEW_LINE (\r|\n|\r\n) l.ungetN(ungetCnt) - fmt.Printf("T_START_HEREDOC: %q\n", tb); + lval.token = string(tb); return T_START_HEREDOC . searchLabel := []byte{} @@ -418,21 +415,22 @@ NEW_LINE (\r|\n|\r\n) c = l.Next() } - fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %q\n", tb); + lval.token = string(tb); return T_ENCAPSED_AND_WHITESPACE -{VAR_NAME}\; fmt.Printf("T_END_HEREDOC: %q\n", l.ungetN(1));begin(PHP) -. fmt.Printf("ERROR HEREDOC: %q\n", l.ungetN(1)); +{VAR_NAME}\; begin(PHP);lval.token = string(l.ungetN(1)); return T_END_HEREDOC [b]?[\"] binPrefix := l.TokenBytes(nil)[0] == 'b' - beginString := func() { + beginString := func() int { cnt := 1; if (binPrefix) {cnt = 2} l.ungetN(len(l.TokenBytes(nil))-cnt) tokenBytes := l.TokenBytes(nil)[:cnt] fmt.Println(string(tokenBytes)) // TODO: RETURN TOKEN pushState(STRING) + + lval.token = string(tokenBytes); return rune2Class('"') } F:for { @@ -443,13 +441,13 @@ NEW_LINE (\r|\n|\r\n) switch c { case '"' : c = l.Next(); - fmt.Printf("T_CONSTANT_ENCAPSED_STRING: %s\n", l.TokenBytes(nil)); + lval.token = string(l.TokenBytes(nil)); return T_CONSTANT_ENCAPSED_STRING break F; case '$': c = l.Next(); if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' { - beginString() + return beginString() break F; } l.ungetN(0) @@ -457,7 +455,7 @@ NEW_LINE (\r|\n|\r\n) case '{': c = l.Next(); if rune(c) == '$' { - beginString() + return beginString() break F; } l.ungetN(0) @@ -469,9 +467,9 @@ NEW_LINE (\r|\n|\r\n) c = l.Next() } -\" fmt.Println("\""); popState() -\{\$ fmt.Printf("T_CURLY_OPEN: %q\n", l.ungetN(1));pushState(PHP) -\$\{ fmt.Printf("T_DOLLAR_OPEN_CURLY_BRACES: %q\n", l.TokenBytes(nil));pushState(STRING_VAR_NAME) +\" popState(); lval.token = "\""; return c +\{\$ lval.token = string(l.ungetN(1)); return T_CURLY_OPEN +\$\{ pushState(STRING_VAR_NAME);lval.token = string(l.TokenBytes(nil)); return T_DOLLAR_OPEN_CURLY_BRACES \$ l.ungetN(1);pushState(STRING_VAR) . F1:for { @@ -481,7 +479,7 @@ NEW_LINE (\r|\n|\r\n) switch c { case '"' : - fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %s\n", l.TokenBytes(nil)); + lval.token = string(l.TokenBytes(nil)); return T_ENCAPSED_AND_WHITESPACE break F1; case '$': @@ -489,7 +487,7 @@ NEW_LINE (\r|\n|\r\n) if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' { l.ungetN(1) tb := l.TokenBytes(nil) - fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %s\n", tb[:len(tb)-1]); + lval.token = string(tb[:len(tb)-1]); return T_ENCAPSED_AND_WHITESPACE break F1; } l.ungetN(0) @@ -499,7 +497,7 @@ NEW_LINE (\r|\n|\r\n) if rune(c) == '$' { l.ungetN(1) tb := l.TokenBytes(nil) - fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %s\n", tb[:len(tb)-1]); + lval.token = string(tb[:len(tb)-1]); return T_ENCAPSED_AND_WHITESPACE break F1; } l.ungetN(0) @@ -519,7 +517,7 @@ NEW_LINE (\r|\n|\r\n) switch c { case '`' : - fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %s\n", l.TokenBytes(nil)); + lval.token = string(l.TokenBytes(nil)); return T_ENCAPSED_AND_WHITESPACE break F2; case '$': @@ -527,7 +525,7 @@ NEW_LINE (\r|\n|\r\n) if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' { l.ungetN(1) tb := l.TokenBytes(nil) - fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %s\n", tb[:len(tb)-1]); + lval.token = string(tb[:len(tb)-1]); return T_ENCAPSED_AND_WHITESPACE break F2; } l.ungetN(0) @@ -537,7 +535,7 @@ NEW_LINE (\r|\n|\r\n) if rune(c) == '$' { l.ungetN(1) tb := l.TokenBytes(nil) - fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %s\n", tb[:len(tb)-1]); + lval.token = string(tb[:len(tb)-1]); return T_ENCAPSED_AND_WHITESPACE break F2; } l.ungetN(0) @@ -597,32 +595,27 @@ NEW_LINE (\r|\n|\r\n) c = l.Next() } + + lval.token = string(tb); return T_ENCAPSED_AND_WHITESPACE - fmt.Printf("T_ENCAPSED_AND_WHITESPACE(HEREDOC): %q\n", tb); - -\${VAR_NAME} fmt.Printf("T_VARIABLE: %q\n", l.TokenBytes(nil)); -->{VAR_NAME} fmt.Printf("T_OBJECT_OPERATOR: %q\n", l.ungetN(len(l.TokenBytes(nil))-2)); -{VAR_NAME} fmt.Printf("T_STRING: %q\n", l.TokenBytes(nil));popState() -\[ fmt.Println("["); pushState(STRING_VAR_INDEX) +\${VAR_NAME} lval.token = string(l.TokenBytes(nil)); return T_VARIABLE +->{VAR_NAME} lval.token = string(l.ungetN(len(l.TokenBytes(nil))-2)); return T_OBJECT_OPERATOR +{VAR_NAME} popState();lval.token = string(l.TokenBytes(nil)); return T_STRING +\[ pushState(STRING_VAR_INDEX);lval.token = string(l.TokenBytes(nil)); return c .|[ \t\n\r] l.ungetN(1);popState() -{LNUM}|{HNUM}|{BNUM} fmt.Printf("T_NUM_STRING: %q\n", l.TokenBytes(nil)); -\${VAR_NAME} fmt.Printf("T_VARIABLE: %q\n", l.TokenBytes(nil)); -{VAR_NAME} fmt.Printf("T_STRING: %q\n", l.TokenBytes(nil)); -\] fmt.Println("\"]\""); popState(); popState() -[ \n\r\t\\'#] fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %q\n", l.ungetN(1)); popState(); popState() -{OPERATORS} fmt.Printf("%q\n", l.TokenBytes(nil)); -. fmt.Printf("%q\n", l.TokenBytes(nil)); +{LNUM}|{HNUM}|{BNUM} lval.token = string(l.TokenBytes(nil)); return T_NUM_STRING +\${VAR_NAME} lval.token = string(l.TokenBytes(nil)); return T_VARIABLE +{VAR_NAME} lval.token = string(l.TokenBytes(nil)); return T_STRING +\] popState(); popState();lval.token = string(l.TokenBytes(nil)); return c +[ \n\r\t\\'#] popState(); popState();lval.token = string(l.TokenBytes(nil)); return T_ENCAPSED_AND_WHITESPACE +{OPERATORS} lval.token = string(l.TokenBytes(nil)); return c +. lval.token = string(l.TokenBytes(nil)); return c -{VAR_NAME}[\[\}] fmt.Printf("T_STRING_VARNAME: %q\n", l.ungetN(1));popState();pushState(PHP) +{VAR_NAME}[\[\}] popState();pushState(PHP);lval.token = string(l.ungetN(1)); return T_STRING_VARNAME . l.ungetN(1);popState();pushState(PHP) %% if c, ok := l.Abort(); ok { return int(c) } goto yyAction -} - -func main() { - l := newLexer(os.Stdin, os.Stdout, "file.name") - l.Lex(); } \ No newline at end of file diff --git a/parser.go b/parser.go new file mode 100644 index 0000000..305873e --- /dev/null +++ b/parser.go @@ -0,0 +1,828 @@ +package main + +import __yyfmt__ "fmt" + +import ( + "bytes" + "fmt" + "io" + "os" +) + +type node struct { + name string + children []node +} + +func (n node) String() string { + buf := new(bytes.Buffer) + n.print(buf, " ") + return buf.String() +} + +func (n node) print(out io.Writer, indent string) { + fmt.Fprintf(out, "\n%v%v", indent, n.name) + for _, nn := range n.children { + nn.print(out, indent+" ") + } +} + +func Node(name string) node { return node{name: name} } +func (n node) append(nn ...node) node { n.children = append(n.children, nn...); return n } + +type yySymType struct { + yys int + node node + token string +} + +const T_INCLUDE = 57346 +const T_INCLUDE_ONCE = 57347 +const T_EVAL = 57348 +const T_REQUIRE = 57349 +const T_REQUIRE_ONCE = 57350 +const T_LOGICAL_OR = 57351 +const T_LOGICAL_XOR = 57352 +const T_LOGICAL_AND = 57353 +const T_PRINT = 57354 +const T_YIELD = 57355 +const T_DOUBLE_ARROW = 57356 +const T_YIELD_FROM = 57357 +const T_PLUS_EQUAL = 57358 +const T_MINUS_EQUAL = 57359 +const T_MUL_EQUAL = 57360 +const T_DIV_EQUAL = 57361 +const T_CONCAT_EQUAL = 57362 +const T_MOD_EQUAL = 57363 +const T_AND_EQUAL = 57364 +const T_OR_EQUAL = 57365 +const T_XOR_EQUAL = 57366 +const T_SL_EQUAL = 57367 +const T_SR_EQUAL = 57368 +const T_POW_EQUAL = 57369 +const T_COALESCE = 57370 +const T_BOOLEAN_OR = 57371 +const T_BOOLEAN_AND = 57372 +const T_IS_EQUAL = 57373 +const T_IS_NOT_EQUAL = 57374 +const T_IS_IDENTICAL = 57375 +const T_IS_NOT_IDENTICAL = 57376 +const T_SPACESHIP = 57377 +const T_IS_SMALLER_OR_EQUAL = 57378 +const T_IS_GREATER_OR_EQUAL = 57379 +const T_SL = 57380 +const T_SR = 57381 +const T_INSTANCEOF = 57382 +const T_INC = 57383 +const T_DEC = 57384 +const T_INT_CAST = 57385 +const T_DOUBLE_CAST = 57386 +const T_STRING_CAST = 57387 +const T_ARRAY_CAST = 57388 +const T_OBJECT_CAST = 57389 +const T_BOOL_CAST = 57390 +const T_UNSET_CAST = 57391 +const T_POW = 57392 +const T_NEW = 57393 +const T_CLONE = 57394 +const T_ELSEIF = 57395 +const T_ELSE = 57396 +const T_ENDIF = 57397 +const T_STATIC = 57398 +const T_ABSTRACT = 57399 +const T_FINAL = 57400 +const T_PRIVATE = 57401 +const T_PROTECTED = 57402 +const T_PUBLIC = 57403 +const T_EXIT = 57404 +const T_IF = 57405 +const T_LNUMBER = 57406 +const T_DNUMBER = 57407 +const T_STRING = 57408 +const T_STRING_VARNAME = 57409 +const T_VARIABLE = 57410 +const T_NUM_STRING = 57411 +const T_INLINE_HTML = 57412 +const T_CHARACTER = 57413 +const T_BAD_CHARACTER = 57414 +const T_ENCAPSED_AND_WHITESPACE = 57415 +const T_CONSTANT_ENCAPSED_STRING = 57416 +const T_ECHO = 57417 +const T_DO = 57418 +const T_WHILE = 57419 +const T_ENDWHILE = 57420 +const T_FOR = 57421 +const T_ENDFOR = 57422 +const T_FOREACH = 57423 +const T_ENDFOREACH = 57424 +const T_DECLARE = 57425 +const T_ENDDECLARE = 57426 +const T_AS = 57427 +const T_SWITCH = 57428 +const T_ENDSWITCH = 57429 +const T_CASE = 57430 +const T_DEFAULT = 57431 +const T_BREAK = 57432 +const T_CONTINUE = 57433 +const T_GOTO = 57434 +const T_FUNCTION = 57435 +const T_CONST = 57436 +const T_RETURN = 57437 +const T_TRY = 57438 +const T_CATCH = 57439 +const T_FINALLY = 57440 +const T_THROW = 57441 +const T_USE = 57442 +const T_INSTEADOF = 57443 +const T_GLOBAL = 57444 +const T_VAR = 57445 +const T_UNSET = 57446 +const T_ISSET = 57447 +const T_EMPTY = 57448 +const T_HALT_COMPILER = 57449 +const T_CLASS = 57450 +const T_TRAIT = 57451 +const T_INTERFACE = 57452 +const T_EXTENDS = 57453 +const T_IMPLEMENTS = 57454 +const T_OBJECT_OPERATOR = 57455 +const T_LIST = 57456 +const T_ARRAY = 57457 +const T_CALLABLE = 57458 +const T_CLASS_C = 57459 +const T_TRAIT_C = 57460 +const T_METHOD_C = 57461 +const T_FUNC_C = 57462 +const T_LINE = 57463 +const T_FILE = 57464 +const T_COMMENT = 57465 +const T_DOC_COMMENT = 57466 +const T_OPEN_TAG = 57467 +const T_OPEN_TAG_WITH_ECHO = 57468 +const T_CLOSE_TAG = 57469 +const T_WHITESPACE = 57470 +const T_START_HEREDOC = 57471 +const T_END_HEREDOC = 57472 +const T_DOLLAR_OPEN_CURLY_BRACES = 57473 +const T_CURLY_OPEN = 57474 +const T_PAAMAYIM_NEKUDOTAYIM = 57475 +const T_NAMESPACE = 57476 +const T_NS_C = 57477 +const T_DIR = 57478 +const T_NS_SEPARATOR = 57479 +const T_ELLIPSIS = 57480 + +var yyToknames = [...]string{ + "$end", + "error", + "$unk", + "T_INCLUDE", + "T_INCLUDE_ONCE", + "T_EVAL", + "T_REQUIRE", + "T_REQUIRE_ONCE", + "','", + "T_LOGICAL_OR", + "T_LOGICAL_XOR", + "T_LOGICAL_AND", + "T_PRINT", + "T_YIELD", + "T_DOUBLE_ARROW", + "T_YIELD_FROM", + "'='", + "T_PLUS_EQUAL", + "T_MINUS_EQUAL", + "T_MUL_EQUAL", + "T_DIV_EQUAL", + "T_CONCAT_EQUAL", + "T_MOD_EQUAL", + "T_AND_EQUAL", + "T_OR_EQUAL", + "T_XOR_EQUAL", + "T_SL_EQUAL", + "T_SR_EQUAL", + "T_POW_EQUAL", + "'?'", + "':'", + "T_COALESCE", + "T_BOOLEAN_OR", + "T_BOOLEAN_AND", + "'|'", + "'^'", + "'&'", + "T_IS_EQUAL", + "T_IS_NOT_EQUAL", + "T_IS_IDENTICAL", + "T_IS_NOT_IDENTICAL", + "T_SPACESHIP", + "'<'", + "T_IS_SMALLER_OR_EQUAL", + "'>'", + "T_IS_GREATER_OR_EQUAL", + "T_SL", + "T_SR", + "'+'", + "'-'", + "'.'", + "'*'", + "'/'", + "'%'", + "'!'", + "T_INSTANCEOF", + "'~'", + "T_INC", + "T_DEC", + "T_INT_CAST", + "T_DOUBLE_CAST", + "T_STRING_CAST", + "T_ARRAY_CAST", + "T_OBJECT_CAST", + "T_BOOL_CAST", + "T_UNSET_CAST", + "'@'", + "T_POW", + "'['", + "T_NEW", + "T_CLONE", + "T_ELSEIF", + "T_ELSE", + "T_ENDIF", + "T_STATIC", + "T_ABSTRACT", + "T_FINAL", + "T_PRIVATE", + "T_PROTECTED", + "T_PUBLIC", + "T_EXIT", + "T_IF", + "T_LNUMBER", + "T_DNUMBER", + "T_STRING", + "T_STRING_VARNAME", + "T_VARIABLE", + "T_NUM_STRING", + "T_INLINE_HTML", + "T_CHARACTER", + "T_BAD_CHARACTER", + "T_ENCAPSED_AND_WHITESPACE", + "T_CONSTANT_ENCAPSED_STRING", + "T_ECHO", + "T_DO", + "T_WHILE", + "T_ENDWHILE", + "T_FOR", + "T_ENDFOR", + "T_FOREACH", + "T_ENDFOREACH", + "T_DECLARE", + "T_ENDDECLARE", + "T_AS", + "T_SWITCH", + "T_ENDSWITCH", + "T_CASE", + "T_DEFAULT", + "T_BREAK", + "T_CONTINUE", + "T_GOTO", + "T_FUNCTION", + "T_CONST", + "T_RETURN", + "T_TRY", + "T_CATCH", + "T_FINALLY", + "T_THROW", + "T_USE", + "T_INSTEADOF", + "T_GLOBAL", + "T_VAR", + "T_UNSET", + "T_ISSET", + "T_EMPTY", + "T_HALT_COMPILER", + "T_CLASS", + "T_TRAIT", + "T_INTERFACE", + "T_EXTENDS", + "T_IMPLEMENTS", + "T_OBJECT_OPERATOR", + "T_LIST", + "T_ARRAY", + "T_CALLABLE", + "T_CLASS_C", + "T_TRAIT_C", + "T_METHOD_C", + "T_FUNC_C", + "T_LINE", + "T_FILE", + "T_COMMENT", + "T_DOC_COMMENT", + "T_OPEN_TAG", + "T_OPEN_TAG_WITH_ECHO", + "T_CLOSE_TAG", + "T_WHITESPACE", + "T_START_HEREDOC", + "T_END_HEREDOC", + "T_DOLLAR_OPEN_CURLY_BRACES", + "T_CURLY_OPEN", + "T_PAAMAYIM_NEKUDOTAYIM", + "T_NAMESPACE", + "T_NS_C", + "T_DIR", + "T_NS_SEPARATOR", + "T_ELLIPSIS", +} +var yyStatenames = [...]string{} + +const yyEofCode = 1 +const yyErrCode = 2 +const yyInitialStackSize = 16 + +const src = `= 1 && c-1 < len(yyToknames) { + if yyToknames[c-1] != "" { + return yyToknames[c-1] + } + } + return __yyfmt__.Sprintf("tok-%v", c) +} + +func yyStatname(s int) string { + if s >= 0 && s < len(yyStatenames) { + if yyStatenames[s] != "" { + return yyStatenames[s] + } + } + return __yyfmt__.Sprintf("state-%v", s) +} + +func yyErrorMessage(state, lookAhead int) string { + const TOKSTART = 4 + + if !yyErrorVerbose { + return "syntax error" + } + + for _, e := range yyErrorMessages { + if e.state == state && e.token == lookAhead { + return "syntax error: " + e.msg + } + } + + res := "syntax error: unexpected " + yyTokname(lookAhead) + + // To match Bison, suggest at most four expected tokens. + expected := make([]int, 0, 4) + + // Look for shiftable tokens. + base := yyPact[state] + for tok := TOKSTART; tok-1 < len(yyToknames); tok++ { + if n := base + tok; n >= 0 && n < yyLast && yyChk[yyAct[n]] == tok { + if len(expected) == cap(expected) { + return res + } + expected = append(expected, tok) + } + } + + if yyDef[state] == -2 { + i := 0 + for yyExca[i] != -1 || yyExca[i+1] != state { + i += 2 + } + + // Look for tokens that we accept or reduce. + for i += 2; yyExca[i] >= 0; i += 2 { + tok := yyExca[i] + if tok < TOKSTART || yyExca[i+1] == 0 { + continue + } + if len(expected) == cap(expected) { + return res + } + expected = append(expected, tok) + } + + // If the default action is to accept or reduce, give up. + if yyExca[i+1] != 0 { + return res + } + } + + for i, tok := range expected { + if i == 0 { + res += ", expecting " + } else { + res += " or " + } + res += yyTokname(tok) + } + return res +} + +func yylex1(lex yyLexer, lval *yySymType) (char, token int) { + token = 0 + char = lex.Lex(lval) + if char <= 0 { + token = yyTok1[0] + goto out + } + if char < len(yyTok1) { + token = yyTok1[char] + goto out + } + if char >= yyPrivate { + if char < yyPrivate+len(yyTok2) { + token = yyTok2[char-yyPrivate] + goto out + } + } + for i := 0; i < len(yyTok3); i += 2 { + token = yyTok3[i+0] + if token == char { + token = yyTok3[i+1] + goto out + } + } + +out: + if token == 0 { + token = yyTok2[1] /* unknown char */ + } + if yyDebug >= 3 { + __yyfmt__.Printf("lex %s(%d)\n", yyTokname(token), uint(char)) + } + return char, token +} + +func yyParse(yylex yyLexer) int { + return yyNewParser().Parse(yylex) +} + +func (yyrcvr *yyParserImpl) Parse(yylex yyLexer) int { + var yyn int + var yyVAL yySymType + var yyDollar []yySymType + _ = yyDollar // silence set and not used + yyS := yyrcvr.stack[:] + + Nerrs := 0 /* number of errors */ + Errflag := 0 /* error recovery flag */ + yystate := 0 + yyrcvr.char = -1 + yytoken := -1 // yyrcvr.char translated into internal numbering + defer func() { + // Make sure we report no lookahead when not parsing. + yystate = -1 + yyrcvr.char = -1 + yytoken = -1 + }() + yyp := -1 + goto yystack + +ret0: + return 0 + +ret1: + return 1 + +yystack: + /* put a state and value onto the stack */ + if yyDebug >= 4 { + __yyfmt__.Printf("char %v in %v\n", yyTokname(yytoken), yyStatname(yystate)) + } + + yyp++ + if yyp >= len(yyS) { + nyys := make([]yySymType, len(yyS)*2) + copy(nyys, yyS) + yyS = nyys + } + yyS[yyp] = yyVAL + yyS[yyp].yys = yystate + +yynewstate: + yyn = yyPact[yystate] + if yyn <= yyFlag { + goto yydefault /* simple state */ + } + if yyrcvr.char < 0 { + yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval) + } + yyn += yytoken + if yyn < 0 || yyn >= yyLast { + goto yydefault + } + yyn = yyAct[yyn] + if yyChk[yyn] == yytoken { /* valid shift */ + yyrcvr.char = -1 + yytoken = -1 + yyVAL = yyrcvr.lval + yystate = yyn + if Errflag > 0 { + Errflag-- + } + goto yystack + } + +yydefault: + /* default state action */ + yyn = yyDef[yystate] + if yyn == -2 { + if yyrcvr.char < 0 { + yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval) + } + + /* look through exception table */ + xi := 0 + for { + if yyExca[xi+0] == -1 && yyExca[xi+1] == yystate { + break + } + xi += 2 + } + for xi += 2; ; xi += 2 { + yyn = yyExca[xi+0] + if yyn < 0 || yyn == yytoken { + break + } + } + yyn = yyExca[xi+1] + if yyn < 0 { + goto ret0 + } + } + if yyn == 0 { + /* error ... attempt to resume parsing */ + switch Errflag { + case 0: /* brand new error */ + yylex.Error(yyErrorMessage(yystate, yytoken)) + Nerrs++ + if yyDebug >= 1 { + __yyfmt__.Printf("%s", yyStatname(yystate)) + __yyfmt__.Printf(" saw %s\n", yyTokname(yytoken)) + } + fallthrough + + case 1, 2: /* incompletely recovered error ... try again */ + Errflag = 3 + + /* find a state where "error" is a legal shift action */ + for yyp >= 0 { + yyn = yyPact[yyS[yyp].yys] + yyErrCode + if yyn >= 0 && yyn < yyLast { + yystate = yyAct[yyn] /* simulate a shift of "error" */ + if yyChk[yystate] == yyErrCode { + goto yystack + } + } + + /* the current p has no shift on "error", pop stack */ + if yyDebug >= 2 { + __yyfmt__.Printf("error recovery pops state %d\n", yyS[yyp].yys) + } + yyp-- + } + /* there is no state on the stack with an error shift ... abort */ + goto ret1 + + case 3: /* no shift yet; clobber input char */ + if yyDebug >= 2 { + __yyfmt__.Printf("error recovery discards %s\n", yyTokname(yytoken)) + } + if yytoken == yyEofCode { + goto ret1 + } + yyrcvr.char = -1 + yytoken = -1 + goto yynewstate /* try again in the same state */ + } + } + + /* reduction by production yyn */ + if yyDebug >= 2 { + __yyfmt__.Printf("reduce %v in:\n\t%v\n", yyn, yyStatname(yystate)) + } + + yynt := yyn + yypt := yyp + _ = yypt // guard against "declared and not used" + + yyp -= yyR2[yyn] + // yyp is now the index of $0. Perform the default action. Iff the + // reduced production is ε, $1 is possibly out of range. + if yyp+1 >= len(yyS) { + nyys := make([]yySymType, len(yyS)*2) + copy(nyys, yyS) + yyS = nyys + } + yyVAL = yyS[yyp+1] + + /* consult goto table to find next state */ + yyn = yyR1[yyn] + yyg := yyPgo[yyn] + yyj := yyg + yyS[yyp].yys + 1 + + if yyj >= yyLast { + yystate = yyAct[yyg] + } else { + yystate = yyAct[yyj] + if yyChk[yystate] != -yyn { + yystate = yyAct[yyg] + } + } + // dummy call; replaced with literal code + switch yynt { + + case 1: + yyDollar = yyS[yypt-1 : yypt+1] + { + fmt.Println(yyDollar[1].node) + } + case 76: + yyDollar = yyS[yypt-1 : yypt+1] + { + yyVAL.node = Node("identifier") + } + case 77: + yyDollar = yyS[yypt-1 : yypt+1] + { + yyVAL.node = Node("reserved") + } + } + goto yystack /* stack new state and value */ +} diff --git a/parser.y b/parser.y new file mode 100644 index 0000000..c7ff86b --- /dev/null +++ b/parser.y @@ -0,0 +1,193 @@ +%{ +package main + +import ( + "bytes" + "fmt" + "os" + "io" +) + +type node struct { + name string + children []node +} + +func (n node) String() string { + buf := new(bytes.Buffer) + n.print(buf, " ") + return buf.String() +} + +func (n node) print(out io.Writer, indent string) { + fmt.Fprintf(out, "\n%v%v", indent, n.name) + for _, nn := range n.children { nn.print(out, indent + " ") } +} + +func Node(name string) node { return node{name: name} } +func (n node) append(nn...node) node { n.children = append(n.children, nn...); return n } + +%} + +%union{ + node node + token string +} + +%left T_INCLUDE T_INCLUDE_ONCE T_EVAL T_REQUIRE T_REQUIRE_ONCE +%left ',' +%left T_LOGICAL_OR +%left T_LOGICAL_XOR +%left T_LOGICAL_AND +%right T_PRINT +%right T_YIELD +%right T_DOUBLE_ARROW +%right T_YIELD_FROM +%left '=' T_PLUS_EQUAL T_MINUS_EQUAL T_MUL_EQUAL T_DIV_EQUAL T_CONCAT_EQUAL T_MOD_EQUAL T_AND_EQUAL T_OR_EQUAL T_XOR_EQUAL T_SL_EQUAL T_SR_EQUAL T_POW_EQUAL +%left '?' ':' +%right T_COALESCE +%left T_BOOLEAN_OR +%left T_BOOLEAN_AND +%left '|' +%left '^' +%left '&' +%nonassoc T_IS_EQUAL T_IS_NOT_EQUAL T_IS_IDENTICAL T_IS_NOT_IDENTICAL T_SPACESHIP +%nonassoc '<' T_IS_SMALLER_OR_EQUAL '>' T_IS_GREATER_OR_EQUAL +%left T_SL T_SR +%left '+' '-' '.' +%left '*' '/' '%' +%right '!' +%nonassoc T_INSTANCEOF +%right '~' T_INC T_DEC T_INT_CAST T_DOUBLE_CAST T_STRING_CAST T_ARRAY_CAST T_OBJECT_CAST T_BOOL_CAST T_UNSET_CAST '@' +%right T_POW +%right '[' +%nonassoc T_NEW T_CLONE +%left T_ELSEIF +%left T_ELSE +%left T_ENDIF +%right T_STATIC T_ABSTRACT T_FINAL T_PRIVATE T_PROTECTED T_PUBLIC + +%token T_EXIT +%token T_IF +%token T_LNUMBER +%token T_DNUMBER +%token T_STRING +%token T_STRING_VARNAME +%token T_VARIABLE +%token T_NUM_STRING +%token T_INLINE_HTML +%token T_CHARACTER +%token T_BAD_CHARACTER +%token T_ENCAPSED_AND_WHITESPACE +%token T_CONSTANT_ENCAPSED_STRING +%token T_ECHO +%token T_DO +%token T_WHILE +%token T_ENDWHILE +%token T_FOR +%token T_ENDFOR +%token T_FOREACH +%token T_ENDFOREACH +%token T_DECLARE +%token T_ENDDECLARE +%token T_AS +%token T_SWITCH +%token T_ENDSWITCH +%token T_CASE +%token T_DEFAULT +%token T_BREAK +%token T_CONTINUE +%token T_GOTO +%token T_FUNCTION +%token T_CONST +%token T_RETURN +%token T_TRY +%token T_CATCH +%token T_FINALLY +%token T_THROW +%token T_USE +%token T_INSTEADOF +%token T_GLOBAL +%token T_VAR +%token T_UNSET +%token T_ISSET +%token T_EMPTY +%token T_HALT_COMPILER +%token T_CLASS +%token T_TRAIT +%token T_INTERFACE +%token T_EXTENDS +%token T_IMPLEMENTS +%token T_OBJECT_OPERATOR +%token T_DOUBLE_ARROW +%token T_LIST +%token T_ARRAY +%token T_CALLABLE +%token T_CLASS_C +%token T_TRAIT_C +%token T_METHOD_C +%token T_FUNC_C +%token T_LINE +%token T_FILE +%token T_COMMENT +%token T_DOC_COMMENT +%token T_OPEN_TAG +%token T_OPEN_TAG_WITH_ECHO +%token T_CLOSE_TAG +%token T_WHITESPACE +%token T_START_HEREDOC +%token T_END_HEREDOC +%token T_DOLLAR_OPEN_CURLY_BRACES +%token T_CURLY_OPEN +%token T_PAAMAYIM_NEKUDOTAYIM +%token T_NAMESPACE +%token T_NS_C +%token T_DIR +%token T_NS_SEPARATOR +%token T_ELLIPSIS + +%type identifier + + +%% + +///////////////////////////////////////////////////////////////////////// + +start: + identifier { fmt.Println($1) } +; + +reserved_non_modifiers: + T_INCLUDE | T_INCLUDE_ONCE | T_EVAL | T_REQUIRE | T_REQUIRE_ONCE | T_LOGICAL_OR | T_LOGICAL_XOR | T_LOGICAL_AND + | T_INSTANCEOF | T_NEW | T_CLONE | T_EXIT | T_IF | T_ELSEIF | T_ELSE | T_ENDIF | T_ECHO | T_DO | T_WHILE | T_ENDWHILE + | T_FOR | T_ENDFOR | T_FOREACH | T_ENDFOREACH | T_DECLARE | T_ENDDECLARE | T_AS | T_TRY | T_CATCH | T_FINALLY + | T_THROW | T_USE | T_INSTEADOF | T_GLOBAL | T_VAR | T_UNSET | T_ISSET | T_EMPTY | T_CONTINUE | T_GOTO + | T_FUNCTION | T_CONST | T_RETURN | T_PRINT | T_YIELD | T_LIST | T_SWITCH | T_ENDSWITCH | T_CASE | T_DEFAULT | T_BREAK + | T_ARRAY | T_CALLABLE | T_EXTENDS | T_IMPLEMENTS | T_NAMESPACE | T_TRAIT | T_INTERFACE | T_CLASS + | T_CLASS_C | T_TRAIT_C | T_FUNC_C | T_METHOD_C | T_LINE | T_FILE | T_DIR | T_NS_C +; + +semi_reserved: + reserved_non_modifiers + | T_STATIC | T_ABSTRACT | T_FINAL | T_PRIVATE | T_PROTECTED | T_PUBLIC +; + +identifier: + T_STRING { $$ = Node("identifier") } + | semi_reserved { $$ = Node("reserved") } +; + +///////////////////////////////////////////////////////////////////////// + +%% + +const src = `