parser, first try

This commit is contained in:
vadim 2017-11-23 17:33:47 +02:00
parent 427714df44
commit c11d3ece0d
5 changed files with 1720 additions and 572 deletions

View File

@ -4,14 +4,19 @@
# blame: jnml, labs.nic.cz
all: lexer.go
all: parser.go lexer.go
rm -f y.output
gofmt -l -s -w *.go
go build
run: all
./php-parser
lexer.go: lexer.l
golex -t $< | gofmt > $@
golex -o $@ $<
parser.go: parser.y
goyacc -o $@ $<
clean:
rm -f php-parser.go lex.yy.go y.output *~

907
lexer.go

File diff suppressed because it is too large Load Diff

355
lexer.l
View File

@ -13,7 +13,6 @@ import (
"io"
"unicode"
"fmt"
"os"
"bytes"
"github.com/cznic/golex/lex"
@ -83,7 +82,8 @@ func rune2Class(r rune) int {
if unicode.IsDigit(r) {
return classUnicodeDigit
}
return classOther
// return classOther
return -1
}
func newLexer(src io.Reader, dst io.Writer, fName string) *lexer {
@ -93,8 +93,6 @@ func newLexer(src io.Reader, dst io.Writer, fName string) *lexer {
return &lexer{lx}
}
type yySymType struct {}
func (l *lexer) unget(r rune) []byte{
l.Unget(l.Lookahead())
@ -129,7 +127,7 @@ func (l *lexer) ungetN(n int) []byte{
return buf
}
func (l *lexer) Lex() int { // Lex(lval *yySymType)
func (l *lexer) Lex(lval *yySymType) int { // Lex(lval *yySymType)
c := l.Enter()
%}
@ -153,18 +151,17 @@ NEW_LINE (\r|\n|\r\n)
%%
c = l.Rule0()
// ([\$]{NCH})*
<INITIAL>[ \t\n\r]+
<INITIAL>.
<INITIAL>\<\?php([ \t]|{NEW_LINE}) fmt.Println("T_OPEN_TAG");begin(PHP)
<INITIAL>\<\? fmt.Println("T_OPEN_TAG");begin(PHP)
<INITIAL>\<\?= fmt.Println("T_OPEN_TAG_WITH_ECHO");begin(PHP)
<INITIAL>\<\?php([ \t]|{NEW_LINE}) begin(PHP);//lval.token = string(l.TokenBytes(nil)); return T_OPEN_TAG;
<INITIAL>\<\? begin(PHP);//lval.token = string(l.TokenBytes(nil)); return T_OPEN_TAG;
<INITIAL>\<\?= begin(PHP);lval.token = string(l.TokenBytes(nil)); return T_OPEN_TAG_WITH_ECHO;
<PHP>[ \t\n\r]+ fmt.Println("T_WHITESPACE")
<PHP>\?\>{NEW_LINE}? fmt.Println("T_CLOSE_TAG");begin(INITIAL)
<PHP>[ \t\n\r]+ //lval.token = string(l.TokenBytes(nil)); return T_WHITESPACE
<PHP>\?\>{NEW_LINE}? begin(INITIAL);lval.token = string(l.TokenBytes(nil)); return T_CLOSE_TAG;
<PHP>{DNUM}|{EXPONENT_DNUM} fmt.Println("T_DNUMBER")
<PHP>{DNUM}|{EXPONENT_DNUM} lval.token = string(l.TokenBytes(nil)); return T_DNUMBER
<PHP>{BNUM}
tb := l.TokenBytes(nil)
i:=2
@ -175,15 +172,15 @@ NEW_LINE (\r|\n|\r\n)
}
}
if len(tb) - i < 64 {
fmt.Println("T_LNUMBER")
lval.token = string(l.TokenBytes(nil)); return T_LNUMBER
} else {
fmt.Println("T_DNUMBER")
lval.token = string(l.TokenBytes(nil)); return T_DNUMBER
}
<PHP>{LNUM}
if len(l.TokenBytes(nil)) < 20 {
fmt.Println("T_LNUMBER")
lval.token = string(l.TokenBytes(nil)); return T_LNUMBER
} else {
fmt.Println("T_DNUMBER")
lval.token = string(l.TokenBytes(nil)); return T_DNUMBER
}
<PHP>{HNUM}
tb := l.TokenBytes(nil)
@ -196,143 +193,143 @@ NEW_LINE (\r|\n|\r\n)
}
length := len(tb) - i
if length < 16 || (length == 16 && tb[i] <= '7') {
fmt.Println("T_LNUMBER")
lval.token = string(l.TokenBytes(nil)); return T_LNUMBER
} else {
fmt.Println("T_DNUMBER")
lval.token = string(l.TokenBytes(nil)); return T_DNUMBER
}
<PHP>abstract fmt.Println("T_ABSTRACT")
<PHP>array fmt.Println("T_ARRAY")
<PHP>as fmt.Println("T_AS")
<PHP>break fmt.Println("T_BREAK")
<PHP>callable fmt.Println("T_CALLABLE")
<PHP>case fmt.Println("T_CASE")
<PHP>catch fmt.Println("T_CATCH")
<PHP>class fmt.Println("T_CLASS")
<PHP>clone fmt.Println("T_CLONE")
<PHP>const fmt.Println("T_CONST");
<PHP>continue fmt.Println("T_CONTINUE");
<PHP>declare fmt.Println("T_DECLARE");
<PHP>default fmt.Println("T_DEFAULT");
<PHP>do fmt.Println("T_DO");
<PHP>echo fmt.Println("T_ECHO");
<PHP>else fmt.Println("T_ELSE");
<PHP>elseif fmt.Println("T_ELSEIF");
<PHP>empty fmt.Println("T_EMPTY");
<PHP>endfor fmt.Println("T_ENDFOR")
<PHP>endforeach fmt.Println("T_ENDFOREACH")
<PHP>endif fmt.Println("T_ENDIF")
<PHP>endswitch fmt.Println("T_ENDSWITCH")
<PHP>endwhile fmt.Println("T_ENDWHILE")
<PHP>eval fmt.Println("T_EVAL")
<PHP>exit|die fmt.Println("T_EXIT")
<PHP>extends fmt.Println("T_EXTENDS")
<PHP>final fmt.Println("T_FINAL")
<PHP>finally fmt.Println("T_FINALLY")
<PHP>for fmt.Println("T_FOR")
<PHP>foreach fmt.Println("T_FOREACH")
<PHP>function|cfunction fmt.Println("T_FUNCTION")
<PHP>global fmt.Println("T_GLOBAL")
<PHP>goto fmt.Println("T_GOTO")
<PHP>if fmt.Println("T_IF")
<PHP>isset fmt.Println("T_ISSET")
<PHP>implements fmt.Println("T_IMPLEMENTS")
<PHP>instanceof fmt.Println("T_INSTANCEOF")
<PHP>insteadof fmt.Println("T_INSTEADOF")
<PHP>interface fmt.Println("T_INTERFACE")
<PHP>list fmt.Println("T_LIST")
<PHP>namespace fmt.Println("T_NAMESPACE")
<PHP>private fmt.Println("T_PRIVATE")
<PHP>public fmt.Println("T_PUBLIC")
<PHP>print fmt.Println("T_PRINT")
<PHP>protected fmt.Println("T_PROTECTED")
<PHP>return fmt.Println("T_RETURN")
<PHP>static fmt.Println("T_STATIC")
<PHP>switch fmt.Println("T_SWITCH")
<PHP>throw fmt.Println("T_THROW")
<PHP>trait fmt.Println("T_TRAIT")
<PHP>try fmt.Println("T_TRY")
<PHP>unset fmt.Println("T_UNSET")
<PHP>use fmt.Println("T_USE")
<PHP>var fmt.Println("T_VAR")
<PHP>while fmt.Println("T_WHILE")
<PHP>yield[ \t\n\r]+from[^a-zA-Z0-9_\x80-\xff] fmt.Println("T_YIELD_FROM")
<PHP>yield fmt.Println("T_YIELD")
<PHP>include fmt.Println("T_INCLUDE")
<PHP>include_once fmt.Println("T_INCLUDE_ONCE")
<PHP>require fmt.Println("T_REQUIRE")
<PHP>require_once fmt.Println("T_REQUIRE_ONCE")
<PHP>__CLASS__ fmt.Println("T_CLASS_C")
<PHP>__DIR__ fmt.Println("T_DIR")
<PHP>__FILE__ fmt.Println("T_FILE")
<PHP>__FUNCTION__ fmt.Println("T_FUNC_C")
<PHP>__LINE__ fmt.Println("T_LINE")
<PHP>__NAMESPACE__ fmt.Println("T_NS_C")
<PHP>__METHOD__ fmt.Println("T_METHOD_C")
<PHP>__TRAIT__ fmt.Println("T_TRAIT_C")
<PHP>__halt_compiler fmt.Println("T_HALT_COMPILER")
<PHP>\([ \t]*array[ \t]*\) fmt.Println("T_ARRAY_CAST")
<PHP>\([ \t]*(bool|boolean)[ \t]*\) fmt.Println("T_BOOL_CAST")
<PHP>\([ \t]*(real|double|float)[ \t]*\) fmt.Println("T_DOUBLE_CAST")
<PHP>\([ \t]*(int|integer)[ \t]*\) fmt.Println("T_INT_CAST")
<PHP>\([ \t]*object[ \t]*\) fmt.Println("T_OBJECT_CAST")
<PHP>\([ \t]*string[ \t]*\) fmt.Println("T_STRING_CAST")
<PHP>\([ \t]*unset[ \t]*\) fmt.Println("T_UNSET_CAST")
<PHP>new fmt.Println("T_NEW")
<PHP>and fmt.Println("T_LOGICAL_AND")
<PHP>or fmt.Println("T_LOGICAL_OR")
<PHP>xor fmt.Println("T_LOGICAL_XOR")
<PHP>\\ fmt.Println("T_NS_SEPARATOR")
<PHP>\.\.\. fmt.Println("T_ELLIPSIS");
<PHP>:: fmt.Println("T_PAAMAYIM_NEKUDOTAYIM"); // T_DOUBLE_COLON
<PHP>&& fmt.Println("T_BOOLEAN_AND")
<PHP>\|\| fmt.Println("T_BOOLEAN_OR")
<PHP>&= fmt.Println("T_AND_EQUAL")
<PHP>\|= fmt.Println("T_OR_EQUAL")
<PHP>\.= fmt.Println("T_CONCAT_EQUAL");
<PHP>\*= fmt.Println("T_MUL_EQUAL")
<PHP>\*\*= fmt.Println("T_POW_EQUAL")
<PHP>[/]= fmt.Println("T_DIV_EQUAL");
<PHP>\+= fmt.Println("T_PLUS_EQUAL")
<PHP>-= fmt.Println("T_MINUS_EQUAL")
<PHP>\^= fmt.Println("T_XOR_EQUAL")
<PHP>%= fmt.Println("T_MOD_EQUAL")
<PHP>-- fmt.Println("T_DEC");
<PHP>\+\+ fmt.Println("T_INC")
<PHP>=> fmt.Println("T_DOUBLE_ARROW");
<PHP>\<=\> fmt.Println("T_SPACESHIP")
<PHP>\!=|\<\> fmt.Println("T_IS_NOT_EQUAL")
<PHP>\!== fmt.Println("T_IS_NOT_IDENTICAL")
<PHP>== fmt.Println("T_IS_EQUAL")
<PHP>=== fmt.Println("T_IS_IDENTICAL")
<PHP>\<\<= fmt.Println("T_SL_EQUAL")
<PHP>\>\>= fmt.Println("T_SR_EQUAL")
<PHP>\>= fmt.Println("T_IS_GREATER_OR_EQUAL")
<PHP>\<= fmt.Println("T_IS_SMALLER_OR_EQUAL")
<PHP>\*\* fmt.Println("T_POW")
<PHP>\<\< fmt.Println("T_SL")
<PHP>\>\> fmt.Println("T_SR")
<PHP>(#|[/][/]){NEW_LINE} fmt.Println("T_COMMENT"); // TODO: handle ?>
<PHP>[/][*][^*]*[*]+([^*/][^*]*[*]+)*[/] fmt.Println("T_COMMENT"); // TODO: handle ?>
<PHP>[/][*][*][^*]*[*]+([^*/][^*]*[*]+)*[/] fmt.Println("T_DOC_COMMENT"); // TODO: handle ?>
<PHP>'[^']*(\\')*' fmt.Println("T_CONSTANT_ENCAPSED_STRING")
<PHP>{OPERATORS} fmt.Printf("%s\n", l.TokenBytes(nil));
<PHP>abstract lval.token = string(l.TokenBytes(nil)); return T_ABSTRACT
<PHP>array lval.token = string(l.TokenBytes(nil)); return T_ARRAY
<PHP>as lval.token = string(l.TokenBytes(nil)); return T_AS
<PHP>break lval.token = string(l.TokenBytes(nil)); return T_BREAK
<PHP>callable lval.token = string(l.TokenBytes(nil)); return T_CALLABLE
<PHP>case lval.token = string(l.TokenBytes(nil)); return T_CASE
<PHP>catch lval.token = string(l.TokenBytes(nil)); return T_CATCH
<PHP>class lval.token = string(l.TokenBytes(nil)); return T_CLASS
<PHP>clone lval.token = string(l.TokenBytes(nil)); return T_CLONE
<PHP>const lval.token = string(l.TokenBytes(nil)); return T_CONST;
<PHP>continue lval.token = string(l.TokenBytes(nil)); return T_CONTINUE;
<PHP>declare lval.token = string(l.TokenBytes(nil)); return T_DECLARE;
<PHP>default lval.token = string(l.TokenBytes(nil)); return T_DEFAULT;
<PHP>do lval.token = string(l.TokenBytes(nil)); return T_DO;
<PHP>echo lval.token = string(l.TokenBytes(nil)); return T_ECHO;
<PHP>else lval.token = string(l.TokenBytes(nil)); return T_ELSE;
<PHP>elseif lval.token = string(l.TokenBytes(nil)); return T_ELSEIF;
<PHP>empty lval.token = string(l.TokenBytes(nil)); return T_EMPTY;
<PHP>endfor lval.token = string(l.TokenBytes(nil)); return T_ENDFOR
<PHP>endforeach lval.token = string(l.TokenBytes(nil)); return T_ENDFOREACH
<PHP>endif lval.token = string(l.TokenBytes(nil)); return T_ENDIF
<PHP>endswitch lval.token = string(l.TokenBytes(nil)); return T_ENDSWITCH
<PHP>endwhile lval.token = string(l.TokenBytes(nil)); return T_ENDWHILE
<PHP>eval lval.token = string(l.TokenBytes(nil)); return T_EVAL
<PHP>exit|die lval.token = string(l.TokenBytes(nil)); return T_EXIT
<PHP>extends lval.token = string(l.TokenBytes(nil)); return T_EXTENDS
<PHP>final lval.token = string(l.TokenBytes(nil)); return T_FINAL
<PHP>finally lval.token = string(l.TokenBytes(nil)); return T_FINALLY
<PHP>for lval.token = string(l.TokenBytes(nil)); return T_FOR
<PHP>foreach lval.token = string(l.TokenBytes(nil)); return T_FOREACH
<PHP>function|cfunction lval.token = string(l.TokenBytes(nil)); return T_FUNCTION
<PHP>global lval.token = string(l.TokenBytes(nil)); return T_GLOBAL
<PHP>goto lval.token = string(l.TokenBytes(nil)); return T_GOTO
<PHP>if lval.token = string(l.TokenBytes(nil)); return T_IF
<PHP>isset lval.token = string(l.TokenBytes(nil)); return T_ISSET
<PHP>implements lval.token = string(l.TokenBytes(nil)); return T_IMPLEMENTS
<PHP>instanceof lval.token = string(l.TokenBytes(nil)); return T_INSTANCEOF
<PHP>insteadof lval.token = string(l.TokenBytes(nil)); return T_INSTEADOF
<PHP>interface lval.token = string(l.TokenBytes(nil)); return T_INTERFACE
<PHP>list lval.token = string(l.TokenBytes(nil)); return T_LIST
<PHP>namespace lval.token = string(l.TokenBytes(nil)); return T_NAMESPACE
<PHP>private lval.token = string(l.TokenBytes(nil)); return T_PRIVATE
<PHP>public lval.token = string(l.TokenBytes(nil)); return T_PUBLIC
<PHP>print lval.token = string(l.TokenBytes(nil)); return T_PRINT
<PHP>protected lval.token = string(l.TokenBytes(nil)); return T_PROTECTED
<PHP>return lval.token = string(l.TokenBytes(nil)); return T_RETURN
<PHP>static lval.token = string(l.TokenBytes(nil)); return T_STATIC
<PHP>switch lval.token = string(l.TokenBytes(nil)); return T_SWITCH
<PHP>throw lval.token = string(l.TokenBytes(nil)); return T_THROW
<PHP>trait lval.token = string(l.TokenBytes(nil)); return T_TRAIT
<PHP>try lval.token = string(l.TokenBytes(nil)); return T_TRY
<PHP>unset lval.token = string(l.TokenBytes(nil)); return T_UNSET
<PHP>use lval.token = string(l.TokenBytes(nil)); return T_USE
<PHP>var lval.token = string(l.TokenBytes(nil)); return T_VAR
<PHP>while lval.token = string(l.TokenBytes(nil)); return T_WHILE
<PHP>yield[ \t\n\r]+from[^a-zA-Z0-9_\x80-\xff] lval.token = string(l.TokenBytes(nil)); return T_YIELD_FROM
<PHP>yield lval.token = string(l.TokenBytes(nil)); return T_YIELD
<PHP>include lval.token = string(l.TokenBytes(nil)); return T_INCLUDE
<PHP>include_once lval.token = string(l.TokenBytes(nil)); return T_INCLUDE_ONCE
<PHP>require lval.token = string(l.TokenBytes(nil)); return T_REQUIRE
<PHP>require_once lval.token = string(l.TokenBytes(nil)); return T_REQUIRE_ONCE
<PHP>__CLASS__ lval.token = string(l.TokenBytes(nil)); return T_CLASS_C
<PHP>__DIR__ lval.token = string(l.TokenBytes(nil)); return T_DIR
<PHP>__FILE__ lval.token = string(l.TokenBytes(nil)); return T_FILE
<PHP>__FUNCTION__ lval.token = string(l.TokenBytes(nil)); return T_FUNC_C
<PHP>__LINE__ lval.token = string(l.TokenBytes(nil)); return T_LINE
<PHP>__NAMESPACE__ lval.token = string(l.TokenBytes(nil)); return T_NS_C
<PHP>__METHOD__ lval.token = string(l.TokenBytes(nil)); return T_METHOD_C
<PHP>__TRAIT__ lval.token = string(l.TokenBytes(nil)); return T_TRAIT_C
<PHP>__halt_compiler lval.token = string(l.TokenBytes(nil)); return T_HALT_COMPILER
<PHP>\([ \t]*array[ \t]*\) lval.token = string(l.TokenBytes(nil)); return T_ARRAY_CAST
<PHP>\([ \t]*(bool|boolean)[ \t]*\) lval.token = string(l.TokenBytes(nil)); return T_BOOL_CAST
<PHP>\([ \t]*(real|double|float)[ \t]*\) lval.token = string(l.TokenBytes(nil)); return T_DOUBLE_CAST
<PHP>\([ \t]*(int|integer)[ \t]*\) lval.token = string(l.TokenBytes(nil)); return T_INT_CAST
<PHP>\([ \t]*object[ \t]*\) lval.token = string(l.TokenBytes(nil)); return T_OBJECT_CAST
<PHP>\([ \t]*string[ \t]*\) lval.token = string(l.TokenBytes(nil)); return T_STRING_CAST
<PHP>\([ \t]*unset[ \t]*\) lval.token = string(l.TokenBytes(nil)); return T_UNSET_CAST
<PHP>new lval.token = string(l.TokenBytes(nil)); return T_NEW
<PHP>and lval.token = string(l.TokenBytes(nil)); return T_LOGICAL_AND
<PHP>or lval.token = string(l.TokenBytes(nil)); return T_LOGICAL_OR
<PHP>xor lval.token = string(l.TokenBytes(nil)); return T_LOGICAL_XOR
<PHP>\\ lval.token = string(l.TokenBytes(nil)); return T_NS_SEPARATOR
<PHP>\.\.\. lval.token = string(l.TokenBytes(nil)); return T_ELLIPSIS;
<PHP>:: lval.token = string(l.TokenBytes(nil)); return T_PAAMAYIM_NEKUDOTAYIM; // T_DOUBLE_COLON
<PHP>&& lval.token = string(l.TokenBytes(nil)); return T_BOOLEAN_AND
<PHP>\|\| lval.token = string(l.TokenBytes(nil)); return T_BOOLEAN_OR
<PHP>&= lval.token = string(l.TokenBytes(nil)); return T_AND_EQUAL
<PHP>\|= lval.token = string(l.TokenBytes(nil)); return T_OR_EQUAL
<PHP>\.= lval.token = string(l.TokenBytes(nil)); return T_CONCAT_EQUAL;
<PHP>\*= lval.token = string(l.TokenBytes(nil)); return T_MUL_EQUAL
<PHP>\*\*= lval.token = string(l.TokenBytes(nil)); return T_POW_EQUAL
<PHP>[/]= lval.token = string(l.TokenBytes(nil)); return T_DIV_EQUAL;
<PHP>\+= lval.token = string(l.TokenBytes(nil)); return T_PLUS_EQUAL
<PHP>-= lval.token = string(l.TokenBytes(nil)); return T_MINUS_EQUAL
<PHP>\^= lval.token = string(l.TokenBytes(nil)); return T_XOR_EQUAL
<PHP>%= lval.token = string(l.TokenBytes(nil)); return T_MOD_EQUAL
<PHP>-- lval.token = string(l.TokenBytes(nil)); return T_DEC;
<PHP>\+\+ lval.token = string(l.TokenBytes(nil)); return T_INC
<PHP>=> lval.token = string(l.TokenBytes(nil)); return T_DOUBLE_ARROW;
<PHP>\<=\> lval.token = string(l.TokenBytes(nil)); return T_SPACESHIP
<PHP>\!=|\<\> lval.token = string(l.TokenBytes(nil)); return T_IS_NOT_EQUAL
<PHP>\!== lval.token = string(l.TokenBytes(nil)); return T_IS_NOT_IDENTICAL
<PHP>== lval.token = string(l.TokenBytes(nil)); return T_IS_EQUAL
<PHP>=== lval.token = string(l.TokenBytes(nil)); return T_IS_IDENTICAL
<PHP>\<\<= lval.token = string(l.TokenBytes(nil)); return T_SL_EQUAL
<PHP>\>\>= lval.token = string(l.TokenBytes(nil)); return T_SR_EQUAL
<PHP>\>= lval.token = string(l.TokenBytes(nil)); return T_IS_GREATER_OR_EQUAL
<PHP>\<= lval.token = string(l.TokenBytes(nil)); return T_IS_SMALLER_OR_EQUAL
<PHP>\*\* lval.token = string(l.TokenBytes(nil)); return T_POW
<PHP>\<\< lval.token = string(l.TokenBytes(nil)); return T_SL
<PHP>\>\> lval.token = string(l.TokenBytes(nil)); return T_SR
<PHP>(#|[/][/]){NEW_LINE} lval.token = string(l.TokenBytes(nil)); return T_COMMENT; // TODO: handle ?>
<PHP>[/][*][^*]*[*]+([^*/][^*]*[*]+)*[/] lval.token = string(l.TokenBytes(nil)); return T_COMMENT; // TODO: handle ?>
<PHP>[/][*][*][^*]*[*]+([^*/][^*]*[*]+)*[/] lval.token = string(l.TokenBytes(nil)); return T_DOC_COMMENT; // TODO: handle ?>
<PHP>'[^']*(\\')*' lval.token = string(l.TokenBytes(nil)); return T_CONSTANT_ENCAPSED_STRING
<PHP>{OPERATORS} lval.token = string(l.TokenBytes(nil)); return c
<PHP>\{ fmt.Println("{"); pushState(PHP);
<PHP>\} fmt.Println("}"); popState();
<PHP>\${VAR_NAME} fmt.Printf("T_VARIABLE: %q\n", l.TokenBytes(nil))
<PHP>{VAR_NAME} fmt.Printf("T_STRING: %q\n", l.TokenBytes(nil));
<PHP>\{ pushState(PHP); lval.token = string(l.TokenBytes(nil)); return c
<PHP>\} popState(); lval.token = string(l.TokenBytes(nil)); return c
<PHP>\${VAR_NAME} lval.token = string(l.TokenBytes(nil)); return T_VARIABLE
<PHP>{VAR_NAME} if c == -1 {fmt.Printf("%q\n", string(l.TokenBytes(nil)))};lval.token = string(l.TokenBytes(nil)); return T_STRING
<PHP>-> fmt.Println("T_OBJECT_OPERATOR");begin(PROPERTY)
<PROPERTY>[ \t\n\r]+ fmt.Println("T_WHITESPACE");
<PROPERTY>-> fmt.Println("T_OBJECT_OPERATOR");
<PROPERTY>{VAR_NAME} fmt.Println("T_STRING");begin(PHP)
<PHP>-> begin(PROPERTY);lval.token = string(l.TokenBytes(nil)); return T_OBJECT_OPERATOR;
<PROPERTY>[ \t\n\r]+ lval.token = string(l.TokenBytes(nil)); return T_WHITESPACE;
<PROPERTY>-> lval.token = string(l.TokenBytes(nil)); return T_OBJECT_OPERATOR;
<PROPERTY>{VAR_NAME} begin(PHP);lval.token = string(l.TokenBytes(nil)); return T_STRING;
<PROPERTY>. l.ungetN(1);begin(PHP)
<PHP>[\']([^\\\']*([\\][\'])*)*[\'] fmt.Printf("T_CONSTANT_ENCAPSED_STRING: %s\n", l.TokenBytes(nil));
<PHP>[\']([^\\\']*([\\][\'])*)*[\'] lval.token = string(l.TokenBytes(nil)); return T_CONSTANT_ENCAPSED_STRING;
<PHP>` fmt.Println("`");begin(BACKQUOTE)
<BACKQUOTE>` fmt.Println("`");begin(PHP)
<PHP>` begin(BACKQUOTE); lval.token = string(l.TokenBytes(nil)); return c
<BACKQUOTE>` begin(PHP); lval.token = string(l.TokenBytes(nil)); return c
<PHP>[b]?\<\<\<[ \t]*({VAR_NAME}|([']{VAR_NAME}['])|(["]{VAR_NAME}["])){NEW_LINE}
tb := l.TokenBytes(nil)
@ -392,7 +389,7 @@ NEW_LINE (\r|\n|\r\n)
l.ungetN(ungetCnt)
fmt.Printf("T_START_HEREDOC: %q\n", tb);
lval.token = string(tb); return T_START_HEREDOC
<NOWDOC>.
searchLabel := []byte{}
@ -418,21 +415,22 @@ NEW_LINE (\r|\n|\r\n)
c = l.Next()
}
fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %q\n", tb);
lval.token = string(tb); return T_ENCAPSED_AND_WHITESPACE
<HEREDOC_END>{VAR_NAME}\; fmt.Printf("T_END_HEREDOC: %q\n", l.ungetN(1));begin(PHP)
<HEREDOC_END>. fmt.Printf("ERROR HEREDOC: %q\n", l.ungetN(1));
<HEREDOC_END>{VAR_NAME}\; begin(PHP);lval.token = string(l.ungetN(1)); return T_END_HEREDOC
<PHP>[b]?[\"]
binPrefix := l.TokenBytes(nil)[0] == 'b'
beginString := func() {
beginString := func() int {
cnt := 1; if (binPrefix) {cnt = 2}
l.ungetN(len(l.TokenBytes(nil))-cnt)
tokenBytes := l.TokenBytes(nil)[:cnt]
fmt.Println(string(tokenBytes)) // TODO: RETURN TOKEN
pushState(STRING)
lval.token = string(tokenBytes); return rune2Class('"')
}
F:for {
@ -443,13 +441,13 @@ NEW_LINE (\r|\n|\r\n)
switch c {
case '"' :
c = l.Next();
fmt.Printf("T_CONSTANT_ENCAPSED_STRING: %s\n", l.TokenBytes(nil));
lval.token = string(l.TokenBytes(nil)); return T_CONSTANT_ENCAPSED_STRING
break F;
case '$':
c = l.Next();
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
beginString()
return beginString()
break F;
}
l.ungetN(0)
@ -457,7 +455,7 @@ NEW_LINE (\r|\n|\r\n)
case '{':
c = l.Next();
if rune(c) == '$' {
beginString()
return beginString()
break F;
}
l.ungetN(0)
@ -469,9 +467,9 @@ NEW_LINE (\r|\n|\r\n)
c = l.Next()
}
<STRING>\" fmt.Println("\""); popState()
<STRING,HEREDOC,BACKQUOTE>\{\$ fmt.Printf("T_CURLY_OPEN: %q\n", l.ungetN(1));pushState(PHP)
<STRING,HEREDOC,BACKQUOTE>\$\{ fmt.Printf("T_DOLLAR_OPEN_CURLY_BRACES: %q\n", l.TokenBytes(nil));pushState(STRING_VAR_NAME)
<STRING>\" popState(); lval.token = "\""; return c
<STRING,HEREDOC,BACKQUOTE>\{\$ lval.token = string(l.ungetN(1)); return T_CURLY_OPEN
<STRING,HEREDOC,BACKQUOTE>\$\{ pushState(STRING_VAR_NAME);lval.token = string(l.TokenBytes(nil)); return T_DOLLAR_OPEN_CURLY_BRACES
<STRING,HEREDOC,BACKQUOTE>\$ l.ungetN(1);pushState(STRING_VAR)
<STRING>.
F1:for {
@ -481,7 +479,7 @@ NEW_LINE (\r|\n|\r\n)
switch c {
case '"' :
fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %s\n", l.TokenBytes(nil));
lval.token = string(l.TokenBytes(nil)); return T_ENCAPSED_AND_WHITESPACE
break F1;
case '$':
@ -489,7 +487,7 @@ NEW_LINE (\r|\n|\r\n)
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
l.ungetN(1)
tb := l.TokenBytes(nil)
fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %s\n", tb[:len(tb)-1]);
lval.token = string(tb[:len(tb)-1]); return T_ENCAPSED_AND_WHITESPACE
break F1;
}
l.ungetN(0)
@ -499,7 +497,7 @@ NEW_LINE (\r|\n|\r\n)
if rune(c) == '$' {
l.ungetN(1)
tb := l.TokenBytes(nil)
fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %s\n", tb[:len(tb)-1]);
lval.token = string(tb[:len(tb)-1]); return T_ENCAPSED_AND_WHITESPACE
break F1;
}
l.ungetN(0)
@ -519,7 +517,7 @@ NEW_LINE (\r|\n|\r\n)
switch c {
case '`' :
fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %s\n", l.TokenBytes(nil));
lval.token = string(l.TokenBytes(nil)); return T_ENCAPSED_AND_WHITESPACE
break F2;
case '$':
@ -527,7 +525,7 @@ NEW_LINE (\r|\n|\r\n)
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
l.ungetN(1)
tb := l.TokenBytes(nil)
fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %s\n", tb[:len(tb)-1]);
lval.token = string(tb[:len(tb)-1]); return T_ENCAPSED_AND_WHITESPACE
break F2;
}
l.ungetN(0)
@ -537,7 +535,7 @@ NEW_LINE (\r|\n|\r\n)
if rune(c) == '$' {
l.ungetN(1)
tb := l.TokenBytes(nil)
fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %s\n", tb[:len(tb)-1]);
lval.token = string(tb[:len(tb)-1]); return T_ENCAPSED_AND_WHITESPACE
break F2;
}
l.ungetN(0)
@ -597,32 +595,27 @@ NEW_LINE (\r|\n|\r\n)
c = l.Next()
}
lval.token = string(tb); return T_ENCAPSED_AND_WHITESPACE
fmt.Printf("T_ENCAPSED_AND_WHITESPACE(HEREDOC): %q\n", tb);
<STRING_VAR>\${VAR_NAME} fmt.Printf("T_VARIABLE: %q\n", l.TokenBytes(nil));
<STRING_VAR>->{VAR_NAME} fmt.Printf("T_OBJECT_OPERATOR: %q\n", l.ungetN(len(l.TokenBytes(nil))-2));
<STRING_VAR>{VAR_NAME} fmt.Printf("T_STRING: %q\n", l.TokenBytes(nil));popState()
<STRING_VAR>\[ fmt.Println("["); pushState(STRING_VAR_INDEX)
<STRING_VAR>\${VAR_NAME} lval.token = string(l.TokenBytes(nil)); return T_VARIABLE
<STRING_VAR>->{VAR_NAME} lval.token = string(l.ungetN(len(l.TokenBytes(nil))-2)); return T_OBJECT_OPERATOR
<STRING_VAR>{VAR_NAME} popState();lval.token = string(l.TokenBytes(nil)); return T_STRING
<STRING_VAR>\[ pushState(STRING_VAR_INDEX);lval.token = string(l.TokenBytes(nil)); return c
<STRING_VAR>.|[ \t\n\r] l.ungetN(1);popState()
<STRING_VAR_INDEX>{LNUM}|{HNUM}|{BNUM} fmt.Printf("T_NUM_STRING: %q\n", l.TokenBytes(nil));
<STRING_VAR_INDEX>\${VAR_NAME} fmt.Printf("T_VARIABLE: %q\n", l.TokenBytes(nil));
<STRING_VAR_INDEX>{VAR_NAME} fmt.Printf("T_STRING: %q\n", l.TokenBytes(nil));
<STRING_VAR_INDEX>\] fmt.Println("\"]\""); popState(); popState()
<STRING_VAR_INDEX>[ \n\r\t\\'#] fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %q\n", l.ungetN(1)); popState(); popState()
<STRING_VAR_INDEX>{OPERATORS} fmt.Printf("%q\n", l.TokenBytes(nil));
<STRING_VAR_INDEX>. fmt.Printf("%q\n", l.TokenBytes(nil));
<STRING_VAR_INDEX>{LNUM}|{HNUM}|{BNUM} lval.token = string(l.TokenBytes(nil)); return T_NUM_STRING
<STRING_VAR_INDEX>\${VAR_NAME} lval.token = string(l.TokenBytes(nil)); return T_VARIABLE
<STRING_VAR_INDEX>{VAR_NAME} lval.token = string(l.TokenBytes(nil)); return T_STRING
<STRING_VAR_INDEX>\] popState(); popState();lval.token = string(l.TokenBytes(nil)); return c
<STRING_VAR_INDEX>[ \n\r\t\\'#] popState(); popState();lval.token = string(l.TokenBytes(nil)); return T_ENCAPSED_AND_WHITESPACE
<STRING_VAR_INDEX>{OPERATORS} lval.token = string(l.TokenBytes(nil)); return c
<STRING_VAR_INDEX>. lval.token = string(l.TokenBytes(nil)); return c
<STRING_VAR_NAME>{VAR_NAME}[\[\}] fmt.Printf("T_STRING_VARNAME: %q\n", l.ungetN(1));popState();pushState(PHP)
<STRING_VAR_NAME>{VAR_NAME}[\[\}] popState();pushState(PHP);lval.token = string(l.ungetN(1)); return T_STRING_VARNAME
<STRING_VAR_NAME>. l.ungetN(1);popState();pushState(PHP)
%%
if c, ok := l.Abort(); ok { return int(c) }
goto yyAction
}
func main() {
l := newLexer(os.Stdin, os.Stdout, "file.name")
l.Lex();
}

828
parser.go Normal file
View File

@ -0,0 +1,828 @@
package main
import __yyfmt__ "fmt"
import (
"bytes"
"fmt"
"io"
"os"
)
type node struct {
name string
children []node
}
func (n node) String() string {
buf := new(bytes.Buffer)
n.print(buf, " ")
return buf.String()
}
func (n node) print(out io.Writer, indent string) {
fmt.Fprintf(out, "\n%v%v", indent, n.name)
for _, nn := range n.children {
nn.print(out, indent+" ")
}
}
func Node(name string) node { return node{name: name} }
func (n node) append(nn ...node) node { n.children = append(n.children, nn...); return n }
type yySymType struct {
yys int
node node
token string
}
const T_INCLUDE = 57346
const T_INCLUDE_ONCE = 57347
const T_EVAL = 57348
const T_REQUIRE = 57349
const T_REQUIRE_ONCE = 57350
const T_LOGICAL_OR = 57351
const T_LOGICAL_XOR = 57352
const T_LOGICAL_AND = 57353
const T_PRINT = 57354
const T_YIELD = 57355
const T_DOUBLE_ARROW = 57356
const T_YIELD_FROM = 57357
const T_PLUS_EQUAL = 57358
const T_MINUS_EQUAL = 57359
const T_MUL_EQUAL = 57360
const T_DIV_EQUAL = 57361
const T_CONCAT_EQUAL = 57362
const T_MOD_EQUAL = 57363
const T_AND_EQUAL = 57364
const T_OR_EQUAL = 57365
const T_XOR_EQUAL = 57366
const T_SL_EQUAL = 57367
const T_SR_EQUAL = 57368
const T_POW_EQUAL = 57369
const T_COALESCE = 57370
const T_BOOLEAN_OR = 57371
const T_BOOLEAN_AND = 57372
const T_IS_EQUAL = 57373
const T_IS_NOT_EQUAL = 57374
const T_IS_IDENTICAL = 57375
const T_IS_NOT_IDENTICAL = 57376
const T_SPACESHIP = 57377
const T_IS_SMALLER_OR_EQUAL = 57378
const T_IS_GREATER_OR_EQUAL = 57379
const T_SL = 57380
const T_SR = 57381
const T_INSTANCEOF = 57382
const T_INC = 57383
const T_DEC = 57384
const T_INT_CAST = 57385
const T_DOUBLE_CAST = 57386
const T_STRING_CAST = 57387
const T_ARRAY_CAST = 57388
const T_OBJECT_CAST = 57389
const T_BOOL_CAST = 57390
const T_UNSET_CAST = 57391
const T_POW = 57392
const T_NEW = 57393
const T_CLONE = 57394
const T_ELSEIF = 57395
const T_ELSE = 57396
const T_ENDIF = 57397
const T_STATIC = 57398
const T_ABSTRACT = 57399
const T_FINAL = 57400
const T_PRIVATE = 57401
const T_PROTECTED = 57402
const T_PUBLIC = 57403
const T_EXIT = 57404
const T_IF = 57405
const T_LNUMBER = 57406
const T_DNUMBER = 57407
const T_STRING = 57408
const T_STRING_VARNAME = 57409
const T_VARIABLE = 57410
const T_NUM_STRING = 57411
const T_INLINE_HTML = 57412
const T_CHARACTER = 57413
const T_BAD_CHARACTER = 57414
const T_ENCAPSED_AND_WHITESPACE = 57415
const T_CONSTANT_ENCAPSED_STRING = 57416
const T_ECHO = 57417
const T_DO = 57418
const T_WHILE = 57419
const T_ENDWHILE = 57420
const T_FOR = 57421
const T_ENDFOR = 57422
const T_FOREACH = 57423
const T_ENDFOREACH = 57424
const T_DECLARE = 57425
const T_ENDDECLARE = 57426
const T_AS = 57427
const T_SWITCH = 57428
const T_ENDSWITCH = 57429
const T_CASE = 57430
const T_DEFAULT = 57431
const T_BREAK = 57432
const T_CONTINUE = 57433
const T_GOTO = 57434
const T_FUNCTION = 57435
const T_CONST = 57436
const T_RETURN = 57437
const T_TRY = 57438
const T_CATCH = 57439
const T_FINALLY = 57440
const T_THROW = 57441
const T_USE = 57442
const T_INSTEADOF = 57443
const T_GLOBAL = 57444
const T_VAR = 57445
const T_UNSET = 57446
const T_ISSET = 57447
const T_EMPTY = 57448
const T_HALT_COMPILER = 57449
const T_CLASS = 57450
const T_TRAIT = 57451
const T_INTERFACE = 57452
const T_EXTENDS = 57453
const T_IMPLEMENTS = 57454
const T_OBJECT_OPERATOR = 57455
const T_LIST = 57456
const T_ARRAY = 57457
const T_CALLABLE = 57458
const T_CLASS_C = 57459
const T_TRAIT_C = 57460
const T_METHOD_C = 57461
const T_FUNC_C = 57462
const T_LINE = 57463
const T_FILE = 57464
const T_COMMENT = 57465
const T_DOC_COMMENT = 57466
const T_OPEN_TAG = 57467
const T_OPEN_TAG_WITH_ECHO = 57468
const T_CLOSE_TAG = 57469
const T_WHITESPACE = 57470
const T_START_HEREDOC = 57471
const T_END_HEREDOC = 57472
const T_DOLLAR_OPEN_CURLY_BRACES = 57473
const T_CURLY_OPEN = 57474
const T_PAAMAYIM_NEKUDOTAYIM = 57475
const T_NAMESPACE = 57476
const T_NS_C = 57477
const T_DIR = 57478
const T_NS_SEPARATOR = 57479
const T_ELLIPSIS = 57480
var yyToknames = [...]string{
"$end",
"error",
"$unk",
"T_INCLUDE",
"T_INCLUDE_ONCE",
"T_EVAL",
"T_REQUIRE",
"T_REQUIRE_ONCE",
"','",
"T_LOGICAL_OR",
"T_LOGICAL_XOR",
"T_LOGICAL_AND",
"T_PRINT",
"T_YIELD",
"T_DOUBLE_ARROW",
"T_YIELD_FROM",
"'='",
"T_PLUS_EQUAL",
"T_MINUS_EQUAL",
"T_MUL_EQUAL",
"T_DIV_EQUAL",
"T_CONCAT_EQUAL",
"T_MOD_EQUAL",
"T_AND_EQUAL",
"T_OR_EQUAL",
"T_XOR_EQUAL",
"T_SL_EQUAL",
"T_SR_EQUAL",
"T_POW_EQUAL",
"'?'",
"':'",
"T_COALESCE",
"T_BOOLEAN_OR",
"T_BOOLEAN_AND",
"'|'",
"'^'",
"'&'",
"T_IS_EQUAL",
"T_IS_NOT_EQUAL",
"T_IS_IDENTICAL",
"T_IS_NOT_IDENTICAL",
"T_SPACESHIP",
"'<'",
"T_IS_SMALLER_OR_EQUAL",
"'>'",
"T_IS_GREATER_OR_EQUAL",
"T_SL",
"T_SR",
"'+'",
"'-'",
"'.'",
"'*'",
"'/'",
"'%'",
"'!'",
"T_INSTANCEOF",
"'~'",
"T_INC",
"T_DEC",
"T_INT_CAST",
"T_DOUBLE_CAST",
"T_STRING_CAST",
"T_ARRAY_CAST",
"T_OBJECT_CAST",
"T_BOOL_CAST",
"T_UNSET_CAST",
"'@'",
"T_POW",
"'['",
"T_NEW",
"T_CLONE",
"T_ELSEIF",
"T_ELSE",
"T_ENDIF",
"T_STATIC",
"T_ABSTRACT",
"T_FINAL",
"T_PRIVATE",
"T_PROTECTED",
"T_PUBLIC",
"T_EXIT",
"T_IF",
"T_LNUMBER",
"T_DNUMBER",
"T_STRING",
"T_STRING_VARNAME",
"T_VARIABLE",
"T_NUM_STRING",
"T_INLINE_HTML",
"T_CHARACTER",
"T_BAD_CHARACTER",
"T_ENCAPSED_AND_WHITESPACE",
"T_CONSTANT_ENCAPSED_STRING",
"T_ECHO",
"T_DO",
"T_WHILE",
"T_ENDWHILE",
"T_FOR",
"T_ENDFOR",
"T_FOREACH",
"T_ENDFOREACH",
"T_DECLARE",
"T_ENDDECLARE",
"T_AS",
"T_SWITCH",
"T_ENDSWITCH",
"T_CASE",
"T_DEFAULT",
"T_BREAK",
"T_CONTINUE",
"T_GOTO",
"T_FUNCTION",
"T_CONST",
"T_RETURN",
"T_TRY",
"T_CATCH",
"T_FINALLY",
"T_THROW",
"T_USE",
"T_INSTEADOF",
"T_GLOBAL",
"T_VAR",
"T_UNSET",
"T_ISSET",
"T_EMPTY",
"T_HALT_COMPILER",
"T_CLASS",
"T_TRAIT",
"T_INTERFACE",
"T_EXTENDS",
"T_IMPLEMENTS",
"T_OBJECT_OPERATOR",
"T_LIST",
"T_ARRAY",
"T_CALLABLE",
"T_CLASS_C",
"T_TRAIT_C",
"T_METHOD_C",
"T_FUNC_C",
"T_LINE",
"T_FILE",
"T_COMMENT",
"T_DOC_COMMENT",
"T_OPEN_TAG",
"T_OPEN_TAG_WITH_ECHO",
"T_CLOSE_TAG",
"T_WHITESPACE",
"T_START_HEREDOC",
"T_END_HEREDOC",
"T_DOLLAR_OPEN_CURLY_BRACES",
"T_CURLY_OPEN",
"T_PAAMAYIM_NEKUDOTAYIM",
"T_NAMESPACE",
"T_NS_C",
"T_DIR",
"T_NS_SEPARATOR",
"T_ELLIPSIS",
}
var yyStatenames = [...]string{}
const yyEofCode = 1
const yyErrCode = 2
const yyInitialStackSize = 16
const src = `<?
class
`
func main() {
yyDebug = 0
yyErrorVerbose = true
l := newLexer(bytes.NewBufferString(src), os.Stdout, "file.name")
yyParse(l)
}
var yyExca = [...]int{
-1, 1,
1, -1,
-2, 0,
}
const yyPrivate = 57344
const yyLast = 152
var yyAct = [...]int{
12, 13, 14, 15, 16, 4, 17, 18, 19, 55,
56, 5, 1, 2, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 20, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 21, 22, 25, 26,
27, 6, 7, 8, 9, 10, 11, 23, 24, 0,
0, 3, 0, 0, 0, 0, 0, 0, 0, 0,
28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
38, 58, 59, 60, 61, 62, 50, 51, 52, 53,
54, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 0, 70, 68, 69, 65, 66, 0, 57,
63, 64, 71, 72, 74, 73, 75, 76, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 67,
78, 77,
}
var yyPact = [...]int{
-4, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000,
-1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000,
-1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000,
-1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000,
-1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000,
-1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000,
-1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000,
-1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000,
}
var yyPgo = [...]int{
0, 13, 12, 11, 5,
}
var yyR1 = [...]int{
0, 2, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 4,
4, 4, 4, 4, 4, 4, 1, 1,
}
var yyR2 = [...]int{
0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
}
var yyChk = [...]int{
-1000, -2, -1, 85, -4, -3, 75, 76, 77, 78,
79, 80, 4, 5, 6, 7, 8, 10, 11, 12,
56, 70, 71, 81, 82, 72, 73, 74, 94, 95,
96, 97, 98, 99, 100, 101, 102, 103, 104, 115,
116, 117, 118, 119, 120, 121, 122, 123, 124, 125,
110, 111, 112, 113, 114, 13, 14, 133, 105, 106,
107, 108, 109, 134, 135, 130, 131, 153, 128, 129,
127, 136, 137, 139, 138, 140, 141, 155, 154,
}
var yyDef = [...]int{
0, -2, 1, 76, 77, 69, 70, 71, 72, 73,
74, 75, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 65, 66, 67, 68,
}
var yyTok1 = [...]int{
1, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 55, 3, 3, 3, 54, 37, 3,
3, 3, 52, 49, 9, 50, 51, 53, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 31, 3,
43, 17, 45, 30, 67, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 69, 3, 3, 36, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 35, 3, 57,
}
var yyTok2 = [...]int{
2, 3, 4, 5, 6, 7, 8, 10, 11, 12,
13, 14, 15, 16, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 32, 33, 34, 38,
39, 40, 41, 42, 44, 46, 47, 48, 56, 58,
59, 60, 61, 62, 63, 64, 65, 66, 68, 70,
71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
121, 122, 123, 124, 125, 126, 127, 128, 129, 130,
131, 132, 133, 134, 135, 136, 137, 138, 139, 140,
141, 142, 143, 144, 145, 146, 147, 148, 149, 150,
151, 152, 153, 154, 155, 156, 157,
}
var yyTok3 = [...]int{
0,
}
var yyErrorMessages = [...]struct {
state int
token int
msg string
}{}
/* parser for yacc output */
var (
yyDebug = 0
yyErrorVerbose = false
)
type yyLexer interface {
Lex(lval *yySymType) int
Error(s string)
}
type yyParser interface {
Parse(yyLexer) int
Lookahead() int
}
type yyParserImpl struct {
lval yySymType
stack [yyInitialStackSize]yySymType
char int
}
func (p *yyParserImpl) Lookahead() int {
return p.char
}
func yyNewParser() yyParser {
return &yyParserImpl{}
}
const yyFlag = -1000
func yyTokname(c int) string {
if c >= 1 && c-1 < len(yyToknames) {
if yyToknames[c-1] != "" {
return yyToknames[c-1]
}
}
return __yyfmt__.Sprintf("tok-%v", c)
}
func yyStatname(s int) string {
if s >= 0 && s < len(yyStatenames) {
if yyStatenames[s] != "" {
return yyStatenames[s]
}
}
return __yyfmt__.Sprintf("state-%v", s)
}
func yyErrorMessage(state, lookAhead int) string {
const TOKSTART = 4
if !yyErrorVerbose {
return "syntax error"
}
for _, e := range yyErrorMessages {
if e.state == state && e.token == lookAhead {
return "syntax error: " + e.msg
}
}
res := "syntax error: unexpected " + yyTokname(lookAhead)
// To match Bison, suggest at most four expected tokens.
expected := make([]int, 0, 4)
// Look for shiftable tokens.
base := yyPact[state]
for tok := TOKSTART; tok-1 < len(yyToknames); tok++ {
if n := base + tok; n >= 0 && n < yyLast && yyChk[yyAct[n]] == tok {
if len(expected) == cap(expected) {
return res
}
expected = append(expected, tok)
}
}
if yyDef[state] == -2 {
i := 0
for yyExca[i] != -1 || yyExca[i+1] != state {
i += 2
}
// Look for tokens that we accept or reduce.
for i += 2; yyExca[i] >= 0; i += 2 {
tok := yyExca[i]
if tok < TOKSTART || yyExca[i+1] == 0 {
continue
}
if len(expected) == cap(expected) {
return res
}
expected = append(expected, tok)
}
// If the default action is to accept or reduce, give up.
if yyExca[i+1] != 0 {
return res
}
}
for i, tok := range expected {
if i == 0 {
res += ", expecting "
} else {
res += " or "
}
res += yyTokname(tok)
}
return res
}
func yylex1(lex yyLexer, lval *yySymType) (char, token int) {
token = 0
char = lex.Lex(lval)
if char <= 0 {
token = yyTok1[0]
goto out
}
if char < len(yyTok1) {
token = yyTok1[char]
goto out
}
if char >= yyPrivate {
if char < yyPrivate+len(yyTok2) {
token = yyTok2[char-yyPrivate]
goto out
}
}
for i := 0; i < len(yyTok3); i += 2 {
token = yyTok3[i+0]
if token == char {
token = yyTok3[i+1]
goto out
}
}
out:
if token == 0 {
token = yyTok2[1] /* unknown char */
}
if yyDebug >= 3 {
__yyfmt__.Printf("lex %s(%d)\n", yyTokname(token), uint(char))
}
return char, token
}
func yyParse(yylex yyLexer) int {
return yyNewParser().Parse(yylex)
}
func (yyrcvr *yyParserImpl) Parse(yylex yyLexer) int {
var yyn int
var yyVAL yySymType
var yyDollar []yySymType
_ = yyDollar // silence set and not used
yyS := yyrcvr.stack[:]
Nerrs := 0 /* number of errors */
Errflag := 0 /* error recovery flag */
yystate := 0
yyrcvr.char = -1
yytoken := -1 // yyrcvr.char translated into internal numbering
defer func() {
// Make sure we report no lookahead when not parsing.
yystate = -1
yyrcvr.char = -1
yytoken = -1
}()
yyp := -1
goto yystack
ret0:
return 0
ret1:
return 1
yystack:
/* put a state and value onto the stack */
if yyDebug >= 4 {
__yyfmt__.Printf("char %v in %v\n", yyTokname(yytoken), yyStatname(yystate))
}
yyp++
if yyp >= len(yyS) {
nyys := make([]yySymType, len(yyS)*2)
copy(nyys, yyS)
yyS = nyys
}
yyS[yyp] = yyVAL
yyS[yyp].yys = yystate
yynewstate:
yyn = yyPact[yystate]
if yyn <= yyFlag {
goto yydefault /* simple state */
}
if yyrcvr.char < 0 {
yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval)
}
yyn += yytoken
if yyn < 0 || yyn >= yyLast {
goto yydefault
}
yyn = yyAct[yyn]
if yyChk[yyn] == yytoken { /* valid shift */
yyrcvr.char = -1
yytoken = -1
yyVAL = yyrcvr.lval
yystate = yyn
if Errflag > 0 {
Errflag--
}
goto yystack
}
yydefault:
/* default state action */
yyn = yyDef[yystate]
if yyn == -2 {
if yyrcvr.char < 0 {
yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval)
}
/* look through exception table */
xi := 0
for {
if yyExca[xi+0] == -1 && yyExca[xi+1] == yystate {
break
}
xi += 2
}
for xi += 2; ; xi += 2 {
yyn = yyExca[xi+0]
if yyn < 0 || yyn == yytoken {
break
}
}
yyn = yyExca[xi+1]
if yyn < 0 {
goto ret0
}
}
if yyn == 0 {
/* error ... attempt to resume parsing */
switch Errflag {
case 0: /* brand new error */
yylex.Error(yyErrorMessage(yystate, yytoken))
Nerrs++
if yyDebug >= 1 {
__yyfmt__.Printf("%s", yyStatname(yystate))
__yyfmt__.Printf(" saw %s\n", yyTokname(yytoken))
}
fallthrough
case 1, 2: /* incompletely recovered error ... try again */
Errflag = 3
/* find a state where "error" is a legal shift action */
for yyp >= 0 {
yyn = yyPact[yyS[yyp].yys] + yyErrCode
if yyn >= 0 && yyn < yyLast {
yystate = yyAct[yyn] /* simulate a shift of "error" */
if yyChk[yystate] == yyErrCode {
goto yystack
}
}
/* the current p has no shift on "error", pop stack */
if yyDebug >= 2 {
__yyfmt__.Printf("error recovery pops state %d\n", yyS[yyp].yys)
}
yyp--
}
/* there is no state on the stack with an error shift ... abort */
goto ret1
case 3: /* no shift yet; clobber input char */
if yyDebug >= 2 {
__yyfmt__.Printf("error recovery discards %s\n", yyTokname(yytoken))
}
if yytoken == yyEofCode {
goto ret1
}
yyrcvr.char = -1
yytoken = -1
goto yynewstate /* try again in the same state */
}
}
/* reduction by production yyn */
if yyDebug >= 2 {
__yyfmt__.Printf("reduce %v in:\n\t%v\n", yyn, yyStatname(yystate))
}
yynt := yyn
yypt := yyp
_ = yypt // guard against "declared and not used"
yyp -= yyR2[yyn]
// yyp is now the index of $0. Perform the default action. Iff the
// reduced production is ε, $1 is possibly out of range.
if yyp+1 >= len(yyS) {
nyys := make([]yySymType, len(yyS)*2)
copy(nyys, yyS)
yyS = nyys
}
yyVAL = yyS[yyp+1]
/* consult goto table to find next state */
yyn = yyR1[yyn]
yyg := yyPgo[yyn]
yyj := yyg + yyS[yyp].yys + 1
if yyj >= yyLast {
yystate = yyAct[yyg]
} else {
yystate = yyAct[yyj]
if yyChk[yystate] != -yyn {
yystate = yyAct[yyg]
}
}
// dummy call; replaced with literal code
switch yynt {
case 1:
yyDollar = yyS[yypt-1 : yypt+1]
{
fmt.Println(yyDollar[1].node)
}
case 76:
yyDollar = yyS[yypt-1 : yypt+1]
{
yyVAL.node = Node("identifier")
}
case 77:
yyDollar = yyS[yypt-1 : yypt+1]
{
yyVAL.node = Node("reserved")
}
}
goto yystack /* stack new state and value */
}

193
parser.y Normal file
View File

@ -0,0 +1,193 @@
%{
package main
import (
"bytes"
"fmt"
"os"
"io"
)
type node struct {
name string
children []node
}
func (n node) String() string {
buf := new(bytes.Buffer)
n.print(buf, " ")
return buf.String()
}
func (n node) print(out io.Writer, indent string) {
fmt.Fprintf(out, "\n%v%v", indent, n.name)
for _, nn := range n.children { nn.print(out, indent + " ") }
}
func Node(name string) node { return node{name: name} }
func (n node) append(nn...node) node { n.children = append(n.children, nn...); return n }
%}
%union{
node node
token string
}
%left T_INCLUDE T_INCLUDE_ONCE T_EVAL T_REQUIRE T_REQUIRE_ONCE
%left ','
%left T_LOGICAL_OR
%left T_LOGICAL_XOR
%left T_LOGICAL_AND
%right T_PRINT
%right T_YIELD
%right T_DOUBLE_ARROW
%right T_YIELD_FROM
%left '=' T_PLUS_EQUAL T_MINUS_EQUAL T_MUL_EQUAL T_DIV_EQUAL T_CONCAT_EQUAL T_MOD_EQUAL T_AND_EQUAL T_OR_EQUAL T_XOR_EQUAL T_SL_EQUAL T_SR_EQUAL T_POW_EQUAL
%left '?' ':'
%right T_COALESCE
%left T_BOOLEAN_OR
%left T_BOOLEAN_AND
%left '|'
%left '^'
%left '&'
%nonassoc T_IS_EQUAL T_IS_NOT_EQUAL T_IS_IDENTICAL T_IS_NOT_IDENTICAL T_SPACESHIP
%nonassoc '<' T_IS_SMALLER_OR_EQUAL '>' T_IS_GREATER_OR_EQUAL
%left T_SL T_SR
%left '+' '-' '.'
%left '*' '/' '%'
%right '!'
%nonassoc T_INSTANCEOF
%right '~' T_INC T_DEC T_INT_CAST T_DOUBLE_CAST T_STRING_CAST T_ARRAY_CAST T_OBJECT_CAST T_BOOL_CAST T_UNSET_CAST '@'
%right T_POW
%right '['
%nonassoc T_NEW T_CLONE
%left T_ELSEIF
%left T_ELSE
%left T_ENDIF
%right T_STATIC T_ABSTRACT T_FINAL T_PRIVATE T_PROTECTED T_PUBLIC
%token T_EXIT
%token T_IF
%token T_LNUMBER
%token T_DNUMBER
%token T_STRING
%token T_STRING_VARNAME
%token T_VARIABLE
%token T_NUM_STRING
%token T_INLINE_HTML
%token T_CHARACTER
%token T_BAD_CHARACTER
%token T_ENCAPSED_AND_WHITESPACE
%token T_CONSTANT_ENCAPSED_STRING
%token T_ECHO
%token T_DO
%token T_WHILE
%token T_ENDWHILE
%token T_FOR
%token T_ENDFOR
%token T_FOREACH
%token T_ENDFOREACH
%token T_DECLARE
%token T_ENDDECLARE
%token T_AS
%token T_SWITCH
%token T_ENDSWITCH
%token T_CASE
%token T_DEFAULT
%token T_BREAK
%token T_CONTINUE
%token T_GOTO
%token T_FUNCTION
%token T_CONST
%token T_RETURN
%token T_TRY
%token T_CATCH
%token T_FINALLY
%token T_THROW
%token T_USE
%token T_INSTEADOF
%token T_GLOBAL
%token T_VAR
%token T_UNSET
%token T_ISSET
%token T_EMPTY
%token T_HALT_COMPILER
%token T_CLASS
%token T_TRAIT
%token T_INTERFACE
%token T_EXTENDS
%token T_IMPLEMENTS
%token T_OBJECT_OPERATOR
%token T_DOUBLE_ARROW
%token T_LIST
%token T_ARRAY
%token T_CALLABLE
%token T_CLASS_C
%token T_TRAIT_C
%token T_METHOD_C
%token T_FUNC_C
%token T_LINE
%token T_FILE
%token T_COMMENT
%token T_DOC_COMMENT
%token T_OPEN_TAG
%token T_OPEN_TAG_WITH_ECHO
%token T_CLOSE_TAG
%token T_WHITESPACE
%token T_START_HEREDOC
%token T_END_HEREDOC
%token T_DOLLAR_OPEN_CURLY_BRACES
%token T_CURLY_OPEN
%token T_PAAMAYIM_NEKUDOTAYIM
%token T_NAMESPACE
%token T_NS_C
%token T_DIR
%token T_NS_SEPARATOR
%token T_ELLIPSIS
%type <node> identifier
%%
/////////////////////////////////////////////////////////////////////////
start:
identifier { fmt.Println($1) }
;
reserved_non_modifiers:
T_INCLUDE | T_INCLUDE_ONCE | T_EVAL | T_REQUIRE | T_REQUIRE_ONCE | T_LOGICAL_OR | T_LOGICAL_XOR | T_LOGICAL_AND
| T_INSTANCEOF | T_NEW | T_CLONE | T_EXIT | T_IF | T_ELSEIF | T_ELSE | T_ENDIF | T_ECHO | T_DO | T_WHILE | T_ENDWHILE
| T_FOR | T_ENDFOR | T_FOREACH | T_ENDFOREACH | T_DECLARE | T_ENDDECLARE | T_AS | T_TRY | T_CATCH | T_FINALLY
| T_THROW | T_USE | T_INSTEADOF | T_GLOBAL | T_VAR | T_UNSET | T_ISSET | T_EMPTY | T_CONTINUE | T_GOTO
| T_FUNCTION | T_CONST | T_RETURN | T_PRINT | T_YIELD | T_LIST | T_SWITCH | T_ENDSWITCH | T_CASE | T_DEFAULT | T_BREAK
| T_ARRAY | T_CALLABLE | T_EXTENDS | T_IMPLEMENTS | T_NAMESPACE | T_TRAIT | T_INTERFACE | T_CLASS
| T_CLASS_C | T_TRAIT_C | T_FUNC_C | T_METHOD_C | T_LINE | T_FILE | T_DIR | T_NS_C
;
semi_reserved:
reserved_non_modifiers
| T_STATIC | T_ABSTRACT | T_FINAL | T_PRIVATE | T_PROTECTED | T_PUBLIC
;
identifier:
T_STRING { $$ = Node("identifier") }
| semi_reserved { $$ = Node("reserved") }
;
/////////////////////////////////////////////////////////////////////////
%%
const src = `<?
class
`
func main() {
yyDebug = 0
yyErrorVerbose = true
l := newLexer(bytes.NewBufferString(src), os.Stdout, "file.name")
yyParse(l)
}