From abf8af04f828539a28252111c897933de11f1a5c Mon Sep 17 00:00:00 2001 From: vadim Date: Thu, 16 Nov 2017 19:00:34 +0200 Subject: [PATCH] base heredoc handler --- php-parser.go | 386 ++++++++++++++++++++++++++++++-------------------- php-parser.l | 71 +++++++++- 2 files changed, 300 insertions(+), 157 deletions(-) diff --git a/php-parser.go b/php-parser.go index 854fb90..cb958b5 100644 --- a/php-parser.go +++ b/php-parser.go @@ -10,6 +10,7 @@ package main import ( "bufio" + "bytes" "fmt" "go/token" "io" @@ -36,6 +37,7 @@ const ( STRING_VAR_INDEX STRING_VAR_NAME PROPERTY + HEREDOC_END ) type lexer struct { @@ -43,6 +45,7 @@ type lexer struct { } var stateStack = []int{PHP} +var heredocLabel []byte func pushState(state int) { sc = state @@ -150,6 +153,8 @@ yystate0: goto yystart501 case 6: // start condition: PROPERTY goto yystart506 + case 7: // start condition: HEREDOC_END + goto yystart514 } goto yystate0 // silence unused label error @@ -436,6 +441,8 @@ yyAction: goto yyrule139 case 140: goto yyrule140 + case 141: + goto yyrule141 } goto yystate1 // silence unused label error yystate1: @@ -639,9 +646,9 @@ yystart11: yystate12: c = l.Next() - yyrule = 140 + yyrule = 141 l.Mark() - goto yyrule140 + goto yyrule141 yystate13: c = l.Next() @@ -695,17 +702,17 @@ yystate17: yystate18: c = l.Next() - yyrule = 120 + yyrule = 121 l.Mark() - goto yyrule120 + goto yyrule121 yystate19: c = l.Next() - yyrule = 140 + yyrule = 141 l.Mark() switch { default: - goto yyrule140 + goto yyrule141 case c == '\n': goto yystate20 case c == '\r': @@ -795,11 +802,11 @@ yystate28: yystate29: c = l.Next() - yyrule = 140 + yyrule = 141 l.Mark() switch { default: - goto yyrule140 + goto yyrule141 case c == '\'': goto yystate31 case c == '\\': @@ -1979,11 +1986,11 @@ yystate151: yystate152: c = l.Next() - yyrule = 140 + yyrule = 141 l.Mark() switch { default: - goto yyrule140 + goto yyrule141 case c == '_': goto yystate153 } @@ -2520,11 +2527,11 @@ yystate212: yystate213: c = l.Next() - yyrule = 140 + yyrule = 141 l.Mark() switch { default: - goto yyrule140 + goto yyrule141 case c == 'b': goto yystate214 case c == 'n': @@ -2651,11 +2658,11 @@ yystate227: yystate228: c = l.Next() - yyrule = 140 + yyrule = 141 l.Mark() switch { default: - goto yyrule140 + goto yyrule141 case c == '"': goto yystate229 case c == '<': @@ -2666,9 +2673,9 @@ yystate228: yystate229: c = l.Next() - yyrule = 120 + yyrule = 121 l.Mark() - goto yyrule120 + goto yyrule121 yystate230: c = l.Next() @@ -2723,11 +2730,11 @@ yystate235: yystate236: c = l.Next() - yyrule = 140 + yyrule = 141 l.Mark() switch { default: - goto yyrule140 + goto yyrule141 case c == 'a': goto yystate237 case c == 'f': @@ -3048,11 +3055,11 @@ yystate272: yystate273: c = l.Next() - yyrule = 140 + yyrule = 141 l.Mark() switch { default: - goto yyrule140 + goto yyrule141 case c == 'e': goto yystate274 case c == 'i': @@ -3179,11 +3186,11 @@ yystate287: yystate288: c = l.Next() - yyrule = 140 + yyrule = 141 l.Mark() switch { default: - goto yyrule140 + goto yyrule141 case c == 'c': goto yystate289 case c == 'l': @@ -3555,11 +3562,11 @@ yystate329: yystate330: c = l.Next() - yyrule = 140 + yyrule = 141 l.Mark() switch { default: - goto yyrule140 + goto yyrule141 case c == 'i': goto yystate331 case c == 'o': @@ -3676,11 +3683,11 @@ yystate342: yystate343: c = l.Next() - yyrule = 140 + yyrule = 141 l.Mark() switch { default: - goto yyrule140 + goto yyrule141 case c == 'l': goto yystate344 case c == 'o': @@ -3755,11 +3762,11 @@ yystate351: yystate352: c = l.Next() - yyrule = 140 + yyrule = 141 l.Mark() switch { default: - goto yyrule140 + goto yyrule141 case c == 'f': goto yystate353 case c == 'm': @@ -4038,11 +4045,11 @@ yystate383: yystate384: c = l.Next() - yyrule = 140 + yyrule = 141 l.Mark() switch { default: - goto yyrule140 + goto yyrule141 case c == 'a': goto yystate385 case c == 'e': @@ -4135,11 +4142,11 @@ yystate394: yystate395: c = l.Next() - yyrule = 140 + yyrule = 141 l.Mark() switch { default: - goto yyrule140 + goto yyrule141 case c == 'r': goto yystate396 } @@ -4152,11 +4159,11 @@ yystate396: yystate397: c = l.Next() - yyrule = 140 + yyrule = 141 l.Mark() switch { default: - goto yyrule140 + goto yyrule141 case c == 'r': goto yystate398 case c == 'u': @@ -4320,11 +4327,11 @@ yystate415: yystate416: c = l.Next() - yyrule = 140 + yyrule = 141 l.Mark() switch { default: - goto yyrule140 + goto yyrule141 case c == 'e': goto yystate417 } @@ -4373,11 +4380,11 @@ yystate421: yystate422: c = l.Next() - yyrule = 140 + yyrule = 141 l.Mark() switch { default: - goto yyrule140 + goto yyrule141 case c == 't': goto yystate423 case c == 'w': @@ -4470,11 +4477,11 @@ yystate432: yystate433: c = l.Next() - yyrule = 140 + yyrule = 141 l.Mark() switch { default: - goto yyrule140 + goto yyrule141 case c == 'h': goto yystate434 case c == 'r': @@ -4557,11 +4564,11 @@ yystate442: yystate443: c = l.Next() - yyrule = 140 + yyrule = 141 l.Mark() switch { default: - goto yyrule140 + goto yyrule141 case c == 's': goto yystate444 } @@ -4583,11 +4590,11 @@ yystate445: yystate446: c = l.Next() - yyrule = 140 + yyrule = 141 l.Mark() switch { default: - goto yyrule140 + goto yyrule141 case c == 'a': goto yystate447 } @@ -4609,11 +4616,11 @@ yystate448: yystate449: c = l.Next() - yyrule = 140 + yyrule = 141 l.Mark() switch { default: - goto yyrule140 + goto yyrule141 case c == 'h': goto yystate450 } @@ -4653,11 +4660,11 @@ yystate453: yystate454: c = l.Next() - yyrule = 140 + yyrule = 141 l.Mark() switch { default: - goto yyrule140 + goto yyrule141 case c == 'o': goto yystate455 } @@ -4679,11 +4686,11 @@ yystate456: yystate457: c = l.Next() - yyrule = 140 + yyrule = 141 l.Mark() switch { default: - goto yyrule140 + goto yyrule141 case c == 'i': goto yystate458 } @@ -4829,49 +4836,49 @@ yystart472: yystate473: c = l.Next() - yyrule = 125 + yyrule = 126 l.Mark() - goto yyrule125 + goto yyrule126 yystate474: c = l.Next() - yyrule = 121 + yyrule = 122 l.Mark() - goto yyrule121 + goto yyrule122 yystate475: - c = l.Next() - yyrule = 124 - l.Mark() - switch { - default: - goto yyrule124 - case c == '{': - goto yystate476 - } - -yystate476: - c = l.Next() - yyrule = 123 - l.Mark() - goto yyrule123 - -yystate477: c = l.Next() yyrule = 125 l.Mark() switch { default: goto yyrule125 + case c == '{': + goto yystate476 + } + +yystate476: + c = l.Next() + yyrule = 124 + l.Mark() + goto yyrule124 + +yystate477: + c = l.Next() + yyrule = 126 + l.Mark() + switch { + default: + goto yyrule126 case c == '$': goto yystate478 } yystate478: c = l.Next() - yyrule = 122 + yyrule = 123 l.Mark() - goto yyrule122 + goto yyrule123 goto yystate479 // silence unused label error yystate479: @@ -4894,39 +4901,39 @@ yystart479: yystate480: c = l.Next() - yyrule = 130 + yyrule = 131 l.Mark() - goto yyrule130 + goto yyrule131 yystate481: c = l.Next() - yyrule = 130 + yyrule = 131 l.Mark() switch { default: - goto yyrule130 + goto yyrule131 case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': goto yystate482 } yystate482: c = l.Next() - yyrule = 126 + yyrule = 127 l.Mark() switch { default: - goto yyrule126 + goto yyrule127 case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': goto yystate482 } yystate483: c = l.Next() - yyrule = 130 + yyrule = 131 l.Mark() switch { default: - goto yyrule130 + goto yyrule131 case c == '>': goto yystate484 } @@ -4942,42 +4949,42 @@ yystate484: yystate485: c = l.Next() - yyrule = 127 + yyrule = 128 l.Mark() switch { default: - goto yyrule127 + goto yyrule128 case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': goto yystate485 } yystate486: c = l.Next() - yyrule = 128 + yyrule = 129 l.Mark() switch { default: - goto yyrule128 + goto yyrule129 case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': goto yystate487 } yystate487: c = l.Next() - yyrule = 128 + yyrule = 129 l.Mark() switch { default: - goto yyrule128 + goto yyrule129 case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': goto yystate487 } yystate488: c = l.Next() - yyrule = 129 + yyrule = 130 l.Mark() - goto yyrule129 + goto yyrule130 goto yystate489 // silence unused label error yystate489: @@ -5006,99 +5013,99 @@ yystart489: yystate490: c = l.Next() - yyrule = 137 + yyrule = 138 l.Mark() - goto yyrule137 + goto yyrule138 yystate491: - c = l.Next() - yyrule = 135 - l.Mark() - goto yyrule135 - -yystate492: - c = l.Next() - yyrule = 135 - l.Mark() - goto yyrule135 - -yystate493: c = l.Next() yyrule = 136 l.Mark() goto yyrule136 -yystate494: +yystate492: c = l.Next() yyrule = 136 l.Mark() + goto yyrule136 + +yystate493: + c = l.Next() + yyrule = 137 + l.Mark() + goto yyrule137 + +yystate494: + c = l.Next() + yyrule = 137 + l.Mark() switch { default: - goto yyrule136 + goto yyrule137 case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': goto yystate495 } yystate495: c = l.Next() - yyrule = 132 + yyrule = 133 l.Mark() switch { default: - goto yyrule132 + goto yyrule133 case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': goto yystate495 } yystate496: c = l.Next() - yyrule = 131 + yyrule = 132 l.Mark() switch { default: - goto yyrule131 + goto yyrule132 case c >= '0' && c <= '9': goto yystate497 } yystate497: c = l.Next() - yyrule = 131 + yyrule = 132 l.Mark() switch { default: - goto yyrule131 + goto yyrule132 case c >= '0' && c <= '9': goto yystate497 } yystate498: c = l.Next() - yyrule = 133 + yyrule = 134 l.Mark() switch { default: - goto yyrule133 + goto yyrule134 case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': goto yystate499 } yystate499: c = l.Next() - yyrule = 133 + yyrule = 134 l.Mark() switch { default: - goto yyrule133 + goto yyrule134 case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': goto yystate499 } yystate500: c = l.Next() - yyrule = 134 + yyrule = 135 l.Mark() - goto yyrule134 + goto yyrule135 goto yystate501 // silence unused label error yystate501: @@ -5115,17 +5122,17 @@ yystart501: yystate502: c = l.Next() - yyrule = 139 + yyrule = 140 l.Mark() - goto yyrule139 + goto yyrule140 yystate503: c = l.Next() - yyrule = 139 + yyrule = 140 l.Mark() switch { default: - goto yyrule139 + goto yyrule140 case c == '[' || c == '}': goto yystate505 case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': @@ -5145,9 +5152,9 @@ yystate504: yystate505: c = l.Next() - yyrule = 138 + yyrule = 139 l.Mark() - goto yyrule138 + goto yyrule139 goto yystate506 // silence unused label error yystate506: @@ -5235,6 +5242,34 @@ yystate513: goto yystate513 } + goto yystate514 // silence unused label error +yystate514: + c = l.Next() +yystart514: + switch { + default: + goto yyabort + case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': + goto yystate515 + } + +yystate515: + c = l.Next() + switch { + default: + goto yyabort + case c == ';': + goto yystate516 + case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ': + goto yystate515 + } + +yystate516: + c = l.Next() + yyrule = 120 + l.Mark() + goto yyrule120 + yyrule1: // [ \t\n\r]+ goto yystate0 @@ -5832,15 +5867,66 @@ yyrule119: // [b]?\<\<\<[ \t]*({VAR_NAME}|([']{VAR_NAME}['])|(["]{VAR_NAME}["])) { tb := l.TokenBytes(nil) - fmt.Println(string(tb[len(tb)-2])) - // switch la { - // case '\'' : fmt.Println("ST_NOWDOC");//begin(ST_NOWDOC) - // case '"' : fmt.Println("ST_HEREDOC");//begin(ST_HEREDOC) - // } - fmt.Printf("T_START_HEREDOC: %s\n", l.TokenBytes(nil)) + binPrefix := 0 + if tb[0] == 'b' { + binPrefix = 1 + } + + lblFirst := 3 + binPrefix + lblLast := len(tb) - 2 + if tb[lblLast] == '\r' { + lblLast-- + } + for { + if tb[lblFirst] == ' ' || tb[lblFirst] == '\t' { + lblFirst++ + continue + } + break + } + switch tb[lblFirst] { + case '\'': + lblFirst++ + lblLast-- + fmt.Println("NOWDOC") + //begin(NOWDOC) + case '"': + lblFirst++ + lblLast-- + fmt.Println("HEREDOC") + //begin(HEREDOC) + default: + //begin(HEREDOC) + } + heredocLabel = tb[lblFirst : lblLast+1] + + ungetCnt := len(heredocLabel) + searchLabelAhead := []byte{} + for i := 0; i < len(heredocLabel); i++ { + if c == -1 { + break + } + searchLabelAhead = append(searchLabelAhead, byte(rune(c))) + c = l.Next() + } + if bytes.Equal(heredocLabel, searchLabelAhead) && ';' == rune(c) { + ungetCnt++ + c = l.Next() + if '\n' == rune(c) || '\r' == rune(c) { + begin(HEREDOC_END) + } + } + l.ungetN(ungetCnt) + fmt.Printf("T_START_HEREDOC: %q\n", tb) goto yystate0 } -yyrule120: // [b]?[\"] +yyrule120: // {VAR_NAME}\; + { + fmt.Printf("T_END_HEREDOC: %q\n", l.ungetN(1)) + begin(PHP) + goto yystate0 + } +yyrule121: // [b]?[\"] { binPrefix := l.TokenBytes(nil)[0] == 'b' @@ -5888,31 +5974,31 @@ yyrule120: // [b]?[\"] } goto yystate0 } -yyrule121: // \" +yyrule122: // \" { fmt.Println("\"") popState() goto yystate0 } -yyrule122: // \{\$ +yyrule123: // \{\$ { fmt.Printf("T_CURLY_OPEN: %q\n", l.ungetN(1)) pushState(PHP) goto yystate0 } -yyrule123: // \$\{ +yyrule124: // \$\{ { fmt.Printf("T_DOLLAR_OPEN_CURLY_BRACES: %q\n", l.TokenBytes(nil)) pushState(STRING_VAR_NAME) goto yystate0 } -yyrule124: // \$ +yyrule125: // \$ { l.ungetN(1) begin(STRING_VAR) goto yystate0 } -yyrule125: // . +yyrule126: // . { F1: @@ -5951,86 +6037,86 @@ yyrule125: // . } goto yystate0 } -yyrule126: // \${VAR_NAME} +yyrule127: // \${VAR_NAME} { fmt.Printf("T_VARIABLE: %q\n", l.TokenBytes(nil)) goto yystate0 } -yyrule127: // ->{VAR_NAME} +yyrule128: // ->{VAR_NAME} { fmt.Printf("T_OBJECT_OPERATOR: %q\n", l.ungetN(len(l.TokenBytes(nil))-2)) goto yystate0 } -yyrule128: // {VAR_NAME} +yyrule129: // {VAR_NAME} { fmt.Printf("T_STRING: %q\n", l.TokenBytes(nil)) begin(STRING) goto yystate0 } -yyrule129: // \[ +yyrule130: // \[ { fmt.Println("[") begin(STRING_VAR_INDEX) goto yystate0 } -yyrule130: // . +yyrule131: // . { l.ungetN(1) begin(STRING) goto yystate0 } -yyrule131: // {LNUM} +yyrule132: // {LNUM} { fmt.Printf("T_NUM_STRING: %q\n", l.TokenBytes(nil)) goto yystate0 } -yyrule132: // \${VAR_NAME} +yyrule133: // \${VAR_NAME} { fmt.Printf("T_VARIABLE: %q\n", l.TokenBytes(nil)) goto yystate0 } -yyrule133: // {VAR_NAME} +yyrule134: // {VAR_NAME} { fmt.Printf("T_STRING: %q\n", l.TokenBytes(nil)) goto yystate0 } -yyrule134: // \] +yyrule135: // \] { fmt.Println("\"]\"") begin(STRING) goto yystate0 } -yyrule135: // [ \n\r\t\\'#] +yyrule136: // [ \n\r\t\\'#] { fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %q\n", l.ungetN(1)) begin(STRING) goto yystate0 } -yyrule136: // {OPERATORS} +yyrule137: // {OPERATORS} { fmt.Printf("%q\n", l.TokenBytes(nil)) goto yystate0 } -yyrule137: // . +yyrule138: // . { fmt.Printf("%q\n", l.TokenBytes(nil)) goto yystate0 } -yyrule138: // {VAR_NAME}[\[\}] +yyrule139: // {VAR_NAME}[\[\}] { fmt.Printf("T_STRING_VARNAME: %q\n", l.ungetN(1)) popState() pushState(PHP) goto yystate0 } -yyrule139: // . +yyrule140: // . { l.ungetN(1) popState() pushState(PHP) goto yystate0 } -yyrule140: // . +yyrule141: // . { fmt.Printf("other: %q\n", l.TokenBytes(nil)) goto yystate0 diff --git a/php-parser.l b/php-parser.l index d43da45..bf25c7c 100644 --- a/php-parser.l +++ b/php-parser.l @@ -14,6 +14,7 @@ import ( "unicode" "fmt" "os" + "bytes" "github.com/cznic/golex/lex" ) @@ -35,6 +36,7 @@ const ( STRING_VAR_INDEX STRING_VAR_NAME PROPERTY + HEREDOC_END ) @@ -43,6 +45,7 @@ type lexer struct { } var stateStack = []int{PHP} +var heredocLabel []byte func pushState(state int) { sc = state @@ -127,7 +130,7 @@ func (l *lexer) Lex() int { // Lex(lval *yySymType) %} -%s PHP STRING STRING_VAR STRING_VAR_INDEX STRING_VAR_NAME PROPERTY +%s PHP STRING STRING_VAR STRING_VAR_INDEX STRING_VAR_NAME PROPERTY HEREDOC_END %yyb last == '\n' || last = '\0' %yyt sc @@ -274,12 +277,66 @@ NEW_LINE (\r|\n|\r\n) [b]?\<\<\<[ \t]*({VAR_NAME}|([']{VAR_NAME}['])|(["]{VAR_NAME}["])){NEW_LINE} tb := l.TokenBytes(nil) - fmt.Println(string(tb[len(tb)-2])) - // switch la { - // case '\'' : fmt.Println("ST_NOWDOC");//begin(ST_NOWDOC) - // case '"' : fmt.Println("ST_HEREDOC");//begin(ST_HEREDOC) - // } - fmt.Printf("T_START_HEREDOC: %s\n", l.TokenBytes(nil)); + binPrefix := 0 + if tb[0] == 'b' { + binPrefix = 1 + } + + lblFirst := 3 + binPrefix + lblLast := len(tb)-2 + if tb[lblLast] == '\r' { + lblLast-- + } + + for { + if tb[lblFirst] == ' ' || tb[lblFirst] == '\t' { + lblFirst++ + continue + } + + break + } + + switch tb[lblFirst] { + case '\'' : + lblFirst++ + lblLast-- + fmt.Println("NOWDOC"); + //begin(NOWDOC) + case '"' : + lblFirst++ + lblLast-- + fmt.Println("HEREDOC"); + //begin(HEREDOC) + default: + //begin(HEREDOC) + } + + heredocLabel = tb[lblFirst:lblLast+1] + + ungetCnt := len(heredocLabel) + searchLabelAhead := []byte{} + for i := 0; i < len(heredocLabel); i++ { + if c == -1 { + break; + } + searchLabelAhead = append(searchLabelAhead, byte(rune(c))) + c = l.Next() + } + + if bytes.Equal(heredocLabel, searchLabelAhead) && ';' == rune(c) { + ungetCnt++ + c = l.Next() + if '\n' == rune(c) || '\r' == rune(c) { + begin(HEREDOC_END) + } + } + + l.ungetN(ungetCnt) + + fmt.Printf("T_START_HEREDOC: %q\n", tb); + +{VAR_NAME}\; fmt.Printf("T_END_HEREDOC: %q\n", l.ungetN(1));begin(PHP) [b]?[\"] binPrefix := l.TokenBytes(nil)[0] == 'b'