base heredoc handler

This commit is contained in:
vadim 2017-11-16 19:00:34 +02:00
parent 25dfb3f277
commit abf8af04f8
2 changed files with 300 additions and 157 deletions

View File

@ -10,6 +10,7 @@ package main
import (
"bufio"
"bytes"
"fmt"
"go/token"
"io"
@ -36,6 +37,7 @@ const (
STRING_VAR_INDEX
STRING_VAR_NAME
PROPERTY
HEREDOC_END
)
type lexer struct {
@ -43,6 +45,7 @@ type lexer struct {
}
var stateStack = []int{PHP}
var heredocLabel []byte
func pushState(state int) {
sc = state
@ -150,6 +153,8 @@ yystate0:
goto yystart501
case 6: // start condition: PROPERTY
goto yystart506
case 7: // start condition: HEREDOC_END
goto yystart514
}
goto yystate0 // silence unused label error
@ -436,6 +441,8 @@ yyAction:
goto yyrule139
case 140:
goto yyrule140
case 141:
goto yyrule141
}
goto yystate1 // silence unused label error
yystate1:
@ -639,9 +646,9 @@ yystart11:
yystate12:
c = l.Next()
yyrule = 140
yyrule = 141
l.Mark()
goto yyrule140
goto yyrule141
yystate13:
c = l.Next()
@ -695,17 +702,17 @@ yystate17:
yystate18:
c = l.Next()
yyrule = 120
yyrule = 121
l.Mark()
goto yyrule120
goto yyrule121
yystate19:
c = l.Next()
yyrule = 140
yyrule = 141
l.Mark()
switch {
default:
goto yyrule140
goto yyrule141
case c == '\n':
goto yystate20
case c == '\r':
@ -795,11 +802,11 @@ yystate28:
yystate29:
c = l.Next()
yyrule = 140
yyrule = 141
l.Mark()
switch {
default:
goto yyrule140
goto yyrule141
case c == '\'':
goto yystate31
case c == '\\':
@ -1979,11 +1986,11 @@ yystate151:
yystate152:
c = l.Next()
yyrule = 140
yyrule = 141
l.Mark()
switch {
default:
goto yyrule140
goto yyrule141
case c == '_':
goto yystate153
}
@ -2520,11 +2527,11 @@ yystate212:
yystate213:
c = l.Next()
yyrule = 140
yyrule = 141
l.Mark()
switch {
default:
goto yyrule140
goto yyrule141
case c == 'b':
goto yystate214
case c == 'n':
@ -2651,11 +2658,11 @@ yystate227:
yystate228:
c = l.Next()
yyrule = 140
yyrule = 141
l.Mark()
switch {
default:
goto yyrule140
goto yyrule141
case c == '"':
goto yystate229
case c == '<':
@ -2666,9 +2673,9 @@ yystate228:
yystate229:
c = l.Next()
yyrule = 120
yyrule = 121
l.Mark()
goto yyrule120
goto yyrule121
yystate230:
c = l.Next()
@ -2723,11 +2730,11 @@ yystate235:
yystate236:
c = l.Next()
yyrule = 140
yyrule = 141
l.Mark()
switch {
default:
goto yyrule140
goto yyrule141
case c == 'a':
goto yystate237
case c == 'f':
@ -3048,11 +3055,11 @@ yystate272:
yystate273:
c = l.Next()
yyrule = 140
yyrule = 141
l.Mark()
switch {
default:
goto yyrule140
goto yyrule141
case c == 'e':
goto yystate274
case c == 'i':
@ -3179,11 +3186,11 @@ yystate287:
yystate288:
c = l.Next()
yyrule = 140
yyrule = 141
l.Mark()
switch {
default:
goto yyrule140
goto yyrule141
case c == 'c':
goto yystate289
case c == 'l':
@ -3555,11 +3562,11 @@ yystate329:
yystate330:
c = l.Next()
yyrule = 140
yyrule = 141
l.Mark()
switch {
default:
goto yyrule140
goto yyrule141
case c == 'i':
goto yystate331
case c == 'o':
@ -3676,11 +3683,11 @@ yystate342:
yystate343:
c = l.Next()
yyrule = 140
yyrule = 141
l.Mark()
switch {
default:
goto yyrule140
goto yyrule141
case c == 'l':
goto yystate344
case c == 'o':
@ -3755,11 +3762,11 @@ yystate351:
yystate352:
c = l.Next()
yyrule = 140
yyrule = 141
l.Mark()
switch {
default:
goto yyrule140
goto yyrule141
case c == 'f':
goto yystate353
case c == 'm':
@ -4038,11 +4045,11 @@ yystate383:
yystate384:
c = l.Next()
yyrule = 140
yyrule = 141
l.Mark()
switch {
default:
goto yyrule140
goto yyrule141
case c == 'a':
goto yystate385
case c == 'e':
@ -4135,11 +4142,11 @@ yystate394:
yystate395:
c = l.Next()
yyrule = 140
yyrule = 141
l.Mark()
switch {
default:
goto yyrule140
goto yyrule141
case c == 'r':
goto yystate396
}
@ -4152,11 +4159,11 @@ yystate396:
yystate397:
c = l.Next()
yyrule = 140
yyrule = 141
l.Mark()
switch {
default:
goto yyrule140
goto yyrule141
case c == 'r':
goto yystate398
case c == 'u':
@ -4320,11 +4327,11 @@ yystate415:
yystate416:
c = l.Next()
yyrule = 140
yyrule = 141
l.Mark()
switch {
default:
goto yyrule140
goto yyrule141
case c == 'e':
goto yystate417
}
@ -4373,11 +4380,11 @@ yystate421:
yystate422:
c = l.Next()
yyrule = 140
yyrule = 141
l.Mark()
switch {
default:
goto yyrule140
goto yyrule141
case c == 't':
goto yystate423
case c == 'w':
@ -4470,11 +4477,11 @@ yystate432:
yystate433:
c = l.Next()
yyrule = 140
yyrule = 141
l.Mark()
switch {
default:
goto yyrule140
goto yyrule141
case c == 'h':
goto yystate434
case c == 'r':
@ -4557,11 +4564,11 @@ yystate442:
yystate443:
c = l.Next()
yyrule = 140
yyrule = 141
l.Mark()
switch {
default:
goto yyrule140
goto yyrule141
case c == 's':
goto yystate444
}
@ -4583,11 +4590,11 @@ yystate445:
yystate446:
c = l.Next()
yyrule = 140
yyrule = 141
l.Mark()
switch {
default:
goto yyrule140
goto yyrule141
case c == 'a':
goto yystate447
}
@ -4609,11 +4616,11 @@ yystate448:
yystate449:
c = l.Next()
yyrule = 140
yyrule = 141
l.Mark()
switch {
default:
goto yyrule140
goto yyrule141
case c == 'h':
goto yystate450
}
@ -4653,11 +4660,11 @@ yystate453:
yystate454:
c = l.Next()
yyrule = 140
yyrule = 141
l.Mark()
switch {
default:
goto yyrule140
goto yyrule141
case c == 'o':
goto yystate455
}
@ -4679,11 +4686,11 @@ yystate456:
yystate457:
c = l.Next()
yyrule = 140
yyrule = 141
l.Mark()
switch {
default:
goto yyrule140
goto yyrule141
case c == 'i':
goto yystate458
}
@ -4829,49 +4836,49 @@ yystart472:
yystate473:
c = l.Next()
yyrule = 125
yyrule = 126
l.Mark()
goto yyrule125
goto yyrule126
yystate474:
c = l.Next()
yyrule = 121
yyrule = 122
l.Mark()
goto yyrule121
goto yyrule122
yystate475:
c = l.Next()
yyrule = 124
l.Mark()
switch {
default:
goto yyrule124
case c == '{':
goto yystate476
}
yystate476:
c = l.Next()
yyrule = 123
l.Mark()
goto yyrule123
yystate477:
c = l.Next()
yyrule = 125
l.Mark()
switch {
default:
goto yyrule125
case c == '{':
goto yystate476
}
yystate476:
c = l.Next()
yyrule = 124
l.Mark()
goto yyrule124
yystate477:
c = l.Next()
yyrule = 126
l.Mark()
switch {
default:
goto yyrule126
case c == '$':
goto yystate478
}
yystate478:
c = l.Next()
yyrule = 122
yyrule = 123
l.Mark()
goto yyrule122
goto yyrule123
goto yystate479 // silence unused label error
yystate479:
@ -4894,39 +4901,39 @@ yystart479:
yystate480:
c = l.Next()
yyrule = 130
yyrule = 131
l.Mark()
goto yyrule130
goto yyrule131
yystate481:
c = l.Next()
yyrule = 130
yyrule = 131
l.Mark()
switch {
default:
goto yyrule130
goto yyrule131
case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ':
goto yystate482
}
yystate482:
c = l.Next()
yyrule = 126
yyrule = 127
l.Mark()
switch {
default:
goto yyrule126
goto yyrule127
case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ':
goto yystate482
}
yystate483:
c = l.Next()
yyrule = 130
yyrule = 131
l.Mark()
switch {
default:
goto yyrule130
goto yyrule131
case c == '>':
goto yystate484
}
@ -4942,42 +4949,42 @@ yystate484:
yystate485:
c = l.Next()
yyrule = 127
yyrule = 128
l.Mark()
switch {
default:
goto yyrule127
goto yyrule128
case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ':
goto yystate485
}
yystate486:
c = l.Next()
yyrule = 128
yyrule = 129
l.Mark()
switch {
default:
goto yyrule128
goto yyrule129
case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ':
goto yystate487
}
yystate487:
c = l.Next()
yyrule = 128
yyrule = 129
l.Mark()
switch {
default:
goto yyrule128
goto yyrule129
case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ':
goto yystate487
}
yystate488:
c = l.Next()
yyrule = 129
yyrule = 130
l.Mark()
goto yyrule129
goto yyrule130
goto yystate489 // silence unused label error
yystate489:
@ -5006,99 +5013,99 @@ yystart489:
yystate490:
c = l.Next()
yyrule = 137
yyrule = 138
l.Mark()
goto yyrule137
goto yyrule138
yystate491:
c = l.Next()
yyrule = 135
l.Mark()
goto yyrule135
yystate492:
c = l.Next()
yyrule = 135
l.Mark()
goto yyrule135
yystate493:
c = l.Next()
yyrule = 136
l.Mark()
goto yyrule136
yystate494:
yystate492:
c = l.Next()
yyrule = 136
l.Mark()
goto yyrule136
yystate493:
c = l.Next()
yyrule = 137
l.Mark()
goto yyrule137
yystate494:
c = l.Next()
yyrule = 137
l.Mark()
switch {
default:
goto yyrule136
goto yyrule137
case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ':
goto yystate495
}
yystate495:
c = l.Next()
yyrule = 132
yyrule = 133
l.Mark()
switch {
default:
goto yyrule132
goto yyrule133
case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ':
goto yystate495
}
yystate496:
c = l.Next()
yyrule = 131
yyrule = 132
l.Mark()
switch {
default:
goto yyrule131
goto yyrule132
case c >= '0' && c <= '9':
goto yystate497
}
yystate497:
c = l.Next()
yyrule = 131
yyrule = 132
l.Mark()
switch {
default:
goto yyrule131
goto yyrule132
case c >= '0' && c <= '9':
goto yystate497
}
yystate498:
c = l.Next()
yyrule = 133
yyrule = 134
l.Mark()
switch {
default:
goto yyrule133
goto yyrule134
case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ':
goto yystate499
}
yystate499:
c = l.Next()
yyrule = 133
yyrule = 134
l.Mark()
switch {
default:
goto yyrule133
goto yyrule134
case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ':
goto yystate499
}
yystate500:
c = l.Next()
yyrule = 134
yyrule = 135
l.Mark()
goto yyrule134
goto yyrule135
goto yystate501 // silence unused label error
yystate501:
@ -5115,17 +5122,17 @@ yystart501:
yystate502:
c = l.Next()
yyrule = 139
yyrule = 140
l.Mark()
goto yyrule139
goto yyrule140
yystate503:
c = l.Next()
yyrule = 139
yyrule = 140
l.Mark()
switch {
default:
goto yyrule139
goto yyrule140
case c == '[' || c == '}':
goto yystate505
case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ':
@ -5145,9 +5152,9 @@ yystate504:
yystate505:
c = l.Next()
yyrule = 138
yyrule = 139
l.Mark()
goto yyrule138
goto yyrule139
goto yystate506 // silence unused label error
yystate506:
@ -5235,6 +5242,34 @@ yystate513:
goto yystate513
}
goto yystate514 // silence unused label error
yystate514:
c = l.Next()
yystart514:
switch {
default:
goto yyabort
case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ':
goto yystate515
}
yystate515:
c = l.Next()
switch {
default:
goto yyabort
case c == ';':
goto yystate516
case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ':
goto yystate515
}
yystate516:
c = l.Next()
yyrule = 120
l.Mark()
goto yyrule120
yyrule1: // [ \t\n\r]+
goto yystate0
@ -5832,15 +5867,66 @@ yyrule119: // [b]?\<\<\<[ \t]*({VAR_NAME}|([']{VAR_NAME}['])|(["]{VAR_NAME}["]))
{
tb := l.TokenBytes(nil)
fmt.Println(string(tb[len(tb)-2]))
// switch la {
// case '\'' : fmt.Println("ST_NOWDOC");//begin(ST_NOWDOC)
// case '"' : fmt.Println("ST_HEREDOC");//begin(ST_HEREDOC)
// }
fmt.Printf("T_START_HEREDOC: %s\n", l.TokenBytes(nil))
binPrefix := 0
if tb[0] == 'b' {
binPrefix = 1
}
lblFirst := 3 + binPrefix
lblLast := len(tb) - 2
if tb[lblLast] == '\r' {
lblLast--
}
for {
if tb[lblFirst] == ' ' || tb[lblFirst] == '\t' {
lblFirst++
continue
}
break
}
switch tb[lblFirst] {
case '\'':
lblFirst++
lblLast--
fmt.Println("NOWDOC")
//begin(NOWDOC)
case '"':
lblFirst++
lblLast--
fmt.Println("HEREDOC")
//begin(HEREDOC)
default:
//begin(HEREDOC)
}
heredocLabel = tb[lblFirst : lblLast+1]
ungetCnt := len(heredocLabel)
searchLabelAhead := []byte{}
for i := 0; i < len(heredocLabel); i++ {
if c == -1 {
break
}
searchLabelAhead = append(searchLabelAhead, byte(rune(c)))
c = l.Next()
}
if bytes.Equal(heredocLabel, searchLabelAhead) && ';' == rune(c) {
ungetCnt++
c = l.Next()
if '\n' == rune(c) || '\r' == rune(c) {
begin(HEREDOC_END)
}
}
l.ungetN(ungetCnt)
fmt.Printf("T_START_HEREDOC: %q\n", tb)
goto yystate0
}
yyrule120: // [b]?[\"]
yyrule120: // {VAR_NAME}\;
{
fmt.Printf("T_END_HEREDOC: %q\n", l.ungetN(1))
begin(PHP)
goto yystate0
}
yyrule121: // [b]?[\"]
{
binPrefix := l.TokenBytes(nil)[0] == 'b'
@ -5888,31 +5974,31 @@ yyrule120: // [b]?[\"]
}
goto yystate0
}
yyrule121: // \"
yyrule122: // \"
{
fmt.Println("\"")
popState()
goto yystate0
}
yyrule122: // \{\$
yyrule123: // \{\$
{
fmt.Printf("T_CURLY_OPEN: %q\n", l.ungetN(1))
pushState(PHP)
goto yystate0
}
yyrule123: // \$\{
yyrule124: // \$\{
{
fmt.Printf("T_DOLLAR_OPEN_CURLY_BRACES: %q\n", l.TokenBytes(nil))
pushState(STRING_VAR_NAME)
goto yystate0
}
yyrule124: // \$
yyrule125: // \$
{
l.ungetN(1)
begin(STRING_VAR)
goto yystate0
}
yyrule125: // .
yyrule126: // .
{
F1:
@ -5951,86 +6037,86 @@ yyrule125: // .
}
goto yystate0
}
yyrule126: // \${VAR_NAME}
yyrule127: // \${VAR_NAME}
{
fmt.Printf("T_VARIABLE: %q\n", l.TokenBytes(nil))
goto yystate0
}
yyrule127: // ->{VAR_NAME}
yyrule128: // ->{VAR_NAME}
{
fmt.Printf("T_OBJECT_OPERATOR: %q\n", l.ungetN(len(l.TokenBytes(nil))-2))
goto yystate0
}
yyrule128: // {VAR_NAME}
yyrule129: // {VAR_NAME}
{
fmt.Printf("T_STRING: %q\n", l.TokenBytes(nil))
begin(STRING)
goto yystate0
}
yyrule129: // \[
yyrule130: // \[
{
fmt.Println("[")
begin(STRING_VAR_INDEX)
goto yystate0
}
yyrule130: // .
yyrule131: // .
{
l.ungetN(1)
begin(STRING)
goto yystate0
}
yyrule131: // {LNUM}
yyrule132: // {LNUM}
{
fmt.Printf("T_NUM_STRING: %q\n", l.TokenBytes(nil))
goto yystate0
}
yyrule132: // \${VAR_NAME}
yyrule133: // \${VAR_NAME}
{
fmt.Printf("T_VARIABLE: %q\n", l.TokenBytes(nil))
goto yystate0
}
yyrule133: // {VAR_NAME}
yyrule134: // {VAR_NAME}
{
fmt.Printf("T_STRING: %q\n", l.TokenBytes(nil))
goto yystate0
}
yyrule134: // \]
yyrule135: // \]
{
fmt.Println("\"]\"")
begin(STRING)
goto yystate0
}
yyrule135: // [ \n\r\t\\'#]
yyrule136: // [ \n\r\t\\'#]
{
fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %q\n", l.ungetN(1))
begin(STRING)
goto yystate0
}
yyrule136: // {OPERATORS}
yyrule137: // {OPERATORS}
{
fmt.Printf("%q\n", l.TokenBytes(nil))
goto yystate0
}
yyrule137: // .
yyrule138: // .
{
fmt.Printf("%q\n", l.TokenBytes(nil))
goto yystate0
}
yyrule138: // {VAR_NAME}[\[\}]
yyrule139: // {VAR_NAME}[\[\}]
{
fmt.Printf("T_STRING_VARNAME: %q\n", l.ungetN(1))
popState()
pushState(PHP)
goto yystate0
}
yyrule139: // .
yyrule140: // .
{
l.ungetN(1)
popState()
pushState(PHP)
goto yystate0
}
yyrule140: // .
yyrule141: // .
{
fmt.Printf("other: %q\n", l.TokenBytes(nil))
goto yystate0

View File

@ -14,6 +14,7 @@ import (
"unicode"
"fmt"
"os"
"bytes"
"github.com/cznic/golex/lex"
)
@ -35,6 +36,7 @@ const (
STRING_VAR_INDEX
STRING_VAR_NAME
PROPERTY
HEREDOC_END
)
@ -43,6 +45,7 @@ type lexer struct {
}
var stateStack = []int{PHP}
var heredocLabel []byte
func pushState(state int) {
sc = state
@ -127,7 +130,7 @@ func (l *lexer) Lex() int { // Lex(lval *yySymType)
%}
%s PHP STRING STRING_VAR STRING_VAR_INDEX STRING_VAR_NAME PROPERTY
%s PHP STRING STRING_VAR STRING_VAR_INDEX STRING_VAR_NAME PROPERTY HEREDOC_END
%yyb last == '\n' || last = '\0'
%yyt sc
@ -274,12 +277,66 @@ NEW_LINE (\r|\n|\r\n)
<PHP>[b]?\<\<\<[ \t]*({VAR_NAME}|([']{VAR_NAME}['])|(["]{VAR_NAME}["])){NEW_LINE}
tb := l.TokenBytes(nil)
fmt.Println(string(tb[len(tb)-2]))
// switch la {
// case '\'' : fmt.Println("ST_NOWDOC");//begin(ST_NOWDOC)
// case '"' : fmt.Println("ST_HEREDOC");//begin(ST_HEREDOC)
// }
fmt.Printf("T_START_HEREDOC: %s\n", l.TokenBytes(nil));
binPrefix := 0
if tb[0] == 'b' {
binPrefix = 1
}
lblFirst := 3 + binPrefix
lblLast := len(tb)-2
if tb[lblLast] == '\r' {
lblLast--
}
for {
if tb[lblFirst] == ' ' || tb[lblFirst] == '\t' {
lblFirst++
continue
}
break
}
switch tb[lblFirst] {
case '\'' :
lblFirst++
lblLast--
fmt.Println("NOWDOC");
//begin(NOWDOC)
case '"' :
lblFirst++
lblLast--
fmt.Println("HEREDOC");
//begin(HEREDOC)
default:
//begin(HEREDOC)
}
heredocLabel = tb[lblFirst:lblLast+1]
ungetCnt := len(heredocLabel)
searchLabelAhead := []byte{}
for i := 0; i < len(heredocLabel); i++ {
if c == -1 {
break;
}
searchLabelAhead = append(searchLabelAhead, byte(rune(c)))
c = l.Next()
}
if bytes.Equal(heredocLabel, searchLabelAhead) && ';' == rune(c) {
ungetCnt++
c = l.Next()
if '\n' == rune(c) || '\r' == rune(c) {
begin(HEREDOC_END)
}
}
l.ungetN(ungetCnt)
fmt.Printf("T_START_HEREDOC: %q\n", tb);
<HEREDOC_END>{VAR_NAME}\; fmt.Printf("T_END_HEREDOC: %q\n", l.ungetN(1));begin(PHP)
<PHP>[b]?[\"]
binPrefix := l.TokenBytes(nil)[0] == 'b'