#51 saving optional tokes and tokens that have different representation as meta

This commit is contained in:
z7zmey
2018-07-29 11:44:38 +03:00
parent 0138749c6d
commit 4989d31874
223 changed files with 9832 additions and 5976 deletions

View File

@@ -32,7 +32,7 @@ type Lexer struct {
*lex.Lexer
StateStack []int
PhpDocComment string
Meta []meta.Meta
Meta meta.Collection
heredocLabel string
tokenBytesBuf *bytes.Buffer
TokenPool *TokenPool
@@ -136,44 +136,6 @@ func (l *Lexer) createToken(chars []lex.Char) *Token {
return token
}
func (l *Lexer) addComments(chars []lex.Char) {
if !l.WithMeta {
return
}
firstChar := chars[0]
lastChar := chars[len(chars)-1]
pos := position.NewPosition(
l.File.Line(firstChar.Pos()),
l.File.Line(lastChar.Pos()),
int(firstChar.Pos()),
int(lastChar.Pos()),
)
c := meta.NewComment(l.tokenString(chars), pos)
l.Meta = append(l.Meta, c)
}
func (l *Lexer) addWhiteSpace(chars []lex.Char) {
if !l.WithMeta {
return
}
firstChar := chars[0]
lastChar := chars[len(chars)-1]
pos := position.NewPosition(
l.File.Line(firstChar.Pos()),
l.File.Line(lastChar.Pos()),
int(firstChar.Pos()),
int(lastChar.Pos()),
)
c := meta.NewWhiteSpace(l.tokenString(chars), pos)
l.Meta = append(l.Meta, c)
}
func (l *Lexer) tokenString(chars []lex.Char) string {
l.tokenBytesBuf.Reset()
@@ -183,3 +145,27 @@ func (l *Lexer) tokenString(chars []lex.Char) string {
return string(l.tokenBytesBuf.Bytes())
}
// meta
func (l *Lexer) addMeta(mt meta.Type, chars []lex.Char) {
if !l.WithMeta {
return
}
firstChar := chars[0]
lastChar := chars[len(chars)-1]
pos := position.NewPosition(
l.File.Line(firstChar.Pos()),
l.File.Line(lastChar.Pos()),
int(firstChar.Pos()),
int(lastChar.Pos()),
)
l.Meta.Push(&meta.Data{
Value: l.tokenString(chars),
Type: mt,
Position: pos,
})
}

View File

@@ -11,6 +11,7 @@ package scanner
import (
"fmt"
"github.com/cznic/golex/lex"
"github.com/z7zmey/php-parser/meta"
)
const (
@@ -7554,8 +7555,10 @@ yystate604:
goto yyrule168
yyrule1: // [ \t\n\r]+
goto yystate0
{
l.addMeta(meta.WhiteSpaceType, l.Token())
goto yystate0
}
yyrule2: // .
{
@@ -7581,12 +7584,14 @@ yyrule2: // .
}
yyrule3: // \<\?php([ \t]|{NEW_LINE})
{
l.addMeta(meta.TokenType, l.Token()[:5])
l.Begin(PHP)
l.ungetChars(len(l.Token()) - 5)
goto yystate0
}
yyrule4: // \<\?
{
l.addMeta(meta.TokenType, l.Token())
l.Begin(PHP)
goto yystate0
}
@@ -7599,7 +7604,7 @@ yyrule5: // \<\?=
}
yyrule6: // [ \t\n\r]+
{
l.addWhiteSpace(l.Token())
l.addMeta(meta.WhiteSpaceType, l.Token())
goto yystate0
}
yyrule7: // [;][ \t\n\r]*\?\>{NEW_LINE}?
@@ -8397,13 +8402,13 @@ yyrule126: // (#|[/][/])
}
break
}
l.addComments(tb)
l.addMeta(meta.CommentType, tb)
goto yystate0
}
yyrule127: // [/][*][*][/]
{
l.addComments(l.Token())
l.addMeta(meta.CommentType, l.Token())
goto yystate0
}
yyrule128: // ([/][*])|([/][*][*])
@@ -8428,9 +8433,9 @@ yyrule128: // ([/][*])|([/][*][*])
}
if is_doc_comment {
l.PhpDocComment = string(l.TokenBytes(nil))
l.addComments(l.Token())
l.addMeta(meta.CommentType, l.Token())
} else {
l.addComments(l.Token())
l.addMeta(meta.CommentType, l.Token())
}
goto yystate0
}
@@ -8476,7 +8481,7 @@ yyrule134: // ->
}
yyrule135: // [ \t\n\r]+
{
l.addWhiteSpace(l.Token())
l.addMeta(meta.WhiteSpaceType, l.Token())
goto yystate0
}
yyrule136: // ->

View File

@@ -9,6 +9,7 @@ package scanner
import (
"fmt"
"github.com/z7zmey/php-parser/meta"
"github.com/cznic/golex/lex"
)
@@ -58,7 +59,7 @@ NEW_LINE (\r|\n|\r\n)
%%
c = l.Rule0()
<INITIAL>[ \t\n\r]+
<INITIAL>[ \t\n\r]+ l.addMeta(meta.WhiteSpaceType, l.Token())
<INITIAL>.
tb := []lex.Char{}
@@ -85,11 +86,12 @@ NEW_LINE (\r|\n|\r\n)
lval.Token(l.createToken(tb))
return int(T_INLINE_HTML)
<INITIAL>\<\?php([ \t]|{NEW_LINE}) l.Begin(PHP);l.ungetChars(len(l.Token())-5)
<INITIAL>\<\? l.Begin(PHP);
<INITIAL>\<\?php([ \t]|{NEW_LINE}) l.addMeta(meta.TokenType, l.Token()[:5]);l.Begin(PHP);l.ungetChars(len(l.Token())-5)
<INITIAL>\<\? l.addMeta(meta.TokenType, l.Token());l.Begin(PHP);
<INITIAL>\<\?= l.Begin(PHP);lval.Token(l.createToken(l.Token())); return int(T_ECHO);
<PHP>[ \t\n\r]+ l.addWhiteSpace(l.Token())
<PHP>[ \t\n\r]+ l.addMeta(meta.WhiteSpaceType, l.Token())
<PHP>[;][ \t\n\r]*\?\>{NEW_LINE}? l.Begin(INITIAL);lval.Token(l.createToken(l.Token())); return Rune2Class(';');
<PHP>\?\>{NEW_LINE}? l.Begin(INITIAL);lval.Token(l.createToken(l.Token())); return Rune2Class(';');
@@ -286,10 +288,10 @@ NEW_LINE (\r|\n|\r\n)
break;
}
l.addComments(tb)
l.addMeta(meta.CommentType, tb)
<PHP>[/][*][*][/]
l.addComments(l.Token())
l.addMeta(meta.CommentType, l.Token())
<PHP>([/][*])|([/][*][*])
tb := l.Token()
is_doc_comment := false
@@ -314,9 +316,9 @@ NEW_LINE (\r|\n|\r\n)
if is_doc_comment {
l.PhpDocComment = string(l.TokenBytes(nil))
l.addComments(l.Token())
l.addMeta(meta.CommentType, l.Token())
} else {
l.addComments(l.Token())
l.addMeta(meta.CommentType, l.Token())
}
<PHP>{OPERATORS} lval.Token(l.createToken(l.Token())); return Rune2Class(rune(l.TokenBytes(nil)[0]))
@@ -327,7 +329,7 @@ NEW_LINE (\r|\n|\r\n)
<PHP>{VAR_NAME} lval.Token(l.createToken(l.Token())); return int(T_STRING)
<PHP>-> l.Begin(PROPERTY);lval.Token(l.createToken(l.Token())); return int(T_OBJECT_OPERATOR);
<PROPERTY>[ \t\n\r]+ l.addWhiteSpace(l.Token())
<PROPERTY>[ \t\n\r]+ l.addMeta(meta.WhiteSpaceType, l.Token())
<PROPERTY>-> lval.Token(l.createToken(l.Token())); return int(T_OBJECT_OPERATOR);
<PROPERTY>{VAR_NAME} l.Begin(PHP);lval.Token(l.createToken(l.Token())); return int(T_STRING);
<PROPERTY>. l.ungetChars(1);l.Begin(PHP)

View File

@@ -967,9 +967,22 @@ func TestSlashAfterVariable(t *testing.T) {
func TestCommentEnd(t *testing.T) {
src := `<?php //test`
expected := []meta.Meta{
meta.NewWhiteSpace(" ", position.NewPosition(1, 1, 6, 6)),
meta.NewComment("//test", position.NewPosition(1, 1, 7, 12)),
expected := meta.Collection{
&meta.Data{
Value: "<?php",
Type: meta.TokenType,
Position: position.NewPosition(1, 1, 1, 5),
},
&meta.Data{
Value: " ",
Type: meta.WhiteSpaceType,
Position: position.NewPosition(1, 1, 6, 6),
},
&meta.Data{
Value: "//test",
Type: meta.CommentType,
Position: position.NewPosition(1, 1, 7, 12),
},
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
@@ -986,9 +999,22 @@ func TestCommentEnd(t *testing.T) {
func TestCommentNewLine(t *testing.T) {
src := "<?php //test\n$a"
expected := []meta.Meta{
meta.NewWhiteSpace(" ", position.NewPosition(1, 1, 6, 6)),
meta.NewComment("//test\n", position.NewPosition(1, 1, 7, 13)),
expected := meta.Collection{
&meta.Data{
Value: "<?php",
Type: meta.TokenType,
Position: position.NewPosition(1, 1, 1, 5),
},
&meta.Data{
Value: " ",
Type: meta.WhiteSpaceType,
Position: position.NewPosition(1, 1, 6, 6),
},
&meta.Data{
Value: "//test\n",
Type: meta.CommentType,
Position: position.NewPosition(1, 1, 7, 13),
},
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
@@ -1005,9 +1031,22 @@ func TestCommentNewLine(t *testing.T) {
func TestCommentNewLine1(t *testing.T) {
src := "<?php //test\r$a"
expected := []meta.Meta{
meta.NewWhiteSpace(" ", position.NewPosition(1, 1, 6, 6)),
meta.NewComment("//test\r", position.NewPosition(1, 1, 7, 13)),
expected := meta.Collection{
&meta.Data{
Value: "<?php",
Type: meta.TokenType,
Position: position.NewPosition(1, 1, 1, 5),
},
&meta.Data{
Value: " ",
Type: meta.WhiteSpaceType,
Position: position.NewPosition(1, 1, 6, 6),
},
&meta.Data{
Value: "//test\r",
Type: meta.CommentType,
Position: position.NewPosition(1, 1, 7, 13),
},
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
@@ -1024,9 +1063,22 @@ func TestCommentNewLine1(t *testing.T) {
func TestCommentNewLine2(t *testing.T) {
src := "<?php #test\r\n$a"
expected := []meta.Meta{
meta.NewWhiteSpace(" ", position.NewPosition(1, 1, 6, 6)),
meta.NewComment("#test\r\n", position.NewPosition(1, 1, 7, 13)),
expected := meta.Collection{
&meta.Data{
Value: "<?php",
Type: meta.TokenType,
Position: position.NewPosition(1, 1, 1, 5),
},
&meta.Data{
Value: " ",
Type: meta.WhiteSpaceType,
Position: position.NewPosition(1, 1, 6, 6),
},
&meta.Data{
Value: "#test\r\n",
Type: meta.CommentType,
Position: position.NewPosition(1, 1, 7, 13),
},
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
@@ -1044,9 +1096,22 @@ func TestCommentWithPhpEndTag(t *testing.T) {
src := `<?php
//test?> test`
expected := []meta.Meta{
meta.NewWhiteSpace("\n\t", position.NewPosition(1, 2, 6, 7)),
meta.NewComment("//test", position.NewPosition(2, 2, 8, 13)),
expected := meta.Collection{
&meta.Data{
Value: "<?php",
Type: meta.TokenType,
Position: position.NewPosition(1, 1, 1, 5),
},
&meta.Data{
Value: "\n\t",
Type: meta.WhiteSpaceType,
Position: position.NewPosition(1, 2, 6, 7),
},
&meta.Data{
Value: "//test",
Type: meta.CommentType,
Position: position.NewPosition(2, 2, 8, 13),
},
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
@@ -1064,9 +1129,22 @@ func TestInlineComment(t *testing.T) {
src := `<?php
/*test*/`
expected := []meta.Meta{
meta.NewWhiteSpace("\n\t", position.NewPosition(1, 2, 6, 7)),
meta.NewComment("/*test*/", position.NewPosition(2, 2, 8, 15)),
expected := meta.Collection{
&meta.Data{
Value: "<?php",
Type: meta.TokenType,
Position: position.NewPosition(1, 1, 1, 5),
},
&meta.Data{
Value: "\n\t",
Type: meta.WhiteSpaceType,
Position: position.NewPosition(1, 2, 6, 7),
},
&meta.Data{
Value: "/*test*/",
Type: meta.CommentType,
Position: position.NewPosition(2, 2, 8, 15),
},
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
@@ -1084,9 +1162,22 @@ func TestInlineComment2(t *testing.T) {
src := `<?php
/*/*/`
expected := []meta.Meta{
meta.NewWhiteSpace("\n\t", position.NewPosition(1, 2, 6, 7)),
meta.NewComment("/*/*/", position.NewPosition(2, 2, 8, 12)),
expected := meta.Collection{
&meta.Data{
Value: "<?php",
Type: meta.TokenType,
Position: position.NewPosition(1, 1, 1, 5),
},
&meta.Data{
Value: "\n\t",
Type: meta.WhiteSpaceType,
Position: position.NewPosition(1, 2, 6, 7),
},
&meta.Data{
Value: "/*/*/",
Type: meta.CommentType,
Position: position.NewPosition(2, 2, 8, 12),
},
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
@@ -1104,10 +1195,27 @@ func TestEmptyInlineComment(t *testing.T) {
src := `<?php
/**/ `
expected := []meta.Meta{
meta.NewWhiteSpace("\n\t", position.NewPosition(1, 2, 6, 7)),
meta.NewComment("/**/", position.NewPosition(2, 2, 8, 11)),
meta.NewWhiteSpace(" ", position.NewPosition(2, 2, 12, 12)),
expected := meta.Collection{
&meta.Data{
Value: "<?php",
Type: meta.TokenType,
Position: position.NewPosition(1, 1, 1, 5),
},
&meta.Data{
Value: "\n\t",
Type: meta.WhiteSpaceType,
Position: position.NewPosition(1, 2, 6, 7),
},
&meta.Data{
Value: "/**/",
Type: meta.CommentType,
Position: position.NewPosition(2, 2, 8, 11),
},
&meta.Data{
Value: " ",
Type: meta.WhiteSpaceType,
Position: position.NewPosition(2, 2, 12, 12),
},
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
@@ -1125,9 +1233,22 @@ func TestEmptyInlineComment2(t *testing.T) {
src := `<?php
/***/`
expected := []meta.Meta{
meta.NewWhiteSpace("\n\t", position.NewPosition(1, 2, 6, 7)),
meta.NewComment("/***/", position.NewPosition(2, 2, 8, 12)),
expected := meta.Collection{
&meta.Data{
Value: "<?php",
Type: meta.TokenType,
Position: position.NewPosition(1, 1, 1, 5),
},
&meta.Data{
Value: "\n\t",
Type: meta.WhiteSpaceType,
Position: position.NewPosition(1, 2, 6, 7),
},
&meta.Data{
Value: "/***/",
Type: meta.CommentType,
Position: position.NewPosition(2, 2, 8, 12),
},
}
lexer := scanner.NewLexer(bytes.NewBufferString(src), "test.php")
@@ -1149,50 +1270,83 @@ func TestMethodCallTokens(t *testing.T) {
lexer.WithMeta = true
lv := &lval{}
expected := []meta.Meta{
meta.NewWhiteSpace("\n\t", position.NewPosition(1, 2, 6, 7)),
expected := meta.Collection{
&meta.Data{
Value: "<?php",
Type: meta.TokenType,
Position: position.NewPosition(1, 1, 1, 5),
},
&meta.Data{
Value: "\n\t",
Type: meta.WhiteSpaceType,
Position: position.NewPosition(1, 2, 6, 7),
},
}
lexer.Lex(lv)
actual := lv.Tkn.Meta
assertEqual(t, expected, actual)
expected = []meta.Meta{
meta.NewWhiteSpace(" ", position.NewPosition(2, 2, 10, 10)),
expected = meta.Collection{
&meta.Data{
Value: " ",
Type: meta.WhiteSpaceType,
Position: position.NewPosition(2, 2, 10, 10),
},
}
lexer.Lex(lv)
actual = lv.Tkn.Meta
assertEqual(t, expected, actual)
expected = []meta.Meta{
meta.NewWhiteSpace(" ", position.NewPosition(2, 2, 13, 13)),
expected = meta.Collection{
&meta.Data{
Value: " ",
Type: meta.WhiteSpaceType,
Position: position.NewPosition(2, 2, 13, 13),
},
}
lexer.Lex(lv)
actual = lv.Tkn.Meta
assertEqual(t, expected, actual)
expected = []meta.Meta{
meta.NewWhiteSpace(" ", position.NewPosition(2, 2, 17, 17)),
expected = meta.Collection{
&meta.Data{
Value: " ",
Type: meta.WhiteSpaceType,
Position: position.NewPosition(2, 2, 17, 17),
},
}
lexer.Lex(lv)
actual = lv.Tkn.Meta
assertEqual(t, expected, actual)
expected = []meta.Meta{
meta.NewWhiteSpace(" ", position.NewPosition(2, 2, 19, 19)),
expected = meta.Collection{
&meta.Data{
Value: " ",
Type: meta.WhiteSpaceType,
Position: position.NewPosition(2, 2, 19, 19),
},
}
lexer.Lex(lv)
actual = lv.Tkn.Meta
assertEqual(t, expected, actual)
expected = []meta.Meta{
meta.NewWhiteSpace(" ", position.NewPosition(2, 2, 22, 22)),
expected = meta.Collection{
&meta.Data{
Value: " ",
Type: meta.WhiteSpaceType,
Position: position.NewPosition(2, 2, 22, 22),
},
}
lexer.Lex(lv)
actual = lv.Tkn.Meta
assertEqual(t, expected, actual)
expected = []meta.Meta{
meta.NewWhiteSpace(" ", position.NewPosition(2, 2, 24, 24)),
expected = meta.Collection{
&meta.Data{
Value: " ",
Type: meta.WhiteSpaceType,
Position: position.NewPosition(2, 2, 24, 24),
},
}
lexer.Lex(lv)
actual = lv.Tkn.Meta
@@ -1207,15 +1361,28 @@ func TestYieldFromTokens(t *testing.T) {
lexer.WithMeta = true
lv := &lval{}
expected := []meta.Meta{
meta.NewWhiteSpace("\n\t", position.NewPosition(1, 2, 6, 7)),
expected := meta.Collection{
&meta.Data{
Value: "<?php",
Type: meta.TokenType,
Position: position.NewPosition(1, 1, 1, 5),
},
&meta.Data{
Value: "\n\t",
Type: meta.WhiteSpaceType,
Position: position.NewPosition(1, 2, 6, 7),
},
}
lexer.Lex(lv)
actual := lv.Tkn.Meta
assertEqual(t, expected, actual)
expected = []meta.Meta{
meta.NewWhiteSpace(" ", position.NewPosition(2, 2, 18, 18)),
expected = meta.Collection{
&meta.Data{
Value: " ",
Type: meta.WhiteSpaceType,
Position: position.NewPosition(2, 2, 18, 18),
},
}
lexer.Lex(lv)
actual = lv.Tkn.Meta

View File

@@ -7,7 +7,7 @@ import (
// Token value returned by lexer
type Token struct {
Value string
Meta []meta.Meta
Meta meta.Collection
StartLine int
EndLine int
StartPos int

View File

@@ -18,8 +18,12 @@ func TestToken(t *testing.T) {
EndPos: 3,
}
c := []meta.Meta{
meta.NewComment("test comment", nil),
c := meta.Collection{
&meta.Data{
Value: "test comment",
Type: meta.CommentType,
Position: nil,
},
}
tkn.Meta = c