php8: fixed a bug with # comments (#22)

Example from issue:
```php
<?php
#
# Comment
#

$a = 100;
```

The problem with the example from the issue is that `#` is immediately followed by a line break.
And since the rule in the lexer for such comments was changed, this case was handled incorrectly.

```
(('#' ^'[') | '//') any_line* when is_not_comment_end => {
   lex.ungetStr("?>")
   lex.addFreeFloatingToken(tkn, token.T_COMMENT, lex.ts, lex.te)
};
```

This rule has one problem, it checks two characters at once, first for the match `#`, and 
then for the mismatch `[`, which leads to the fact that in the case of an empty comment, the first 
matcher will capture `#`, and the second line break (`\n`), which will lead to the fact that `any_line` 
matcher will not work and will not increase the line number.

The next rule added is specifically for this case.

```
'#' newline when is_not_comment_end => {
    lex.ungetStr("?>")
    lex.addFreeFloatingToken(tkn, token.T_COMMENT, lex.ts, lex.te)
};
```
This commit is contained in:
Makhnev Petr 2021-08-02 12:37:08 +03:00 committed by GitHub
parent d846011a9b
commit d85f5a4816
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 12654 additions and 12360 deletions

File diff suppressed because it is too large Load Diff

View File

@ -336,13 +336,21 @@ func (lex *Lexer) Lex() *token.Token {
'(' whitespace* ('string'i|'binary'i) whitespace* ')' => {lex.setTokenPosition(tkn); tok = token.T_STRING_CAST; fbreak;};
'(' whitespace* 'unset'i whitespace* ')' => {lex.error(fmt.Sprintf("The (unset) cast is no longer supported")); fbreak;};
# For case '#\n'
'#' newline when is_not_comment_end => {
lex.ungetStr("?>")
lex.addFreeFloatingToken(tkn, token.T_COMMENT, lex.ts, lex.te)
};
(('#' ^'[') | '//') any_line* when is_not_comment_end => {
lex.ungetStr("?>")
lex.addFreeFloatingToken(tkn, token.T_COMMENT, lex.ts, lex.te)
};
'#' => {
lex.addFreeFloatingToken(tkn, token.T_COMMENT, lex.ts, lex.te)
};
'/*' any_line* :>> '*/' {
isDocComment := false;
if lex.te - lex.ts > 4 && string(lex.data[lex.ts:lex.ts+3]) == "/**" {

View File

@ -6,6 +6,7 @@ import (
"github.com/VKCOM/php-parser/internal/php8"
"github.com/VKCOM/php-parser/internal/tester"
"github.com/VKCOM/php-parser/pkg/conf"
"github.com/VKCOM/php-parser/pkg/position"
"github.com/VKCOM/php-parser/pkg/token"
"gotest.tools/assert"
)
@ -344,3 +345,62 @@ func TestNamespaceRelativeTokens(t *testing.T) {
}
suite.Run()
}
func TestSharpCommentTokens(t *testing.T) {
suite := tester.NewLexerTokenStructTestSuite(t)
suite.UsePHP8()
suite.WithPosition()
suite.WithFreeFloating()
suite.Code = `<?php
#
# Hello
#
$a;
`
suite.Expected = []*token.Token{
{
ID: php8.T_VARIABLE,
Value: []byte(`$a`),
FreeFloating: []*token.Token{
{
ID: php8.T_OPEN_TAG,
Value: []byte("<?php"),
Position: &position.Position{StartLine: 1, EndLine: 1, EndPos: 5},
},
{
ID: php8.T_WHITESPACE,
Value: []byte("\n"),
Position: &position.Position{StartLine: 1, EndLine: 1, StartPos: 5, EndPos: 6},
},
{
ID: php8.T_COMMENT,
Value: []byte("#\n"),
Position: &position.Position{StartLine: 2, EndLine: 2, StartPos: 6, EndPos: 8},
},
{
ID: php8.T_COMMENT,
Value: []byte("# Hello\n"),
Position: &position.Position{StartLine: 3, EndLine: 3, StartPos: 8, EndPos: 16},
},
{
ID: php8.T_COMMENT,
Value: []byte("#\n"),
Position: &position.Position{StartLine: 4, EndLine: 4, StartPos: 16, EndPos: 18},
},
{
ID: php8.T_WHITESPACE,
Value: []byte("\n"),
Position: &position.Position{StartLine: 5, EndLine: 5, StartPos: 18, EndPos: 19},
},
},
Position: &position.Position{StartLine: 6, EndLine: 6, StartPos: 19, EndPos: 21},
},
{
ID: ';',
Value: []byte(";"),
Position: &position.Position{StartLine: 6, EndLine: 6, StartPos: 21, EndPos: 22},
},
}
suite.Run()
}

View File

@ -18,6 +18,9 @@ type LexerTokenStructTestSuite struct {
Expected []*token.Token
Version version.Version
withPosition bool
withFreeFloating bool
}
func NewLexerTokenStructTestSuite(t *testing.T) *LexerTokenStructTestSuite {
@ -34,6 +37,14 @@ func (l *LexerTokenStructTestSuite) UsePHP8() {
l.Version = version.Version{Major: 8, Minor: 0}
}
func (l *LexerTokenStructTestSuite) WithPosition() {
l.withPosition = true
}
func (l *LexerTokenStructTestSuite) WithFreeFloating() {
l.withFreeFloating = true
}
func (l *LexerTokenStructTestSuite) Run() {
config := conf.Config{
Version: &l.Version,
@ -49,8 +60,12 @@ func (l *LexerTokenStructTestSuite) Run() {
for _, expected := range l.Expected {
actual := lexer.Lex()
actual.Position = nil
actual.FreeFloating = nil
if !l.withPosition {
actual.Position = nil
}
if !l.withFreeFloating {
actual.FreeFloating = nil
}
assert.DeepEqual(l.t, expected, actual)
}
}