handle new lines

This commit is contained in:
z7zmey 2017-12-03 09:17:05 +02:00
parent e7579c4e05
commit c23b899f55
4 changed files with 133 additions and 78 deletions

View File

@ -1,26 +1,40 @@
<?php
namespace z7zmey\Example;
use \Exception;
use z7zmey\Foo\{Bar, function Baz};
abstract class Foo extends Bar implements Buz, Buzz {
use \z7zmey\_Trait;
public const CC = 0;
public function &test(bool $a, string $b = null): ?void {
namespace z7zmey\Example {
use \Exception;
use z7zmey\Foo\{Bar, function Baz};
abstract class Foo extends Bar implements Buz, Buzz {
use \z7zmey\_Trait;
public const CC = 0;
public function &test(bool $a, string $b = null): ?void {
}
}
}
namespace z7zmey\Example2;
if ($a === true) {
} elseif ($a === false) {
} elseif ($a === null) {
} else {
}
$a = "string
with $var
";
$a = 'string
with out $var';
$a = <<<test
heredoc
$var
test;
?>
<?= $b; $b ?>

View File

@ -4,6 +4,7 @@ import (
"bufio"
"go/token"
"io"
"unicode"
"github.com/cznic/golex/lex"
)
@ -18,6 +19,21 @@ const (
type lexer struct {
*lex.Lexer
stateStack []int
lineNumber int
}
func rune2Class(r rune) int {
if r >= 0 && r < 0x80 { // Keep ASCII as it is.
return int(r)
}
if unicode.IsLetter(r) {
return classUnicodeLeter
}
if unicode.IsDigit(r) {
return classUnicodeDigit
}
// return classOther
return -1
}
func newLexer(src io.Reader, fName string) *lexer {
@ -26,7 +42,7 @@ func newLexer(src io.Reader, fName string) *lexer {
if err != nil {
panic(err)
}
return &lexer{lx, []int{0}}
return &lexer{lx, []int{0}, 1}
}
func (l *lexer) ungetN(n int) []byte {
@ -67,3 +83,24 @@ func (l *lexer) begin(state int) {
func (l *lexer) getCurrentState() int {
return l.stateStack[len(l.stateStack)-1]
}
func (l *lexer) handleNewLine(str []byte) (int, int) {
startln := l.lineNumber
var prev byte
for _, b := range str {
if b == '\n' || prev == '\r' {
l.lineNumber++
}
prev = b
}
// handle last \r
if prev == '\r' {
l.lineNumber++
}
return startln, l.lineNumber
}

View File

@ -11,7 +11,6 @@ package main
import (
"bytes"
"fmt"
"unicode"
)
const (
@ -30,20 +29,6 @@ const (
var heredocLabel []byte
func rune2Class(r rune) int {
if r >= 0 && r < 0x80 { // Keep ASCII as it is.
return int(r)
}
if unicode.IsLetter(r) {
return classUnicodeLeter
}
if unicode.IsDigit(r) {
return classUnicodeDigit
}
// return classOther
return -1
}
func (l *lexer) Lex(lval *yySymType) int { // Lex(lval *yySymType)
c := l.Enter()
@ -7066,7 +7051,7 @@ yystart554:
goto yystate557
case c == '{':
goto yystate559
case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '!' || c == '#' || c >= '%' && c <= 'z' || c >= '|' && c <= 'ÿ':
case c >= '\x01' && c <= '!' || c == '#' || c >= '%' && c <= 'z' || c >= '|' && c <= 'ÿ':
goto yystate555
}
@ -7675,8 +7660,10 @@ yystate614:
}
yyrule1: // [ \t\n\r]+
goto yystate0
{
l.handleNewLine(l.TokenBytes(nil))
goto yystate0
}
yyrule2: // .
{
@ -7697,12 +7684,13 @@ yyrule2: // .
}
c = l.Next()
}
l.handleNewLine(tb)
lval.token = string(tb)
return T_INLINE_HTML
goto yystate0
}
yyrule3: // \<\?php([ \t]|{NEW_LINE})
{
l.handleNewLine(l.TokenBytes(nil))
l.begin(PHP) //lval.token = string(l.TokenBytes(nil)); return T_OPEN_TAG;
goto yystate0
}
@ -7720,11 +7708,12 @@ yyrule5: // \<\?=
}
yyrule6: // [ \t\n\r]+
{
//lval.token = string(l.TokenBytes(nil)); return T_WHITESPACE
l.handleNewLine(l.TokenBytes(nil)) //lval.token = string(l.TokenBytes(nil)); return T_WHITESPACE
goto yystate0
}
yyrule7: // \?\>{NEW_LINE}?
{
l.handleNewLine(l.TokenBytes(nil))
l.begin(INITIAL)
lval.token = ";"
return rune2Class(';')
@ -8133,6 +8122,7 @@ yyrule67: // while
}
yyrule68: // yield[ \t\n\r]+from[^a-zA-Z0-9_\x80-\xff]
{
l.handleNewLine(l.TokenBytes(nil))
lval.token = string(l.TokenBytes(nil))
return T_YIELD_FROM
goto yystate0
@ -8475,21 +8465,22 @@ yyrule124: // \?\?
}
yyrule125: // (#|[/][/]).*{NEW_LINE}
{
// lval.token = string(l.TokenBytes(nil)); return T_COMMENT; // TODO: handle ?>
l.handleNewLine(l.TokenBytes(nil)) // lval.token = string(l.TokenBytes(nil)); return T_COMMENT; // TODO: handle ?>
goto yystate0
}
yyrule126: // [/][*][^*]*[*]+([^*/][^*]*[*]+)*[/]
{
// lval.token = string(l.TokenBytes(nil)); return T_COMMENT; // TODO: handle ?>
l.handleNewLine(l.TokenBytes(nil)) // lval.token = string(l.TokenBytes(nil)); return T_COMMENT; // TODO: handle ?>
goto yystate0
}
yyrule127: // [/][*][*][^*]*[*]+([^*/][^*]*[*]+)*[/]
{
// lval.token = string(l.TokenBytes(nil)); return T_DOC_COMMENT; // TODO: handle ?>
l.handleNewLine(l.TokenBytes(nil)) // lval.token = string(l.TokenBytes(nil)); return T_DOC_COMMENT; // TODO: handle ?>
goto yystate0
}
yyrule128: // '[^']*(\\')*'
{
l.handleNewLine(l.TokenBytes(nil))
lval.token = string(l.TokenBytes(nil))
return T_CONSTANT_ENCAPSED_STRING
goto yystate0
@ -8535,6 +8526,7 @@ yyrule134: // ->
}
yyrule135: // [ \t\n\r]+
{
l.handleNewLine(l.TokenBytes(nil))
lval.token = string(l.TokenBytes(nil))
return T_WHITESPACE
goto yystate0
@ -8631,9 +8623,9 @@ yyrule142: // [b]?\<\<\<[ \t]*({VAR_NAME}|([']{VAR_NAME}['])|(["]{VAR_NAME}["]))
}
}
l.ungetN(ungetCnt)
l.handleNewLine(tb)
lval.token = string(tb)
return T_START_HEREDOC
goto yystate0
}
yyrule143: // .
{
@ -8658,9 +8650,9 @@ yyrule143: // .
}
c = l.Next()
}
l.handleNewLine(tb)
lval.token = string(tb)
return T_ENCAPSED_AND_WHITESPACE
goto yystate0
}
yyrule144: // {VAR_NAME}\;
{
@ -8746,7 +8738,7 @@ yyrule149: // \$
l.pushState(STRING_VAR)
goto yystate0
}
yyrule150: // .
yyrule150: // .|[ \t\n\r]
{
F1:
@ -8756,6 +8748,7 @@ yyrule150: // .
}
switch c {
case '"':
l.handleNewLine(l.TokenBytes(nil))
lval.token = string(l.TokenBytes(nil))
return T_ENCAPSED_AND_WHITESPACE
break F1
@ -8765,6 +8758,7 @@ yyrule150: // .
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
l.ungetN(1)
tb := l.TokenBytes(nil)
l.handleNewLine(tb[:len(tb)-1])
lval.token = string(tb[:len(tb)-1])
return T_ENCAPSED_AND_WHITESPACE
break F1
@ -8776,6 +8770,7 @@ yyrule150: // .
if rune(c) == '$' {
l.ungetN(1)
tb := l.TokenBytes(nil)
l.handleNewLine(tb[:len(tb)-1])
lval.token = string(tb[:len(tb)-1])
return T_ENCAPSED_AND_WHITESPACE
break F1
@ -8798,6 +8793,7 @@ yyrule151: // .
}
switch c {
case '`':
l.handleNewLine(l.TokenBytes(nil))
lval.token = string(l.TokenBytes(nil))
return T_ENCAPSED_AND_WHITESPACE
break F2
@ -8807,6 +8803,7 @@ yyrule151: // .
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
l.ungetN(1)
tb := l.TokenBytes(nil)
l.handleNewLine(tb[:len(tb)-1])
lval.token = string(tb[:len(tb)-1])
return T_ENCAPSED_AND_WHITESPACE
break F2
@ -8818,6 +8815,7 @@ yyrule151: // .
if rune(c) == '$' {
l.ungetN(1)
tb := l.TokenBytes(nil)
l.handleNewLine(tb[:len(tb)-1])
lval.token = string(tb[:len(tb)-1])
return T_ENCAPSED_AND_WHITESPACE
break F2
@ -8880,9 +8878,9 @@ yyrule152: // .|[ \t\n\r]
c = l.Next()
}
l.handleNewLine(tb)
lval.token = string(tb)
return T_ENCAPSED_AND_WHITESPACE
goto yystate0
}
yyrule153: // \${VAR_NAME}
{
@ -8944,6 +8942,7 @@ yyrule161: // \]
}
yyrule162: // [ \n\r\t\\'#]
{
l.handleNewLine(l.TokenBytes(nil))
l.popState()
l.popState()
lval.token = string(l.TokenBytes(nil))

View File

@ -10,7 +10,6 @@ package main
import (
"fmt"
"bytes"
"unicode"
)
const (
@ -29,20 +28,6 @@ const (
var heredocLabel []byte
func rune2Class(r rune) int {
if r >= 0 && r < 0x80 { // Keep ASCII as it is.
return int(r)
}
if unicode.IsLetter(r) {
return classUnicodeLeter
}
if unicode.IsDigit(r) {
return classUnicodeDigit
}
// return classOther
return -1
}
func (l *lexer) Lex(lval *yySymType) int { // Lex(lval *yySymType)
c := l.Enter()
@ -68,7 +53,7 @@ NEW_LINE (\r|\n|\r\n)
%%
c = l.Rule0()
<INITIAL>[ \t\n\r]+
<INITIAL>[ \t\n\r]+ l.handleNewLine(l.TokenBytes(nil));
<INITIAL>.
tb := []byte{}
@ -92,14 +77,16 @@ NEW_LINE (\r|\n|\r\n)
c = l.Next()
}
lval.token = string(tb); return T_INLINE_HTML
l.handleNewLine(tb);
lval.token = string(tb);
return T_INLINE_HTML
<INITIAL>\<\?php([ \t]|{NEW_LINE}) l.begin(PHP);//lval.token = string(l.TokenBytes(nil)); return T_OPEN_TAG;
<INITIAL>\<\?php([ \t]|{NEW_LINE}) l.handleNewLine(l.TokenBytes(nil));l.begin(PHP);//lval.token = string(l.TokenBytes(nil)); return T_OPEN_TAG;
<INITIAL>\<\? l.begin(PHP);//lval.token = string(l.TokenBytes(nil)); return T_OPEN_TAG;
<INITIAL>\<\?= l.begin(PHP);lval.token = string(l.TokenBytes(nil)); return T_ECHO;
<PHP>[ \t\n\r]+ //lval.token = string(l.TokenBytes(nil)); return T_WHITESPACE
<PHP>\?\>{NEW_LINE}? l.begin(INITIAL);lval.token = ";"; return rune2Class(';');
<PHP>[ \t\n\r]+ l.handleNewLine(l.TokenBytes(nil));//lval.token = string(l.TokenBytes(nil)); return T_WHITESPACE
<PHP>\?\>{NEW_LINE}? l.handleNewLine(l.TokenBytes(nil));l.begin(INITIAL);lval.token = ";"; return rune2Class(';');
<PHP>{DNUM}|{EXPONENT_DNUM} lval.token = string(l.TokenBytes(nil)); return T_DNUMBER
<PHP>{BNUM}
@ -194,7 +181,7 @@ NEW_LINE (\r|\n|\r\n)
<PHP>use lval.token = string(l.TokenBytes(nil)); return T_USE
<PHP>var lval.token = string(l.TokenBytes(nil)); return T_VAR
<PHP>while lval.token = string(l.TokenBytes(nil)); return T_WHILE
<PHP>yield[ \t\n\r]+from[^a-zA-Z0-9_\x80-\xff] lval.token = string(l.TokenBytes(nil)); return T_YIELD_FROM
<PHP>yield[ \t\n\r]+from[^a-zA-Z0-9_\x80-\xff] l.handleNewLine(l.TokenBytes(nil));lval.token = string(l.TokenBytes(nil)); return T_YIELD_FROM
<PHP>yield lval.token = string(l.TokenBytes(nil)); return T_YIELD
<PHP>include lval.token = string(l.TokenBytes(nil)); return T_INCLUDE
<PHP>include_once lval.token = string(l.TokenBytes(nil)); return T_INCLUDE_ONCE
@ -251,10 +238,10 @@ NEW_LINE (\r|\n|\r\n)
<PHP>\<\< lval.token = string(l.TokenBytes(nil)); return T_SL
<PHP>\>\> lval.token = string(l.TokenBytes(nil)); return T_SR
<PHP>\?\? lval.token = string(l.TokenBytes(nil)); return T_COALESCE
<PHP>(#|[/][/]).*{NEW_LINE} // lval.token = string(l.TokenBytes(nil)); return T_COMMENT; // TODO: handle ?>
<PHP>[/][*][^*]*[*]+([^*/][^*]*[*]+)*[/] // lval.token = string(l.TokenBytes(nil)); return T_COMMENT; // TODO: handle ?>
<PHP>[/][*][*][^*]*[*]+([^*/][^*]*[*]+)*[/] // lval.token = string(l.TokenBytes(nil)); return T_DOC_COMMENT; // TODO: handle ?>
<PHP>'[^']*(\\')*' lval.token = string(l.TokenBytes(nil)); return T_CONSTANT_ENCAPSED_STRING
<PHP>(#|[/][/]).*{NEW_LINE} l.handleNewLine(l.TokenBytes(nil));// lval.token = string(l.TokenBytes(nil)); return T_COMMENT; // TODO: handle ?>
<PHP>[/][*][^*]*[*]+([^*/][^*]*[*]+)*[/] l.handleNewLine(l.TokenBytes(nil));// lval.token = string(l.TokenBytes(nil)); return T_COMMENT; // TODO: handle ?>
<PHP>[/][*][*][^*]*[*]+([^*/][^*]*[*]+)*[/] l.handleNewLine(l.TokenBytes(nil));// lval.token = string(l.TokenBytes(nil)); return T_DOC_COMMENT; // TODO: handle ?>
<PHP>'[^']*(\\')*' l.handleNewLine(l.TokenBytes(nil));lval.token = string(l.TokenBytes(nil)); return T_CONSTANT_ENCAPSED_STRING
<PHP>{OPERATORS} lval.token = string(l.TokenBytes(nil)); return rune2Class(rune(l.TokenBytes(nil)[0]))
<PHP>\{ l.pushState(PHP); lval.token = string(l.TokenBytes(nil)); return rune2Class(rune(l.TokenBytes(nil)[0]))
@ -263,7 +250,7 @@ NEW_LINE (\r|\n|\r\n)
<PHP>{VAR_NAME} lval.token = string(l.TokenBytes(nil)); return T_STRING
<PHP>-> l.begin(PROPERTY);lval.token = string(l.TokenBytes(nil)); return T_OBJECT_OPERATOR;
<PROPERTY>[ \t\n\r]+ lval.token = string(l.TokenBytes(nil)); return T_WHITESPACE;
<PROPERTY>[ \t\n\r]+ l.handleNewLine(l.TokenBytes(nil));lval.token = string(l.TokenBytes(nil)); return T_WHITESPACE;
<PROPERTY>-> lval.token = string(l.TokenBytes(nil)); return T_OBJECT_OPERATOR;
<PROPERTY>{VAR_NAME} l.begin(PHP);lval.token = string(l.TokenBytes(nil)); return T_STRING;
<PROPERTY>. l.ungetN(1);l.begin(PHP)
@ -331,7 +318,9 @@ NEW_LINE (\r|\n|\r\n)
l.ungetN(ungetCnt)
lval.token = string(tb); return T_START_HEREDOC
l.handleNewLine(tb);
lval.token = string(tb);
return T_START_HEREDOC
<NOWDOC>.
searchLabel := []byte{}
@ -357,7 +346,9 @@ NEW_LINE (\r|\n|\r\n)
c = l.Next()
}
lval.token = string(tb); return T_ENCAPSED_AND_WHITESPACE
l.handleNewLine(tb);
lval.token = string(tb);
return T_ENCAPSED_AND_WHITESPACE
<HEREDOC_END>{VAR_NAME}\; l.begin(PHP);lval.token = string(l.ungetN(1)); return T_END_HEREDOC
@ -412,7 +403,7 @@ NEW_LINE (\r|\n|\r\n)
<STRING,HEREDOC,BACKQUOTE>\{\$ lval.token = string(l.ungetN(1)); l.pushState(PHP); return T_CURLY_OPEN
<STRING,HEREDOC,BACKQUOTE>\$\{ l.pushState(STRING_VAR_NAME);lval.token = string(l.TokenBytes(nil)); return T_DOLLAR_OPEN_CURLY_BRACES
<STRING,HEREDOC,BACKQUOTE>\$ l.ungetN(1);l.pushState(STRING_VAR)
<STRING>.
<STRING>.|[ \t\n\r]
F1:for {
if c == -1 {
break;
@ -420,7 +411,9 @@ NEW_LINE (\r|\n|\r\n)
switch c {
case '"' :
lval.token = string(l.TokenBytes(nil)); return T_ENCAPSED_AND_WHITESPACE
l.handleNewLine(l.TokenBytes(nil));
lval.token = string(l.TokenBytes(nil));
return T_ENCAPSED_AND_WHITESPACE
break F1;
case '$':
@ -428,7 +421,9 @@ NEW_LINE (\r|\n|\r\n)
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
l.ungetN(1)
tb := l.TokenBytes(nil)
lval.token = string(tb[:len(tb)-1]); return T_ENCAPSED_AND_WHITESPACE
l.handleNewLine(tb[:len(tb)-1]);
lval.token = string(tb[:len(tb)-1]);
return T_ENCAPSED_AND_WHITESPACE
break F1;
}
l.ungetN(0)
@ -438,7 +433,9 @@ NEW_LINE (\r|\n|\r\n)
if rune(c) == '$' {
l.ungetN(1)
tb := l.TokenBytes(nil)
lval.token = string(tb[:len(tb)-1]); return T_ENCAPSED_AND_WHITESPACE
l.handleNewLine(tb[:len(tb)-1]);
lval.token = string(tb[:len(tb)-1]);
return T_ENCAPSED_AND_WHITESPACE
break F1;
}
l.ungetN(0)
@ -457,8 +454,10 @@ NEW_LINE (\r|\n|\r\n)
}
switch c {
case '`' :
lval.token = string(l.TokenBytes(nil)); return T_ENCAPSED_AND_WHITESPACE
case '`' :
l.handleNewLine(l.TokenBytes(nil));
lval.token = string(l.TokenBytes(nil));
return T_ENCAPSED_AND_WHITESPACE
break F2;
case '$':
@ -466,7 +465,9 @@ NEW_LINE (\r|\n|\r\n)
if rune(c) == '{' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ' {
l.ungetN(1)
tb := l.TokenBytes(nil)
lval.token = string(tb[:len(tb)-1]); return T_ENCAPSED_AND_WHITESPACE
l.handleNewLine(tb[:len(tb)-1]);
lval.token = string(tb[:len(tb)-1]);
return T_ENCAPSED_AND_WHITESPACE
break F2;
}
l.ungetN(0)
@ -476,7 +477,9 @@ NEW_LINE (\r|\n|\r\n)
if rune(c) == '$' {
l.ungetN(1)
tb := l.TokenBytes(nil)
lval.token = string(tb[:len(tb)-1]); return T_ENCAPSED_AND_WHITESPACE
l.handleNewLine(tb[:len(tb)-1]);
lval.token = string(tb[:len(tb)-1]);
return T_ENCAPSED_AND_WHITESPACE
break F2;
}
l.ungetN(0)
@ -537,7 +540,9 @@ NEW_LINE (\r|\n|\r\n)
c = l.Next()
}
lval.token = string(tb); return T_ENCAPSED_AND_WHITESPACE
l.handleNewLine(tb);
lval.token = string(tb);
return T_ENCAPSED_AND_WHITESPACE
<STRING_VAR>\${VAR_NAME} lval.token = string(l.TokenBytes(nil)); return T_VARIABLE
<STRING_VAR>->{VAR_NAME} lval.token = string(l.ungetN(len(l.TokenBytes(nil))-2)); return T_OBJECT_OPERATOR
@ -549,7 +554,7 @@ NEW_LINE (\r|\n|\r\n)
<STRING_VAR_INDEX>\${VAR_NAME} lval.token = string(l.TokenBytes(nil)); return T_VARIABLE
<STRING_VAR_INDEX>{VAR_NAME} lval.token = string(l.TokenBytes(nil)); return T_STRING
<STRING_VAR_INDEX>\] l.popState(); l.popState();lval.token = string(l.TokenBytes(nil)); return rune2Class(rune(l.TokenBytes(nil)[0]))
<STRING_VAR_INDEX>[ \n\r\t\\'#] l.popState(); l.popState();lval.token = string(l.TokenBytes(nil)); return T_ENCAPSED_AND_WHITESPACE
<STRING_VAR_INDEX>[ \n\r\t\\'#] l.handleNewLine(l.TokenBytes(nil));l.popState(); l.popState();lval.token = string(l.TokenBytes(nil)); return T_ENCAPSED_AND_WHITESPACE
<STRING_VAR_INDEX>{OPERATORS} lval.token = string(l.TokenBytes(nil)); return rune2Class(rune(l.TokenBytes(nil)[0]))
<STRING_VAR_INDEX>. lval.token = string(l.TokenBytes(nil)); return rune2Class(rune(l.TokenBytes(nil)[0]))