first commit

This commit is contained in:
z7zmey 2017-11-07 08:21:38 +02:00
commit 599def1384
5 changed files with 1245 additions and 0 deletions

20
Makefile Normal file
View File

@ -0,0 +1,20 @@
# Copyright (c) 2011 CZ.NIC z.s.p.o. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
# blame: jnml, labs.nic.cz
all: php-parser.go
go build
run: all
./php-parser
php-parser.go: php-parser.l
golex -t $< | gofmt > $@
clean:
rm -f php-parser.go lex.yy.go y.output *~
nuke: clean
rm -f example

79
c-like.l Normal file
View File

@ -0,0 +1,79 @@
%{
// Copyright (c) 2015 The golex Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This is an example program using golex run time library.
package main
import (
"bufio"
"go/token"
"io"
"unicode"
"github.com/cznic/golex/lex"
)
// Allocate Character classes anywhere in [0x80, 0xFF].
const (
classUnicodeLeter = iota + 0x80
classUnicodeDigit
classOther
)
type lexer struct {
*lex.Lexer
}
func rune2Class(r rune) int {
if r >= 0 && r < 0x80 { // Keep ASCII as it is.
return int(r)
}
if unicode.IsLetter(r) {
return classUnicodeLeter
}
if unicode.IsDigit(r) {
return classUnicodeDigit
}
return classOther
}
func newLexer(src io.Reader, dst io.Writer, fName string) *lexer {
file := token.NewFileSet().AddFile(fName, -1, 1<<31-1)
lx, err := lex.New(file, bufio.NewReader(src), lex.RuneClass(rune2Class))
if (err != nil) { panic(err) }
return &lexer{lx}
}
func (l *lexer) Lex(lval *yySymType) int {
c := l.Enter()
%}
%yyc c
%yyn c = l.Next()
%yym l.Mark()
digit [0-9]|{unicodeDigit}
identifier {letter}({letter}|{digit})*
int {digit}+
letter [_a-zA-Z]|{unicodeLetter}
unicodeDigit \x81
unicodeLetter \x80
op [-+*/]
%%
c = l.Rule0()
[ \t\r\n]+
[/][/][^\n]+ lval.token = string(l.TokenBytes(nil)); return COMMENT
func lval.token = string(l.TokenBytes(nil)); return FUNC
{identifier} lval.token = string(l.TokenBytes(nil)); return IDENT
{int} lval.token = string(l.TokenBytes(nil)); return INT
{op} lval.token = string(l.TokenBytes(nil)); return OP
%%
if c, ok := l.Abort(); ok { return int(c) }
goto yyAction
}

237
example.l Normal file
View File

@ -0,0 +1,237 @@
%{
// Copyright (c) 2011 CZ.NIC z.s.p.o. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// blame: jnml, labs.nic.cz
package main
import (
"bufio"
"fmt"
"os"
"bytes"
)
var (
src = bufio.NewReader(os.Stdin)
buf []byte
current byte
isPhp bool
sc int
)
const (
INITIAL = iota
PHP
)
func skipHtml() {
for !isPhp{
t_open_tag := []byte("<?")
if p, err := src.Peek(2); err == nil && bytes.Equal(p, t_open_tag) {
isPhp = true
break
}
_, err := src.ReadByte()
if err != nil {
break
}
}
}
func getc() byte {
// skipHtml()
if current != 0 {
buf = append(buf, current)
}
current = 0
if b, err := src.ReadByte(); err == nil {
current = b
}
return current
}
func begin(cond int) {
sc = cond
}
func main() { // This left brace is closed by *1
c := getc() // init
%}
%s PHP
%yyt sc
%yyc c
%yyn c = getc()
%yyb last == '\n' || last = '\0'
D [0-9]+
%%
buf = buf[:0] // The code before the first rule executed before every scan cycle (rule #0 / state 0 action)
<*>\0 return
<INITIAL>[ \t\n\r]+
<INITIAL>.
<INITIAL>\<\?|\<\?php fmt.Println("T_OPEN_TAG");begin(PHP)
<INITIAL>\<\?= fmt.Println("T_OPEN_TAG_WITH_ECHO");begin(PHP)
<PHP>. fmt.Printf("%q\n", buf)
<PHP>[ \t\n\r]+ fmt.Println("T_WHITESPACE")
// <PHP>\<\?|\<\?php fmt.Println("T_OPEN_TAG");
// <PHP>\<\?= fmt.Println("T_OPEN_TAG_WITH_ECHO");
<PHP>\?\> fmt.Println("T_CLOSE_TAG");begin(INITIAL)
// <PHP>abstract fmt.Println("T_ABSTRACT")
// <PHP>&= fmt.Println("T_AND_EQUAL")
// <PHP>\(array\) fmt.Println("T_ARRAY_CAST")
// <PHP>\(bool\)|\(boolean\) fmt.Println("T_BOOL_CAST")
// <PHP>\(real\)|\(double\)|\(float\) fmt.Println("T_DOUBLE_CAST")
// <PHP>\(int\)|\(integer\) fmt.Println("T_INT_CAST")
// <PHP>\(object\) fmt.Println("T_OBJECT_CAST")
// <PHP>\(string\) fmt.Println("T_STRING_CAST")
// <PHP>\(unset\) fmt.Println("T_UNSET_CAST")
// <PHP>array fmt.Println("T_ARRAY")
// <PHP>as fmt.Println("T_AS")
// <PHP>&& fmt.Println("T_BOOLEAN_AND")
// <PHP>\|\| fmt.Println("T_BOOLEAN_OR")
// <PHP>break fmt.Println("T_BREAK")
// <PHP>callable fmt.Println("T_CALLABLE")
// <PHP>case fmt.Println("T_CASE")
// <PHP>catch fmt.Println("T_CATCH")
// <PHP>class fmt.Println("T_CLASS")
// <PHP>__CLASS__ fmt.Println("T_CLASS_C")
// <PHP>__DIR__ fmt.Println("T_DIR")
// <PHP>clone fmt.Println("T_CLONE")
// <PHP>[/][/][^\n]+ fmt.Println("T_COMMENT"); // TODO: multiline comment
// <PHP>\.= fmt.Println("T_CONCAT_EQUAL");
// <PHP>[/]= fmt.Println("T_DIV_EQUAL");
// <PHP>const fmt.Println("T_CONST");
// <PHP>continue fmt.Println("T_CONTINUE");
// <PHP>-- fmt.Println("T_DEC");
// <PHP>declare fmt.Println("T_DECLARE");
// <PHP>default fmt.Println("T_DEFAULT");
// <PHP>do fmt.Println("T_DO");
// <PHP>{D}\.{D}?|\.{D} fmt.Println("T_DNUMBER");
// <PHP>=> fmt.Println("T_DOUBLE_ARROW");
// <PHP>:: fmt.Println("T_DOUBLE_COLON");
// <PHP>echo fmt.Println("T_ECHO");
// <PHP>\.\.\. fmt.Println("T_ELLIPSIS");
// <PHP>else fmt.Println("T_ELSE");
// <PHP>elseif fmt.Println("T_ELSEIF");
// <PHP>empty fmt.Println("T_EMPTY");
// <PHP>endfor fmt.Println("T_ENDFOR")
// <PHP>endforeach fmt.Println("T_ENDFOREACH")
// <PHP>endif fmt.Println("T_ENDIF")
// <PHP>endswitch fmt.Println("T_ENDSWITCH")
// <PHP>endwhile fmt.Println("T_ENDWHILE")
// <PHP>exit|die fmt.Println("T_EXIT")
// <PHP>extends fmt.Println("T_EXTENDS")
// <PHP>__FILE__ fmt.Println("T_FILE")
// <PHP>final fmt.Println("T_FINAL")
// <PHP>finally fmt.Println("T_FINALLY")
// <PHP>for fmt.Println("T_FOR")
// <PHP>foreach fmt.Println("T_FOREACH")
// <PHP>function|cfunction fmt.Println("T_FUNCTION")
// <PHP>__FUNCTION__ fmt.Println("T_FUNC_C")
// <PHP>global fmt.Println("T_GLOBAL")
// <PHP>goto fmt.Println("T_GOTO")
// <PHP>if fmt.Println("T_IF")
// <PHP>implements fmt.Println("T_IMPLEMENTS")
// <PHP>\+\+ fmt.Println("T_INC")
// <PHP>instanceof fmt.Println("T_INSTANCEOF")
// <PHP>insteadof fmt.Println("T_INSTEADOF")
// <PHP>interface fmt.Println("T_INTERFACE")
// <PHP>== fmt.Println("T_IS_EQUAL")
// <PHP>\>= fmt.Println("T_IS_GREATER_OR_EQUAL")
// <PHP>=== fmt.Println("T_IS_IDENTICAL")
// <PHP>\!=|\<\> fmt.Println("T_IS_NOT_EQUAL")
// <PHP>\!== fmt.Println("T_IS_NOT_IDENTICAL")
// <PHP>\<= fmt.Println("T_IS_SMALLER_OR_EQUAL")
// <PHP>\<=\> fmt.Println("T_SPACESHIP")
// <PHP>__LINE__ fmt.Println("T_LINE")
// <PHP>{D} fmt.Println("T_LNUMBER") // TODO: parse 0x1ac
// <PHP>and fmt.Println("T_LOGICAL_AND")
// <PHP>or fmt.Println("T_LOGICAL_OR")
// <PHP>xor fmt.Println("T_LOGICAL_XOR")
// <PHP>__METHOD__ fmt.Println("T_METHOD_C")
// <PHP>-= fmt.Println("T_MINUS_EQUAL")
// <PHP>%= fmt.Println("T_MOD_EQUAL")
// <PHP>\*= fmt.Println("T_MUL_EQUAL")
// <PHP>namespace fmt.Println("T_NAMESPACE")
// <PHP>__NAMESPACE__ fmt.Println("T_NS_C")
// <PHP>\\ fmt.Println("T_NS_SEPARATOR")
// <PHP>new fmt.Println("T_NEW")
// <PHP>-> fmt.Println("T_OBJECT_OPERATOR")
// <PHP>\|= fmt.Println("T_OR_EQUAL")
// <PHP>\+= fmt.Println("T_PLUS_EQUAL")
// <PHP>\*\* fmt.Println("T_POW")
// <PHP>\*\*= fmt.Println("T_POW_EQUAL")
// <PHP>private fmt.Println("T_PRIVATE")
// <PHP>public fmt.Println("T_PUBLIC")
// <PHP>protected fmt.Println("T_PROTECTED")
// <PHP>return fmt.Println("T_RETURN")
// <PHP>\<\< fmt.Println("T_SL")
// <PHP>\<\<= fmt.Println("T_SL_EQUAL")
// <PHP>\>\> fmt.Println("T_SR")
// <PHP>\>\>= fmt.Println("T_SR_EQUAL")
// <PHP>static fmt.Println("T_STATIC")
// <PHP>switch fmt.Println("T_SWITCH")
// <PHP>throw fmt.Println("T_THROW")
// <PHP>trait fmt.Println("T_TRAIT")
// <PHP>__TRAIT__ fmt.Println("T_TRAIT_C")
// <PHP>try fmt.Println("T_TRY")
// <PHP>use fmt.Println("T_USE")
// <PHP>var fmt.Println("T_VAR")
// <PHP>while fmt.Println("T_WHILE")
// <PHP>\^= fmt.Println("T_XOR_EQUAL")
// <PHP>yield fmt.Println("T_YIELD")
// <PHP>yield\nfrom fmt.Println("T_YIELD_FROM")
// <PHP>'[^']*' fmt.Println("T_CONSTANT_ENCAPSED_STRING")
// <PHP>\{\$ fmt.Println("T_CURLY_OPEN");src.UnreadByte();c='$'
// <PHP>\$[A-Za-z][A-Za-z0-9]* fmt.Println("T_VARIABLE") // TODO allow -_ and other
// <PHP>[/][*][^*]*[*]+([^*/][^*]*[*]+)*[/] fmt.Println("T_COMMENT");
// <PHP>[/][*] panic("Unterminated comment")
%%
// \{\$ fmt.Println("T_CURLY_OPEN");
// \$\{ fmt.Println("T_DOLLAR_OPEN_CURLY_BRACES");
// T_DOC_COMMENT /** */ PHPDoc-комментарии
// T_ENCAPSED_AND_WHITESPACE " $a" константная часть строки с переменными
// T_ENDDECLARE enddeclare declare, альтернативный синтаксис
// T_END_HEREDOC синтаксис heredoc
// T_START_HEREDOC <<< синтаксис heredoc
// T_EVAL eval() eval()
// T_HALT_COMPILER __halt_compiler() __halt_compiler (доступно с PHP 5.1.0)
// T_INCLUDE include() include
// T_INCLUDE_ONCE include_once() include_once
// T_INLINE_HTML текст вне PHP
// T_ISSET isset() isset()
// T_LIST list() list()
// T_NUM_STRING "$a[0]" цифровой индекс массива внутри строки
// T_PRINT print() print
// T_REQUIRE require() require
// T_REQUIRE_ONCE require_once() require_once
// T_STRING parent, self и т.п.. идентификаторы, например, ключевые слова вроде parent и self, сюда подходят также имена функций, классов и некоторые другие. Смотрите также T_CONSTANT_ENCAPSED_STRING
// T_STRING_VARNAME "${a переменные внутри строки
// T_UNSET unset() unset()
// T_VARIABLE $foo переменные
// T_BAD_CHARACTER все, что ниже ASCII 32 исключая \t (0x09), \n (0x0a) и \r (0x0d)
// T_CONSTANT_ENCAPSED_STRING "foo" или 'bar' строковой синтаксис
// The golex generated scanner enters top of the user code section when
// lexem recongition fails. In this example it should never happen.
fmt.Println("UNDEFENED LEXEM")
goto yystate0
// panic("scanner internal error")
} // *1 this right brace

765
php-parser.go Normal file
View File

@ -0,0 +1,765 @@
// Code generated by golex. DO NOT EDIT.
// Copyright (c) 2011 CZ.NIC z.s.p.o. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// blame: jnml, labs.nic.cz
package main
import (
"bufio"
"fmt"
"go/token"
"io"
"os"
"unicode"
"github.com/cznic/golex/lex"
)
// Allocate Character classes anywhere in [0x80, 0xFF].
const (
classUnicodeLeter = iota + 0x80
classUnicodeDigit
classOther
)
var sc int
const (
INITIAL = iota
PHP
STRING
)
type lexer struct {
*lex.Lexer
}
func begin(cond int) {
sc = cond
}
func rune2Class(r rune) int {
if r >= 0 && r < 0x80 { // Keep ASCII as it is.
return int(r)
}
if unicode.IsLetter(r) {
return classUnicodeLeter
}
if unicode.IsDigit(r) {
return classUnicodeDigit
}
return classOther
}
func newLexer(src io.Reader, dst io.Writer, fName string) *lexer {
file := token.NewFileSet().AddFile(fName, -1, 1<<31-1)
lx, err := lex.New(file, bufio.NewReader(src), lex.RuneClass(rune2Class))
if err != nil {
panic(err)
}
return &lexer{lx}
}
type yySymType struct{}
func (l *lexer) unget(r rune) []byte {
l.Unget(l.Lookahead())
chars := l.Token()
lastChar := chars[len(chars)-1]
if lastChar.Rune != r {
return l.TokenBytes(nil)
}
l.Unget(lastChar)
buf := l.TokenBytes(nil)
buf = buf[:len(buf)-1]
return buf
}
func (l *lexer) Lex() int { // Lex(lval *yySymType)
c := l.Enter()
yystate0:
yyrule := -1
_ = yyrule
c = l.Rule0()
// ([\$]{NCH})*
switch yyt := sc; yyt {
default:
panic(fmt.Errorf(`invalid start condition %d`, yyt))
case 0: // start condition: INITIAL
goto yystart1
case 1: // start condition: PHP
goto yystart11
case 2: // start condition: STRING
goto yystart32
}
goto yystate0 // silence unused label error
goto yyAction // silence unused label error
yyAction:
switch yyrule {
case 1:
goto yyrule1
case 2:
goto yyrule2
case 3:
goto yyrule3
case 4:
goto yyrule4
case 5:
goto yyrule5
case 6:
goto yyrule6
case 7:
goto yyrule7
case 8:
goto yyrule8
case 9:
goto yyrule9
case 10:
goto yyrule10
case 11:
goto yyrule11
case 12:
goto yyrule12
case 13:
goto yyrule13
case 14:
goto yyrule14
case 15:
goto yyrule15
case 16:
goto yyrule16
}
goto yystate1 // silence unused label error
yystate1:
c = l.Next()
yystart1:
switch {
default:
goto yyabort
case c == '<':
goto yystate5
case c == '\n':
goto yystate4
case c == '\t' || c == '\r' || c == ' ':
goto yystate3
case c >= '\x01' && c <= '\b' || c == '\v' || c == '\f' || c >= '\x0e' && c <= '\x1f' || c >= '!' && c <= ';' || c >= '=' && c <= 'ÿ':
goto yystate2
}
yystate2:
c = l.Next()
yyrule = 2
l.Mark()
goto yyrule2
yystate3:
c = l.Next()
yyrule = 1
l.Mark()
switch {
default:
goto yyrule1
case c == '\t' || c == '\n' || c == '\r' || c == ' ':
goto yystate4
}
yystate4:
c = l.Next()
yyrule = 1
l.Mark()
switch {
default:
goto yyrule1
case c == '\t' || c == '\n' || c == '\r' || c == ' ':
goto yystate4
}
yystate5:
c = l.Next()
yyrule = 2
l.Mark()
switch {
default:
goto yyrule2
case c == '?':
goto yystate6
}
yystate6:
c = l.Next()
yyrule = 3
l.Mark()
switch {
default:
goto yyrule3
case c == '=':
goto yystate7
case c == 'p':
goto yystate8
}
yystate7:
c = l.Next()
yyrule = 4
l.Mark()
goto yyrule4
yystate8:
c = l.Next()
switch {
default:
goto yyabort
case c == 'h':
goto yystate9
}
yystate9:
c = l.Next()
switch {
default:
goto yyabort
case c == 'p':
goto yystate10
}
yystate10:
c = l.Next()
yyrule = 3
l.Mark()
goto yyrule3
goto yystate11 // silence unused label error
yystate11:
c = l.Next()
yystart11:
switch {
default:
goto yyabort
case c == '"':
goto yystate15
case c == '$':
goto yystate24
case c == '?':
goto yystate30
case c == '\'':
goto yystate26
case c == '\n':
goto yystate14
case c == '\t' || c == '\r' || c == ' ':
goto yystate13
case c >= '\x01' && c <= '\b' || c == '\v' || c == '\f' || c >= '\x0e' && c <= '\x1f' || c == '!' || c == '#' || c == '%' || c == '&' || c >= '(' && c <= '>' || c >= '@' && c <= 'ÿ':
goto yystate12
}
yystate12:
c = l.Next()
yyrule = 10
l.Mark()
goto yyrule10
yystate13:
c = l.Next()
yyrule = 5
l.Mark()
switch {
default:
goto yyrule5
case c == '\t' || c == '\n' || c == '\r' || c == ' ':
goto yystate14
}
yystate14:
c = l.Next()
yyrule = 5
l.Mark()
switch {
default:
goto yyrule5
case c == '\t' || c == '\n' || c == '\r' || c == ' ':
goto yystate14
}
yystate15:
c = l.Next()
yyrule = 9
l.Mark()
switch {
default:
goto yyrule9
case c == '"':
goto yystate17
case c == '$':
goto yystate18
case c == '\\':
goto yystate19
case c == '{':
goto yystate20
case c >= '\x01' && c <= '!' || c == '#' || c >= '%' && c <= '[' || c >= ']' && c <= 'z' || c >= '|' && c <= 'ÿ':
goto yystate16
}
yystate16:
c = l.Next()
switch {
default:
goto yyabort
case c == '"':
goto yystate17
case c == '$':
goto yystate18
case c == '\\':
goto yystate19
case c == '{':
goto yystate20
case c >= '\x01' && c <= '!' || c == '#' || c >= '%' && c <= '[' || c >= ']' && c <= 'z' || c >= '|' && c <= 'ÿ':
goto yystate16
}
yystate17:
c = l.Next()
yyrule = 7
l.Mark()
goto yyrule7
yystate18:
c = l.Next()
switch {
default:
goto yyabort
case c == '"':
goto yystate17
case c == '\\':
goto yystate19
case c >= '\x01' && c <= '!' || c == '#' || c >= '%' && c <= '@' || c == '[' || c == ']' || c == '^' || c == '`' || c >= '|' && c <= '~':
goto yystate16
}
yystate19:
c = l.Next()
switch {
default:
goto yyabort
case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ':
goto yystate16
}
yystate20:
c = l.Next()
switch {
default:
goto yyabort
case c == '"':
goto yystate17
case c == '\\':
goto yystate21
case c >= '\x01' && c <= '!' || c == '#' || c >= '%' && c <= '[' || c >= ']' && c <= 'ÿ':
goto yystate16
}
yystate21:
c = l.Next()
switch {
default:
goto yyabort
case c == '"':
goto yystate22
case c == '\\':
goto yystate21
case c == '{':
goto yystate23
case c >= '\x01' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'z' || c >= '|' && c <= 'ÿ':
goto yystate16
}
yystate22:
c = l.Next()
yyrule = 7
l.Mark()
switch {
default:
goto yyrule7
case c == '"':
goto yystate17
case c == '$':
goto yystate18
case c == '\\':
goto yystate19
case c == '{':
goto yystate20
case c >= '\x01' && c <= '!' || c == '#' || c >= '%' && c <= '[' || c >= ']' && c <= 'z' || c >= '|' && c <= 'ÿ':
goto yystate16
}
yystate23:
c = l.Next()
switch {
default:
goto yyabort
case c == '"':
goto yystate17
case c == '$':
goto yystate18
case c == '\\':
goto yystate21
case c == '{':
goto yystate23
case c >= '\x01' && c <= '!' || c == '#' || c >= '%' && c <= '[' || c >= ']' && c <= 'z' || c >= '|' && c <= 'ÿ':
goto yystate16
}
yystate24:
c = l.Next()
yyrule = 10
l.Mark()
switch {
default:
goto yyrule10
case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ':
goto yystate25
}
yystate25:
c = l.Next()
yyrule = 16
l.Mark()
switch {
default:
goto yyrule16
case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ':
goto yystate25
}
yystate26:
c = l.Next()
yyrule = 10
l.Mark()
switch {
default:
goto yyrule10
case c == '\'':
goto yystate28
case c == '\\':
goto yystate29
case c >= '\x01' && c <= '&' || c >= '(' && c <= '[' || c >= ']' && c <= 'ÿ':
goto yystate27
}
yystate27:
c = l.Next()
switch {
default:
goto yyabort
case c == '\'':
goto yystate28
case c == '\\':
goto yystate29
case c >= '\x01' && c <= '&' || c >= '(' && c <= '[' || c >= ']' && c <= 'ÿ':
goto yystate27
}
yystate28:
c = l.Next()
yyrule = 8
l.Mark()
goto yyrule8
yystate29:
c = l.Next()
switch {
default:
goto yyabort
case c == '\'':
goto yystate27
}
yystate30:
c = l.Next()
yyrule = 10
l.Mark()
switch {
default:
goto yyrule10
case c == '>':
goto yystate31
}
yystate31:
c = l.Next()
yyrule = 6
l.Mark()
goto yyrule6
goto yystate32 // silence unused label error
yystate32:
c = l.Next()
yystart32:
switch {
default:
goto yyabort
case c == '"':
goto yystate40
case c == '$':
goto yystate41
case c == '\\':
goto yystate36
case c == '{':
goto yystate44
case c >= '\x01' && c <= '!' || c == '#' || c >= '%' && c <= '[' || c >= ']' && c <= 'z' || c >= '|' && c <= 'ÿ':
goto yystate33
}
yystate33:
c = l.Next()
yyrule = 15
l.Mark()
switch {
default:
goto yyrule15
case c == '"':
goto yystate34
case c == '$':
goto yystate35
case c == '\\':
goto yystate36
case c == '{':
goto yystate37
case c >= '\x01' && c <= '!' || c == '#' || c >= '%' && c <= '[' || c >= ']' && c <= 'z' || c >= '|' && c <= 'ÿ':
goto yystate33
}
yystate34:
c = l.Next()
yyrule = 15
l.Mark()
goto yyrule15
yystate35:
c = l.Next()
switch {
default:
goto yyabort
case c == '"':
goto yystate34
case c == '\\':
goto yystate36
case c >= '\x01' && c <= '!' || c == '#' || c >= '%' && c <= '@' || c == '[' || c == ']' || c == '^' || c == '`' || c >= '|' && c <= '~':
goto yystate33
}
yystate36:
c = l.Next()
switch {
default:
goto yyabort
case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ':
goto yystate33
}
yystate37:
c = l.Next()
switch {
default:
goto yyabort
case c == '"':
goto yystate34
case c == '\\':
goto yystate38
case c >= '\x01' && c <= '!' || c == '#' || c >= '%' && c <= '[' || c >= ']' && c <= 'ÿ':
goto yystate33
}
yystate38:
c = l.Next()
yyrule = 15
l.Mark()
switch {
default:
goto yyrule15
case c == '\\':
goto yystate38
case c == '{':
goto yystate39
case c >= '\x01' && c <= '[' || c >= ']' && c <= 'z' || c >= '|' && c <= 'ÿ':
goto yystate33
}
yystate39:
c = l.Next()
yyrule = 15
l.Mark()
switch {
default:
goto yyrule15
case c == '"':
goto yystate34
case c == '$':
goto yystate35
case c == '\\':
goto yystate38
case c == '{':
goto yystate39
case c >= '\x01' && c <= '!' || c == '#' || c >= '%' && c <= '[' || c >= ']' && c <= 'z' || c >= '|' && c <= 'ÿ':
goto yystate33
}
yystate40:
c = l.Next()
yyrule = 11
l.Mark()
goto yyrule11
yystate41:
c = l.Next()
yyrule = 14
l.Mark()
switch {
default:
goto yyrule14
case c == '\\':
goto yystate36
case c == '{':
goto yystate43
case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ':
goto yystate42
case c >= '\x01' && c <= '!' || c == '#' || c >= '%' && c <= '@' || c == '[' || c == ']' || c == '^' || c == '`' || c >= '|' && c <= '~':
goto yystate33
}
yystate42:
c = l.Next()
yyrule = 14
l.Mark()
switch {
default:
goto yyrule14
case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z' || c >= '\u007f' && c <= 'ÿ':
goto yystate42
}
yystate43:
c = l.Next()
yyrule = 13
l.Mark()
goto yyrule13
yystate44:
c = l.Next()
yyrule = 12
l.Mark()
switch {
default:
goto yyrule12
case c == '\\':
goto yystate38
case c >= '\x01' && c <= '!' || c == '#' || c >= '%' && c <= '[' || c >= ']' && c <= 'ÿ':
goto yystate33
}
yyrule1: // [ \t\n\r]+
goto yystate0
yyrule2: // .
goto yystate0
yyrule3: // \<\?|\<\?php
{
fmt.Println("T_OPEN_TAG")
begin(PHP)
goto yystate0
}
yyrule4: // \<\?=
{
fmt.Println("T_OPEN_TAG_WITH_ECHO")
begin(PHP)
goto yystate0
}
yyrule5: // [ \t\n\r]+
{
fmt.Println("T_WHITESPACE")
goto yystate0
}
yyrule6: // \?\>
{
fmt.Println("T_CLOSE_TAG")
begin(INITIAL)
goto yystate0
}
yyrule7: // [\"]{STR}*[\{\$]?[\"]
{
fmt.Printf("T_CONSTANT_ENCAPSED_STRING: %s\n", l.TokenBytes(nil))
goto yystate0
}
yyrule8: // [\']([^\\\']*([\\][\'])*)*[\']
{
fmt.Printf("T_CONSTANT_ENCAPSED_STRING: %s\n", l.TokenBytes(nil))
goto yystate0
}
yyrule9: // [\"]
{
fmt.Println("\"")
begin(STRING)
goto yystate0
}
yyrule10: // .
{
fmt.Printf("other: %q\n", l.TokenBytes(nil))
goto yystate0
}
yyrule11: // \"
{
fmt.Println("\"")
begin(PHP)
goto yystate0
}
yyrule12: // \{
{
fmt.Printf("T_CURLY_OPEN: %q\n", l.TokenBytes(nil))
goto yystate0
}
yyrule13: // \$\{
{
fmt.Printf("T_DOLLAR_OPEN_CURLY_BRACES: %q\n", l.TokenBytes(nil))
goto yystate0
}
yyrule14: // \${VAR}?
{
fmt.Printf("T_VARIABLE: %q\n", l.TokenBytes(nil))
goto yystate0
}
yyrule15: // {TPL}
{
fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %q\n", l.unget('"'))
goto yystate0
}
yyrule16: // \${VAR}
{
fmt.Println("T_VARIABLE")
goto yystate0
}
panic("unreachable")
goto yyabort // silence unused label error
yyabort: // no lexem recognized
if c, ok := l.Abort(); ok {
return int(c)
}
goto yyAction
}
func main() {
l := newLexer(os.Stdin, os.Stdout, "file.name")
l.Lex()
}

144
php-parser.l Normal file
View File

@ -0,0 +1,144 @@
%{
// Copyright (c) 2011 CZ.NIC z.s.p.o. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// blame: jnml, labs.nic.cz
package main
import (
"bufio"
"go/token"
"io"
"unicode"
"fmt"
"os"
"github.com/cznic/golex/lex"
)
// Allocate Character classes anywhere in [0x80, 0xFF].
const (
classUnicodeLeter = iota + 0x80
classUnicodeDigit
classOther
)
var sc int
const (
INITIAL = iota
PHP
STRING
)
type lexer struct {
*lex.Lexer
}
func begin(cond int) {
sc = cond
}
func rune2Class(r rune) int {
if r >= 0 && r < 0x80 { // Keep ASCII as it is.
return int(r)
}
if unicode.IsLetter(r) {
return classUnicodeLeter
}
if unicode.IsDigit(r) {
return classUnicodeDigit
}
return classOther
}
func newLexer(src io.Reader, dst io.Writer, fName string) *lexer {
file := token.NewFileSet().AddFile(fName, -1, 1<<31-1)
lx, err := lex.New(file, bufio.NewReader(src), lex.RuneClass(rune2Class))
if (err != nil) { panic(err) }
return &lexer{lx}
}
type yySymType struct {}
func (l *lexer) unget(r rune) []byte{
l.Unget(l.Lookahead())
chars := l.Token();
lastChar := chars[len(chars)-1];
if lastChar.Rune != r {
return l.TokenBytes(nil)
}
l.Unget(lastChar);
buf := l.TokenBytes(nil)
buf = buf[:len(buf)-1]
return buf
}
func (l *lexer) Lex() int { // Lex(lval *yySymType)
c := l.Enter()
%}
%s PHP STRING
%yyb last == '\n' || last = '\0'
%yyt sc
%yyc c
%yyn c = l.Next()
%yym l.Mark()
D [0-9]+
NC ([^\\\$\"\{])
NCH [^a-zA-Z_\x7f-\xff]
ENSCAPED ([\\].)
DOLLAR_E ([\$]{ENSCAPED})
DOLLAR_N ([\$][^a-zA-Z_\x7f-\xff\\\$\"\{])
CURVE_E ([\{]{ENSCAPED})
CURVE ([\{][^\$\"])
ALLOWED ({NC}|{ENSCAPED}|{DOLLAR_E}|{DOLLAR_N}|{CURVE_E}|{CURVE})
STR_END ([\{\$]?[\"])?
STR {ALLOWED}+{ALLOWED}*
TPL {STR}{STR_END}
VAR [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
%%
c = l.Rule0()
// ([\$]{NCH})*
<INITIAL>[ \t\n\r]+
<INITIAL>.
<INITIAL>\<\?|\<\?php fmt.Println("T_OPEN_TAG");begin(PHP)
<INITIAL>\<\?= fmt.Println("T_OPEN_TAG_WITH_ECHO");begin(PHP)
<PHP>[ \t\n\r]+ fmt.Println("T_WHITESPACE")
<PHP>\?\> fmt.Println("T_CLOSE_TAG");begin(INITIAL)
<PHP>[\"]{STR}*[\{\$]?[\"] fmt.Printf("T_CONSTANT_ENCAPSED_STRING: %s\n", l.TokenBytes(nil));
<PHP>[\']([^\\\']*([\\][\'])*)*[\'] fmt.Printf("T_CONSTANT_ENCAPSED_STRING: %s\n", l.TokenBytes(nil));
<PHP>[\"] fmt.Println("\"");begin(STRING)
<PHP>. fmt.Printf("other: %q\n", l.TokenBytes(nil))
<STRING>\" fmt.Println("\""); begin(PHP)
<STRING>\{ fmt.Printf("T_CURLY_OPEN: %q\n", l.TokenBytes(nil));
<STRING>\$\{ fmt.Printf("T_DOLLAR_OPEN_CURLY_BRACES: %q\n", l.TokenBytes(nil))
<STRING>\${VAR}? fmt.Printf("T_VARIABLE: %q\n", l.TokenBytes(nil))
<STRING>{TPL} fmt.Printf("T_ENCAPSED_AND_WHITESPACE: %q\n", l.unget('"'));
<PHP>\${VAR} fmt.Println("T_VARIABLE")
%%
if c, ok := l.Abort(); ok { return int(c) }
goto yyAction
}
func main() {
l := newLexer(os.Stdin, os.Stdout, "file.name")
l.Lex();
}