split scanner and lexer

This commit is contained in:
vadim 2017-12-01 15:36:46 +02:00
parent 4318df3473
commit bc6d25a7cc
4 changed files with 8966 additions and 9063 deletions

View File

@ -4,7 +4,7 @@
# blame: jnml, labs.nic.cz
all: parser.go lexer.go
all: parser.go scanner.go
rm -f y.output
gofmt -l -s -w *.go
go build
@ -12,7 +12,7 @@ all: parser.go lexer.go
run: all
./php-parser
lexer.go: lexer.l
scanner.go: scanner.l
golex -o $@ $<
parser.go: parser.y

8960
lexer.go

File diff suppressed because it is too large Load Diff

8964
scanner.go Normal file

File diff suppressed because it is too large Load Diff

View File

@ -8,25 +8,10 @@
package main
import (
"bufio"
"go/token"
"io"
"unicode"
"fmt"
"bytes"
"github.com/cznic/golex/lex"
)
// Allocate Character classes anywhere in [0x80, 0xFF].
const (
classUnicodeLeter = iota + 0x80
classUnicodeDigit
classOther
)
var sc int
const (
INITIAL = iota
PHP
@ -41,92 +26,6 @@ const (
BACKQUOTE
)
type lexer struct {
*lex.Lexer
}
var stateStack = []int{PHP}
var heredocLabel []byte
func pushState(state int) {
sc = state
stateStack = append(stateStack, state)
}
func popState() {
len := len(stateStack)
if len <= 1 {
return
}
sc = stateStack[len - 2]
stateStack = stateStack[:len-1]
}
func begin(state int) {
len := len(stateStack)
stateStack = stateStack[:len-1]
stateStack = append(stateStack, state)
sc = state
}
func rune2Class(r rune) int {
if r >= 0 && r < 0x80 { // Keep ASCII as it is.
return int(r)
}
if unicode.IsLetter(r) {
return classUnicodeLeter
}
if unicode.IsDigit(r) {
return classUnicodeDigit
}
// return classOther
return -1
}
func newLexer(src io.Reader, dst io.Writer, fName string) *lexer {
file := token.NewFileSet().AddFile(fName, -1, 1<<31-1)
lx, err := lex.New(file, bufio.NewReader(src), lex.RuneClass(rune2Class))
if (err != nil) { panic(err) }
return &lexer{lx}
}
func (l *lexer) unget(r rune) []byte{
l.Unget(l.Lookahead())
chars := l.Token();
lastChar := chars[len(chars)-1];
if lastChar.Rune != r {
return l.TokenBytes(nil)
}
l.Unget(lastChar);
buf := l.TokenBytes(nil)
buf = buf[:len(buf)-1]
return buf
}
func (l *lexer) ungetN(n int) []byte{
l.Unget(l.Lookahead())
chars := l.Token();
for i := 1; i <= n; i++ {
char := chars[len(chars)-i];
l.Unget(char);
}
buf := l.TokenBytes(nil)
buf = buf[:len(buf)-n]
return buf
}
func (l *lexer) Lex(lval *yySymType) int { // Lex(lval *yySymType)
c := l.Enter()