split scanner and lexer

2017-12-01 15:36:46 +02:00 · 2017-12-01 15:36:46 +02:00 · bc6d25a7cc
commit bc6d25a7cc
parent 4318df3473
4 changed files with 8966 additions and 9063 deletions
--- a/4
+++ b/4
@ -4,7 +4,7 @@
 # blame: jnml, labs.nic.cz
-all: parser.go lexer.go
+all: parser.go scanner.go
 	rm -f y.output
 	gofmt -l -s -w *.go
 	go build
@ -12,7 +12,7 @@ all: parser.go lexer.go
 run: all
 	./php-parser
-lexer.go: lexer.l
+scanner.go: scanner.l
 	golex -o $@ $<
 parser.go: parser.y
--- a/lexer.go
+++ b/lexer.go
--- a/scanner.go
+++ b/scanner.go
--- a/scanner.l
+++ b/scanner.l
@ -8,25 +8,10 @@
 package main
 import (
    "bufio"
    "go/token"
    "io"
    "unicode"
    "fmt"
    "bytes"
    "github.com/cznic/golex/lex"
 )
 // Allocate Character classes anywhere in [0x80, 0xFF].
 const (
    classUnicodeLeter = iota + 0x80
    classUnicodeDigit
    classOther
 )
 var sc int
 const (
    INITIAL = iota
    PHP
@ -41,92 +26,6 @@ const (
    BACKQUOTE
 )
 type lexer struct {
    *lex.Lexer
 }
 var stateStack = []int{PHP}
 var heredocLabel []byte
 func pushState(state int) {
    sc = state
    stateStack = append(stateStack, state)
 }
 func popState() {
    len := len(stateStack)
    if len <= 1 {
        return
    }
    sc = stateStack[len - 2]
    stateStack = stateStack[:len-1]
 }
 func begin(state int) {
    len := len(stateStack)
    stateStack = stateStack[:len-1]
    stateStack = append(stateStack, state)
    sc = state
 }
 func rune2Class(r rune) int {
    if r >= 0 && r < 0x80 { // Keep ASCII as it is.
        return int(r)
    }
    if unicode.IsLetter(r) {
        return classUnicodeLeter
    }
    if unicode.IsDigit(r) {
        return classUnicodeDigit
    }
    // return classOther
    return -1
 }
 func newLexer(src io.Reader, dst io.Writer, fName string) *lexer {
    file := token.NewFileSet().AddFile(fName, -1, 1<<31-1)
    lx, err := lex.New(file, bufio.NewReader(src), lex.RuneClass(rune2Class))
    if (err != nil) { panic(err) }
    return &lexer{lx}
 }
 func (l *lexer) unget(r rune) []byte{
    l.Unget(l.Lookahead())
    chars := l.Token();
    lastChar := chars[len(chars)-1];
    if lastChar.Rune != r {
        return l.TokenBytes(nil)
    }
    l.Unget(lastChar);
    buf := l.TokenBytes(nil)
    buf = buf[:len(buf)-1]
    return buf
 }
 func (l *lexer) ungetN(n int) []byte{
    l.Unget(l.Lookahead())
    chars := l.Token();
    for i := 1; i <= n; i++ {
        char := chars[len(chars)-i];
        l.Unget(char);
    }
    buf := l.TokenBytes(nil)
    buf = buf[:len(buf)-n]
    return buf
 }
 func (l *lexer) Lex(lval *yySymType) int { // Lex(lval *yySymType)
  c := l.Enter()