%{ // Copyright (c) 2011 CZ.NIC z.s.p.o. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // blame: jnml, labs.nic.cz package main import ( "bufio" "go/token" "io" "unicode" "fmt" "os" "github.com/cznic/golex/lex" ) // Allocate Character classes anywhere in [0x80, 0xFF]. const ( classUnicodeLeter = iota + 0x80 classUnicodeDigit classOther ) var sc int const ( INITIAL = iota PHP STRING ) type lexer struct { *lex.Lexer } func begin(cond int) { sc = cond } func rune2Class(r rune) int { if r >= 0 && r < 0x80 { // Keep ASCII as it is. return int(r) } if unicode.IsLetter(r) { return classUnicodeLeter } if unicode.IsDigit(r) { return classUnicodeDigit } return classOther } func newLexer(src io.Reader, dst io.Writer, fName string) *lexer { file := token.NewFileSet().AddFile(fName, -1, 1<<31-1) lx, err := lex.New(file, bufio.NewReader(src), lex.RuneClass(rune2Class)) if (err != nil) { panic(err) } return &lexer{lx} } type yySymType struct {} func (l *lexer) unget(r rune) []byte{ l.Unget(l.Lookahead()) chars := l.Token(); lastChar := chars[len(chars)-1]; if lastChar.Rune != r { return l.TokenBytes(nil) } l.Unget(lastChar); buf := l.TokenBytes(nil) buf = buf[:len(buf)-1] return buf } func (l *lexer) ungetN(n int) []byte{ l.Unget(l.Lookahead()) chars := l.Token(); for i := 1; i <= n; i++ { char := chars[len(chars)-i]; l.Unget(char); } buf := l.TokenBytes(nil) buf = buf[:len(buf)-n] return buf } func (l *lexer) Lex() int { // Lex(lval *yySymType) c := l.Enter() %} %s PHP STRING %yyb last == '\n' || last = '\0' %yyt sc %yyc c %yyn c = l.Next() %yym l.Mark() D [0-9]+ NC ([^\\\$\"\{]) ENSCAPED ([\\].) DOLLAR ([\$]+{ENSCAPED})|([\$]+[^a-zA-Z_\x7f-\xff\$\"\{]) CURVE ([\{]+{ENSCAPED})|([\{]+[^\{\$\"]) ALLOWED ({NC}|{ENSCAPED}|{DOLLAR}|{CURVE}) STR {ALLOWED}* STR_END [\{\$]?[\"] VAR_NAME [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]* %% c = l.Rule0() // ([\$]{NCH})* [ \t\n\r]+ . \<\?|\<\?php fmt.Println("T_OPEN_TAG");begin(PHP) \<\?= fmt.Println("T_OPEN_TAG_WITH_ECHO");begin(PHP) [ \t\n\r]+ fmt.Println("T_WHITESPACE") \?\> fmt.Println("T_CLOSE_TAG");begin(INITIAL) [\"]{STR}{STR_END} fmt.Printf("T_CONSTANT_ENCAPSED_STRING: %s\n", l.TokenBytes(nil)); [\']([^\\\']*([\\][\'])*)*[\'] fmt.Printf("T_CONSTANT_ENCAPSED_STRING: %s\n", l.TokenBytes(nil)); [\"] fmt.Println("\"");begin(STRING) . fmt.Printf("other: %q\n", l.TokenBytes(nil)) \" fmt.Println("\""); begin(PHP) \{\$ fmt.Printf("T_CURLY_OPEN: %q\n", l.ungetN(1)); \$\{ fmt.Printf("T_DOLLAR_OPEN_CURLY_BRACES: %q\n", l.TokenBytes(nil)) \$ fmt.Printf("T_VARIABLE: %q\n", l.TokenBytes(nil)) {STR}{STR_END} fmt.Printf("T_ENCAPSED_AND_WHITESPACE1: %q\n", l.unget('"')); {STR}[\{]+[\$] fmt.Printf("T_ENCAPSED_AND_WHITESPACE2: %q\n", l.ungetN(2)); {STR}[\$]+[\{] fmt.Printf("T_ENCAPSED_AND_WHITESPACE3: %q\n", l.ungetN(2)); {STR}[^\{][\$]+[a-zA-Z_\x7f-\xff] fmt.Printf("T_ENCAPSED_AND_WHITESPACE4: %q\n", l.ungetN(2)); \${VAR_NAME} fmt.Println("T_VARIABLE") %% if c, ok := l.Abort(); ok { return int(c) } goto yyAction } func main() { l := newLexer(os.Stdin, os.Stdout, "file.name") l.Lex(); }