From b9ad178652a833bfd8965a68b2f308fe6f3d163f Mon Sep 17 00:00:00 2001 From: z7zmey Date: Tue, 28 Nov 2017 00:37:17 +0200 Subject: [PATCH] parse func declaration and class declaration --- lexer.go | 23 ++-- lexer.l | 22 ++-- parser.go | 379 ++++++++++++++++++++++++++++++++++++++++++++++-------- parser.y | 148 ++++++++++++++++++++- 4 files changed, 487 insertions(+), 85 deletions(-) diff --git a/lexer.go b/lexer.go index 312dc96..acaa953 100644 --- a/lexer.go +++ b/lexer.go @@ -8459,21 +8459,21 @@ yyrule127: // '[^']*(\\')*' yyrule128: // {OPERATORS} { lval.token = string(l.TokenBytes(nil)) - return rune2Class(l.Prev.Rune) + return rune2Class(rune(l.TokenBytes(nil)[0])) goto yystate0 } yyrule129: // \{ { pushState(PHP) lval.token = string(l.TokenBytes(nil)) - return rune2Class(l.Prev.Rune) + return rune2Class(rune(l.TokenBytes(nil)[0])) goto yystate0 } yyrule130: // \} { popState() lval.token = string(l.TokenBytes(nil)) - return rune2Class(l.Prev.Rune) + return rune2Class(rune(l.TokenBytes(nil)[0])) goto yystate0 } yyrule131: // \${VAR_NAME} @@ -8484,9 +8484,6 @@ yyrule131: // \${VAR_NAME} } yyrule132: // {VAR_NAME} { - if c == -1 { - fmt.Printf("%q\n", string(l.TokenBytes(nil))) - } lval.token = string(l.TokenBytes(nil)) return T_STRING goto yystate0 @@ -8533,14 +8530,14 @@ yyrule139: // ` { begin(BACKQUOTE) lval.token = string(l.TokenBytes(nil)) - return rune2Class(l.Prev.Rune) + rune2Class(rune(l.TokenBytes(nil)[0])) goto yystate0 } yyrule140: // ` { begin(PHP) lval.token = string(l.TokenBytes(nil)) - return rune2Class(l.Prev.Rune) + rune2Class(rune(l.TokenBytes(nil)[0])) goto yystate0 } yyrule141: // [b]?\<\<\<[ \t]*({VAR_NAME}|([']{VAR_NAME}['])|(["]{VAR_NAME}["])){NEW_LINE} @@ -8689,7 +8686,7 @@ yyrule145: // \" { popState() lval.token = "\"" - return rune2Class(l.Prev.Rune) + return rune2Class(rune(l.TokenBytes(nil)[0])) goto yystate0 } yyrule146: // \{\$ @@ -8872,7 +8869,7 @@ yyrule155: // \[ { pushState(STRING_VAR_INDEX) lval.token = string(l.TokenBytes(nil)) - return rune2Class(l.Prev.Rune) + return rune2Class(rune(l.TokenBytes(nil)[0])) goto yystate0 } yyrule156: // .|[ \t\n\r] @@ -8904,7 +8901,7 @@ yyrule160: // \] popState() popState() lval.token = string(l.TokenBytes(nil)) - return rune2Class(l.Prev.Rune) + return rune2Class(rune(l.TokenBytes(nil)[0])) goto yystate0 } yyrule161: // [ \n\r\t\\'#] @@ -8918,13 +8915,13 @@ yyrule161: // [ \n\r\t\\'#] yyrule162: // {OPERATORS} { lval.token = string(l.TokenBytes(nil)) - return rune2Class(l.Prev.Rune) + return rune2Class(rune(l.TokenBytes(nil)[0])) goto yystate0 } yyrule163: // . { lval.token = string(l.TokenBytes(nil)) - return rune2Class(l.Prev.Rune) + return rune2Class(rune(l.TokenBytes(nil)[0])) goto yystate0 } yyrule164: // {VAR_NAME}[\[\}] diff --git a/lexer.l b/lexer.l index c3081df..0bf78d1 100644 --- a/lexer.l +++ b/lexer.l @@ -314,12 +314,12 @@ NEW_LINE (\r|\n|\r\n) [/][*][^*]*[*]+([^*/][^*]*[*]+)*[/] lval.token = string(l.TokenBytes(nil)); return T_COMMENT; // TODO: handle ?> [/][*][*][^*]*[*]+([^*/][^*]*[*]+)*[/] lval.token = string(l.TokenBytes(nil)); return T_DOC_COMMENT; // TODO: handle ?> '[^']*(\\')*' lval.token = string(l.TokenBytes(nil)); return T_CONSTANT_ENCAPSED_STRING -{OPERATORS} lval.token = string(l.TokenBytes(nil)); return rune2Class(l.Prev.Rune) +{OPERATORS} lval.token = string(l.TokenBytes(nil)); return rune2Class(rune(l.TokenBytes(nil)[0])) -\{ pushState(PHP); lval.token = string(l.TokenBytes(nil)); return rune2Class(l.Prev.Rune) -\} popState(); lval.token = string(l.TokenBytes(nil)); return rune2Class(l.Prev.Rune) +\{ pushState(PHP); lval.token = string(l.TokenBytes(nil)); return rune2Class(rune(l.TokenBytes(nil)[0])) +\} popState(); lval.token = string(l.TokenBytes(nil)); return rune2Class(rune(l.TokenBytes(nil)[0])) \${VAR_NAME} lval.token = string(l.TokenBytes(nil)); return T_VARIABLE -{VAR_NAME} if c == -1 {fmt.Printf("%q\n", string(l.TokenBytes(nil)))};lval.token = string(l.TokenBytes(nil)); return T_STRING +{VAR_NAME} lval.token = string(l.TokenBytes(nil)); return T_STRING -> begin(PROPERTY);lval.token = string(l.TokenBytes(nil)); return T_OBJECT_OPERATOR; [ \t\n\r]+ lval.token = string(l.TokenBytes(nil)); return T_WHITESPACE; @@ -329,8 +329,8 @@ NEW_LINE (\r|\n|\r\n) [\']([^\\\']*([\\][\'])*)*[\'] lval.token = string(l.TokenBytes(nil)); return T_CONSTANT_ENCAPSED_STRING; -` begin(BACKQUOTE); lval.token = string(l.TokenBytes(nil)); return rune2Class(l.Prev.Rune) -` begin(PHP); lval.token = string(l.TokenBytes(nil)); return rune2Class(l.Prev.Rune) +` begin(BACKQUOTE); lval.token = string(l.TokenBytes(nil)); rune2Class(rune(l.TokenBytes(nil)[0])) +` begin(PHP); lval.token = string(l.TokenBytes(nil)); rune2Class(rune(l.TokenBytes(nil)[0])) [b]?\<\<\<[ \t]*({VAR_NAME}|([']{VAR_NAME}['])|(["]{VAR_NAME}["])){NEW_LINE} tb := l.TokenBytes(nil) @@ -468,7 +468,7 @@ NEW_LINE (\r|\n|\r\n) c = l.Next() } -\" popState(); lval.token = "\""; return rune2Class(l.Prev.Rune) +\" popState(); lval.token = "\""; return rune2Class(rune(l.TokenBytes(nil)[0])) \{\$ lval.token = string(l.ungetN(1)); return T_CURLY_OPEN \$\{ pushState(STRING_VAR_NAME);lval.token = string(l.TokenBytes(nil)); return T_DOLLAR_OPEN_CURLY_BRACES \$ l.ungetN(1);pushState(STRING_VAR) @@ -602,16 +602,16 @@ NEW_LINE (\r|\n|\r\n) \${VAR_NAME} lval.token = string(l.TokenBytes(nil)); return T_VARIABLE ->{VAR_NAME} lval.token = string(l.ungetN(len(l.TokenBytes(nil))-2)); return T_OBJECT_OPERATOR {VAR_NAME} popState();lval.token = string(l.TokenBytes(nil)); return T_STRING -\[ pushState(STRING_VAR_INDEX);lval.token = string(l.TokenBytes(nil)); return rune2Class(l.Prev.Rune) +\[ pushState(STRING_VAR_INDEX);lval.token = string(l.TokenBytes(nil)); return rune2Class(rune(l.TokenBytes(nil)[0])) .|[ \t\n\r] l.ungetN(1);popState() {LNUM}|{HNUM}|{BNUM} lval.token = string(l.TokenBytes(nil)); return T_NUM_STRING \${VAR_NAME} lval.token = string(l.TokenBytes(nil)); return T_VARIABLE {VAR_NAME} lval.token = string(l.TokenBytes(nil)); return T_STRING -\] popState(); popState();lval.token = string(l.TokenBytes(nil)); return rune2Class(l.Prev.Rune) +\] popState(); popState();lval.token = string(l.TokenBytes(nil)); return rune2Class(rune(l.TokenBytes(nil)[0])) [ \n\r\t\\'#] popState(); popState();lval.token = string(l.TokenBytes(nil)); return T_ENCAPSED_AND_WHITESPACE -{OPERATORS} lval.token = string(l.TokenBytes(nil)); return rune2Class(l.Prev.Rune) -. lval.token = string(l.TokenBytes(nil)); return rune2Class(l.Prev.Rune) +{OPERATORS} lval.token = string(l.TokenBytes(nil)); return rune2Class(rune(l.TokenBytes(nil)[0])) +. lval.token = string(l.TokenBytes(nil)); return rune2Class(rune(l.TokenBytes(nil)[0])) {VAR_NAME}[\[\}] popState();pushState(PHP);lval.token = string(l.ungetN(1)); return T_STRING_VARNAME . l.ungetN(1);popState();pushState(PHP) diff --git a/parser.go b/parser.go index bffd026..f5fd6d6 100644 --- a/parser.go +++ b/parser.go @@ -12,8 +12,9 @@ import ( ) type node struct { - name string - children []node + name string + children []node + attributes map[string]string } func (n node) String() string { @@ -23,16 +24,31 @@ func (n node) String() string { } func (n node) print(out io.Writer, indent string) { - fmt.Fprintf(out, "\n%v%v", indent, n.name) + if len(n.attributes) > 0 { + fmt.Fprintf(out, "\n%v%v %s", indent, n.name, n.attributes) + } else { + fmt.Fprintf(out, "\n%v%v", indent, n.name) + } for _, nn := range n.children { nn.print(out, indent+" ") } } -func Node(name string) node { return node{name: name} } -func (n node) append(nn ...node) node { n.children = append(n.children, nn...); return n } +func Node(name string) node { + return node{name: name, attributes: make(map[string]string)} +} -//line parser.y:32 +func (n node) append(nn ...node) node { + n.children = append(n.children, nn...) + return n +} + +func (n node) attribute(key string, value string) node { + n.attributes[key] = value + return n +} + +//line parser.y:50 type yySymType struct { yys int node node @@ -335,6 +351,10 @@ var yyToknames = [...]string{ "T_NS_SEPARATOR", "T_ELLIPSIS", "';'", + "'{'", + "'}'", + "'('", + "')'", } var yyStatenames = [...]string{} @@ -342,11 +362,14 @@ const yyEofCode = 1 const yyErrCode = 2 const yyInitialStackSize = 16 -//line parser.y:203 +//line parser.y:335 const src = ` 0) { + fmt.Fprintf(out, "\n%v%v %s", indent, n.name, n.attributes) + } else { + fmt.Fprintf(out, "\n%v%v", indent, n.name) + } + for _, nn := range n.children { + nn.print(out, indent + " ") + } } -func Node(name string) node { return node{name: name} } -func (n node) append(nn...node) node { n.children = append(n.children, nn...); return n } +func Node(name string) node { + return node{name: name, attributes: make(map[string]string)} +} + +func (n node) append(nn...node) node { + n.children = append(n.children, nn...) + return n +} + +func (n node) attribute(key string, value string) node { + n.attributes[key] = value + return n +} %} @@ -150,10 +168,28 @@ func (n node) append(nn...node) node { n.children = append(n.children, nn...); r %token T_NS_SEPARATOR %token T_ELLIPSIS +%type class_modifier +%type is_reference +%type is_variadic +%type returns_ref + %type identifier %type top_statement %type namespace_name %type top_statement_list +%type statement +%type inner_statement +%type inner_statement_list +%type class_modifiers +%type class_declaration_statement +%type function_declaration_statement +%type optional_type +%type return_type +%type type_expr +%type type +%type parameter_list +%type non_empty_parameter_list +%type parameter %% @@ -194,8 +230,103 @@ top_statement_list: ; top_statement: - T_INCLUDE identifier ';' { $$ = $2; /*TODO: identifier stub, refactor it*/ } + statement { $$ = $1 } + | function_declaration_statement { $$ = $1 } + | T_INCLUDE identifier ';' { $$ = $2; /*TODO: identifier stub, refactor it*/ } | T_NAMESPACE namespace_name ';' { $$ = $2; } + | class_declaration_statement { $$ = $1; } +; + +inner_statement_list: + inner_statement_list inner_statement { $$ = $1.append($2); } + | /* empty */ { $$ = Node("statement_list") } +; + +inner_statement: + statement { $$ = $1; } + | class_declaration_statement { $$ = $1; } + +statement: + '{' inner_statement_list '}' { $$ = $2; } + +class_declaration_statement: + class_modifiers T_CLASS T_STRING '{' '}' { $$ = $1.attribute("name", $3) } + | T_CLASS T_STRING '{' '}' { $$ = Node("Class").attribute("name", $2) } +; +class_modifiers: + class_modifier { $$ = Node("Class").attribute($1, "true") } + | class_modifiers class_modifier { $$ = $1.attribute($2, "true") } +; + +class_modifier: + T_ABSTRACT { $$ = "abstract" } + | T_FINAL { $$ = "final" } +; + +function_declaration_statement: + T_FUNCTION returns_ref T_STRING '(' parameter_list ')' return_type '{' inner_statement_list '}' + { + $$ = Node("Function"). + attribute("name", $3). + attribute("returns_ref", $2). + append($5). + append($7). + append($9); + } +; + +parameter_list: + non_empty_parameter_list { $$ = $1; } + | /* empty */ { $$ = Node("Parameter list"); } +; +non_empty_parameter_list: + parameter { $$ = Node("Parameter list").append($1) } + | non_empty_parameter_list ',' parameter { $$ = $1.append($3); } +; +parameter: + optional_type is_reference is_variadic T_VARIABLE + { + $$ = Node("Parameter"). + append($1). + attribute("is_reference", $2). + attribute("is_variadic", $3). + attribute("var", $4); + } +; + +optional_type: + /* empty */ { $$ = Node("No type") } + | type_expr { $$ = $1; } +; + +returns_ref: + /* empty */ { $$ = "false"; } + | '&' { $$ = "true"; } +; + +is_reference: + /* empty */ { $$ = "false"; } + | '&' { $$ = "true"; } +; + +is_variadic: + /* empty */ { $$ = "false"; } + | T_ELLIPSIS { $$ = "true"; } +; + +type_expr: + type { $$ = $1; } + | '?' type { $$ = $2; $$.attribute("nullable", "true") } +; + +type: + T_ARRAY { $$ = Node("array type"); } + | T_CALLABLE { $$ = Node("callable type"); } +; + +return_type: + /* empty */ { $$ = Node("void"); } + | ':' type_expr { $$ = $2; } ; ///////////////////////////////////////////////////////////////////////// @@ -204,8 +335,11 @@ top_statement: const src = `