| |
| /*================================================================*/ |
| /* |
| JavaCup Specification for the JavaCup Specification Language |
| by Scott Hudson, GVU Center, Georgia Tech, August 1995 |
| |
| This JavaCup specification is used to implement JavaCup itself. |
| It specifies the parser for the JavaCup specification language. |
| (It also serves as a reasonable example of what a typical JavaCup |
| spec looks like). |
| |
| The specification has the following parts: |
| Package and import declarations |
| These serve the same purpose as in a normal Java source file |
| (and will appear in the generated code for the parser). In this |
| case we are part of the java_cup package and we import both the |
| java_cup runtime system and Hashtable from the standard Java |
| utilities package. |
| |
| Action code |
| This section provides code that is included with the class encapsulating |
| the various pieces of user code embedded in the grammar (i.e., the |
| semantic actions). This provides a series of helper routines and |
| data structures that the semantic actions use. |
| |
| Parser code |
| This section provides code included in the parser class itself. In |
| this case we override the default error reporting routines. |
| |
| Init with and scan with |
| These sections provide small bits of code that initialize, then |
| indicate how to invoke the scanner. |
| |
| Symbols and grammar |
| These sections declare all the terminal and non terminal symbols |
| and the types of objects that they will be represented by at runtime, |
| then indicate the start symbol of the grammar (), and finally provide |
| the grammar itself (with embedded actions). |
| |
| Operation of the parser |
| The parser acts primarily by accumulating data structures representing |
| various parts of the specification. Various small parts (e.g., single |
| code strings) are stored as static variables of the emit class and |
| in a few cases as variables declared in the action code section. |
| Terminals, non terminals, and productions, are maintained as collection |
| accessible via static methods of those classes. In addition, two |
| symbol tables are kept: |
| symbols maintains the name to object mapping for all symbols |
| non_terms maintains a separate mapping containing only the non terms |
| |
| Several intermediate working structures are also declared in the action |
| code section. These include: rhs_parts, rhs_pos, and lhs_nt which |
| build up parts of the current production while it is being parsed. |
| |
| Author(s) |
| Scott Hudson, GVU Center, Georgia Tech. |
| |
| Revisions |
| v0.9a First released version [SEH] 8/29/95 |
| v0.9b Updated for beta language (throws clauses) [SEH] 11/25/95 |
| */ |
| /*================================================================*/ |
| |
| package java_cup; |
| import java_cup.runtime.*; |
| import java.util.Hashtable; |
| |
| /*----------------------------------------------------------------*/ |
| |
| action code {: |
| /** helper routine to clone a new production part adding a given label */ |
| protected production_part add_lab(production_part part, String lab) |
| throws internal_error |
| { |
| /* if there is no label, or this is an action, just return the original */ |
| if (lab == null || part.is_action()) return part; |
| |
| /* otherwise build a new one with the given label attached */ |
| return new symbol_part(((symbol_part)part).the_symbol(),lab); |
| } |
| |
| /** max size of right hand side we will support */ |
| protected final int MAX_RHS = 200; |
| |
| /** array for accumulating right hand side parts */ |
| protected production_part[] rhs_parts = new production_part[MAX_RHS]; |
| |
| /** where we are currently in building a right hand side */ |
| protected int rhs_pos = 0; |
| |
| /** start a new right hand side */ |
| protected void new_rhs() {rhs_pos = 0; } |
| |
| /** add a new right hand side part */ |
| protected void add_rhs_part(production_part part) throws java.lang.Exception |
| { |
| if (rhs_pos >= MAX_RHS) |
| throw new Exception("Internal Error: Productions limited to " + |
| MAX_RHS + " symbols and actions"); |
| |
| rhs_parts[rhs_pos] = part; |
| rhs_pos++; |
| } |
| |
| /** string to build up multiple part names */ |
| protected String multipart_name = new String(); |
| |
| /** append a new name segment to the accumulated multipart name */ |
| protected void append_multipart(String name) |
| { |
| String dot = ""; |
| |
| /* if we aren't just starting out, put on a dot */ |
| if (multipart_name.length() != 0) dot = "."; |
| |
| multipart_name = multipart_name.concat(dot + name); |
| } |
| |
| /** table of declared symbols -- contains production parts indexed by name */ |
| protected Hashtable symbols = new Hashtable(); |
| |
| /** table of just non terminals -- contains non_terminals indexed by name */ |
| protected Hashtable non_terms = new Hashtable(); |
| |
| /** declared start non_terminal */ |
| protected non_terminal start_nt = null; |
| |
| /** left hand side non terminal of the current production */ |
| protected non_terminal lhs_nt; |
| |
| :}; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| parser code {: |
| |
| /* override error routines */ |
| |
| public void report_fatal_error( |
| String message, |
| Object info) |
| { |
| done_parsing(); |
| lexer.emit_error(message); |
| System.err.println("Can't recover from previous error(s), giving up."); |
| System.exit(1); |
| } |
| |
| public void report_error(String message, Object info) |
| { |
| lexer.emit_error(message); |
| } |
| :}; |
| |
| /*----------------------------------------------------------------*/ |
| |
| init with {: lexer.init(); :}; |
| scan with {: return lexer.next_token(); :}; |
| |
| /*----------------------------------------------------------------*/ |
| |
| terminal java_cup.runtime.token |
| PACKAGE, IMPORT, CODE, ACTION, PARSER, TERMINAL, NON, INIT, SCAN, WITH, |
| START, SEMI, COMMA, STAR, DOT, COLON, COLON_COLON_EQUALS, BAR, |
| DEBUG; |
| |
| terminal java_cup.runtime.str_token ID, CODE_STRING; |
| |
| non terminal java_cup.runtime.symbol |
| spec, package_spec, import_list, code_part, action_code_part, |
| parser_code_part, symbol_list, start_spec, production_list, |
| multipart_id, import_spec, import_id, init_code, scan_code, symbol, |
| debug_grammar, |
| type_id, term_name_list, non_term_name_list, production, prod_part_list, |
| prod_part, new_term_id, new_non_term_id, rhs_list, rhs, empty; |
| |
| non terminal java_cup.runtime.str_token nt_id, symbol_id, label_id, opt_label; |
| |
| /*----------------------------------------------------------------*/ |
| |
| start with spec; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| spec ::= |
| {: |
| /* declare "error" as a terminal */ |
| symbols.put("error", new symbol_part(terminal.error)); |
| |
| /* declare start non terminal */ |
| non_terms.put("$START", non_terminal.START_nt); |
| :} |
| package_spec |
| import_list |
| code_part |
| debug_grammar |
| init_code |
| scan_code |
| symbol_list |
| start_spec |
| production_list |
| | |
| /* error recovery assuming something went wrong before symbols |
| and we have TERMINAL or NON TERMINAL to sync on. if we get |
| an error after that, we recover inside symbol_list or |
| production_list |
| */ |
| error |
| symbol_list |
| start_spec |
| production_list |
| ; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| package_spec ::= |
| PACKAGE |
| multipart_id |
| {: |
| /* save the package name */ |
| emit.package_name = multipart_name; |
| |
| /* reset the accumulated multipart name */ |
| multipart_name = new String(); |
| :} |
| SEMI |
| | |
| empty |
| ; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| import_list ::= |
| import_list |
| import_spec |
| | |
| empty |
| ; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| import_spec ::= |
| IMPORT |
| import_id |
| {: |
| /* save this import on the imports list */ |
| emit.import_list.push(multipart_name); |
| |
| /* reset the accumulated multipart name */ |
| multipart_name = new String(); |
| :} |
| SEMI |
| ; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| code_part ::= action_code_part parser_code_part ; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| action_code_part ::= |
| ACTION CODE CODE_STRING:user_code SEMI |
| {: |
| /* save the user included code string */ |
| emit.action_code = user_code.str_val; |
| :} |
| | |
| empty |
| ; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| parser_code_part ::= |
| PARSER CODE CODE_STRING:user_code SEMI |
| {: |
| /* save the user included code string */ |
| emit.parser_code = user_code.str_val; |
| :} |
| | |
| empty |
| ; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| init_code ::= |
| INIT WITH CODE_STRING:user_code SEMI |
| {: |
| /* save the user code */ |
| emit.init_code = user_code.str_val; |
| :} |
| | |
| empty |
| ; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| scan_code ::= |
| SCAN WITH CODE_STRING:user_code SEMI |
| {: |
| /* save the user code */ |
| emit.scan_code = user_code.str_val; |
| :} |
| | |
| empty |
| ; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| debug_grammar ::= |
| DEBUG WITH multipart_id SEMI |
| {: |
| /* save the procedure name */ |
| emit.debug_grammar = multipart_name; |
| /* reset the accumulated multipart name */ |
| multipart_name = new String(); |
| :} |
| | |
| empty |
| ; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| symbol_list ::= symbol_list symbol | symbol; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| symbol ::= |
| TERMINAL |
| type_id |
| term_name_list |
| {: |
| /* reset the accumulated multipart name */ |
| multipart_name = new String(); |
| :} |
| SEMI |
| | |
| NON |
| TERMINAL |
| type_id |
| non_term_name_list |
| {: |
| /* reset the accumulated multipart name */ |
| multipart_name = new String(); |
| :} |
| SEMI |
| | |
| |
| /* error recovery productions -- sync on semicolon */ |
| |
| TERMINAL |
| error |
| {: |
| /* reset the accumulated multipart name */ |
| multipart_name = new String(); |
| :} |
| SEMI |
| | |
| NON |
| TERMINAL |
| error |
| {: |
| /* reset the accumulated multipart name */ |
| multipart_name = new String(); |
| :} |
| SEMI |
| ; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| term_name_list ::= term_name_list COMMA new_term_id | new_term_id; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| non_term_name_list ::= |
| non_term_name_list |
| COMMA |
| new_non_term_id |
| | |
| new_non_term_id |
| ; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| start_spec ::= |
| START WITH nt_id:start_name |
| {: |
| /* verify that the name has been declared as a non terminal */ |
| non_terminal nt = (non_terminal)non_terms.get(start_name.str_val); |
| if (nt == null) |
| { |
| lexer.emit_error( "Start non terminal \"" + start_name.str_val + |
| "\" has not been declared"); |
| } |
| else |
| { |
| /* remember the non-terminal for later */ |
| start_nt = nt; |
| |
| /* build a special start production */ |
| new_rhs(); |
| add_rhs_part(new symbol_part(start_nt)); |
| add_rhs_part(new symbol_part(terminal.EOF)); |
| emit.start_production = |
| new production(non_terminal.START_nt, rhs_parts, rhs_pos); |
| new_rhs(); |
| } |
| :} |
| SEMI |
| | |
| empty |
| ; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| production_list ::= production_list production | production; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| production ::= |
| nt_id:lhs_id |
| {: |
| /* lookup the lhs nt */ |
| lhs_nt = (non_terminal)non_terms.get(lhs_id.str_val); |
| |
| /* if it wasn't declared, emit a message */ |
| if (lhs_nt == null) |
| { |
| if (lexer.error_count == 0) |
| lexer.emit_error("LHS non terminal \"" + lhs_id.str_val + |
| "\" has not been declared"); |
| } |
| |
| /* reset the rhs accumulation */ |
| new_rhs(); |
| :} |
| COLON_COLON_EQUALS |
| rhs_list |
| SEMI |
| | |
| error |
| {: lexer.emit_error("Syntax Error"); :} |
| SEMI |
| ; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| rhs_list ::= rhs_list BAR rhs | rhs; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| rhs ::= |
| prod_part_list |
| {: |
| if (lhs_nt != null) |
| { |
| /* build the production */ |
| production p = new production(lhs_nt, rhs_parts, rhs_pos); |
| |
| /* if we have no start non-terminal declared and this is |
| the first production, make its lhs nt the start_nt |
| and build a special start production for it. */ |
| if (start_nt == null) |
| { |
| start_nt = lhs_nt; |
| |
| /* build a special start production */ |
| new_rhs(); |
| add_rhs_part(new symbol_part(start_nt)); |
| add_rhs_part(new symbol_part(terminal.EOF)); |
| emit.start_production = |
| new production(non_terminal.START_nt, rhs_parts, rhs_pos); |
| new_rhs(); |
| } |
| } |
| |
| /* reset the rhs accumulation in any case */ |
| new_rhs(); |
| :} |
| ; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| prod_part_list ::= prod_part_list prod_part | empty; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| prod_part ::= |
| symbol_id:symid opt_label:labid |
| {: |
| /* try to look up the id */ |
| production_part symb = (production_part)symbols.get(symid.str_val); |
| |
| /* if that fails, symbol is undeclared */ |
| if (symb == null) |
| { |
| if (lexer.error_count == 0) |
| lexer.emit_error("Symbol \"" + symid.str_val + |
| "\" has not been declared"); |
| } |
| else |
| { |
| /* add a labeled production part */ |
| add_rhs_part(add_lab(symb, labid.str_val)); |
| } |
| :} |
| | |
| CODE_STRING:code_str |
| {: |
| /* add a new production part */ |
| add_rhs_part(new action_part(code_str.str_val)); |
| :} |
| ; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| opt_label ::= |
| COLON label_id:labid |
| {: RESULT.str_val = labid.str_val; :} |
| | |
| empty |
| {: RESULT.str_val = null; :} |
| ; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| multipart_id ::= |
| multipart_id DOT ID:another_id |
| {: append_multipart(another_id.str_val); :} |
| | |
| ID:an_id |
| {: append_multipart(an_id.str_val); :} |
| ; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| import_id ::= |
| multipart_id DOT STAR |
| {: append_multipart("*"); :} |
| | |
| multipart_id |
| ; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| type_id ::= multipart_id; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| new_term_id ::= |
| ID:term_id |
| {: |
| /* see if this terminal has been declared before */ |
| if (symbols.get(term_id.str_val) != null) |
| { |
| /* issue a message */ |
| lexer.emit_error("Symbol \"" + term_id.str_val + |
| "\" has already been declared"); |
| } |
| else |
| { |
| /* build a production_part and put it in the table */ |
| symbols.put(term_id.str_val, |
| new symbol_part(new terminal(term_id.str_val, multipart_name))); |
| } |
| :} |
| ; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| new_non_term_id ::= |
| ID:non_term_id |
| {: |
| /* see if this non terminal has been declared before */ |
| if (symbols.get(non_term_id.str_val) != null) |
| { |
| /* issue a message */ |
| lexer.emit_error( "Symbol \"" + non_term_id.str_val + |
| "\" has already been declared"); |
| } |
| else |
| { |
| /* build the non terminal object */ |
| non_terminal this_nt = |
| new non_terminal(non_term_id.str_val, multipart_name); |
| |
| /* put it in the non_terms table */ |
| non_terms.put(non_term_id.str_val, this_nt); |
| |
| /* build a production_part and put it in the symbols table */ |
| symbols.put(non_term_id.str_val, new symbol_part(this_nt)); |
| } |
| :} |
| ; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| nt_id ::= |
| ID:the_id |
| {: RESULT.str_val = the_id.str_val; :} |
| ; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| symbol_id ::= |
| ID:the_id |
| {: RESULT.str_val = the_id.str_val; :} |
| ; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| label_id ::= |
| ID:the_id |
| {: RESULT.str_val = the_id.str_val; :} |
| ; |
| |
| /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ |
| |
| empty ::= /* nothing */; |
| |
| /*----------------------------------------------------------------*/ |