| # begin[licence] |
| # |
| # [The "BSD licence"] |
| # Copyright (c) 2005-2009 Terence Parr |
| # All rights reserved. |
| |
| # Redistribution and use in source and binary forms, with or without |
| # modification, are permitted provided that the following conditions |
| # are met: |
| # 1. Redistributions of source code must retain the above copyright |
| # notice, this list of conditions and the following disclaimer. |
| # 2. Redistributions in binary form must reproduce the above copyright |
| # notice, this list of conditions and the following disclaimer in the |
| # documentation and/or other materials provided with the distribution. |
| # 3. The name of the author may not be used to endorse or promote products |
| # derived from this software without specific prior written permission. |
| |
| # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| # |
| # end[licence] |
| |
| import socket |
| from antlr3 import Parser, TokenStream, RecognitionException, Token |
| from antlr3.tree import CommonTreeAdaptor, TreeAdaptor, Tree |
| |
| class DebugParser(Parser): |
| def __init__(self, stream, state=None, dbg=None, *args, **kwargs): |
| # wrap token stream in DebugTokenStream (unless user already did so). |
| if not isinstance(stream, DebugTokenStream): |
| stream = DebugTokenStream(stream, dbg) |
| |
| super(DebugParser, self).__init__(stream, state, *args, **kwargs) |
| |
| # Who to notify when events in the parser occur. |
| self._dbg = None |
| |
| self.setDebugListener(dbg) |
| |
| |
| def setDebugListener(self, dbg): |
| """Provide a new debug event listener for this parser. Notify the |
| input stream too that it should send events to this listener. |
| """ |
| |
| if hasattr(self.input, 'dbg'): |
| self.input.dbg = dbg |
| |
| self._dbg = dbg |
| |
| def getDebugListener(self): |
| return self._dbg |
| |
| dbg = property(getDebugListener, setDebugListener) |
| |
| |
| def beginResync(self): |
| self._dbg.beginResync() |
| |
| |
| def endResync(self): |
| self._dbg.endResync() |
| |
| |
| def beginBacktrack(self, level): |
| self._dbg.beginBacktrack(level) |
| |
| |
| def endBacktrack(self, level, successful): |
| self._dbg.endBacktrack(level,successful) |
| |
| |
| def reportError(self, exc): |
| Parser.reportError(self, exc) |
| |
| if isinstance(exc, RecognitionException): |
| self._dbg.recognitionException(exc) |
| |
| |
| class DebugTokenStream(TokenStream): |
| def __init__(self, input, dbg=None): |
| self.input = input |
| self.initialStreamState = True |
| # Track the last mark() call result value for use in rewind(). |
| self.lastMarker = None |
| |
| self._dbg = None |
| self.setDebugListener(dbg) |
| |
| # force TokenStream to get at least first valid token |
| # so we know if there are any hidden tokens first in the stream |
| self.input.LT(1) |
| |
| |
| def getDebugListener(self): |
| return self._dbg |
| |
| def setDebugListener(self, dbg): |
| self._dbg = dbg |
| |
| dbg = property(getDebugListener, setDebugListener) |
| |
| |
| def consume(self): |
| if self.initialStreamState: |
| self.consumeInitialHiddenTokens() |
| |
| a = self.input.index() |
| t = self.input.LT(1) |
| self.input.consume() |
| b = self.input.index() |
| self._dbg.consumeToken(t) |
| |
| if b > a+1: |
| # then we consumed more than one token; must be off channel tokens |
| for idx in range(a+1, b): |
| self._dbg.consumeHiddenToken(self.input.get(idx)); |
| |
| |
| def consumeInitialHiddenTokens(self): |
| """consume all initial off-channel tokens""" |
| |
| firstOnChannelTokenIndex = self.input.index() |
| for idx in range(firstOnChannelTokenIndex): |
| self._dbg.consumeHiddenToken(self.input.get(idx)) |
| |
| self.initialStreamState = False |
| |
| |
| def LT(self, i): |
| if self.initialStreamState: |
| self.consumeInitialHiddenTokens() |
| |
| t = self.input.LT(i) |
| self._dbg.LT(i, t) |
| return t |
| |
| |
| def LA(self, i): |
| if self.initialStreamState: |
| self.consumeInitialHiddenTokens() |
| |
| t = self.input.LT(i) |
| self._dbg.LT(i, t) |
| return t.type |
| |
| |
| def get(self, i): |
| return self.input.get(i) |
| |
| |
| def index(self): |
| return self.input.index() |
| |
| |
| def mark(self): |
| self.lastMarker = self.input.mark() |
| self._dbg.mark(self.lastMarker) |
| return self.lastMarker |
| |
| |
| def rewind(self, marker=None): |
| self._dbg.rewind(marker) |
| self.input.rewind(marker) |
| |
| |
| def release(self, marker): |
| pass |
| |
| |
| def seek(self, index): |
| # TODO: implement seek in dbg interface |
| # self._dbg.seek(index); |
| self.input.seek(index) |
| |
| |
| def size(self): |
| return self.input.size() |
| |
| |
| def getTokenSource(self): |
| return self.input.getTokenSource() |
| |
| |
| def getSourceName(self): |
| return self.getTokenSource().getSourceName() |
| |
| |
| def toString(self, start=None, stop=None): |
| return self.input.toString(start, stop) |
| |
| |
| class DebugTreeAdaptor(TreeAdaptor): |
| """A TreeAdaptor proxy that fires debugging events to a DebugEventListener |
| delegate and uses the TreeAdaptor delegate to do the actual work. All |
| AST events are triggered by this adaptor; no code gen changes are needed |
| in generated rules. Debugging events are triggered *after* invoking |
| tree adaptor routines. |
| |
| Trees created with actions in rewrite actions like "-> ^(ADD {foo} {bar})" |
| cannot be tracked as they might not use the adaptor to create foo, bar. |
| The debug listener has to deal with tree node IDs for which it did |
| not see a createNode event. A single <unknown> node is sufficient even |
| if it represents a whole tree. |
| """ |
| |
| def __init__(self, dbg, adaptor): |
| self.dbg = dbg |
| self.adaptor = adaptor |
| |
| |
| def createWithPayload(self, payload): |
| if payload.getTokenIndex() < 0: |
| # could be token conjured up during error recovery |
| return self.createFromType(payload.getType(), payload.getText()) |
| |
| node = self.adaptor.createWithPayload(payload) |
| self.dbg.createNode(node, payload) |
| return node |
| |
| def createFromToken(self, tokenType, fromToken, text=None): |
| node = self.adaptor.createFromToken(tokenType, fromToken, text) |
| self.dbg.createNode(node) |
| return node |
| |
| def createFromType(self, tokenType, text): |
| node = self.adaptor.createFromType(tokenType, text) |
| self.dbg.createNode(node) |
| return node |
| |
| |
| def errorNode(self, input, start, stop, exc): |
| node = selfadaptor.errorNode(input, start, stop, exc) |
| if node is not None: |
| dbg.errorNode(node) |
| |
| return node |
| |
| |
| def dupTree(self, tree): |
| t = self.adaptor.dupTree(tree) |
| # walk the tree and emit create and add child events |
| # to simulate what dupTree has done. dupTree does not call this debug |
| # adapter so I must simulate. |
| self.simulateTreeConstruction(t) |
| return t |
| |
| |
| def simulateTreeConstruction(self, t): |
| """^(A B C): emit create A, create B, add child, ...""" |
| self.dbg.createNode(t) |
| for i in range(self.adaptor.getChildCount(t)): |
| child = self.adaptor.getChild(t, i) |
| self.simulateTreeConstruction(child) |
| self.dbg.addChild(t, child) |
| |
| |
| def dupNode(self, treeNode): |
| d = self.adaptor.dupNode(treeNode) |
| self.dbg.createNode(d) |
| return d |
| |
| |
| def nil(self): |
| node = self.adaptor.nil() |
| self.dbg.nilNode(node) |
| return node |
| |
| |
| def isNil(self, tree): |
| return self.adaptor.isNil(tree) |
| |
| |
| def addChild(self, t, child): |
| if isinstance(child, Token): |
| n = self.createWithPayload(child) |
| self.addChild(t, n) |
| |
| else: |
| if t is None or child is None: |
| return |
| |
| self.adaptor.addChild(t, child) |
| self.dbg.addChild(t, child) |
| |
| def becomeRoot(self, newRoot, oldRoot): |
| if isinstance(newRoot, Token): |
| n = self.createWithPayload(newRoot) |
| self.adaptor.becomeRoot(n, oldRoot) |
| else: |
| n = self.adaptor.becomeRoot(newRoot, oldRoot) |
| |
| self.dbg.becomeRoot(newRoot, oldRoot) |
| return n |
| |
| |
| def rulePostProcessing(self, root): |
| return self.adaptor.rulePostProcessing(root) |
| |
| |
| def getType(self, t): |
| return self.adaptor.getType(t) |
| |
| |
| def setType(self, t, type): |
| self.adaptor.setType(t, type) |
| |
| |
| def getText(self, t): |
| return self.adaptor.getText(t) |
| |
| |
| def setText(self, t, text): |
| self.adaptor.setText(t, text) |
| |
| |
| def getToken(self, t): |
| return self.adaptor.getToken(t) |
| |
| |
| def setTokenBoundaries(self, t, startToken, stopToken): |
| self.adaptor.setTokenBoundaries(t, startToken, stopToken) |
| if t is not None and startToken is not None and stopToken is not None: |
| self.dbg.setTokenBoundaries( |
| t, startToken.getTokenIndex(), |
| stopToken.getTokenIndex()) |
| |
| |
| def getTokenStartIndex(self, t): |
| return self.adaptor.getTokenStartIndex(t) |
| |
| |
| def getTokenStopIndex(self, t): |
| return self.adaptor.getTokenStopIndex(t) |
| |
| |
| def getChild(self, t, i): |
| return self.adaptor.getChild(t, i) |
| |
| |
| def setChild(self, t, i, child): |
| self.adaptor.setChild(t, i, child) |
| |
| |
| def deleteChild(self, t, i): |
| return self.adaptor.deleteChild(t, i) |
| |
| |
| def getChildCount(self, t): |
| return self.adaptor.getChildCount(t) |
| |
| |
| def getUniqueID(self, node): |
| return self.adaptor.getUniqueID(node) |
| |
| |
| def getParent(self, t): |
| return self.adaptor.getParent(t) |
| |
| |
| def getChildIndex(self, t): |
| return self.adaptor.getChildIndex(t) |
| |
| |
| def setParent(self, t, parent): |
| self.adaptor.setParent(t, parent) |
| |
| |
| def setChildIndex(self, t, index): |
| self.adaptor.setChildIndex(t, index) |
| |
| |
| def replaceChildren(self, parent, startChildIndex, stopChildIndex, t): |
| self.adaptor.replaceChildren(parent, startChildIndex, stopChildIndex, t) |
| |
| |
| ## support |
| |
| def getDebugListener(self): |
| return dbg |
| |
| def setDebugListener(self, dbg): |
| self.dbg = dbg |
| |
| |
| def getTreeAdaptor(self): |
| return self.adaptor |
| |
| |
| |
| class DebugEventListener(object): |
| """All debugging events that a recognizer can trigger. |
| |
| I did not create a separate AST debugging interface as it would create |
| lots of extra classes and DebugParser has a dbg var defined, which makes |
| it hard to change to ASTDebugEventListener. I looked hard at this issue |
| and it is easier to understand as one monolithic event interface for all |
| possible events. Hopefully, adding ST debugging stuff won't be bad. Leave |
| for future. 4/26/2006. |
| """ |
| |
| # Moved to version 2 for v3.1: added grammar name to enter/exit Rule |
| PROTOCOL_VERSION = "2" |
| |
| def enterRule(self, grammarFileName, ruleName): |
| """The parser has just entered a rule. No decision has been made about |
| which alt is predicted. This is fired AFTER init actions have been |
| executed. Attributes are defined and available etc... |
| The grammarFileName allows composite grammars to jump around among |
| multiple grammar files. |
| """ |
| |
| pass |
| |
| |
| def enterAlt(self, alt): |
| """Because rules can have lots of alternatives, it is very useful to |
| know which alt you are entering. This is 1..n for n alts. |
| """ |
| pass |
| |
| |
| def exitRule(self, grammarFileName, ruleName): |
| """This is the last thing executed before leaving a rule. It is |
| executed even if an exception is thrown. This is triggered after |
| error reporting and recovery have occurred (unless the exception is |
| not caught in this rule). This implies an "exitAlt" event. |
| The grammarFileName allows composite grammars to jump around among |
| multiple grammar files. |
| """ |
| pass |
| |
| |
| def enterSubRule(self, decisionNumber): |
| """Track entry into any (...) subrule other EBNF construct""" |
| pass |
| |
| |
| def exitSubRule(self, decisionNumber): |
| pass |
| |
| |
| def enterDecision(self, decisionNumber, couldBacktrack): |
| """Every decision, fixed k or arbitrary, has an enter/exit event |
| so that a GUI can easily track what LT/consume events are |
| associated with prediction. You will see a single enter/exit |
| subrule but multiple enter/exit decision events, one for each |
| loop iteration. |
| """ |
| pass |
| |
| |
| def exitDecision(self, decisionNumber): |
| pass |
| |
| |
| def consumeToken(self, t): |
| """An input token was consumed; matched by any kind of element. |
| Trigger after the token was matched by things like match(), matchAny(). |
| """ |
| pass |
| |
| |
| def consumeHiddenToken(self, t): |
| """An off-channel input token was consumed. |
| Trigger after the token was matched by things like match(), matchAny(). |
| (unless of course the hidden token is first stuff in the input stream). |
| """ |
| pass |
| |
| |
| def LT(self, i, t): |
| """Somebody (anybody) looked ahead. Note that this actually gets |
| triggered by both LA and LT calls. The debugger will want to know |
| which Token object was examined. Like consumeToken, this indicates |
| what token was seen at that depth. A remote debugger cannot look |
| ahead into a file it doesn't have so LT events must pass the token |
| even if the info is redundant. |
| """ |
| pass |
| |
| |
| def mark(self, marker): |
| """The parser is going to look arbitrarily ahead; mark this location, |
| the token stream's marker is sent in case you need it. |
| """ |
| pass |
| |
| |
| def rewind(self, marker=None): |
| """After an arbitrairly long lookahead as with a cyclic DFA (or with |
| any backtrack), this informs the debugger that stream should be |
| rewound to the position associated with marker. |
| |
| """ |
| pass |
| |
| |
| def beginBacktrack(self, level): |
| pass |
| |
| |
| def endBacktrack(self, level, successful): |
| pass |
| |
| |
| def location(self, line, pos): |
| """To watch a parser move through the grammar, the parser needs to |
| inform the debugger what line/charPos it is passing in the grammar. |
| For now, this does not know how to switch from one grammar to the |
| other and back for island grammars etc... |
| |
| This should also allow breakpoints because the debugger can stop |
| the parser whenever it hits this line/pos. |
| """ |
| pass |
| |
| |
| def recognitionException(self, e): |
| """A recognition exception occurred such as NoViableAltException. I made |
| this a generic event so that I can alter the exception hierachy later |
| without having to alter all the debug objects. |
| |
| Upon error, the stack of enter rule/subrule must be properly unwound. |
| If no viable alt occurs it is within an enter/exit decision, which |
| also must be rewound. Even the rewind for each mark must be unwount. |
| In the Java target this is pretty easy using try/finally, if a bit |
| ugly in the generated code. The rewind is generated in DFA.predict() |
| actually so no code needs to be generated for that. For languages |
| w/o this "finally" feature (C++?), the target implementor will have |
| to build an event stack or something. |
| |
| Across a socket for remote debugging, only the RecognitionException |
| data fields are transmitted. The token object or whatever that |
| caused the problem was the last object referenced by LT. The |
| immediately preceding LT event should hold the unexpected Token or |
| char. |
| |
| Here is a sample event trace for grammar: |
| |
| b : C ({;}A|B) // {;} is there to prevent A|B becoming a set |
| | D |
| ; |
| |
| The sequence for this rule (with no viable alt in the subrule) for |
| input 'c c' (there are 3 tokens) is: |
| |
| commence |
| LT(1) |
| enterRule b |
| location 7 1 |
| enter decision 3 |
| LT(1) |
| exit decision 3 |
| enterAlt1 |
| location 7 5 |
| LT(1) |
| consumeToken [c/<4>,1:0] |
| location 7 7 |
| enterSubRule 2 |
| enter decision 2 |
| LT(1) |
| LT(1) |
| recognitionException NoViableAltException 2 1 2 |
| exit decision 2 |
| exitSubRule 2 |
| beginResync |
| LT(1) |
| consumeToken [c/<4>,1:1] |
| LT(1) |
| endResync |
| LT(-1) |
| exitRule b |
| terminate |
| """ |
| pass |
| |
| |
| def beginResync(self): |
| """Indicates the recognizer is about to consume tokens to resynchronize |
| the parser. Any consume events from here until the recovered event |
| are not part of the parse--they are dead tokens. |
| """ |
| pass |
| |
| |
| def endResync(self): |
| """Indicates that the recognizer has finished consuming tokens in order |
| to resychronize. There may be multiple beginResync/endResync pairs |
| before the recognizer comes out of errorRecovery mode (in which |
| multiple errors are suppressed). This will be useful |
| in a gui where you want to probably grey out tokens that are consumed |
| but not matched to anything in grammar. Anything between |
| a beginResync/endResync pair was tossed out by the parser. |
| """ |
| pass |
| |
| |
| def semanticPredicate(self, result, predicate): |
| """A semantic predicate was evaluate with this result and action text""" |
| pass |
| |
| |
| def commence(self): |
| """Announce that parsing has begun. Not technically useful except for |
| sending events over a socket. A GUI for example will launch a thread |
| to connect and communicate with a remote parser. The thread will want |
| to notify the GUI when a connection is made. ANTLR parsers |
| trigger this upon entry to the first rule (the ruleLevel is used to |
| figure this out). |
| """ |
| pass |
| |
| |
| def terminate(self): |
| """Parsing is over; successfully or not. Mostly useful for telling |
| remote debugging listeners that it's time to quit. When the rule |
| invocation level goes to zero at the end of a rule, we are done |
| parsing. |
| """ |
| pass |
| |
| |
| ## T r e e P a r s i n g |
| |
| def consumeNode(self, t): |
| """Input for a tree parser is an AST, but we know nothing for sure |
| about a node except its type and text (obtained from the adaptor). |
| This is the analog of the consumeToken method. Again, the ID is |
| the hashCode usually of the node so it only works if hashCode is |
| not implemented. If the type is UP or DOWN, then |
| the ID is not really meaningful as it's fixed--there is |
| just one UP node and one DOWN navigation node. |
| """ |
| pass |
| |
| |
| def LT(self, i, t): |
| """The tree parser lookedahead. If the type is UP or DOWN, |
| then the ID is not really meaningful as it's fixed--there is |
| just one UP node and one DOWN navigation node. |
| """ |
| pass |
| |
| |
| |
| ## A S T E v e n t s |
| |
| def nilNode(self, t): |
| """A nil was created (even nil nodes have a unique ID... |
| they are not "null" per se). As of 4/28/2006, this |
| seems to be uniquely triggered when starting a new subtree |
| such as when entering a subrule in automatic mode and when |
| building a tree in rewrite mode. |
| |
| If you are receiving this event over a socket via |
| RemoteDebugEventSocketListener then only t.ID is set. |
| """ |
| pass |
| |
| |
| def errorNode(self, t): |
| """Upon syntax error, recognizers bracket the error with an error node |
| if they are building ASTs. |
| """ |
| pass |
| |
| |
| def createNode(self, node, token=None): |
| """Announce a new node built from token elements such as type etc... |
| |
| If you are receiving this event over a socket via |
| RemoteDebugEventSocketListener then only t.ID, type, text are |
| set. |
| """ |
| pass |
| |
| |
| def becomeRoot(self, newRoot, oldRoot): |
| """Make a node the new root of an existing root. |
| |
| Note: the newRootID parameter is possibly different |
| than the TreeAdaptor.becomeRoot() newRoot parameter. |
| In our case, it will always be the result of calling |
| TreeAdaptor.becomeRoot() and not root_n or whatever. |
| |
| The listener should assume that this event occurs |
| only when the current subrule (or rule) subtree is |
| being reset to newRootID. |
| |
| If you are receiving this event over a socket via |
| RemoteDebugEventSocketListener then only IDs are set. |
| |
| @see antlr3.tree.TreeAdaptor.becomeRoot() |
| """ |
| pass |
| |
| |
| def addChild(self, root, child): |
| """Make childID a child of rootID. |
| |
| If you are receiving this event over a socket via |
| RemoteDebugEventSocketListener then only IDs are set. |
| |
| @see antlr3.tree.TreeAdaptor.addChild() |
| """ |
| pass |
| |
| |
| def setTokenBoundaries(self, t, tokenStartIndex, tokenStopIndex): |
| """Set the token start/stop token index for a subtree root or node. |
| |
| If you are receiving this event over a socket via |
| RemoteDebugEventSocketListener then only t.ID is set. |
| """ |
| pass |
| |
| |
| class BlankDebugEventListener(DebugEventListener): |
| """A blank listener that does nothing; useful for real classes so |
| they don't have to have lots of blank methods and are less |
| sensitive to updates to debug interface. |
| |
| Note: this class is identical to DebugEventListener and exists purely |
| for compatibility with Java. |
| """ |
| pass |
| |
| |
| class TraceDebugEventListener(DebugEventListener): |
| """A listener that simply records text representations of the events. |
| |
| Useful for debugging the debugging facility ;) |
| |
| Subclasses can override the record() method (which defaults to printing to |
| stdout) to record the events in a different way. |
| """ |
| |
| def __init__(self, adaptor=None): |
| super(TraceDebugEventListener, self).__init__() |
| |
| if adaptor is None: |
| adaptor = CommonTreeAdaptor() |
| self.adaptor = adaptor |
| |
| def record(self, event): |
| sys.stdout.write(event + '\n') |
| |
| def enterRule(self, grammarFileName, ruleName): |
| self.record("enterRule "+ruleName) |
| |
| def exitRule(self, grammarFileName, ruleName): |
| self.record("exitRule "+ruleName) |
| |
| def enterSubRule(self, decisionNumber): |
| self.record("enterSubRule") |
| |
| def exitSubRule(self, decisionNumber): |
| self.record("exitSubRule") |
| |
| def location(self, line, pos): |
| self.record("location %s:%s" % (line, pos)) |
| |
| ## Tree parsing stuff |
| |
| def consumeNode(self, t): |
| self.record("consumeNode %s %s %s" % ( |
| self.adaptor.getUniqueID(t), |
| self.adaptor.getText(t), |
| self.adaptor.getType(t))) |
| |
| def LT(self, i, t): |
| self.record("LT %s %s %s %s" % ( |
| i, |
| self.adaptor.getUniqueID(t), |
| self.adaptor.getText(t), |
| self.adaptor.getType(t))) |
| |
| |
| ## AST stuff |
| def nilNode(self, t): |
| self.record("nilNode %s" % self.adaptor.getUniqueID(t)) |
| |
| def createNode(self, t, token=None): |
| if token is None: |
| self.record("create %s: %s, %s" % ( |
| self.adaptor.getUniqueID(t), |
| self.adaptor.getText(t), |
| self.adaptor.getType(t))) |
| |
| else: |
| self.record("create %s: %s" % ( |
| self.adaptor.getUniqueID(t), |
| token.getTokenIndex())) |
| |
| def becomeRoot(self, newRoot, oldRoot): |
| self.record("becomeRoot %s, %s" % ( |
| self.adaptor.getUniqueID(newRoot), |
| self.adaptor.getUniqueID(oldRoot))) |
| |
| def addChild(self, root, child): |
| self.record("addChild %s, %s" % ( |
| self.adaptor.getUniqueID(root), |
| self.adaptor.getUniqueID(child))) |
| |
| def setTokenBoundaries(self, t, tokenStartIndex, tokenStopIndex): |
| self.record("setTokenBoundaries %s, %s, %s" % ( |
| self.adaptor.getUniqueID(t), |
| tokenStartIndex, tokenStopIndex)) |
| |
| |
| class RecordDebugEventListener(TraceDebugEventListener): |
| """A listener that records events as strings in an array.""" |
| |
| def __init__(self, adaptor=None): |
| super(RecordDebugEventListener, self).__init__(adaptor) |
| |
| self.events = [] |
| |
| def record(self, event): |
| self.events.append(event) |
| |
| |
| class DebugEventSocketProxy(DebugEventListener): |
| """A proxy debug event listener that forwards events over a socket to |
| a debugger (or any other listener) using a simple text-based protocol; |
| one event per line. ANTLRWorks listens on server socket with a |
| RemoteDebugEventSocketListener instance. These two objects must therefore |
| be kept in sync. New events must be handled on both sides of socket. |
| """ |
| |
| DEFAULT_DEBUGGER_PORT = 49100 |
| |
| def __init__(self, recognizer, adaptor=None, port=None, |
| debug=None): |
| super(DebugEventSocketProxy, self).__init__() |
| |
| self.grammarFileName = recognizer.getGrammarFileName() |
| |
| # Almost certainly the recognizer will have adaptor set, but |
| # we don't know how to cast it (Parser or TreeParser) to get |
| # the adaptor field. Must be set with a constructor. :( |
| self.adaptor = adaptor |
| |
| self.port = port or self.DEFAULT_DEBUGGER_PORT |
| |
| self.debug = debug |
| |
| self.socket = None |
| self.connection = None |
| self.input = None |
| self.output = None |
| |
| |
| def log(self, msg): |
| if self.debug is not None: |
| self.debug.write(msg + '\n') |
| |
| |
| def handshake(self): |
| if self.socket is None: |
| # create listening socket |
| self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) |
| self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) |
| self.socket.bind(('', self.port)) |
| self.socket.listen(1) |
| self.log("Waiting for incoming connection on port %d" % self.port) |
| |
| # wait for an incoming connection |
| self.connection, addr = self.socket.accept() |
| self.log("Accepted connection from %s:%d" % addr) |
| |
| self.connection.setblocking(1) |
| self.connection.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, 1) |
| |
| # FIXME(pink): wrap into utf8 encoding stream |
| self.output = self.connection.makefile('w', 0) |
| self.input = self.connection.makefile('r', 0) |
| |
| self.write("ANTLR %s" % self.PROTOCOL_VERSION) |
| self.write("grammar \"%s" % self.grammarFileName) |
| self.ack() |
| |
| |
| def write(self, msg): |
| self.log("> %s" % msg) |
| self.output.write("%s\n" % msg) |
| self.output.flush() |
| |
| |
| def ack(self): |
| t = self.input.readline() |
| self.log("< %s" % t.rstrip()) |
| |
| |
| def transmit(self, event): |
| self.write(event); |
| self.ack(); |
| |
| |
| def commence(self): |
| # don't bother sending event; listener will trigger upon connection |
| pass |
| |
| |
| def terminate(self): |
| self.transmit("terminate") |
| self.output.close() |
| self.input.close() |
| self.connection.close() |
| self.socket.close() |
| |
| |
| def enterRule(self, grammarFileName, ruleName): |
| self.transmit("enterRule\t%s\t%s" % (grammarFileName, ruleName)) |
| |
| |
| def enterAlt(self, alt): |
| self.transmit("enterAlt\t%d" % alt) |
| |
| |
| def exitRule(self, grammarFileName, ruleName): |
| self.transmit("exitRule\t%s\t%s" % (grammarFileName, ruleName)) |
| |
| |
| def enterSubRule(self, decisionNumber): |
| self.transmit("enterSubRule\t%d" % decisionNumber) |
| |
| |
| def exitSubRule(self, decisionNumber): |
| self.transmit("exitSubRule\t%d" % decisionNumber) |
| |
| |
| def enterDecision(self, decisionNumber, couldBacktrack): |
| self.transmit( |
| "enterDecision\t%d\t%d" % (decisionNumber, couldBacktrack)) |
| |
| |
| def exitDecision(self, decisionNumber): |
| self.transmit("exitDecision\t%d" % decisionNumber) |
| |
| |
| def consumeToken(self, t): |
| self.transmit("consumeToken\t%s" % self.serializeToken(t)) |
| |
| |
| def consumeHiddenToken(self, t): |
| self.transmit("consumeHiddenToken\t%s" % self.serializeToken(t)) |
| |
| |
| def LT(self, i, o): |
| if isinstance(o, Tree): |
| return self.LT_tree(i, o) |
| return self.LT_token(i, o) |
| |
| |
| def LT_token(self, i, t): |
| if t is not None: |
| self.transmit("LT\t%d\t%s" % (i, self.serializeToken(t))) |
| |
| |
| def mark(self, i): |
| self.transmit("mark\t%d" % i) |
| |
| |
| def rewind(self, i=None): |
| if i is not None: |
| self.transmit("rewind\t%d" % i) |
| else: |
| self.transmit("rewind") |
| |
| |
| def beginBacktrack(self, level): |
| self.transmit("beginBacktrack\t%d" % level) |
| |
| |
| def endBacktrack(self, level, successful): |
| self.transmit("endBacktrack\t%d\t%s" % ( |
| level, ['0', '1'][bool(successful)])) |
| |
| |
| def location(self, line, pos): |
| self.transmit("location\t%d\t%d" % (line, pos)) |
| |
| |
| def recognitionException(self, exc): |
| self.transmit('\t'.join([ |
| "exception", |
| exc.__class__.__name__, |
| str(int(exc.index)), |
| str(int(exc.line)), |
| str(int(exc.charPositionInLine))])) |
| |
| |
| def beginResync(self): |
| self.transmit("beginResync") |
| |
| |
| def endResync(self): |
| self.transmit("endResync") |
| |
| |
| def semanticPredicate(self, result, predicate): |
| self.transmit('\t'.join([ |
| "semanticPredicate", |
| str(int(result)), |
| self.escapeNewlines(predicate)])) |
| |
| ## A S T P a r s i n g E v e n t s |
| |
| def consumeNode(self, t): |
| FIXME(31) |
| # StringBuffer buf = new StringBuffer(50); |
| # buf.append("consumeNode"); |
| # serializeNode(buf, t); |
| # transmit(buf.toString()); |
| |
| |
| def LT_tree(self, i, t): |
| FIXME(34) |
| # int ID = adaptor.getUniqueID(t); |
| # String text = adaptor.getText(t); |
| # int type = adaptor.getType(t); |
| # StringBuffer buf = new StringBuffer(50); |
| # buf.append("LN\t"); // lookahead node; distinguish from LT in protocol |
| # buf.append(i); |
| # serializeNode(buf, t); |
| # transmit(buf.toString()); |
| |
| |
| def serializeNode(self, buf, t): |
| FIXME(33) |
| # int ID = adaptor.getUniqueID(t); |
| # String text = adaptor.getText(t); |
| # int type = adaptor.getType(t); |
| # buf.append("\t"); |
| # buf.append(ID); |
| # buf.append("\t"); |
| # buf.append(type); |
| # Token token = adaptor.getToken(t); |
| # int line = -1; |
| # int pos = -1; |
| # if ( token!=null ) { |
| # line = token.getLine(); |
| # pos = token.getCharPositionInLine(); |
| # } |
| # buf.append("\t"); |
| # buf.append(line); |
| # buf.append("\t"); |
| # buf.append(pos); |
| # int tokenIndex = adaptor.getTokenStartIndex(t); |
| # buf.append("\t"); |
| # buf.append(tokenIndex); |
| # serializeText(buf, text); |
| |
| |
| ## A S T E v e n t s |
| |
| def nilNode(self, t): |
| self.transmit("nilNode\t%d" % self.adaptor.getUniqueID(t)) |
| |
| |
| def errorNode(self, t): |
| self.transmit("errorNode\t%d\t%d\t\"%s" % ( |
| self.adaptor.getUniqueID(t), |
| Token.INVALID_TOKEN_TYPE, |
| self.escapeNewlines(t.toString()))) |
| |
| |
| |
| def createNode(self, node, token=None): |
| if token is not None: |
| self.transmit("createNode\t%d\t%d" % ( |
| self.adaptor.getUniqueID(node), |
| token.getTokenIndex())) |
| |
| else: |
| self.transmit("createNodeFromTokenElements\t%d\t%d\t\"%s" % ( |
| self.adaptor.getUniqueID(node), |
| self.adaptor.getType(node), |
| self.adaptor.getText(node))) |
| |
| |
| def becomeRoot(self, newRoot, oldRoot): |
| self.transmit("becomeRoot\t%d\t%d" % ( |
| self.adaptor.getUniqueID(newRoot), |
| self.adaptor.getUniqueID(oldRoot))) |
| |
| |
| def addChild(self, root, child): |
| self.transmit("addChild\t%d\t%d" % ( |
| self.adaptor.getUniqueID(root), |
| self.adaptor.getUniqueID(child))) |
| |
| |
| def setTokenBoundaries(self, t, tokenStartIndex, tokenStopIndex): |
| self.transmit("setTokenBoundaries\t%d\t%d\t%d" % ( |
| self.adaptor.getUniqueID(t), |
| tokenStartIndex, tokenStopIndex)) |
| |
| |
| |
| ## support |
| |
| def setTreeAdaptor(self, adaptor): |
| self.adaptor = adaptor |
| |
| def getTreeAdaptor(self): |
| return self.adaptor |
| |
| |
| def serializeToken(self, t): |
| buf = [str(int(t.getTokenIndex())), |
| str(int(t.getType())), |
| str(int(t.getChannel())), |
| str(int(t.getLine() or 0)), |
| str(int(t.getCharPositionInLine() or 0)), |
| '\"' + self.escapeNewlines(t.getText())] |
| return '\t'.join(buf) |
| |
| |
| def escapeNewlines(self, txt): |
| if txt is None: |
| return '' |
| |
| txt = txt.replace("%","%25") # escape all escape char ;) |
| txt = txt.replace("\n","%0A") # escape \n |
| txt = txt.replace("\r","%0D") # escape \r |
| return txt |