| # a glorified C pre-processor parser |
| |
| import sys, re, string |
| from utils import * |
| from defaults import * |
| |
| debugTokens = False |
| debugDirectiveTokenizer = False |
| debugLineParsing = False |
| debugCppExpr = False |
| debugOptimIf01 = False |
| |
| ##################################################################################### |
| ##################################################################################### |
| ##### ##### |
| ##### C P P T O K E N S ##### |
| ##### ##### |
| ##################################################################################### |
| ##################################################################################### |
| |
| # the list of supported C-preprocessor tokens |
| # plus a couple of C tokens as well |
| tokEOF = "\0" |
| tokLN = "\n" |
| tokSTRINGIFY = "#" |
| tokCONCAT = "##" |
| tokLOGICAND = "&&" |
| tokLOGICOR = "||" |
| tokSHL = "<<" |
| tokSHR = ">>" |
| tokEQUAL = "==" |
| tokNEQUAL = "!=" |
| tokLT = "<" |
| tokLTE = "<=" |
| tokGT = ">" |
| tokGTE = ">=" |
| tokELLIPSIS = "..." |
| tokSPACE = " " |
| tokDEFINED = "defined" |
| tokLPAREN = "(" |
| tokRPAREN = ")" |
| tokNOT = "!" |
| tokPLUS = "+" |
| tokMINUS = "-" |
| tokMULTIPLY = "*" |
| tokDIVIDE = "/" |
| tokMODULUS = "%" |
| tokBINAND = "&" |
| tokBINOR = "|" |
| tokBINXOR = "^" |
| tokCOMMA = "," |
| tokLBRACE = "{" |
| tokRBRACE = "}" |
| tokARROW = "->" |
| tokINCREMENT = "++" |
| tokDECREMENT = "--" |
| tokNUMBER = "<number>" |
| tokIDENT = "<ident>" |
| tokSTRING = "<string>" |
| |
| class Token: |
| """a simple class to hold information about a given token. |
| each token has a position in the source code, as well as |
| an 'id' and a 'value'. the id is a string that identifies |
| the token's class, while the value is the string of the |
| original token itself. |
| |
| for example, the tokenizer concatenates a series of spaces |
| and tabs as a single tokSPACE id, whose value if the original |
| spaces+tabs sequence.""" |
| |
| def __init__(self): |
| self.id = None |
| self.value = None |
| self.lineno = 0 |
| self.colno = 0 |
| |
| def set(self,id,val=None): |
| self.id = id |
| if val: |
| self.value = val |
| else: |
| self.value = id |
| return None |
| |
| def copyFrom(self,src): |
| self.id = src.id |
| self.value = src.value |
| self.lineno = src.lineno |
| self.colno = src.colno |
| |
| def __repr__(self): |
| if self.id == tokIDENT: |
| return "(ident %s)" % self.value |
| if self.id == tokNUMBER: |
| return "(number %s)" % self.value |
| if self.id == tokSTRING: |
| return "(string '%s')" % self.value |
| if self.id == tokLN: |
| return "<LN>" |
| if self.id == tokEOF: |
| return "<EOF>" |
| if self.id == tokSPACE and self.value == "\\": |
| # this corresponds to a trailing \ that was transformed into a tokSPACE |
| return "<\\>" |
| |
| return self.id |
| |
| def __str__(self): |
| if self.id == tokIDENT: |
| return self.value |
| if self.id == tokNUMBER: |
| return self.value |
| if self.id == tokSTRING: |
| return self.value |
| if self.id == tokEOF: |
| return "<EOF>" |
| if self.id == tokSPACE: |
| if self.value == "\\": # trailing \ |
| return "\\\n" |
| else: |
| return self.value |
| |
| return self.id |
| |
| class BadExpectedToken(Exception): |
| def __init__(self,msg): |
| print msg |
| |
| ##################################################################################### |
| ##################################################################################### |
| ##### ##### |
| ##### C P P T O K E N C U R S O R ##### |
| ##### ##### |
| ##################################################################################### |
| ##################################################################################### |
| |
| class TokenCursor: |
| """a small class to iterate over a list of Token objects""" |
| def __init__(self,tokens): |
| self.tokens = tokens |
| self.n = 0 |
| self.count = len(tokens) |
| |
| def set(self,n): |
| """set the current position""" |
| if n < 0: |
| n = 0 |
| if n > self.count: |
| n = self.count |
| self.n = n |
| |
| def peekId(self): |
| """retrieve the id of the current token""" |
| if (self.n >= self.count): |
| return None |
| return self.tokens[self.n].id |
| |
| def peek(self): |
| """retrieve the current token. does not change position""" |
| if (self.n >= self.count): |
| return None |
| return self.tokens[self.n] |
| |
| def skip(self): |
| """increase current token position""" |
| if (self.n < self.count): |
| self.n += 1 |
| |
| def skipSpaces(self): |
| """skip over all space tokens, this includes tokSPACE and tokLN""" |
| while 1: |
| tok = self.peekId() |
| if tok != tokSPACE and tok != tokLN: |
| break |
| self.skip() |
| |
| def skipIfId(self,id): |
| """skip an optional token""" |
| if self.peekId() == id: |
| self.skip() |
| |
| def expectId(self,id): |
| """raise an exception if the current token hasn't a given id. |
| otherwise skip over it""" |
| tok = self.peek() |
| if tok.id != id: |
| raise BadExpectedToken, "%d:%d: '%s' expected, received '%s'" % (tok.lineno, tok.colno, id, tok.id) |
| self.skip() |
| |
| def remain(self): |
| """return the list of remaining tokens""" |
| return self.tokens[self.n:] |
| |
| |
| ##################################################################################### |
| ##################################################################################### |
| ##### ##### |
| ##### C P P T O K E N I Z E R ##### |
| ##### ##### |
| ##################################################################################### |
| ##################################################################################### |
| |
| # list of long symbols, i.e. those that take more than one characters |
| cppLongSymbols = [ tokCONCAT, tokLOGICAND, tokLOGICOR, tokSHL, tokSHR, tokELLIPSIS, tokEQUAL,\ |
| tokNEQUAL, tokLTE, tokGTE, tokARROW, tokINCREMENT, tokDECREMENT ] |
| |
| class CppTokenizer: |
| """an abstract class used to convert some input text into a list |
| of tokens. real implementations follow and differ in the format |
| of the input text only""" |
| |
| def __init__(self): |
| """initialize a new CppTokenizer object""" |
| self.eof = False # end of file reached ? |
| self.text = None # content of current line, with final \n stripped |
| self.line = 0 # number of current line |
| self.pos = 0 # current character position in current line |
| self.len = 0 # length of current line text |
| self.held = Token() |
| |
| def setLineText(self,line): |
| """set the content of the (next) current line. should be called |
| by fillLineText() in derived classes""" |
| self.text = line |
| self.len = len(line) |
| self.pos = 0 |
| |
| def fillLineText(self): |
| """refresh the content of 'line' with a new line of input""" |
| # to be overriden |
| self.eof = True |
| |
| def markPos(self,tok): |
| """mark the position of the current token in the source file""" |
| if self.eof or self.pos > self.len: |
| tok.lineno = self.line + 1 |
| tok.colno = 0 |
| else: |
| tok.lineno = self.line |
| tok.colno = self.pos |
| |
| def peekChar(self): |
| """return the current token under the cursor without moving it""" |
| if self.eof: |
| return tokEOF |
| |
| if self.pos > self.len: |
| self.pos = 0 |
| self.line += 1 |
| self.fillLineText() |
| if self.eof: |
| return tokEOF |
| |
| if self.pos == self.len: |
| return tokLN |
| else: |
| return self.text[self.pos] |
| |
| def peekNChar(self,n): |
| """try to peek the next n chars on the same line""" |
| if self.pos + n > self.len: |
| return None |
| return self.text[self.pos:self.pos+n] |
| |
| def skipChar(self): |
| """increment the token cursor position""" |
| if not self.eof: |
| self.pos += 1 |
| |
| def skipNChars(self,n): |
| if self.pos + n <= self.len: |
| self.pos += n |
| else: |
| while n > 0: |
| self.skipChar() |
| n -= 1 |
| |
| def nextChar(self): |
| """retrieve the token at the current cursor position, then skip it""" |
| result = self.peekChar() |
| self.skipChar() |
| return result |
| |
| def getEscape(self): |
| # try to get all characters after a backslash (\) |
| result = self.nextChar() |
| if result == "0": |
| # octal number ? |
| num = self.peekNChar(3) |
| if num != None: |
| isOctal = True |
| for d in num: |
| if not d in "01234567": |
| isOctal = False |
| break |
| if isOctal: |
| result += num |
| self.skipNChars(3) |
| elif result == "x" or result == "X": |
| # hex number ? |
| num = self.peekNChar(2) |
| if num != None: |
| isHex = True |
| for d in num: |
| if not d in "012345678abcdefABCDEF": |
| isHex = False |
| break |
| if isHex: |
| result += num |
| self.skipNChars(2) |
| elif result == "u" or result == "U": |
| # unicode char ? |
| num = self.peekNChar(4) |
| if num != None: |
| isHex = True |
| for d in num: |
| if not d in "012345678abcdefABCDEF": |
| isHex = False |
| break |
| if isHex: |
| result += num |
| self.skipNChars(4) |
| |
| return result |
| |
| def nextRealToken(self,tok): |
| """return next CPP token, used internally by nextToken()""" |
| c = self.nextChar() |
| if c == tokEOF or c == tokLN: |
| return tok.set(c) |
| |
| if c == '/': |
| c = self.peekChar() |
| if c == '/': # C++ comment line |
| self.skipChar() |
| while 1: |
| c = self.nextChar() |
| if c == tokEOF or c == tokLN: |
| break |
| return tok.set(tokLN) |
| if c == '*': # C comment start |
| self.skipChar() |
| value = "/*" |
| prev_c = None |
| while 1: |
| c = self.nextChar() |
| if c == tokEOF: |
| #print "## EOF after '%s'" % value |
| return tok.set(tokEOF,value) |
| if c == '/' and prev_c == '*': |
| break |
| prev_c = c |
| value += c |
| |
| value += "/" |
| #print "## COMMENT: '%s'" % value |
| return tok.set(tokSPACE,value) |
| c = '/' |
| |
| if c.isspace(): |
| while 1: |
| c2 = self.peekChar() |
| if c2 == tokLN or not c2.isspace(): |
| break |
| c += c2 |
| self.skipChar() |
| return tok.set(tokSPACE,c) |
| |
| if c == '\\': |
| if debugTokens: |
| print "nextRealToken: \\ found, next token is '%s'" % repr(self.peekChar()) |
| if self.peekChar() == tokLN: # trailing \ |
| # eat the tokLN |
| self.skipChar() |
| # we replace a trailing \ by a tokSPACE whose value is |
| # simply "\\". this allows us to detect them later when |
| # needed. |
| return tok.set(tokSPACE,"\\") |
| else: |
| # treat as a single token here ? |
| c +=self.getEscape() |
| return tok.set(c) |
| |
| if c == "'": # chars |
| c2 = self.nextChar() |
| c += c2 |
| if c2 == '\\': |
| c += self.getEscape() |
| |
| while 1: |
| c2 = self.nextChar() |
| if c2 == tokEOF: |
| break |
| c += c2 |
| if c2 == "'": |
| break |
| |
| return tok.set(tokSTRING, c) |
| |
| if c == '"': # strings |
| quote = 0 |
| while 1: |
| c2 = self.nextChar() |
| if c2 == tokEOF: |
| return tok.set(tokSTRING,c) |
| |
| c += c2 |
| if not quote: |
| if c2 == '"': |
| return tok.set(tokSTRING,c) |
| if c2 == "\\": |
| quote = 1 |
| else: |
| quote = 0 |
| |
| if c >= "0" and c <= "9": # integers ? |
| while 1: |
| c2 = self.peekChar() |
| if c2 == tokLN or (not c2.isalnum() and c2 != "_"): |
| break |
| c += c2 |
| self.skipChar() |
| return tok.set(tokNUMBER,c) |
| |
| if c.isalnum() or c == "_": # identifiers ? |
| while 1: |
| c2 = self.peekChar() |
| if c2 == tokLN or (not c2.isalnum() and c2 != "_"): |
| break |
| c += c2 |
| self.skipChar() |
| if c == tokDEFINED: |
| return tok.set(tokDEFINED) |
| else: |
| return tok.set(tokIDENT,c) |
| |
| # check special symbols |
| for sk in cppLongSymbols: |
| if c == sk[0]: |
| sklen = len(sk[1:]) |
| if self.pos + sklen <= self.len and \ |
| self.text[self.pos:self.pos+sklen] == sk[1:]: |
| self.pos += sklen |
| return tok.set(sk) |
| |
| return tok.set(c) |
| |
| def nextToken(self,tok): |
| """return the next token from the input text. this function |
| really updates 'tok', and does not return a new one""" |
| self.markPos(tok) |
| self.nextRealToken(tok) |
| |
| def getToken(self): |
| tok = Token() |
| self.nextToken(tok) |
| if debugTokens: |
| print "getTokens: %s" % repr(tok) |
| return tok |
| |
| def toTokenList(self): |
| """convert the input text of a CppTokenizer into a direct |
| list of token objects. tokEOF is stripped from the result""" |
| result = [] |
| while 1: |
| tok = Token() |
| self.nextToken(tok) |
| if tok.id == tokEOF: |
| break |
| result.append(tok) |
| return result |
| |
| class CppLineTokenizer(CppTokenizer): |
| """a CppTokenizer derived class that accepts a single line of text as input""" |
| def __init__(self,line,lineno=1): |
| CppTokenizer.__init__(self) |
| self.line = lineno |
| self.setLineText(line) |
| |
| |
| class CppLinesTokenizer(CppTokenizer): |
| """a CppTokenizer derived class that accepts a list of texdt lines as input. |
| the lines must not have a trailing \n""" |
| def __init__(self,lines=[],lineno=1): |
| """initialize a CppLinesTokenizer. you can later add lines using addLines()""" |
| CppTokenizer.__init__(self) |
| self.line = lineno |
| self.lines = lines |
| self.index = 0 |
| self.count = len(lines) |
| |
| if self.count > 0: |
| self.fillLineText() |
| else: |
| self.eof = True |
| |
| def addLine(self,line): |
| """add a line to a CppLinesTokenizer. this can be done after tokenization |
| happens""" |
| if self.count == 0: |
| self.setLineText(line) |
| self.index = 1 |
| self.lines.append(line) |
| self.count += 1 |
| self.eof = False |
| |
| def fillLineText(self): |
| if self.index < self.count: |
| self.setLineText(self.lines[self.index]) |
| self.index += 1 |
| else: |
| self.eof = True |
| |
| |
| class CppFileTokenizer(CppTokenizer): |
| def __init__(self,file,lineno=1): |
| CppTokenizer.__init__(self) |
| self.file = file |
| self.line = lineno |
| |
| def fillLineText(self): |
| line = self.file.readline() |
| if len(line) > 0: |
| if line[-1] == '\n': |
| line = line[:-1] |
| if len(line) > 0 and line[-1] == "\r": |
| line = line[:-1] |
| self.setLineText(line) |
| else: |
| self.eof = True |
| |
| # Unit testing |
| # |
| class CppTokenizerTester: |
| """a class used to test CppTokenizer classes""" |
| def __init__(self,tokenizer=None): |
| self.tokenizer = tokenizer |
| self.token = Token() |
| |
| def setTokenizer(self,tokenizer): |
| self.tokenizer = tokenizer |
| |
| def expect(self,id): |
| self.tokenizer.nextToken(self.token) |
| tokid = self.token.id |
| if tokid == id: |
| return |
| if self.token.value == id and (tokid == tokIDENT or tokid == tokNUMBER): |
| return |
| raise BadExpectedToken, "### BAD TOKEN: '%s' expecting '%s'" % (self.token.id,id) |
| |
| def expectToken(self,id,line,col): |
| self.expect(id) |
| if self.token.lineno != line: |
| raise BadExpectedToken, "### BAD LINENO: token '%s' got '%d' expecting '%d'" % (id,self.token.lineno,line) |
| if self.token.colno != col: |
| raise BadExpectedToken, "### BAD COLNO: '%d' expecting '%d'" % (self.token.colno,col) |
| |
| def expectTokenVal(self,id,value,line,col): |
| self.expectToken(id,line,col) |
| if self.token.value != value: |
| raise BadExpectedToken, "### BAD VALUE: '%s' expecting '%s'" % (self.token.value,value) |
| |
| def expectList(self,list): |
| for item in list: |
| self.expect(item) |
| |
| def test_CppTokenizer(): |
| print "running CppTokenizer tests" |
| tester = CppTokenizerTester() |
| |
| tester.setTokenizer( CppLineTokenizer("#an/example && (01923_xy)") ) |
| tester.expectList( ["#", "an", "/", "example", tokSPACE, tokLOGICAND, tokSPACE, tokLPAREN, "01923_xy", \ |
| tokRPAREN, tokLN, tokEOF] ) |
| |
| tester.setTokenizer( CppLineTokenizer("FOO(BAR) && defined(BAZ)") ) |
| tester.expectList( ["FOO", tokLPAREN, "BAR", tokRPAREN, tokSPACE, tokLOGICAND, tokSPACE, |
| tokDEFINED, tokLPAREN, "BAZ", tokRPAREN, tokLN, tokEOF] ) |
| |
| tester.setTokenizer( CppLinesTokenizer( ["/*", "#", "*/"] ) ) |
| tester.expectList( [ tokSPACE, tokLN, tokEOF ] ) |
| |
| tester.setTokenizer( CppLinesTokenizer( ["first", "second"] ) ) |
| tester.expectList( [ "first", tokLN, "second", tokLN, tokEOF ] ) |
| |
| tester.setTokenizer( CppLinesTokenizer( ["first second", " third"] ) ) |
| tester.expectToken( "first", 1, 0 ) |
| tester.expectToken( tokSPACE, 1, 5 ) |
| tester.expectToken( "second", 1, 6 ) |
| tester.expectToken( tokLN, 1, 12 ) |
| tester.expectToken( tokSPACE, 2, 0 ) |
| tester.expectToken( "third", 2, 2 ) |
| |
| tester.setTokenizer( CppLinesTokenizer( [ "boo /* what the", "hell */" ] ) ) |
| tester.expectList( [ "boo", tokSPACE ] ) |
| tester.expectTokenVal( tokSPACE, "/* what the\nhell */", 1, 4 ) |
| tester.expectList( [ tokLN, tokEOF ] ) |
| |
| tester.setTokenizer( CppLinesTokenizer( [ "an \\", " example" ] ) ) |
| tester.expectToken( "an", 1, 0 ) |
| tester.expectToken( tokSPACE, 1, 2 ) |
| tester.expectTokenVal( tokSPACE, "\\", 1, 3 ) |
| tester.expectToken( tokSPACE, 2, 0 ) |
| tester.expectToken( "example", 2, 1 ) |
| tester.expectToken( tokLN, 2, 8 ) |
| |
| return True |
| |
| |
| ##################################################################################### |
| ##################################################################################### |
| ##### ##### |
| ##### C P P E X P R E S S I O N S ##### |
| ##### ##### |
| ##################################################################################### |
| ##################################################################################### |
| |
| # Cpp expressions are modeled by tuples of the form (op,arg) or (op,arg1,arg2), etc.. |
| # op is an "operator" string |
| |
| class Expr: |
| """a class used to model a CPP expression""" |
| opInteger = "int" |
| opIdent = "ident" |
| opCall = "call" |
| opDefined = "defined" |
| opTest = "?" |
| opLogicNot = "!" |
| opNot = "~" |
| opNeg = "[-]" |
| opUnaryPlus = "[+]" |
| opAdd = "+" |
| opSub = "-" |
| opMul = "*" |
| opDiv = "/" |
| opMod = "%" |
| opAnd = "&" |
| opOr = "|" |
| opXor = "^" |
| opLogicAnd = "&&" |
| opLogicOr = "||" |
| opEqual = "==" |
| opNotEqual = "!=" |
| opLess = "<" |
| opLessEq = "<=" |
| opGreater = ">" |
| opGreaterEq = ">=" |
| opShl = "<<" |
| opShr = ">>" |
| |
| unaries = [ opLogicNot, opNot, opNeg, opUnaryPlus ] |
| binaries = [ opAdd, opSub, opMul, opDiv, opMod, opAnd, opOr, opXor, opLogicAnd, opLogicOr, |
| opEqual, opNotEqual, opLess, opLessEq, opGreater, opGreaterEq ] |
| |
| precedences = { |
| opTest: 0, |
| opLogicOr: 1, |
| opLogicNot: 2, |
| opOr : 3, |
| opXor: 4, |
| opAnd: 5, |
| opEqual: 6, opNotEqual: 6, |
| opLess:7, opLessEq:7, opGreater:7, opGreaterEq:7, |
| opShl:8, opShr:8, |
| opAdd:9, opSub:9, |
| opMul:10, opDiv:10, opMod:10, |
| opLogicNot:11, |
| opNot: 12, |
| } |
| |
| def __init__(self,op): |
| self.op = op |
| |
| def __repr__(self): |
| return "(%s)" % self.op |
| |
| def __str__(self): |
| return "operator(%s)" % self.op |
| |
| def precedence(self): |
| """return the precedence of a given operator""" |
| return Expr.precedences.get(self.op, 1000) |
| |
| def isUnary(self): |
| return self.op in Expr.unaries |
| |
| def isBinary(self): |
| return self.op in Expr.binaries |
| |
| def isDefined(self): |
| return self.op is opDefined |
| |
| def toInt(self): |
| """return the integer value of a given expression. only valid for integer expressions |
| will return None otherwise""" |
| return None |
| |
| class IntExpr(Expr): |
| def __init__(self,value): |
| Expr.__init__(self,opInteger) |
| self.arg = value |
| |
| def __repr__(self): |
| return "(int %s)" % self.arg |
| |
| def __str__(self): |
| return self.arg |
| |
| def toInt(self): |
| s = self.arg # string value |
| # get rid of U or L suffixes |
| while len(s) > 0 and s[-1] in "LUlu": |
| s = s[:-1] |
| return string.atoi(s) |
| |
| class IdentExpr(Expr): |
| def __init__(self,name): |
| Expr.__init__(self,opIdent) |
| self.name = name |
| |
| def __repr__(self): |
| return "(ident %s)" % self.name |
| |
| def __str__(self): |
| return self.name |
| |
| class CallExpr(Expr): |
| def __init__(self,funcname,params): |
| Expr.__init__(self,opCall) |
| self.funcname = funcname |
| self.params = params |
| |
| def __repr__(self): |
| result = "(call %s [" % self.funcname |
| comma = "" |
| for param in self.params: |
| result += "%s%s" % (comma, repr(param)) |
| comma = "," |
| result += "])" |
| return result |
| |
| def __str__(self): |
| result = "%s(" % self.funcname |
| comma = "" |
| for param in self.params: |
| result += "%s%s" % (comma, str(param)) |
| comma = "," |
| |
| result += ")" |
| return result |
| |
| class TestExpr(Expr): |
| def __init__(self,cond,iftrue,iffalse): |
| Expr.__init__(self,opTest) |
| self.cond = cond |
| self.iftrue = iftrue |
| self.iffalse = iffalse |
| |
| def __repr__(self): |
| return "(?: %s %s %s)" % (repr(self.cond),repr(self.iftrue),repr(self.iffalse)) |
| |
| def __str__(self): |
| return "(%s) ? (%s) : (%s)" % (self.cond, self.iftrue, self.iffalse) |
| |
| class SingleArgExpr(Expr): |
| def __init__(self,op,arg): |
| Expr.__init__(self,op) |
| self.arg = arg |
| |
| def __repr__(self): |
| return "(%s %s)" % (self.op, repr(self.arg)) |
| |
| class DefinedExpr(SingleArgExpr): |
| def __init__(self,op,macroname): |
| SingleArgExpr.__init__(self.opDefined,macroname) |
| |
| def __str__(self): |
| return "defined(%s)" % self.arg |
| |
| |
| class UnaryExpr(SingleArgExpr): |
| def __init__(self,op,arg,opstr=None): |
| SingleArgExpr.__init__(self,op,arg) |
| if not opstr: |
| opstr = op |
| self.opstr = opstr |
| |
| def __str__(self): |
| arg_s = str(self.arg) |
| arg_prec = self.arg.precedence() |
| self_prec = self.precedence() |
| if arg_prec < self_prec: |
| return "%s(%s)" % (self.opstr,arg_s) |
| else: |
| return "%s%s" % (self.opstr, arg_s) |
| |
| class TwoArgExpr(Expr): |
| def __init__(self,op,arg1,arg2): |
| Expr.__init__(self,op) |
| self.arg1 = arg1 |
| self.arg2 = arg2 |
| |
| def __repr__(self): |
| return "(%s %s %s)" % (self.op, repr(self.arg1), repr(self.arg2)) |
| |
| class BinaryExpr(TwoArgExpr): |
| def __init__(self,op,arg1,arg2,opstr=None): |
| TwoArgExpr.__init__(self,op,arg1,arg2) |
| if not opstr: |
| opstr = op |
| self.opstr = opstr |
| |
| def __str__(self): |
| arg1_s = str(self.arg1) |
| arg2_s = str(self.arg2) |
| arg1_prec = self.arg1.precedence() |
| arg2_prec = self.arg2.precedence() |
| self_prec = self.precedence() |
| |
| result = "" |
| if arg1_prec < self_prec: |
| result += "(%s)" % arg1_s |
| else: |
| result += arg1_s |
| |
| result += " %s " % self.opstr |
| |
| if arg2_prec < self_prec: |
| result += "(%s)" % arg2_s |
| else: |
| result += arg2_s |
| |
| return result |
| |
| ##################################################################################### |
| ##################################################################################### |
| ##### ##### |
| ##### C P P E X P R E S S I O N P A R S E R ##### |
| ##### ##### |
| ##################################################################################### |
| ##################################################################################### |
| |
| |
| class ExprParser: |
| """a class used to convert a list of tokens into a cpp Expr object""" |
| |
| re_octal = re.compile(r"\s*\(0[0-7]+\).*") |
| re_decimal = re.compile(r"\s*\(\d+[ulUL]*\).*") |
| re_hexadecimal = re.compile(r"\s*\(0[xX][0-9a-fA-F]*\).*") |
| |
| def __init__(self,tokens): |
| self.tok = tokens |
| self.n = len(self.tok) |
| self.i = 0 |
| |
| def mark(self): |
| return self.i |
| |
| def release(self,pos): |
| self.i = pos |
| |
| def peekId(self): |
| if self.i < self.n: |
| return self.tok[self.i].id |
| return None |
| |
| def peek(self): |
| if self.i < self.n: |
| return self.tok[self.i] |
| return None |
| |
| def skip(self): |
| if self.i < self.n: |
| self.i += 1 |
| |
| def skipOptional(self,id): |
| if self.i < self.n and self.tok[self.i].id == id: |
| self.i += 1 |
| |
| def skipSpaces(self): |
| i = self.i |
| n = self.n |
| tok = self.tok |
| while i < n and (tok[i] == tokSPACE or tok[i] == tokLN): |
| i += 1 |
| self.i = i |
| |
| # all the isXXX functions returns a (expr,nextpos) pair if a match is found |
| # or None if not |
| |
| def is_integer(self): |
| id = self.tok[self.i].id |
| c = id[0] |
| if c < '0' or c > '9': |
| return None |
| |
| m = ExprParser.re_octal.match(id) |
| if m: |
| return (IntExpr(id), m.end(1)) |
| |
| m = ExprParser.re_decimal.match(id) |
| if m: |
| return (IntExpr(id), m.end(1)) |
| |
| m = ExprParser.re_hexadecimal(id) |
| if m: |
| return (IntExpr(id), m.end(1)) |
| |
| return None |
| |
| def is_defined(self): |
| id = self.tok[self.i].id |
| if id != "defined": |
| return None |
| |
| pos = self.mark() |
| |
| use_paren = 0 |
| if self.peekId() == tokLPAREN: |
| self.skip() |
| use_paren = 1 |
| |
| if self.peekId() != tokIDENT: |
| self.throw( BadExpectedToken, "identifier expected") |
| |
| macroname = self.peek().value |
| self.skip() |
| if use_paren: |
| self.skipSpaces() |
| if self.peekId() != tokRPAREN: |
| self.throw( BadExpectedToken, "missing right-paren after 'defined' directive") |
| self.skip() |
| |
| i = self.i |
| return (DefinedExpr(macroname),i+1) |
| |
| def is_call_or_ident(self): |
| pass |
| |
| def parse(self, i): |
| return None |
| |
| ##################################################################################### |
| ##################################################################################### |
| ##### ##### |
| ##### C P P E X P R E S S I O N S ##### |
| ##### ##### |
| ##################################################################################### |
| ##################################################################################### |
| |
| class CppInvalidExpression(Exception): |
| """an exception raised when an invalid/unsupported cpp expression is detected""" |
| pass |
| |
| class CppExpr: |
| """a class that models the condition of #if directives into |
| an expression tree. each node in the tree is of the form (op,arg) or (op,arg1,arg2) |
| where "op" is a string describing the operation""" |
| |
| unaries = [ "!", "~" ] |
| binaries = [ "+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%", "&", "|", "^", "<<", ">>", "==", "!=" ] |
| precedences = { "||": 1, |
| "&&": 2, |
| "|": 3, |
| "^": 4, |
| "&": 5, |
| "==":6, "!=":6, |
| "<":7, "<=":7, ">":7, ">=":7, |
| "<<":8, ">>":8, |
| "+":9, "-":9, |
| "*":10, "/":10, "%":10, |
| "!":11, "~":12 |
| } |
| |
| def __init__(self, tokens): |
| """initialize a CppExpr. 'tokens' must be a CppToken list""" |
| self.tok = tokens |
| self.n = len(tokens) |
| if debugCppExpr: |
| print "CppExpr: trying to parse %s" % repr(tokens) |
| expr = self.is_expr(0) |
| if debugCppExpr: |
| print "CppExpr: got " + repr(expr) |
| self.expr = expr[0] |
| |
| re_cpp_constant = re.compile(r"((\d|\w|_)+)") |
| |
| def throw(self,exception,i,msg): |
| if i < self.n: |
| tok = self.tok[i] |
| print "%d:%d: %s" % (tok.lineno,tok.colno,msg) |
| else: |
| print "EOF: %s" % msg |
| raise exception |
| |
| def skip_spaces(self,i): |
| """skip spaces in input token list""" |
| while i < self.n: |
| t = self.tok[i] |
| if t.id != tokSPACE and t.id != tokLN: |
| break |
| i += 1 |
| return i |
| |
| def expectId(self,i,id): |
| """check that a given token id is at the current position, then skip over it""" |
| i = self.skip_spaces(i) |
| if i >= self.n or self.tok[i].id != id: |
| self.throw(BadExpectedToken,i,"### expecting '%s' in expression, got '%s'" % (id, self.tok[i].id)) |
| return i+1 |
| |
| def expectIdent(self,i): |
| i = self.skip_spaces(i) |
| if i >= self.n or self.tok[i].id != tokIDENT: |
| self.throw(BadExpectedToken,i,"### expecting identifier in expression, got '%s'" % (id, self.tok[i].id)) |
| return i+1 |
| |
| # the is_xxxxx function returns either None or a pair (e,nextpos) |
| # where 'e' is an expression tuple (e.g. (op,arg)) and 'nextpos' is |
| # the corresponding next position in the input token list |
| # |
| |
| def is_decimal(self,i): |
| v = self.tok[i].value[:] |
| while len(v) > 0 and v[-1] in "ULul": |
| v = v[:-1] |
| for digit in v: |
| if not digit.isdigit(): |
| return None |
| |
| # for an integer expression tuple, the argument |
| # is simply the value as an integer |
| val = string.atoi(v) |
| return ("int", val), i+1 |
| |
| def is_hexadecimal(self,i): |
| v = self.tok[i].value[:] |
| while len(v) > 0 and v[-1] in "ULul": |
| v = v[:-1] |
| if len(v) > 2 and (v[0:2] == "0x" or v[0:2] == "0X"): |
| for digit in v[2:]: |
| if not digit in "0123456789abcdefABCDEF": |
| return None |
| |
| # for an hex expression tuple, the argument |
| # is the value as an integer |
| val = int(v[2:], 16) |
| return ("hex", val), i+1 |
| |
| return None |
| |
| def is_integer(self,i): |
| if self.tok[i].id != tokNUMBER: |
| return None |
| |
| c = self.is_decimal(i) |
| if c: return c |
| |
| c = self.is_hexadecimal(i) |
| if c: return c |
| |
| return None |
| |
| def is_number(self,i): |
| t = self.tok[i] |
| if t.id == tokMINUS and i+1 < self.n: |
| c = self.is_integer(i+1) |
| if c: |
| e, i2 = c |
| op, val = e |
| return (op, -val), i2 |
| if t.id == tokPLUS and i+1 < self.n: |
| c = self.is_integer(i+1) |
| if c: return c |
| |
| return self.is_integer(i) |
| |
| |
| def is_alnum(self,i): |
| """test wether a given token is alpha-numeric""" |
| i = self.skip_spaces(i) |
| if i >= self.n: |
| return None |
| t = self.tok[i] |
| m = CppExpr.re_cpp_constant.match(t.id) |
| if m: |
| #print "... alnum '%s'" % m.group(1) |
| r = m.group(1) |
| return ("ident", r), i+1 |
| return None |
| |
| def is_defined(self,i): |
| t = self.tok[i] |
| if t.id != tokDEFINED: |
| return None |
| |
| # we have the defined keyword, check the rest |
| i = self.skip_spaces(i+1) |
| use_parens = 0 |
| if i < self.n and self.tok[i].id == tokLPAREN: |
| use_parens = 1 |
| i = self.skip_spaces(i+1) |
| |
| if i >= self.n: |
| self.throw(CppConstantExpected,i,"### 'defined' must be followed by macro name or left paren") |
| |
| t = self.tok[i] |
| if t.id != tokIDENT: |
| self.throw(CppConstantExpected,i,"### 'defined' must be followed by macro name") |
| |
| i += 1 |
| if use_parens: |
| i = self.expectId(i,tokRPAREN) |
| |
| return ("defined",t.value), i |
| |
| |
| def is_call_or_ident(self,i): |
| i = self.skip_spaces(i) |
| if i >= self.n: |
| return None |
| |
| t = self.tok[i] |
| if t.id != tokIDENT: |
| return None |
| |
| name = t.value |
| |
| i = self.skip_spaces(i+1) |
| if i >= self.n or self.tok[i].id != tokLPAREN: |
| return ("ident", name), i |
| |
| params = [] |
| depth = 1 |
| i += 1 |
| j = i |
| while i < self.n: |
| id = self.tok[i].id |
| if id == tokLPAREN: |
| depth += 1 |
| elif depth == 1 and (id == tokCOMMA or id == tokRPAREN): |
| while j < i and self.tok[j].id == tokSPACE: |
| j += 1 |
| k = i |
| while k > j and self.tok[k-1].id == tokSPACE: |
| k -= 1 |
| param = self.tok[j:k] |
| params.append( param ) |
| if id == tokRPAREN: |
| break |
| j = i+1 |
| elif id == tokRPAREN: |
| depth -= 1 |
| i += 1 |
| |
| if i >= self.n: |
| return None |
| |
| return ("call", (name, params)), i+1 |
| |
| def is_token(self,i,token): |
| i = self.skip_spaces(i) |
| if i >= self.n or self.tok[i].id != token: |
| return None |
| return token, i+1 |
| |
| |
| def is_value(self,i): |
| t = self.tok[i] |
| if t.id == tokSTRING: |
| return ("string", t.value), i+1 |
| |
| c = self.is_number(i) |
| if c: return c |
| |
| c = self.is_defined(i) |
| if c: return c |
| |
| c = self.is_call_or_ident(i) |
| if c: return c |
| |
| i = self.skip_spaces(i) |
| if i >= self.n or self.tok[i].id != tokLPAREN: |
| return None |
| |
| popcount = 1 |
| i2 = i+1 |
| while i2 < self.n: |
| t = self.tok[i2] |
| if t.id == tokLPAREN: |
| popcount += 1 |
| elif t.id == tokRPAREN: |
| popcount -= 1 |
| if popcount == 0: |
| break |
| i2 += 1 |
| |
| if popcount != 0: |
| self.throw(CppInvalidExpression, i, "expression missing closing parenthesis") |
| |
| if debugCppExpr: |
| print "CppExpr: trying to parse sub-expression %s" % repr(self.tok[i+1:i2]) |
| oldcount = self.n |
| self.n = i2 |
| c = self.is_expr(i+1) |
| self.n = oldcount |
| if not c: |
| self.throw(CppInvalidExpression, i, "invalid expression within parenthesis") |
| |
| e, i = c |
| return e, i2+1 |
| |
| def is_unary(self,i): |
| i = self.skip_spaces(i) |
| if i >= self.n: |
| return None |
| |
| t = self.tok[i] |
| if t.id in CppExpr.unaries: |
| c = self.is_unary(i+1) |
| if not c: |
| self.throw(CppInvalidExpression, i, "%s operator must be followed by value" % t.id) |
| e, i = c |
| return (t.id, e), i |
| |
| return self.is_value(i) |
| |
| def is_binary(self,i): |
| i = self.skip_spaces(i) |
| if i >= self.n: |
| return None |
| |
| c = self.is_unary(i) |
| if not c: |
| return None |
| |
| e1, i2 = c |
| i2 = self.skip_spaces(i2) |
| if i2 >= self.n: |
| return c |
| |
| t = self.tok[i2] |
| if t.id in CppExpr.binaries: |
| c = self.is_binary(i2+1) |
| if not c: |
| self.throw(CppInvalidExpression, i,"### %s operator must be followed by value" % t.id ) |
| e2, i3 = c |
| return (t.id, e1, e2), i3 |
| |
| return None |
| |
| def is_expr(self,i): |
| return self.is_binary(i) |
| |
| def dump_node(self,e): |
| op = e[0] |
| line = "(" + op |
| if op == "int": |
| line += " %d)" % e[1] |
| elif op == "hex": |
| line += " 0x%x)" % e[1] |
| elif op == "ident": |
| line += " %s)" % e[1] |
| elif op == "defined": |
| line += " %s)" % e[1] |
| elif op == "call": |
| arg = e[1] |
| line += " %s [" % arg[0] |
| prefix = "" |
| for param in arg[1]: |
| par = "" |
| for tok in param: |
| par += str(tok) |
| line += "%s%s" % (prefix, par) |
| prefix = "," |
| line += "])" |
| elif op in CppExpr.unaries: |
| line += " %s)" % self.dump_node(e[1]) |
| elif op in CppExpr.binaries: |
| line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2])) |
| else: |
| line += " ?%s)" % repr(e[1]) |
| |
| return line |
| |
| def __repr__(self): |
| return self.dump_node(self.expr) |
| |
| def source_node(self,e): |
| op = e[0] |
| if op == "int": |
| return "%d" % e[1] |
| if op == "hex": |
| return "0x%x" % e[1] |
| if op == "ident": |
| # XXX: should try to expand |
| return e[1] |
| if op == "defined": |
| return "defined(%s)" % e[1] |
| |
| prec = CppExpr.precedences.get(op,1000) |
| arg = e[1] |
| if op in CppExpr.unaries: |
| arg_src = self.source_node(arg) |
| arg_op = arg[0] |
| arg_prec = CppExpr.precedences.get(arg[0],1000) |
| if arg_prec < prec: |
| return "!(" + arg_src + ")" |
| else: |
| return "!" + arg_src |
| if op in CppExpr.binaries: |
| arg2 = e[2] |
| arg1_op = arg[0] |
| arg2_op = arg2[0] |
| arg1_src = self.source_node(arg) |
| arg2_src = self.source_node(arg2) |
| if CppExpr.precedences.get(arg1_op,1000) < prec: |
| arg1_src = "(%s)" % arg1_src |
| if CppExpr.precedences.get(arg2_op,1000) < prec: |
| arg2_src = "(%s)" % arg2_src |
| |
| return "%s %s %s" % (arg1_src, op, arg2_src) |
| return "???" |
| |
| def __str__(self): |
| return self.source_node(self.expr) |
| |
| def int_node(self,e): |
| if e[0] == "int": |
| return e[1] |
| elif e[1] == "hex": |
| return int(e[1],16) |
| else: |
| return None |
| |
| def toInt(self): |
| return self.int_node(self.expr) |
| |
| def optimize_node(self,e,macros={}): |
| op = e[0] |
| if op == "defined": |
| name = e[1] |
| if macros.has_key(name): |
| if macros[name] == kCppUndefinedMacro: |
| return ("int", 0) |
| else: |
| return ("int", 1) |
| |
| if kernel_remove_config_macros and name.startswith("CONFIG_"): |
| return ("int", 0) |
| |
| elif op == "!": |
| op, v = e |
| v = self.optimize_node(v, macros) |
| if v[0] == "int": |
| if v[1] == 0: |
| return ("int", 1) |
| else: |
| return ("int", 0) |
| |
| elif op == "&&": |
| op, l, r = e |
| l = self.optimize_node(l, macros) |
| r = self.optimize_node(r, macros) |
| li = self.int_node(l) |
| ri = self.int_node(r) |
| if li != None: |
| if li == 0: |
| return ("int", 0) |
| else: |
| return r |
| |
| elif op == "||": |
| op, l, r = e |
| l = self.optimize_node(l, macros) |
| r = self.optimize_node(r, macros) |
| li = self.int_node(l) |
| ri = self.int_node(r) |
| if li != None: |
| if li == 0: |
| return r |
| else: |
| return ("int", 1) |
| elif ri != None: |
| if ri == 0: |
| return l |
| else: |
| return ("int", 1) |
| return e |
| |
| def optimize(self,macros={}): |
| self.expr = self.optimize_node(self.expr,macros) |
| |
| def removePrefixedNode(self,e,prefix,names): |
| op = e[0] |
| if op == "defined": |
| name = e[1] |
| if name.startswith(prefix): |
| if names.has_key[name] and names[name] == "y": |
| return ("int", 1) |
| else: |
| return ("int", 0) |
| |
| elif op in CppExpr.unaries: |
| op, v = e |
| v = self.removePrefixedNode(v,prefix,names) |
| return (op, v) |
| elif op in CppExpr.binaries: |
| op, v1, v2 = e |
| v1 = self.removePrefixedNode(v1,prefix,names) |
| v2 = self.removePrefixedNode(v2,prefix,names) |
| return (op, v1, v2) |
| elif op == "call": |
| func, params = e[1] |
| params2 = [] |
| for param in params: |
| params2.append( self.removePrefixedNode(param,prefix,names) ) |
| return (op, (func, params2)) |
| |
| return e |
| |
| def removePrefixed(self,prefix,names={}): |
| self.expr = self.removePrefixedNode(self.expr,prefix,names) |
| |
| def is_equal_node(self,e1,e2): |
| if e1[0] != e2[0] or len(e1) != len(e2): |
| return False |
| |
| op = e1[0] |
| if op == "int" or op == "hex" or op == "!" or op == "defined": |
| return e1[0] == e2[0] |
| |
| return self.is_equal_node(e1[1],e2[1]) and self.is_equal_node(e1[2],e2[2]) |
| |
| def is_equal(self,other): |
| return self.is_equal_node(self.expr,other.expr) |
| |
| def test_cpp_expr(expr, expected): |
| e = CppExpr( CppLineTokenizer( expr ).toTokenList() ) |
| #print repr(e.expr) |
| s1 = repr(e) |
| if s1 != expected: |
| print "KO: expression '%s' generates '%s', should be '%s'" % (expr, s1, expected) |
| else: |
| #print "OK: expression '%s'" % expr |
| pass |
| |
| def test_cpp_expr_optim(expr, expected, macros={}): |
| e = CppExpr( CppLineTokenizer( expr ).toTokenList() ) |
| e.optimize(macros) |
| |
| s1 = repr(e) |
| if s1 != expected: |
| print "KO: optimized expression '%s' generates '%s', should be '%s'" % (expr, s1, expected) |
| else: |
| #print "OK: optmized expression '%s'" % expr |
| pass |
| |
| def test_cpp_expr_source(expr, expected): |
| e = CppExpr( CppLineTokenizer( expr ).toTokenList() ) |
| s1 = str(e) |
| if s1 != expected: |
| print "KO: source expression '%s' generates '%s', should be '%s'" % (expr, s1, expected) |
| else: |
| #print "OK: source expression '%s'" % expr |
| pass |
| |
| def test_CppExpr(): |
| print "testing CppExpr" |
| test_cpp_expr( "0", "(int 0)" ) |
| test_cpp_expr( "1", "(int 1)" ) |
| test_cpp_expr( "1 && 1", "(&& (int 1) (int 1))" ) |
| test_cpp_expr( "1 && 0", "(&& (int 1) (int 0))" ) |
| test_cpp_expr( "EXAMPLE", "(ident EXAMPLE)" ) |
| test_cpp_expr( "EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))" ) |
| test_cpp_expr( "defined(EXAMPLE)", "(defined EXAMPLE)" ) |
| test_cpp_expr( "!defined(EXAMPLE)", "(! (defined EXAMPLE))" ) |
| test_cpp_expr( "defined(ABC) || defined(BINGO)", "(|| (defined ABC) (defined BINGO))" ) |
| test_cpp_expr( "FOO(BAR)", "(call FOO [BAR])" ) |
| |
| test_cpp_expr_optim( "0", "(int 0)" ) |
| test_cpp_expr_optim( "1", "(int 1)" ) |
| test_cpp_expr_optim( "1 && 1", "(int 1)" ) |
| test_cpp_expr_optim( "1 && 0", "(int 0)" ) |
| test_cpp_expr_optim( "0 && 1", "(int 0)" ) |
| test_cpp_expr_optim( "0 && 0", "(int 0)" ) |
| test_cpp_expr_optim( "1 || 1", "(int 1)" ) |
| test_cpp_expr_optim( "1 || 0", "(int 1)" ) |
| test_cpp_expr_optim( "0 || 1", "(int 1)" ) |
| test_cpp_expr_optim( "0 || 0", "(int 0)" ) |
| test_cpp_expr_optim( "EXAMPLE", "(ident EXAMPLE)" ) |
| test_cpp_expr_optim( "EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))" ) |
| test_cpp_expr_optim( "defined(EXAMPLE)", "(defined EXAMPLE)" ) |
| test_cpp_expr_optim( "defined(EXAMPLE)", "(int 1)", { "EXAMPLE": "XOWOE" } ) |
| test_cpp_expr_optim( "defined(EXAMPLE)", "(int 0)", { "EXAMPLE": kCppUndefinedMacro} ) |
| test_cpp_expr_optim( "!defined(EXAMPLE)", "(! (defined EXAMPLE))" ) |
| test_cpp_expr_optim( "!defined(EXAMPLE)", "(int 0)", { "EXAMPLE" : "XOWOE" } ) |
| test_cpp_expr_optim( "!defined(EXAMPLE)", "(int 1)", { "EXAMPLE" : kCppUndefinedMacro } ) |
| test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(|| (defined ABC) (defined BINGO))" ) |
| test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(int 1)", { "ABC" : "1" } ) |
| test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(int 1)", { "BINGO" : "1" } ) |
| test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(defined ABC)", { "BINGO" : kCppUndefinedMacro } ) |
| test_cpp_expr_optim( "defined(ABC) || defined(BINGO)", "(int 0)", { "ABC" : kCppUndefinedMacro, "BINGO" : kCppUndefinedMacro } ) |
| |
| test_cpp_expr_source( "0", "0" ) |
| test_cpp_expr_source( "1", "1" ) |
| test_cpp_expr_source( "1 && 1", "1 && 1" ) |
| test_cpp_expr_source( "1 && 0", "1 && 0" ) |
| test_cpp_expr_source( "0 && 1", "0 && 1" ) |
| test_cpp_expr_source( "0 && 0", "0 && 0" ) |
| test_cpp_expr_source( "1 || 1", "1 || 1" ) |
| test_cpp_expr_source( "1 || 0", "1 || 0" ) |
| test_cpp_expr_source( "0 || 1", "0 || 1" ) |
| test_cpp_expr_source( "0 || 0", "0 || 0" ) |
| test_cpp_expr_source( "EXAMPLE", "EXAMPLE" ) |
| test_cpp_expr_source( "EXAMPLE - 3", "EXAMPLE - 3" ) |
| test_cpp_expr_source( "defined(EXAMPLE)", "defined(EXAMPLE)" ) |
| test_cpp_expr_source( "defined EXAMPLE", "defined(EXAMPLE)" ) |
| |
| |
| ##################################################################################### |
| ##################################################################################### |
| ##### ##### |
| ##### C P P B L O C K ##### |
| ##### ##### |
| ##################################################################################### |
| ##################################################################################### |
| |
| class Block: |
| """a class used to model a block of input source text. there are two block types: |
| - directive blocks: contain the tokens of a single pre-processor directive (e.g. #if) |
| - text blocks, contain the tokens of non-directive blocks |
| |
| the cpp parser class below will transform an input source file into a list of Block |
| objects (grouped in a BlockList object for convenience)""" |
| |
| def __init__(self,tokens,directive=None,lineno=0): |
| """initialize a new block, if 'directive' is None, this is a text block |
| NOTE: this automatically converts '#ifdef MACRO' into '#if defined(MACRO)' |
| and '#ifndef MACRO' into '#if !defined(MACRO)'""" |
| if directive == "ifdef": |
| tok = Token() |
| tok.set(tokDEFINED) |
| tokens = [ tok ] + tokens |
| directive = "if" |
| |
| elif directive == "ifndef": |
| tok1 = Token() |
| tok2 = Token() |
| tok1.set(tokNOT) |
| tok2.set(tokDEFINED) |
| tokens = [ tok1, tok2 ] + tokens |
| directive = "if" |
| |
| self.tokens = tokens |
| self.directive = directive |
| if lineno > 0: |
| self.lineno = lineno |
| else: |
| self.lineno = self.tokens[0].lineno |
| |
| if self.isIf(): |
| self.expr = CppExpr( self.tokens ) |
| |
| def isDirective(self): |
| """returns True iff this is a directive block""" |
| return self.directive != None |
| |
| def isConditional(self): |
| """returns True iff this is a conditional directive block""" |
| return self.directive in ["if","ifdef","ifndef","else","elif","endif"] |
| |
| def isDefine(self): |
| """returns the macro name in a #define directive, or None otherwise""" |
| if self.directive != "define": |
| return None |
| |
| return self.tokens[0].value |
| |
| def isIf(self): |
| """returns True iff this is an #if-like directive block""" |
| return self.directive in ["if","ifdef","ifndef","elif"] |
| |
| def isInclude(self): |
| """checks wether this is a #include directive. if true, then returns the |
| corresponding file name (with brackets or double-qoutes). None otherwise""" |
| if self.directive != "include": |
| return None |
| |
| #print "iii " + repr(self.tokens) |
| if self.tokens[0].id == tokSTRING: |
| # a double-quote include, that's easy |
| return self.tokens[0].value |
| |
| # we only want the bracket part, not any comments or junk after it |
| if self.tokens[0].id == "<": |
| i = 0 |
| tok = self.tokens |
| n = len(tok) |
| while i < n and tok[i].id != ">": |
| i += 1 |
| |
| if i >= n: |
| return None |
| |
| return string.join([ str(x) for x in tok[:i+1] ],"") |
| |
| else: |
| return None |
| |
| def removeWhiteSpace(self): |
| # Remove trailing whitespace and empty lines |
| # All whitespace is also contracted to a single space |
| if self.directive != None: |
| return |
| |
| tokens = [] |
| line = 0 # index of line start |
| space = -1 # index of first space, or -1 |
| ii = 0 |
| nn = len(self.tokens) |
| while ii < nn: |
| tok = self.tokens[ii] |
| |
| # If we find a space, record its position if this is the first |
| # one the line start or the previous character. Don't append |
| # anything to tokens array yet though. |
| if tok.id == tokSPACE: |
| if space < 0: |
| space = ii |
| ii += 1 |
| continue |
| |
| # If this is a line space, ignore the spaces we found previously |
| # on the line, and remove empty lines. |
| if tok.id == tokLN: |
| old_line = line |
| old_space = space |
| #print "N line=%d space=%d ii=%d" % (line, space, ii) |
| ii += 1 |
| line = ii |
| space = -1 |
| if old_space == old_line: # line only contains spaces |
| #print "-s" |
| continue |
| if ii-1 == old_line: # line is empty |
| #print "-e" |
| continue |
| tokens.append(tok) |
| continue |
| |
| # Other token, append any space range if any, converting each |
| # one to a single space character, then append the token. |
| if space >= 0: |
| jj = space |
| space = -1 |
| while jj < ii: |
| tok2 = self.tokens[jj] |
| tok2.value = " " |
| tokens.append(tok2) |
| jj += 1 |
| |
| tokens.append(tok) |
| ii += 1 |
| |
| self.tokens = tokens |
| |
| def writeWithWarning(self,out,warning,left_count,repeat_count): |
| # removeWhiteSpace() will sometimes creates non-directive blocks |
| # without any tokens. These come from blocks that only contained |
| # empty lines and spaces. They should not be printed in the final |
| # output, and then should not be counted for this operation. |
| # |
| if not self.directive and self.tokens == []: |
| return left_count |
| |
| if self.directive: |
| out.write(str(self).rstrip() + "\n") |
| left_count -= 1 |
| if left_count == 0: |
| out.write(warning) |
| left_count = repeat_count |
| |
| else: |
| for tok in self.tokens: |
| out.write(str(tok)) |
| if tok.id == tokLN: |
| left_count -= 1 |
| if left_count == 0: |
| out.write(warning) |
| left_count = repeat_count |
| |
| return left_count |
| |
| |
| def __repr__(self): |
| """generate the representation of a given block""" |
| if self.directive: |
| result = "#%s " % self.directive |
| if self.isIf(): |
| result += repr(self.expr) |
| else: |
| for tok in self.tokens: |
| result += repr(tok) |
| else: |
| result = "" |
| for tok in self.tokens: |
| result += repr(tok) |
| |
| return result |
| |
| def __str__(self): |
| """generate the string representation of a given block""" |
| if self.directive: |
| if self.directive == "if": |
| # small optimization to re-generate #ifdef and #ifndef |
| e = self.expr.expr |
| op = e[0] |
| if op == "defined": |
| result = "#ifdef %s" % e[1] |
| elif op == "!" and e[1][0] == "defined": |
| result = "#ifndef %s" % e[1][1] |
| else: |
| result = "#if " + str(self.expr) |
| else: |
| result = "#%s" % self.directive |
| if len(self.tokens): |
| result += " " |
| for tok in self.tokens: |
| result += str(tok) |
| else: |
| result = "" |
| for tok in self.tokens: |
| result += str(tok) |
| |
| return result |
| |
| class BlockList: |
| """a convenience class used to hold and process a list of blocks returned by |
| the cpp parser""" |
| def __init__(self,blocks): |
| self.blocks = blocks |
| |
| def __len__(self): |
| return len(self.blocks) |
| |
| def __getitem__(self,n): |
| return self.blocks[n] |
| |
| def __repr__(self): |
| return repr(self.blocks) |
| |
| def __str__(self): |
| result = "" |
| for b in self.blocks: |
| result += str(b) |
| if b.isDirective(): |
| result = result.rstrip() + '\n' |
| return result |
| |
| def optimizeIf01(self): |
| """remove the code between #if 0 .. #endif in a BlockList""" |
| self.blocks = optimize_if01(self.blocks) |
| |
| def optimizeMacros(self, macros): |
| """remove known defined and undefined macros from a BlockList""" |
| for b in self.blocks: |
| if b.isIf(): |
| b.expr.optimize(macros) |
| |
| def removeMacroDefines(self,macros): |
| """remove known macro definitions from a BlockList""" |
| self.blocks = remove_macro_defines(self.blocks,macros) |
| |
| def removePrefixed(self,prefix,names): |
| for b in self.blocks: |
| if b.isIf(): |
| b.expr.removePrefixed(prefix,names) |
| |
| def removeWhiteSpace(self): |
| for b in self.blocks: |
| b.removeWhiteSpace() |
| |
| def optimizeAll(self,macros): |
| self.optimizeMacros(macros) |
| self.optimizeIf01() |
| return |
| |
| def findIncludes(self): |
| """return the list of included files in a BlockList""" |
| result = [] |
| for b in self.blocks: |
| i = b.isInclude() |
| if i: |
| result.append(i) |
| |
| return result |
| |
| |
| def write(self,out): |
| out.write(str(self)) |
| |
| def writeWithWarning(self,out,warning,repeat_count): |
| left_count = repeat_count |
| for b in self.blocks: |
| left_count = b.writeWithWarning(out,warning,left_count,repeat_count) |
| |
| def removeComments(self): |
| for b in self.blocks: |
| for tok in b.tokens: |
| if tok.id == tokSPACE: |
| tok.value = " " |
| |
| def removeVarsAndFuncs(self,knownStatics=set()): |
| """remove all extern and static declarations corresponding |
| to variable and function declarations. we only accept typedefs |
| and enum/structs/union declarations. |
| |
| however, we keep the definitions corresponding to the set |
| of known static inline functions in the set 'knownStatics', |
| which is useful for optimized byteorder swap functions and |
| stuff like that. |
| """ |
| # state = 0 => normal (i.e. LN + spaces) |
| # state = 1 => typedef/struct encountered, ends with ";" |
| # state = 2 => var declaration encountered, ends with ";" |
| # state = 3 => func declaration encountered, ends with "}" |
| state = 0 |
| depth = 0 |
| blocks2 = [] |
| skipTokens = False |
| for b in self.blocks: |
| if b.isDirective(): |
| blocks2.append(b) |
| else: |
| n = len(b.tokens) |
| i = 0 |
| if skipTokens: |
| first = n |
| else: |
| first = 0 |
| while i < n: |
| tok = b.tokens[i] |
| tokid = tok.id |
| # If we are not looking for the start of a new |
| # type/var/func, then skip over tokens until |
| # we find our terminator, managing the depth of |
| # accolades as we go. |
| if state > 0: |
| terminator = False |
| if tokid == '{': |
| depth += 1 |
| elif tokid == '}': |
| if depth > 0: |
| depth -= 1 |
| if (depth == 0) and (state == 3): |
| terminator = True |
| elif tokid == ';' and depth == 0: |
| terminator = True |
| |
| if terminator: |
| # we found the terminator |
| state = 0 |
| if skipTokens: |
| skipTokens = False |
| first = i+1 |
| |
| i = i+1 |
| continue |
| |
| # We are looking for the start of a new type/func/var |
| # ignore whitespace |
| if tokid in [tokLN, tokSPACE]: |
| i = i+1 |
| continue |
| |
| # Is it a new type definition, then start recording it |
| if tok.value in [ 'struct', 'typedef', 'enum', 'union', '__extension__' ]: |
| #print "$$$ keep type declr" + repr(b.tokens[i:]) |
| state = 1 |
| i = i+1 |
| continue |
| |
| # Is it a variable or function definition. If so, first |
| # try to determine which type it is, and also extract |
| # its name. |
| # |
| # We're going to parse the next tokens of the same block |
| # until we find a semi-column or a left parenthesis. |
| # |
| # The semi-column corresponds to a variable definition, |
| # the left-parenthesis to a function definition. |
| # |
| # We also assume that the var/func name is the last |
| # identifier before the terminator. |
| # |
| j = i+1 |
| ident = "" |
| while j < n: |
| tokid = b.tokens[j].id |
| if tokid == '(': # a function declaration |
| state = 3 |
| break |
| elif tokid == ';': # a variable declaration |
| state = 2 |
| break |
| if tokid == tokIDENT: |
| ident = b.tokens[j].value |
| j += 1 |
| |
| if j >= n: |
| # This can only happen when the declaration |
| # does not end on the current block (e.g. with |
| # a directive mixed inside it. |
| # |
| # We will treat it as malformed because |
| # it's very hard to recover from this case |
| # without making our parser much more |
| # complex. |
| # |
| #print "### skip unterminated static '%s'" % ident |
| break |
| |
| if ident in knownStatics: |
| #print "### keep var/func '%s': %s" % (ident,repr(b.tokens[i:j])) |
| pass |
| else: |
| # We're going to skip the tokens for this declaration |
| #print "### skip variable /func'%s': %s" % (ident,repr(b.tokens[i:j])) |
| if i > first: |
| blocks2.append( Block(b.tokens[first:i])) |
| skipTokens = True |
| first = n |
| |
| i = i+1 |
| |
| if i > first: |
| #print "### final '%s'" % repr(b.tokens[first:i]) |
| blocks2.append( Block(b.tokens[first:i]) ) |
| |
| self.blocks = blocks2 |
| |
| def insertDisclaimer(self,disclaimer="/* auto-generated file, DO NOT EDIT */"): |
| """insert your standard issue disclaimer that this is an |
| auto-generated file, etc..""" |
| tokens = CppLineTokenizer( disclaimer ).toTokenList() |
| tokens = tokens[:-1] # remove trailing tokLN |
| self.blocks = [ Block(tokens) ] + self.blocks |
| |
| def replaceTokens(self,replacements=dict()): |
| """replace tokens according to the given dict |
| """ |
| for b in self.blocks: |
| if not b.isDirective(): |
| for tok in b.tokens: |
| if tok.id == tokIDENT: |
| if tok.value in replacements: |
| tok.value = replacements[tok.value] |
| |
| class BlockParser: |
| """a class used to convert an input source file into a BlockList object""" |
| |
| def __init__(self,tokzer=None): |
| """initialize a block parser. the input source is provided through a Tokenizer |
| object""" |
| self.reset(tokzer) |
| |
| def reset(self,tokzer): |
| self.state = 1 |
| self.tokzer = tokzer |
| |
| def getBlocks(self,tokzer=None): |
| """tokenize and parse the input source, return a BlockList object |
| NOTE: empty and line-numbering directives are ignored and removed |
| from the result. as a consequence, it is possible to have |
| two successive text blocks in the result""" |
| # state 0 => in source code |
| # state 1 => in source code, after a LN |
| # state 2 => in source code, after LN then some space |
| state = 1 |
| lastLN = 0 |
| current = [] |
| blocks = [] |
| |
| if tokzer == None: |
| tokzer = self.tokzer |
| |
| while 1: |
| tok = tokzer.getToken() |
| if tok.id == tokEOF: |
| break |
| |
| if tok.id == tokLN: |
| state = 1 |
| current.append(tok) |
| lastLN = len(current) |
| |
| elif tok.id == tokSPACE: |
| if state == 1: |
| state = 2 |
| current.append(tok) |
| |
| elif tok.id == "#": |
| if state > 0: |
| # this is the start of a directive |
| |
| if lastLN > 0: |
| # record previous tokens as text block |
| block = Block(current[:lastLN]) |
| blocks.append(block) |
| lastLN = 0 |
| |
| current = [] |
| |
| # skip spaces after the # |
| while 1: |
| tok = tokzer.getToken() |
| if tok.id != tokSPACE: |
| break |
| |
| if tok.id != tokIDENT: |
| # empty or line-numbering, ignore it |
| if tok.id != tokLN and tok.id != tokEOF: |
| while 1: |
| tok = tokzer.getToken() |
| if tok.id == tokLN or tok.id == tokEOF: |
| break |
| continue |
| |
| directive = tok.value |
| lineno = tok.lineno |
| |
| # skip spaces |
| tok = tokzer.getToken() |
| while tok.id == tokSPACE: |
| tok = tokzer.getToken() |
| |
| # then record tokens until LN |
| dirtokens = [] |
| while tok.id != tokLN and tok.id != tokEOF: |
| dirtokens.append(tok) |
| tok = tokzer.getToken() |
| |
| block = Block(dirtokens,directive,lineno) |
| blocks.append(block) |
| state = 1 |
| |
| else: |
| state = 0 |
| current.append(tok) |
| |
| if len(current) > 0: |
| block = Block(current) |
| blocks.append(block) |
| |
| return BlockList(blocks) |
| |
| def parse(self,tokzer): |
| return self.getBlocks( tokzer ) |
| |
| def parseLines(self,lines): |
| """parse a list of text lines into a BlockList object""" |
| return self.getBlocks( CppLinesTokenizer(lines) ) |
| |
| def parseFile(self,path): |
| """parse a file into a BlockList object""" |
| file = open(path, "rt") |
| result = self.getBlocks( CppFileTokenizer(file) ) |
| file.close() |
| return result |
| |
| |
| def test_block_parsing(lines,expected): |
| blocks = BlockParser().parse( CppLinesTokenizer(lines) ) |
| if len(blocks) != len(expected): |
| raise BadExpectedToken, "parser.buildBlocks returned '%s' expecting '%s'" \ |
| % (str(blocks), repr(expected)) |
| for n in range(len(blocks)): |
| if str(blocks[n]) != expected[n]: |
| raise BadExpectedToken, "parser.buildBlocks()[%d] is '%s', expecting '%s'" \ |
| % (n, str(blocks[n]), expected[n]) |
| #for block in blocks: |
| # print block |
| |
| def test_BlockParser(): |
| test_block_parsing(["#error hello"],["#error hello"]) |
| test_block_parsing([ "foo", "", "bar" ], [ "foo\n\nbar\n" ]) |
| test_block_parsing([ "foo", " # ", "bar" ], [ "foo\n","bar\n" ]) |
| test_block_parsing(\ |
| [ "foo", " # ", " # /* ahah */ if defined(__KERNEL__) ", "bar", "#endif" ], |
| [ "foo\n", "#ifdef __KERNEL__", "bar\n", "#endif" ] ) |
| |
| |
| ##################################################################################### |
| ##################################################################################### |
| ##### ##### |
| ##### B L O C K L I S T O P T I M I Z A T I O N ##### |
| ##### ##### |
| ##################################################################################### |
| ##################################################################################### |
| |
| def remove_macro_defines( blocks, excludedMacros=set() ): |
| """remove macro definitions like #define <macroName> ....""" |
| result = [] |
| for b in blocks: |
| macroName = b.isDefine() |
| if macroName == None or not macroName in excludedMacros: |
| result.append(b) |
| |
| return result |
| |
| def find_matching_endif( blocks, i ): |
| n = len(blocks) |
| depth = 1 |
| while i < n: |
| if blocks[i].isDirective(): |
| dir = blocks[i].directive |
| if dir in [ "if", "ifndef", "ifdef" ]: |
| depth += 1 |
| elif depth == 1 and dir in [ "else", "elif" ]: |
| return i |
| elif dir == "endif": |
| depth -= 1 |
| if depth == 0: |
| return i |
| i += 1 |
| return i |
| |
| def optimize_if01( blocks ): |
| """remove the code between #if 0 .. #endif in a list of CppBlocks""" |
| i = 0 |
| n = len(blocks) |
| result = [] |
| while i < n: |
| j = i |
| while j < n and not blocks[j].isIf(): |
| j += 1 |
| if j > i: |
| D2("appending lines %d to %d" % (blocks[i].lineno, blocks[j-1].lineno)) |
| result += blocks[i:j] |
| if j >= n: |
| break |
| expr = blocks[j].expr |
| r = expr.toInt() |
| if r == None: |
| result.append(blocks[j]) |
| i = j + 1 |
| continue |
| |
| if r == 0: |
| # if 0 => skip everything until the corresponding #endif |
| j = find_matching_endif( blocks, j+1 ) |
| if j >= n: |
| # unterminated #if 0, finish here |
| break |
| dir = blocks[j].directive |
| if dir == "endif": |
| D2("remove 'if 0' .. 'endif' (lines %d to %d)" % (blocks[i].lineno, blocks[j].lineno)) |
| i = j + 1 |
| elif dir == "else": |
| # convert 'else' into 'if 1' |
| D2("convert 'if 0' .. 'else' into 'if 1' (lines %d to %d)" % (blocks[i].lineno, blocks[j-1].lineno)) |
| blocks[j].directive = "if" |
| blocks[j].expr = CppExpr( CppLineTokenizer("1").toTokenList() ) |
| i = j |
| elif dir == "elif": |
| # convert 'elif' into 'if' |
| D2("convert 'if 0' .. 'elif' into 'if'") |
| blocks[j].directive = "if" |
| i = j |
| continue |
| |
| # if 1 => find corresponding endif and remove/transform them |
| k = find_matching_endif( blocks, j+1 ) |
| if k >= n: |
| # unterminated #if 1, finish here |
| D2("unterminated 'if 1'") |
| result += blocks[j+1:k] |
| break |
| |
| dir = blocks[k].directive |
| if dir == "endif": |
| D2("convert 'if 1' .. 'endif' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno)) |
| result += optimize_if01(blocks[j+1:k]) |
| i = k+1 |
| elif dir == "else": |
| # convert 'else' into 'if 0' |
| D2("convert 'if 1' .. 'else' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno)) |
| result += optimize_if01(blocks[j+1:k]) |
| blocks[k].directive = "if" |
| blocks[k].expr = CppExpr( CppLineTokenizer("0").toTokenList() ) |
| i = k |
| elif dir == "elif": |
| # convert 'elif' into 'if 0' |
| D2("convert 'if 1' .. 'elif' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno)) |
| result += optimize_if01(blocks[j+1:k]) |
| blocks[k].expr = CppExpr( CppLineTokenizer("0").toTokenList() ) |
| i = k |
| return result |
| |
| def test_optimizeAll(): |
| text = """\ |
| #if 1 |
| #define GOOD_1 |
| #endif |
| #if 0 |
| #define BAD_2 |
| #define BAD_3 |
| #endif |
| |
| #if 1 |
| #define GOOD_2 |
| #else |
| #define BAD_4 |
| #endif |
| |
| #if 0 |
| #define BAD_5 |
| #else |
| #define GOOD_3 |
| #endif |
| |
| #if 0 |
| #if 1 |
| #define BAD_6 |
| #endif |
| #endif\ |
| """ |
| |
| expected = """\ |
| #define GOOD_1 |
| |
| #define GOOD_2 |
| |
| #define GOOD_3 |
| |
| """ |
| |
| print "running test_BlockList.optimizeAll" |
| out = StringOutput() |
| lines = string.split(text, '\n') |
| list = BlockParser().parse( CppLinesTokenizer(lines) ) |
| #D_setlevel(2) |
| list.optimizeAll( {"__KERNEL__":kCppUndefinedMacro} ) |
| #print repr(list) |
| list.write(out) |
| if out.get() != expected: |
| print "KO: macro optimization failed\n" |
| print "<<<< expecting '", |
| print expected, |
| print "'\n>>>> result '" |
| print out.get(), |
| print "'\n----" |
| |
| |
| ##################################################################################### |
| ##################################################################################### |
| ##### ##### |
| ##### ##### |
| ##### ##### |
| ##################################################################################### |
| ##################################################################################### |
| |
| def runUnitTests(): |
| """run all unit tests for this program""" |
| print "running unit tests" |
| test_CppTokenizer() |
| test_CppExpr() |
| test_optimizeAll() |
| test_BlockParser() |
| print "OK" |
| |
| if __name__ == "__main__": |
| runUnitTests() |