| //===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This code rewrites include invocations into their expansions. This gives you |
| // a file with all included files merged into it. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "clang/Rewrite/Frontend/Rewriters.h" |
| #include "clang/Basic/SourceManager.h" |
| #include "clang/Frontend/PreprocessorOutputOptions.h" |
| #include "clang/Lex/Preprocessor.h" |
| #include "llvm/Support/raw_ostream.h" |
| |
| using namespace clang; |
| using namespace llvm; |
| |
| namespace { |
| |
| class InclusionRewriter : public PPCallbacks { |
| /// Information about which #includes were actually performed, |
| /// created by preprocessor callbacks. |
| struct FileChange { |
| SourceLocation From; |
| FileID Id; |
| SrcMgr::CharacteristicKind FileType; |
| FileChange(SourceLocation From) : From(From) { |
| } |
| }; |
| Preprocessor &PP; ///< Used to find inclusion directives. |
| SourceManager &SM; ///< Used to read and manage source files. |
| raw_ostream &OS; ///< The destination stream for rewritten contents. |
| bool ShowLineMarkers; ///< Show #line markers. |
| bool UseLineDirective; ///< Use of line directives or line markers. |
| typedef std::map<unsigned, FileChange> FileChangeMap; |
| FileChangeMap FileChanges; ///< Tracks which files were included where. |
| /// Used transitively for building up the FileChanges mapping over the |
| /// various \c PPCallbacks callbacks. |
| FileChangeMap::iterator LastInsertedFileChange; |
| public: |
| InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers); |
| bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType); |
| private: |
| virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, |
| SrcMgr::CharacteristicKind FileType, |
| FileID PrevFID); |
| virtual void FileSkipped(const FileEntry &ParentFile, |
| const Token &FilenameTok, |
| SrcMgr::CharacteristicKind FileType); |
| virtual void InclusionDirective(SourceLocation HashLoc, |
| const Token &IncludeTok, |
| StringRef FileName, |
| bool IsAngled, |
| CharSourceRange FilenameRange, |
| const FileEntry *File, |
| StringRef SearchPath, |
| StringRef RelativePath, |
| const Module *Imported); |
| void WriteLineInfo(const char *Filename, int Line, |
| SrcMgr::CharacteristicKind FileType, |
| StringRef EOL, StringRef Extra = StringRef()); |
| void OutputContentUpTo(const MemoryBuffer &FromFile, |
| unsigned &WriteFrom, unsigned WriteTo, |
| StringRef EOL, int &lines, |
| bool EnsureNewline = false); |
| void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken, |
| const MemoryBuffer &FromFile, StringRef EOL, |
| unsigned &NextToWrite, int &Lines); |
| const FileChange *FindFileChangeLocation(SourceLocation Loc) const; |
| StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken); |
| }; |
| |
| } // end anonymous namespace |
| |
| /// Initializes an InclusionRewriter with a \p PP source and \p OS destination. |
| InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS, |
| bool ShowLineMarkers) |
| : PP(PP), SM(PP.getSourceManager()), OS(OS), |
| ShowLineMarkers(ShowLineMarkers), |
| LastInsertedFileChange(FileChanges.end()) { |
| // If we're in microsoft mode, use normal #line instead of line markers. |
| UseLineDirective = PP.getLangOpts().MicrosoftExt; |
| } |
| |
| /// Write appropriate line information as either #line directives or GNU line |
| /// markers depending on what mode we're in, including the \p Filename and |
| /// \p Line we are located at, using the specified \p EOL line separator, and |
| /// any \p Extra context specifiers in GNU line directives. |
| void InclusionRewriter::WriteLineInfo(const char *Filename, int Line, |
| SrcMgr::CharacteristicKind FileType, |
| StringRef EOL, StringRef Extra) { |
| if (!ShowLineMarkers) |
| return; |
| if (UseLineDirective) { |
| OS << "#line" << ' ' << Line << ' ' << '"' << Filename << '"'; |
| } else { |
| // Use GNU linemarkers as described here: |
| // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html |
| OS << '#' << ' ' << Line << ' ' << '"' << Filename << '"'; |
| if (!Extra.empty()) |
| OS << Extra; |
| if (FileType == SrcMgr::C_System) |
| // "`3' This indicates that the following text comes from a system header |
| // file, so certain warnings should be suppressed." |
| OS << " 3"; |
| else if (FileType == SrcMgr::C_ExternCSystem) |
| // as above for `3', plus "`4' This indicates that the following text |
| // should be treated as being wrapped in an implicit extern "C" block." |
| OS << " 3 4"; |
| } |
| OS << EOL; |
| } |
| |
| /// FileChanged - Whenever the preprocessor enters or exits a #include file |
| /// it invokes this handler. |
| void InclusionRewriter::FileChanged(SourceLocation Loc, |
| FileChangeReason Reason, |
| SrcMgr::CharacteristicKind NewFileType, |
| FileID) { |
| if (Reason != EnterFile) |
| return; |
| if (LastInsertedFileChange == FileChanges.end()) |
| // we didn't reach this file (eg: the main file) via an inclusion directive |
| return; |
| LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID(); |
| LastInsertedFileChange->second.FileType = NewFileType; |
| LastInsertedFileChange = FileChanges.end(); |
| } |
| |
| /// Called whenever an inclusion is skipped due to canonical header protection |
| /// macros. |
| void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/, |
| const Token &/*FilenameTok*/, |
| SrcMgr::CharacteristicKind /*FileType*/) { |
| assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't " |
| "found via an inclusion directive, was skipped"); |
| FileChanges.erase(LastInsertedFileChange); |
| LastInsertedFileChange = FileChanges.end(); |
| } |
| |
| /// This should be called whenever the preprocessor encounters include |
| /// directives. It does not say whether the file has been included, but it |
| /// provides more information about the directive (hash location instead |
| /// of location inside the included file). It is assumed that the matching |
| /// FileChanged() or FileSkipped() is called after this. |
| void InclusionRewriter::InclusionDirective(SourceLocation HashLoc, |
| const Token &/*IncludeTok*/, |
| StringRef /*FileName*/, |
| bool /*IsAngled*/, |
| CharSourceRange /*FilenameRange*/, |
| const FileEntry * /*File*/, |
| StringRef /*SearchPath*/, |
| StringRef /*RelativePath*/, |
| const Module * /*Imported*/) { |
| assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion " |
| "directive was found before the previous one was processed"); |
| std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert( |
| std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc))); |
| assert(p.second && "Unexpected revisitation of the same include directive"); |
| LastInsertedFileChange = p.first; |
| } |
| |
| /// Simple lookup for a SourceLocation (specifically one denoting the hash in |
| /// an inclusion directive) in the map of inclusion information, FileChanges. |
| const InclusionRewriter::FileChange * |
| InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const { |
| FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding()); |
| if (I != FileChanges.end()) |
| return &I->second; |
| return NULL; |
| } |
| |
| /// Detect the likely line ending style of \p FromFile by examining the first |
| /// newline found within it. |
| static StringRef DetectEOL(const MemoryBuffer &FromFile) { |
| // detect what line endings the file uses, so that added content does not mix |
| // the style |
| const char *Pos = strchr(FromFile.getBufferStart(), '\n'); |
| if (Pos == NULL) |
| return "\n"; |
| if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r') |
| return "\n\r"; |
| if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r') |
| return "\r\n"; |
| return "\n"; |
| } |
| |
| /// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at |
| /// \p WriteTo - 1. |
| void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile, |
| unsigned &WriteFrom, unsigned WriteTo, |
| StringRef EOL, int &Line, |
| bool EnsureNewline) { |
| if (WriteTo <= WriteFrom) |
| return; |
| OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom); |
| // count lines manually, it's faster than getPresumedLoc() |
| Line += std::count(FromFile.getBufferStart() + WriteFrom, |
| FromFile.getBufferStart() + WriteTo, '\n'); |
| if (EnsureNewline) { |
| char LastChar = FromFile.getBufferStart()[WriteTo - 1]; |
| if (LastChar != '\n' && LastChar != '\r') |
| OS << EOL; |
| } |
| WriteFrom = WriteTo; |
| } |
| |
| /// Print characters from \p FromFile starting at \p NextToWrite up until the |
| /// inclusion directive at \p StartToken, then print out the inclusion |
| /// inclusion directive disabled by a #if directive, updating \p NextToWrite |
| /// and \p Line to track the number of source lines visited and the progress |
| /// through the \p FromFile buffer. |
| void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex, |
| const Token &StartToken, |
| const MemoryBuffer &FromFile, |
| StringRef EOL, |
| unsigned &NextToWrite, int &Line) { |
| OutputContentUpTo(FromFile, NextToWrite, |
| SM.getFileOffset(StartToken.getLocation()), EOL, Line); |
| Token DirectiveToken; |
| do { |
| DirectiveLex.LexFromRawLexer(DirectiveToken); |
| } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof)); |
| OS << "#if 0 /* expanded by -frewrite-includes */" << EOL; |
| OutputContentUpTo(FromFile, NextToWrite, |
| SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(), |
| EOL, Line); |
| OS << "#endif /* expanded by -frewrite-includes */" << EOL; |
| } |
| |
| /// Find the next identifier in the pragma directive specified by \p RawToken. |
| StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex, |
| Token &RawToken) { |
| RawLex.LexFromRawLexer(RawToken); |
| if (RawToken.is(tok::raw_identifier)) |
| PP.LookUpIdentifierInfo(RawToken); |
| if (RawToken.is(tok::identifier)) |
| return RawToken.getIdentifierInfo()->getName(); |
| return StringRef(); |
| } |
| |
| /// Use a raw lexer to analyze \p FileId, inccrementally copying parts of it |
| /// and including content of included files recursively. |
| bool InclusionRewriter::Process(FileID FileId, |
| SrcMgr::CharacteristicKind FileType) |
| { |
| bool Invalid; |
| const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid); |
| if (Invalid) // invalid inclusion |
| return true; |
| const char *FileName = FromFile.getBufferIdentifier(); |
| Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts()); |
| RawLex.SetCommentRetentionState(false); |
| |
| StringRef EOL = DetectEOL(FromFile); |
| |
| // Per the GNU docs: "1" indicates the start of a new file. |
| WriteLineInfo(FileName, 1, FileType, EOL, " 1"); |
| |
| if (SM.getFileIDSize(FileId) == 0) |
| return true; |
| |
| // The next byte to be copied from the source file |
| unsigned NextToWrite = 0; |
| int Line = 1; // The current input file line number. |
| |
| Token RawToken; |
| RawLex.LexFromRawLexer(RawToken); |
| |
| // TODO: Consider adding a switch that strips possibly unimportant content, |
| // such as comments, to reduce the size of repro files. |
| while (RawToken.isNot(tok::eof)) { |
| if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) { |
| RawLex.setParsingPreprocessorDirective(true); |
| Token HashToken = RawToken; |
| RawLex.LexFromRawLexer(RawToken); |
| if (RawToken.is(tok::raw_identifier)) |
| PP.LookUpIdentifierInfo(RawToken); |
| if (RawToken.is(tok::identifier)) { |
| switch (RawToken.getIdentifierInfo()->getPPKeywordID()) { |
| case tok::pp_include: |
| case tok::pp_include_next: |
| case tok::pp_import: { |
| CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite, |
| Line); |
| if (const FileChange *Change = FindFileChangeLocation( |
| HashToken.getLocation())) { |
| // now include and recursively process the file |
| if (Process(Change->Id, Change->FileType)) |
| // and set lineinfo back to this file, if the nested one was |
| // actually included |
| // `2' indicates returning to a file (after having included |
| // another file. |
| WriteLineInfo(FileName, Line, FileType, EOL, " 2"); |
| } else |
| // fix up lineinfo (since commented out directive changed line |
| // numbers) for inclusions that were skipped due to header guards |
| WriteLineInfo(FileName, Line, FileType, EOL); |
| break; |
| } |
| case tok::pp_pragma: { |
| StringRef Identifier = NextIdentifierName(RawLex, RawToken); |
| if (Identifier == "clang" || Identifier == "GCC") { |
| if (NextIdentifierName(RawLex, RawToken) == "system_header") { |
| // keep the directive in, commented out |
| CommentOutDirective(RawLex, HashToken, FromFile, EOL, |
| NextToWrite, Line); |
| // update our own type |
| FileType = SM.getFileCharacteristic(RawToken.getLocation()); |
| WriteLineInfo(FileName, Line, FileType, EOL); |
| } |
| } else if (Identifier == "once") { |
| // keep the directive in, commented out |
| CommentOutDirective(RawLex, HashToken, FromFile, EOL, |
| NextToWrite, Line); |
| WriteLineInfo(FileName, Line, FileType, EOL); |
| } |
| break; |
| } |
| default: |
| break; |
| } |
| } |
| RawLex.setParsingPreprocessorDirective(false); |
| } |
| RawLex.LexFromRawLexer(RawToken); |
| } |
| OutputContentUpTo(FromFile, NextToWrite, |
| SM.getFileOffset(SM.getLocForEndOfFile(FileId)) + 1, EOL, Line, |
| /*EnsureNewline*/true); |
| return true; |
| } |
| |
| /// InclusionRewriterInInput - Implement -frewrite-includes mode. |
| void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS, |
| const PreprocessorOutputOptions &Opts) { |
| SourceManager &SM = PP.getSourceManager(); |
| InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS, |
| Opts.ShowLineMarkers); |
| PP.addPPCallbacks(Rewrite); |
| |
| // First let the preprocessor process the entire file and call callbacks. |
| // Callbacks will record which #include's were actually performed. |
| PP.EnterMainSourceFile(); |
| Token Tok; |
| // Only preprocessor directives matter here, so disable macro expansion |
| // everywhere else as an optimization. |
| // TODO: It would be even faster if the preprocessor could be switched |
| // to a mode where it would parse only preprocessor directives and comments, |
| // nothing else matters for parsing or processing. |
| PP.SetMacroExpansionOnlyInDirectives(); |
| do { |
| PP.Lex(Tok); |
| } while (Tok.isNot(tok::eof)); |
| Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User); |
| OS->flush(); |
| } |