From 95dd17848383315050a5d64051bd695d0a653c51 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Mon, 6 Nov 2023 07:13:05 +0000 Subject: [PATCH] [clang] Change representation of CurLexerKind (#70381) Previous representation used an enumeration combined to a switch to dispatch to the appropriate lexer. Use function pointer so that the dispatching is just an indirect call, which is actually better because lexing is a costly task compared to a function call. This also makes the code slightly cleaner, speedup on compile time tracker are consistent and range form -0.05% to -0.20% for NewPM-O0-g, see https://llvm-compile-time-tracker.com/compare.php?from=f9906508bc4f05d3950e2219b4c56f6c078a61ef&to=608c85ec1283638db949d73e062bcc3355001ce4&stat=instructions:u Considering just the preprocessing task, preprocessing the sqlite amalgametion takes -0.6% instructions (according to valgrind --tool=callgrind) --------- Co-authored-by: serge-sans-paille Co-authored-by: cor3ntin --- clang/include/clang/Lex/Preprocessor.h | 44 +++++++++++----- clang/lib/Lex/PPCaching.cpp | 6 +-- clang/lib/Lex/PPLexerChange.cpp | 18 +++---- clang/lib/Lex/Preprocessor.cpp | 63 +++++------------------ clang/utils/ClangVisualizers/clang.natvis | 2 +- 5 files changed, 57 insertions(+), 76 deletions(-) diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 4a99447e757c6a..4ec21a8b6be2c8 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -751,13 +751,8 @@ class Preprocessor { std::unique_ptr CurTokenLexer; /// The kind of lexer we're currently working with. - enum CurLexerKind { - CLK_Lexer, - CLK_TokenLexer, - CLK_CachingLexer, - CLK_DependencyDirectivesLexer, - CLK_LexAfterModuleImport - } CurLexerKind = CLK_Lexer; + typedef bool (*LexerCallback)(Preprocessor &, Token &); + LexerCallback CurLexerCallback = &CLK_Lexer; /// If the current lexer is for a submodule that is being built, this /// is that submodule. @@ -767,7 +762,7 @@ class Preprocessor { /// \#included, and macros currently being expanded from, not counting /// CurLexer/CurTokenLexer. struct IncludeStackInfo { - enum CurLexerKind CurLexerKind; + LexerCallback CurLexerCallback; Module *TheSubmodule; std::unique_ptr TheLexer; PreprocessorLexer *ThePPLexer; @@ -776,12 +771,12 @@ class Preprocessor { // The following constructors are completely useless copies of the default // versions, only needed to pacify MSVC. - IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule, + IncludeStackInfo(LexerCallback CurLexerCallback, Module *TheSubmodule, std::unique_ptr &&TheLexer, PreprocessorLexer *ThePPLexer, std::unique_ptr &&TheTokenLexer, ConstSearchDirIterator TheDirLookup) - : CurLexerKind(std::move(CurLexerKind)), + : CurLexerCallback(std::move(CurLexerCallback)), TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)), ThePPLexer(std::move(ThePPLexer)), TheTokenLexer(std::move(TheTokenLexer)), @@ -1901,7 +1896,7 @@ class Preprocessor { /// Determine whether it's possible for a future call to Lex to produce an /// annotation token created by a previous call to EnterAnnotationToken. bool mightHavePendingAnnotationTokens() { - return CurLexerKind != CLK_Lexer; + return CurLexerCallback != CLK_Lexer; } /// Update the current token to represent the provided @@ -2430,8 +2425,9 @@ class Preprocessor { friend void TokenLexer::ExpandFunctionArguments(); void PushIncludeMacroStack() { - assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer"); - IncludeMacroStack.emplace_back(CurLexerKind, CurLexerSubmodule, + assert(CurLexerCallback != CLK_CachingLexer && + "cannot push a caching lexer"); + IncludeMacroStack.emplace_back(CurLexerCallback, CurLexerSubmodule, std::move(CurLexer), CurPPLexer, std::move(CurTokenLexer), CurDirLookup); CurPPLexer = nullptr; @@ -2443,7 +2439,7 @@ class Preprocessor { CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer); CurDirLookup = IncludeMacroStack.back().TheDirLookup; CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule; - CurLexerKind = IncludeMacroStack.back().CurLexerKind; + CurLexerCallback = IncludeMacroStack.back().CurLexerCallback; IncludeMacroStack.pop_back(); } @@ -2899,6 +2895,26 @@ class Preprocessor { /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage" /// opt-out region bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc); + +private: + /// Helper functions to forward lexing to the actual lexer. They all share the + /// same signature. + static bool CLK_Lexer(Preprocessor &P, Token &Result) { + return P.CurLexer->Lex(Result); + } + static bool CLK_TokenLexer(Preprocessor &P, Token &Result) { + return P.CurTokenLexer->Lex(Result); + } + static bool CLK_CachingLexer(Preprocessor &P, Token &Result) { + P.CachingLex(Result); + return true; + } + static bool CLK_DependencyDirectivesLexer(Preprocessor &P, Token &Result) { + return P.CurLexer->LexDependencyDirectiveToken(Result); + } + static bool CLK_LexAfterModuleImport(Preprocessor &P, Token &Result) { + return P.LexAfterModuleImport(Result); + } }; /// Abstract base class that describes a handler that will receive diff --git a/clang/lib/Lex/PPCaching.cpp b/clang/lib/Lex/PPCaching.cpp index e05e52ba9bb536..f38ff62ebf437c 100644 --- a/clang/lib/Lex/PPCaching.cpp +++ b/clang/lib/Lex/PPCaching.cpp @@ -88,7 +88,7 @@ void Preprocessor::EnterCachingLexMode() { "entered caching lex mode while lexing something else"); if (InCachingLexMode()) { - assert(CurLexerKind == CLK_CachingLexer && "Unexpected lexer kind"); + assert(CurLexerCallback == CLK_CachingLexer && "Unexpected lexer kind"); return; } @@ -96,9 +96,9 @@ void Preprocessor::EnterCachingLexMode() { } void Preprocessor::EnterCachingLexModeUnchecked() { - assert(CurLexerKind != CLK_CachingLexer && "already in caching lex mode"); + assert(CurLexerCallback != CLK_CachingLexer && "already in caching lex mode"); PushIncludeMacroStack(); - CurLexerKind = CLK_CachingLexer; + CurLexerCallback = CLK_CachingLexer; } diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp index b8575e1adfc5b3..3b1b6df1dbae4e 100644 --- a/clang/lib/Lex/PPLexerChange.cpp +++ b/clang/lib/Lex/PPLexerChange.cpp @@ -122,10 +122,10 @@ void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer, CurPPLexer = TheLexer; CurDirLookup = CurDir; CurLexerSubmodule = nullptr; - if (CurLexerKind != CLK_LexAfterModuleImport) - CurLexerKind = TheLexer->isDependencyDirectivesLexer() - ? CLK_DependencyDirectivesLexer - : CLK_Lexer; + if (CurLexerCallback != CLK_LexAfterModuleImport) + CurLexerCallback = TheLexer->isDependencyDirectivesLexer() + ? CLK_DependencyDirectivesLexer + : CLK_Lexer; // Notify the client, if desired, that we are in a new source file. if (Callbacks && !CurLexer->Is_PragmaLexer) { @@ -161,8 +161,8 @@ void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd, PushIncludeMacroStack(); CurDirLookup = nullptr; CurTokenLexer = std::move(TokLexer); - if (CurLexerKind != CLK_LexAfterModuleImport) - CurLexerKind = CLK_TokenLexer; + if (CurLexerCallback != CLK_LexAfterModuleImport) + CurLexerCallback = CLK_TokenLexer; } /// EnterTokenStream - Add a "macro" context to the top of the include stack, @@ -180,7 +180,7 @@ void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd, void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks, bool DisableMacroExpansion, bool OwnsTokens, bool IsReinject) { - if (CurLexerKind == CLK_CachingLexer) { + if (CurLexerCallback == CLK_CachingLexer) { if (CachedLexPos < CachedTokens.size()) { assert(IsReinject && "new tokens in the middle of cached stream"); // We're entering tokens into the middle of our cached token stream. We @@ -216,8 +216,8 @@ void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks, PushIncludeMacroStack(); CurDirLookup = nullptr; CurTokenLexer = std::move(TokLexer); - if (CurLexerKind != CLK_LexAfterModuleImport) - CurLexerKind = CLK_TokenLexer; + if (CurLexerCallback != CLK_LexAfterModuleImport) + CurLexerCallback = CLK_TokenLexer; } /// Compute the relative path that names the given file relative to diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 45c0f848da6604..64f54c6fc6382f 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -382,13 +382,13 @@ StringRef Preprocessor::getLastMacroWithSpelling( void Preprocessor::recomputeCurLexerKind() { if (CurLexer) - CurLexerKind = CurLexer->isDependencyDirectivesLexer() - ? CLK_DependencyDirectivesLexer - : CLK_Lexer; + CurLexerCallback = CurLexer->isDependencyDirectivesLexer() + ? CLK_DependencyDirectivesLexer + : CLK_Lexer; else if (CurTokenLexer) - CurLexerKind = CLK_TokenLexer; + CurLexerCallback = CLK_TokenLexer; else - CurLexerKind = CLK_CachingLexer; + CurLexerCallback = CLK_CachingLexer; } bool Preprocessor::SetCodeCompletionPoint(FileEntryRef File, @@ -643,23 +643,7 @@ void Preprocessor::SkipTokensWhileUsingPCH() { while (true) { bool InPredefines = (CurLexer && CurLexer->getFileID() == getPredefinesFileID()); - switch (CurLexerKind) { - case CLK_Lexer: - CurLexer->Lex(Tok); - break; - case CLK_TokenLexer: - CurTokenLexer->Lex(Tok); - break; - case CLK_CachingLexer: - CachingLex(Tok); - break; - case CLK_DependencyDirectivesLexer: - CurLexer->LexDependencyDirectiveToken(Tok); - break; - case CLK_LexAfterModuleImport: - LexAfterModuleImport(Tok); - break; - } + CurLexerCallback(*this, Tok); if (Tok.is(tok::eof) && !InPredefines) { ReachedMainFileEOF = true; break; @@ -868,12 +852,12 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { Identifier.is(tok::kw_import)) && !InMacroArgs && !DisableMacroExpansion && (getLangOpts().Modules || getLangOpts().DebuggerSupport) && - CurLexerKind != CLK_CachingLexer) { + CurLexerCallback != CLK_CachingLexer) { ModuleImportLoc = Identifier.getLocation(); NamedModuleImportPath.clear(); IsAtImport = true; ModuleImportExpectsIdentifier = true; - CurLexerKind = CLK_LexAfterModuleImport; + CurLexerCallback = CLK_LexAfterModuleImport; } return true; } @@ -882,27 +866,8 @@ void Preprocessor::Lex(Token &Result) { ++LexLevel; // We loop here until a lex function returns a token; this avoids recursion. - bool ReturnedToken; - do { - switch (CurLexerKind) { - case CLK_Lexer: - ReturnedToken = CurLexer->Lex(Result); - break; - case CLK_TokenLexer: - ReturnedToken = CurTokenLexer->Lex(Result); - break; - case CLK_CachingLexer: - CachingLex(Result); - ReturnedToken = true; - break; - case CLK_DependencyDirectivesLexer: - ReturnedToken = CurLexer->LexDependencyDirectiveToken(Result); - break; - case CLK_LexAfterModuleImport: - ReturnedToken = LexAfterModuleImport(Result); - break; - } - } while (!ReturnedToken); + while (!CurLexerCallback(*this, Result)) + ; if (Result.is(tok::unknown) && TheModuleLoader.HadFatalFailure) return; @@ -968,7 +933,7 @@ void Preprocessor::Lex(Token &Result) { NamedModuleImportPath.clear(); IsAtImport = false; ModuleImportExpectsIdentifier = true; - CurLexerKind = CLK_LexAfterModuleImport; + CurLexerCallback = CLK_LexAfterModuleImport; } break; } else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) { @@ -1186,7 +1151,7 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) { Name += ":"; NamedModuleImportPath.push_back( {getIdentifierInfo(Name), Result.getLocation()}); - CurLexerKind = CLK_LexAfterModuleImport; + CurLexerCallback = CLK_LexAfterModuleImport; return true; } } else { @@ -1286,7 +1251,7 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) { NamedModuleImportPath.push_back( std::make_pair(Result.getIdentifierInfo(), Result.getLocation())); ModuleImportExpectsIdentifier = false; - CurLexerKind = CLK_LexAfterModuleImport; + CurLexerCallback = CLK_LexAfterModuleImport; return true; } @@ -1295,7 +1260,7 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) { // attribute-specifier-seq here under the Standard C++ Modules.) if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { ModuleImportExpectsIdentifier = true; - CurLexerKind = CLK_LexAfterModuleImport; + CurLexerCallback = CLK_LexAfterModuleImport; return true; } diff --git a/clang/utils/ClangVisualizers/clang.natvis b/clang/utils/ClangVisualizers/clang.natvis index 2d1ad16797f817..9faaa8a8bba8cf 100644 --- a/clang/utils/ClangVisualizers/clang.natvis +++ b/clang/utils/ClangVisualizers/clang.natvis @@ -817,7 +817,7 @@ For later versions of Visual Studio, no setup is required--> {IncludeMacroStack._Mypair._Myval2._Mylast - 1,na} {CurLexer._Mypair._Myval2,na} Expanding Macro: {CurTokenLexer._Mypair._Myval2,na} - + {this,view(cached)}