clang 22.0.0git
HeaderIncludes.cpp
Go to the documentation of this file.
1//===--- HeaderIncludes.cpp - Insert/Delete #includes --*- C++ -*----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
11#include "clang/Lex/Lexer.h"
12#include "llvm/Support/FormatVariadic.h"
13#include "llvm/Support/Path.h"
14#include <optional>
15
16namespace clang {
17namespace tooling {
18namespace {
19
20LangOptions createLangOpts() {
21 LangOptions LangOpts;
22 LangOpts.CPlusPlus = 1;
23 LangOpts.CPlusPlus11 = 1;
24 LangOpts.CPlusPlus14 = 1;
25 LangOpts.LineComment = 1;
26 LangOpts.CXXOperatorNames = 1;
27 LangOpts.Bool = 1;
28 LangOpts.ObjC = 1;
29 LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally.
30 LangOpts.DeclSpecKeyword = 1; // To get __declspec.
31 LangOpts.WChar = 1; // To get wchar_t
32 return LangOpts;
33}
34
35// Create a new lexer on the given \p Code and calls \p Callback with the
36// created source manager and lexer. \p Callback must be a callable object that
37// could be invoked with (const SourceManager &, Lexer &). This function returns
38// whatever \p Callback returns.
39template <typename F>
40auto withLexer(StringRef FileName, StringRef Code, const IncludeStyle &Style,
41 F &&Callback)
42 -> std::invoke_result_t<F, const SourceManager &, Lexer &> {
43 SourceManagerForFile VirtualSM(FileName, Code);
44 SourceManager &SM = VirtualSM.get();
45 LangOptions LangOpts = createLangOpts();
46 Lexer Lex(SM.getMainFileID(), SM.getBufferOrFake(SM.getMainFileID()), SM,
47 LangOpts);
48 return std::invoke(std::forward<F>(Callback), std::as_const(SM), Lex);
49}
50
51// Returns the offset after skipping a sequence of tokens, matched by \p
52// GetOffsetAfterSequence, from the start of the code.
53// \p GetOffsetAfterSequence should be a function that matches a sequence of
54// tokens and returns an offset after the sequence.
55unsigned getOffsetAfterTokenSequence(
56 StringRef FileName, StringRef Code, const IncludeStyle &Style,
57 llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)>
58 GetOffsetAfterSequence) {
59 return withLexer(FileName, Code, Style,
60 [&](const SourceManager &SM, Lexer &Lex) {
61 Token Tok;
62 // Get the first token.
63 Lex.LexFromRawLexer(Tok);
64 return GetOffsetAfterSequence(SM, Lex, Tok);
65 });
66}
67
68// Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is,
69// \p Tok will be the token after this directive; otherwise, it can be any token
70// after the given \p Tok (including \p Tok). If \p RawIDName is provided, the
71// (second) raw_identifier name is checked.
72bool checkAndConsumeDirectiveWithName(
73 Lexer &Lex, StringRef Name, Token &Tok,
74 std::optional<StringRef> RawIDName = std::nullopt) {
75 bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
76 Tok.is(tok::raw_identifier) &&
77 Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) &&
78 Tok.is(tok::raw_identifier) &&
79 (!RawIDName || Tok.getRawIdentifier() == *RawIDName);
80 if (Matched)
81 Lex.LexFromRawLexer(Tok);
82 return Matched;
83}
84
85void skipComments(Lexer &Lex, Token &Tok) {
86 while (Tok.is(tok::comment))
87 if (Lex.LexFromRawLexer(Tok))
88 return;
89}
90
91bool checkAndConsumeModuleDecl(const SourceManager &SM, Lexer &Lex,
92 Token &Tok) {
93 bool Matched = Tok.is(tok::raw_identifier) &&
94 Tok.getRawIdentifier() == "module" &&
95 !Lex.LexFromRawLexer(Tok) && Tok.is(tok::semi) &&
96 !Lex.LexFromRawLexer(Tok);
97 return Matched;
98}
99
100// Determines the minimum offset into the file where we want to insert header
101// includes. This will be put (when available):
102// - after `#pragma once`
103// - after header guards (`#ifdef` and `#define`)
104// - after opening global module (`module;`)
105// - after any comments at the start of the file or immediately following one of
106// the above constructs
107unsigned getMinHeaderInsertionOffset(StringRef FileName, StringRef Code,
108 const IncludeStyle &Style) {
109 // \p Consume returns location after header guard or 0 if no header guard is
110 // found.
111 auto ConsumeHeaderGuardAndComment =
112 [&](std::function<unsigned(const SourceManager &SM, Lexer &Lex,
113 Token Tok)>
114 Consume) {
115 return getOffsetAfterTokenSequence(
116 FileName, Code, Style,
117 [&Consume](const SourceManager &SM, Lexer &Lex, Token Tok) {
118 skipComments(Lex, Tok);
119 unsigned InitialOffset = SM.getFileOffset(Tok.getLocation());
120 return std::max(InitialOffset, Consume(SM, Lex, Tok));
121 });
122 };
123
124 auto ModuleDecl = ConsumeHeaderGuardAndComment(
125 [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
126 if (checkAndConsumeModuleDecl(SM, Lex, Tok)) {
127 skipComments(Lex, Tok);
128 return SM.getFileOffset(Tok.getLocation());
129 }
130 return 0;
131 });
132
133 auto HeaderAndPPOffset = std::max(
134 // #ifndef/#define
135 ConsumeHeaderGuardAndComment(
136 [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
137 if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
138 skipComments(Lex, Tok);
139 if (checkAndConsumeDirectiveWithName(Lex, "define", Tok) &&
140 Tok.isAtStartOfLine())
141 return SM.getFileOffset(Tok.getLocation());
142 }
143 return 0;
144 }),
145 // #pragma once
146 ConsumeHeaderGuardAndComment(
147 [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
148 if (checkAndConsumeDirectiveWithName(Lex, "pragma", Tok,
149 StringRef("once")))
150 return SM.getFileOffset(Tok.getLocation());
151 return 0;
152 }));
153 return std::max(HeaderAndPPOffset, ModuleDecl);
154}
155
156// Check if a sequence of tokens is like
157// "#include ("header.h" | <header.h>)".
158// If it is, \p Tok will be the token after this directive; otherwise, it can be
159// any token after the given \p Tok (including \p Tok).
160bool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) {
161 auto Matched = [&]() {
162 Lex.LexFromRawLexer(Tok);
163 return true;
164 };
165 if (Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
166 Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "include") {
167 if (Lex.LexFromRawLexer(Tok))
168 return false;
169 if (Tok.is(tok::string_literal))
170 return Matched();
171 if (Tok.is(tok::less)) {
172 while (!Lex.LexFromRawLexer(Tok) && Tok.isNot(tok::greater)) {
173 }
174 if (Tok.is(tok::greater))
175 return Matched();
176 }
177 }
178 return false;
179}
180
181// Returns the offset of the last #include directive after which a new
182// #include can be inserted. This ignores #include's after the #include block(s)
183// in the beginning of a file to avoid inserting headers into code sections
184// where new #include's should not be added by default.
185// These code sections include:
186// - raw string literals (containing #include).
187// - #if blocks.
188// - Special #include's among declarations (e.g. functions).
189//
190// If no #include after which a new #include can be inserted, this returns the
191// offset after skipping all comments from the start of the code.
192// Inserting after an #include is not allowed if it comes after code that is not
193// #include (e.g. pre-processing directive that is not #include, declarations).
194unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code,
195 const IncludeStyle &Style) {
196 return getOffsetAfterTokenSequence(
197 FileName, Code, Style,
198 [](const SourceManager &SM, Lexer &Lex, Token Tok) {
199 skipComments(Lex, Tok);
200 unsigned MaxOffset = SM.getFileOffset(Tok.getLocation());
201 while (checkAndConsumeInclusiveDirective(Lex, Tok))
202 MaxOffset = SM.getFileOffset(Tok.getLocation());
203 return MaxOffset;
204 });
205}
206
207// Check whether the first declaration in the code is a C++20 module
208// declaration, and it is not preceded by any preprocessor directives.
209bool isFirstDeclModuleDecl(StringRef FileName, StringRef Code,
210 const IncludeStyle &Style) {
211 return withLexer(
212 FileName, Code, Style, [](const SourceManager &SM, Lexer &Lex) {
213 // Let the lexer skip any comments and whitespaces for us.
214 Lex.SetKeepWhitespaceMode(false);
215 Lex.SetCommentRetentionState(false);
216
217 Token tok;
218 if (Lex.LexFromRawLexer(tok))
219 return false;
220
221 // A module declaration is made up of the following token sequence:
222 // export? module <ident> ('.' <ident>)* <partition> <attr> ;
223 //
224 // For convenience, we don't actually lex the whole declaration -- it's
225 // enough to distinguish a module declaration to just ensure an <ident>
226 // is following the "module" keyword.
227
228 // Lex the optional "export" keyword.
229 if (tok.is(tok::raw_identifier) && tok.getRawIdentifier() == "export") {
230 if (Lex.LexFromRawLexer(tok))
231 return false;
232 }
233
234 // Lex the "module" keyword.
235 if (!tok.is(tok::raw_identifier) ||
236 tok.getRawIdentifier() != "module" || Lex.LexFromRawLexer(tok))
237 return false;
238
239 // Make sure an identifier follows the "module" keyword.
240 return tok.is(tok::raw_identifier);
241 });
242}
243
244inline StringRef trimInclude(StringRef IncludeName) {
245 return IncludeName.trim("\"<>");
246}
247
248const char IncludeRegexPattern[] =
249 R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";
250
251// The filename of Path excluding extension.
252// Used to match implementation with headers, this differs from sys::path::stem:
253// - in names with multiple dots (foo.cu.cc) it terminates at the *first*
254// - an empty stem is never returned: /foo/.bar.x => .bar
255// - we don't bother to handle . and .. specially
256StringRef matchingStem(llvm::StringRef Path) {
257 StringRef Name = llvm::sys::path::filename(Path);
258 return Name.substr(0, Name.find('.', 1));
259}
260
261} // anonymous namespace
262
264 StringRef FileName)
265 : Style(Style), FileName(FileName) {
266 for (const auto &Category : Style.IncludeCategories) {
267 CategoryRegexs.emplace_back(Category.Regex, Category.RegexIsCaseSensitive
268 ? llvm::Regex::NoFlags
269 : llvm::Regex::IgnoreCase);
270 }
271 IsMainFile = FileName.ends_with(".c") || FileName.ends_with(".cc") ||
272 FileName.ends_with(".cpp") || FileName.ends_with(".c++") ||
273 FileName.ends_with(".cxx") || FileName.ends_with(".m") ||
274 FileName.ends_with(".mm");
275 if (!Style.IncludeIsMainSourceRegex.empty()) {
276 llvm::Regex MainFileRegex(Style.IncludeIsMainSourceRegex);
277 IsMainFile |= MainFileRegex.match(FileName);
278 }
279}
280
282 bool CheckMainHeader) const {
283 int Ret = INT_MAX;
284 for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
285 if (CategoryRegexs[i].match(IncludeName)) {
286 Ret = Style.IncludeCategories[i].Priority;
287 break;
288 }
289 if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
290 Ret = 0;
291 return Ret;
292}
293
295 bool CheckMainHeader) const {
296 int Ret = INT_MAX;
297 for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
298 if (CategoryRegexs[i].match(IncludeName)) {
299 Ret = Style.IncludeCategories[i].SortPriority;
300 if (Ret == 0)
301 Ret = Style.IncludeCategories[i].Priority;
302 break;
303 }
304 if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
305 Ret = 0;
306 return Ret;
307}
308bool IncludeCategoryManager::isMainHeader(StringRef IncludeName) const {
309 switch (Style.MainIncludeChar) {
310 case IncludeStyle::MICD_Quote:
311 if (!IncludeName.starts_with("\""))
312 return false;
313 break;
315 if (!IncludeName.starts_with("<"))
316 return false;
317 break;
318 case IncludeStyle::MICD_Any:
319 break;
320 }
321
322 IncludeName =
323 IncludeName.drop_front(1).drop_back(1); // remove the surrounding "" or <>
324 // Not matchingStem: implementation files may have compound extensions but
325 // headers may not.
326 StringRef HeaderStem = llvm::sys::path::stem(IncludeName);
327 StringRef FileStem = llvm::sys::path::stem(FileName); // foo.cu for foo.cu.cc
328 StringRef MatchingFileStem = matchingStem(FileName); // foo for foo.cu.cc
329 // main-header examples:
330 // 1) foo.h => foo.cc
331 // 2) foo.h => foo.cu.cc
332 // 3) foo.proto.h => foo.proto.cc
333 //
334 // non-main-header examples:
335 // 1) foo.h => bar.cc
336 // 2) foo.proto.h => foo.cc
337 StringRef Matching;
338 if (MatchingFileStem.starts_with_insensitive(HeaderStem))
339 Matching = MatchingFileStem; // example 1), 2)
340 else if (FileStem.equals_insensitive(HeaderStem))
341 Matching = FileStem; // example 3)
342 if (!Matching.empty()) {
343 llvm::Regex MainIncludeRegex(HeaderStem.str() + Style.IncludeIsMainRegex,
344 llvm::Regex::IgnoreCase);
345 if (MainIncludeRegex.match(Matching))
346 return true;
347 }
348 return false;
349}
350
351const llvm::Regex HeaderIncludes::IncludeRegex(IncludeRegexPattern);
352
353HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code,
354 const IncludeStyle &Style)
355 : FileName(FileName), Code(Code), FirstIncludeOffset(-1),
356 MinInsertOffset(getMinHeaderInsertionOffset(FileName, Code, Style)),
357 MaxInsertOffset(MinInsertOffset +
358 getMaxHeaderInsertionOffset(
359 FileName, Code.drop_front(MinInsertOffset), Style)),
360 MainIncludeFound(false),
361 ShouldInsertGlobalModuleFragmentDecl(
362 isFirstDeclModuleDecl(FileName, Code, Style)),
363 Categories(Style, FileName) {
364 // Add 0 for main header and INT_MAX for headers that are not in any
365 // category.
366 Priorities = {0, INT_MAX};
367 for (const auto &Category : Style.IncludeCategories)
368 Priorities.insert(Category.Priority);
370 Code.drop_front(MinInsertOffset).split(Lines, "\n");
371
372 unsigned Offset = MinInsertOffset;
373 unsigned NextLineOffset;
375 for (auto Line : Lines) {
376 NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1);
377 if (IncludeRegex.match(Line, &Matches)) {
378 // If this is the last line without trailing newline, we need to make
379 // sure we don't delete across the file boundary.
380 addExistingInclude(
381 Include(Matches[2],
383 Offset, std::min(Line.size() + 1, Code.size() - Offset)),
384 Matches[1] == "import" ? tooling::IncludeDirective::Import
386 NextLineOffset);
387 }
388 Offset = NextLineOffset;
389 }
390
391 // Populate CategoryEndOfssets:
392 // - Ensure that CategoryEndOffset[Highest] is always populated.
393 // - If CategoryEndOffset[Priority] isn't set, use the next higher value
394 // that is set, up to CategoryEndOffset[Highest].
395 auto Highest = Priorities.begin();
396 auto [It, Inserted] = CategoryEndOffsets.try_emplace(*Highest);
397 if (Inserted)
398 It->second = FirstIncludeOffset >= 0 ? FirstIncludeOffset : MinInsertOffset;
399 // By this point, CategoryEndOffset[Highest] is always set appropriately:
400 // - to an appropriate location before/after existing #includes, or
401 // - to right after the header guard, or
402 // - to the beginning of the file.
403 for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I)
404 if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end())
405 CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(I)];
406}
407
408// \p Offset: the start of the line following this include directive.
409void HeaderIncludes::addExistingInclude(Include IncludeToAdd,
410 unsigned NextLineOffset) {
411 auto &Incs = ExistingIncludes[trimInclude(IncludeToAdd.Name)];
412 Incs.push_back(std::move(IncludeToAdd));
413 auto &CurInclude = Incs.back();
414 // The header name with quotes or angle brackets.
415 // Only record the offset of current #include if we can insert after it.
416 if (CurInclude.R.getOffset() <= MaxInsertOffset) {
417 int Priority = Categories.getIncludePriority(
418 CurInclude.Name, /*CheckMainHeader=*/!MainIncludeFound);
419 if (Priority == 0)
420 MainIncludeFound = true;
421 CategoryEndOffsets[Priority] = NextLineOffset;
422 IncludesByPriority[Priority].push_back(&CurInclude);
423 if (FirstIncludeOffset < 0)
424 FirstIncludeOffset = CurInclude.R.getOffset();
425 }
426}
427
428std::optional<tooling::Replacement>
429HeaderIncludes::insert(llvm::StringRef IncludeName, bool IsAngled,
431 assert(IncludeName == trimInclude(IncludeName));
432 // If a <header> ("header") already exists in code, "header" (<header>) with
433 // different quotation and/or directive will still be inserted.
434 // FIXME: figure out if this is the best behavior.
435 auto It = ExistingIncludes.find(IncludeName);
436 if (It != ExistingIncludes.end()) {
437 for (const auto &Inc : It->second)
438 if (Inc.Directive == Directive &&
439 ((IsAngled && StringRef(Inc.Name).starts_with("<")) ||
440 (!IsAngled && StringRef(Inc.Name).starts_with("\""))))
441 return std::nullopt;
442 }
443 std::string Quoted =
444 std::string(llvm::formatv(IsAngled ? "<{0}>" : "\"{0}\"", IncludeName));
445 StringRef QuotedName = Quoted;
446 int Priority = Categories.getIncludePriority(
447 QuotedName, /*CheckMainHeader=*/!MainIncludeFound);
448 auto CatOffset = CategoryEndOffsets.find(Priority);
449 assert(CatOffset != CategoryEndOffsets.end());
450 unsigned InsertOffset = CatOffset->second; // Fall back offset
451 auto Iter = IncludesByPriority.find(Priority);
452 if (Iter != IncludesByPriority.end()) {
453 for (const auto *Inc : Iter->second) {
454 if (QuotedName < Inc->Name) {
455 InsertOffset = Inc->R.getOffset();
456 break;
457 }
458 }
459 }
460 assert(InsertOffset <= Code.size());
461 llvm::StringRef DirectiveSpelling =
462 Directive == IncludeDirective::Include ? "include" : "import";
463 std::string NewInclude =
464 llvm::formatv("#{0} {1}\n", DirectiveSpelling, QuotedName);
465 // When inserting headers at end of the code, also append '\n' to the code
466 // if it does not end with '\n'.
467 // FIXME: when inserting multiple #includes at the end of code, only one
468 // newline should be added.
469 if (InsertOffset == Code.size() && (!Code.empty() && Code.back() != '\n'))
470 NewInclude = "\n" + NewInclude;
471 if (ShouldInsertGlobalModuleFragmentDecl)
472 NewInclude = "module;\n" + NewInclude;
473 return tooling::Replacement(FileName, InsertOffset, 0, NewInclude);
474}
475
477 bool IsAngled) const {
478 assert(IncludeName == trimInclude(IncludeName));
480 auto Iter = ExistingIncludes.find(IncludeName);
481 if (Iter == ExistingIncludes.end())
482 return Result;
483 for (const auto &Inc : Iter->second) {
484 if ((IsAngled && StringRef(Inc.Name).starts_with("\"")) ||
485 (!IsAngled && StringRef(Inc.Name).starts_with("<")))
486 continue;
487 llvm::Error Err = Result.add(tooling::Replacement(
488 FileName, Inc.R.getOffset(), Inc.R.getLength(), ""));
489 if (Err) {
490 auto ErrMsg = "Unexpected conflicts in #include deletions: " +
491 llvm::toString(std::move(Err));
492 llvm_unreachable(ErrMsg.c_str());
493 }
494 }
495 return Result;
496}
497
498} // namespace tooling
499} // namespace clang
Token Tok
The Token.
#define SM(sm)
Defines the SourceManager interface.
VerifyDiagnosticConsumer::Directive Directive
tooling::Replacements remove(llvm::StringRef Header, bool IsAngled) const
Removes all existing includes and imports of Header quoted with <> if IsAngled is true or "" if IsAng...
static const llvm::Regex IncludeRegex
HeaderIncludes(llvm::StringRef FileName, llvm::StringRef Code, const IncludeStyle &Style)
std::optional< tooling::Replacement > insert(llvm::StringRef Header, bool IsAngled, IncludeDirective Directive) const
Inserts an include or import directive of Header into the code.
int getIncludePriority(StringRef IncludeName, bool CheckMainHeader) const
Returns the priority of the category which IncludeName belongs to.
IncludeCategoryManager(const IncludeStyle &Style, StringRef FileName)
int getSortIncludePriority(StringRef IncludeName, bool CheckMainHeader) const
A source range independent of the SourceManager.
Definition Replacement.h:44
A text replacement.
Definition Replacement.h:83
Maintains a set of replacements that are conflict-free.
#define INT_MAX
Definition limits.h:50
SmallVector< BoundNodes, 1 > match(MatcherT Matcher, const NodeT &Node, ASTContext &Context)
Returns the results of matching Matcher on Node.
The JSON file list parser is used to communicate input to InstallAPI.
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
@ Result
The result type of a method or function.
Definition TypeBase.h:905
for(const auto &A :T->param_types())
int const char * function
Definition c++config.h:31
#define false
Definition stdbool.h:26
Style for sorting and grouping C++ include directives.
MICD_AngleBracket
Regular expressions denoting the different #include categories used for ordering #includes.