clang 22.0.0git
HeaderIncludes.cpp
Go to the documentation of this file.
1//===--- HeaderIncludes.cpp - Insert/Delete #includes --*- C++ -*----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
11#include "clang/Lex/Lexer.h"
12#include "llvm/Support/FormatVariadic.h"
13#include "llvm/Support/Path.h"
14#include <optional>
15
16namespace clang {
17namespace tooling {
18namespace {
19
20LangOptions createLangOpts() {
21 LangOptions LangOpts;
22 LangOpts.CPlusPlus = 1;
23 LangOpts.CPlusPlus11 = 1;
24 LangOpts.CPlusPlus14 = 1;
25 LangOpts.LineComment = 1;
26 LangOpts.CXXOperatorNames = 1;
27 LangOpts.Bool = 1;
28 LangOpts.ObjC = 1;
29 LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally.
30 LangOpts.DeclSpecKeyword = 1; // To get __declspec.
31 LangOpts.WChar = 1; // To get wchar_t
32 return LangOpts;
33}
34
35// Returns the offset after skipping a sequence of tokens, matched by \p
36// GetOffsetAfterSequence, from the start of the code.
37// \p GetOffsetAfterSequence should be a function that matches a sequence of
38// tokens and returns an offset after the sequence.
39unsigned getOffsetAfterTokenSequence(
40 StringRef FileName, StringRef Code, const IncludeStyle &Style,
41 llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)>
42 GetOffsetAfterSequence) {
43 SourceManagerForFile VirtualSM(FileName, Code);
44 SourceManager &SM = VirtualSM.get();
45 LangOptions LangOpts = createLangOpts();
46 Lexer Lex(SM.getMainFileID(), SM.getBufferOrFake(SM.getMainFileID()), SM,
47 LangOpts);
48 Token Tok;
49 // Get the first token.
50 Lex.LexFromRawLexer(Tok);
51 return GetOffsetAfterSequence(SM, Lex, Tok);
52}
53
54// Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is,
55// \p Tok will be the token after this directive; otherwise, it can be any token
56// after the given \p Tok (including \p Tok). If \p RawIDName is provided, the
57// (second) raw_identifier name is checked.
58bool checkAndConsumeDirectiveWithName(
59 Lexer &Lex, StringRef Name, Token &Tok,
60 std::optional<StringRef> RawIDName = std::nullopt) {
61 bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
62 Tok.is(tok::raw_identifier) &&
63 Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) &&
64 Tok.is(tok::raw_identifier) &&
65 (!RawIDName || Tok.getRawIdentifier() == *RawIDName);
66 if (Matched)
67 Lex.LexFromRawLexer(Tok);
68 return Matched;
69}
70
71void skipComments(Lexer &Lex, Token &Tok) {
72 while (Tok.is(tok::comment))
73 if (Lex.LexFromRawLexer(Tok))
74 return;
75}
76
77bool checkAndConsumeModuleDecl(const SourceManager &SM, Lexer &Lex,
78 Token &Tok) {
79 bool Matched = Tok.is(tok::raw_identifier) &&
80 Tok.getRawIdentifier() == "module" &&
81 !Lex.LexFromRawLexer(Tok) && Tok.is(tok::semi) &&
82 !Lex.LexFromRawLexer(Tok);
83 return Matched;
84}
85
86// Determines the minimum offset into the file where we want to insert header
87// includes. This will be put (when available):
88// - after `#pragma once`
89// - after header guards (`#ifdef` and `#define`)
90// - after opening global module (`module;`)
91// - after any comments at the start of the file or immediately following one of
92// the above constructs
93unsigned getMinHeaderInsertionOffset(StringRef FileName, StringRef Code,
94 const IncludeStyle &Style) {
95 // \p Consume returns location after header guard or 0 if no header guard is
96 // found.
97 auto ConsumeHeaderGuardAndComment =
98 [&](std::function<unsigned(const SourceManager &SM, Lexer &Lex,
99 Token Tok)>
100 Consume) {
101 return getOffsetAfterTokenSequence(
102 FileName, Code, Style,
103 [&Consume](const SourceManager &SM, Lexer &Lex, Token Tok) {
104 skipComments(Lex, Tok);
105 unsigned InitialOffset = SM.getFileOffset(Tok.getLocation());
106 return std::max(InitialOffset, Consume(SM, Lex, Tok));
107 });
108 };
109
110 auto ModuleDecl = ConsumeHeaderGuardAndComment(
111 [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
112 if (checkAndConsumeModuleDecl(SM, Lex, Tok)) {
113 skipComments(Lex, Tok);
114 return SM.getFileOffset(Tok.getLocation());
115 }
116 return 0;
117 });
118
119 auto HeaderAndPPOffset = std::max(
120 // #ifndef/#define
121 ConsumeHeaderGuardAndComment(
122 [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
123 if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
124 skipComments(Lex, Tok);
125 if (checkAndConsumeDirectiveWithName(Lex, "define", Tok) &&
126 Tok.isAtStartOfLine())
127 return SM.getFileOffset(Tok.getLocation());
128 }
129 return 0;
130 }),
131 // #pragma once
132 ConsumeHeaderGuardAndComment(
133 [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
134 if (checkAndConsumeDirectiveWithName(Lex, "pragma", Tok,
135 StringRef("once")))
136 return SM.getFileOffset(Tok.getLocation());
137 return 0;
138 }));
139 return std::max(HeaderAndPPOffset, ModuleDecl);
140}
141
142// Check if a sequence of tokens is like
143// "#include ("header.h" | <header.h>)".
144// If it is, \p Tok will be the token after this directive; otherwise, it can be
145// any token after the given \p Tok (including \p Tok).
146bool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) {
147 auto Matched = [&]() {
148 Lex.LexFromRawLexer(Tok);
149 return true;
150 };
151 if (Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
152 Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "include") {
153 if (Lex.LexFromRawLexer(Tok))
154 return false;
155 if (Tok.is(tok::string_literal))
156 return Matched();
157 if (Tok.is(tok::less)) {
158 while (!Lex.LexFromRawLexer(Tok) && Tok.isNot(tok::greater)) {
159 }
160 if (Tok.is(tok::greater))
161 return Matched();
162 }
163 }
164 return false;
165}
166
167// Returns the offset of the last #include directive after which a new
168// #include can be inserted. This ignores #include's after the #include block(s)
169// in the beginning of a file to avoid inserting headers into code sections
170// where new #include's should not be added by default.
171// These code sections include:
172// - raw string literals (containing #include).
173// - #if blocks.
174// - Special #include's among declarations (e.g. functions).
175//
176// If no #include after which a new #include can be inserted, this returns the
177// offset after skipping all comments from the start of the code.
178// Inserting after an #include is not allowed if it comes after code that is not
179// #include (e.g. pre-processing directive that is not #include, declarations).
180unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code,
181 const IncludeStyle &Style) {
182 return getOffsetAfterTokenSequence(
183 FileName, Code, Style,
184 [](const SourceManager &SM, Lexer &Lex, Token Tok) {
185 skipComments(Lex, Tok);
186 unsigned MaxOffset = SM.getFileOffset(Tok.getLocation());
187 while (checkAndConsumeInclusiveDirective(Lex, Tok))
188 MaxOffset = SM.getFileOffset(Tok.getLocation());
189 return MaxOffset;
190 });
191}
192
193inline StringRef trimInclude(StringRef IncludeName) {
194 return IncludeName.trim("\"<>");
195}
196
197const char IncludeRegexPattern[] =
198 R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";
199
200// The filename of Path excluding extension.
201// Used to match implementation with headers, this differs from sys::path::stem:
202// - in names with multiple dots (foo.cu.cc) it terminates at the *first*
203// - an empty stem is never returned: /foo/.bar.x => .bar
204// - we don't bother to handle . and .. specially
205StringRef matchingStem(llvm::StringRef Path) {
206 StringRef Name = llvm::sys::path::filename(Path);
207 return Name.substr(0, Name.find('.', 1));
208}
209
210} // anonymous namespace
211
213 StringRef FileName)
214 : Style(Style), FileName(FileName) {
215 for (const auto &Category : Style.IncludeCategories) {
216 CategoryRegexs.emplace_back(Category.Regex, Category.RegexIsCaseSensitive
217 ? llvm::Regex::NoFlags
218 : llvm::Regex::IgnoreCase);
219 }
220 IsMainFile = FileName.ends_with(".c") || FileName.ends_with(".cc") ||
221 FileName.ends_with(".cpp") || FileName.ends_with(".c++") ||
222 FileName.ends_with(".cxx") || FileName.ends_with(".m") ||
223 FileName.ends_with(".mm");
224 if (!Style.IncludeIsMainSourceRegex.empty()) {
225 llvm::Regex MainFileRegex(Style.IncludeIsMainSourceRegex);
226 IsMainFile |= MainFileRegex.match(FileName);
227 }
228}
229
231 bool CheckMainHeader) const {
232 int Ret = INT_MAX;
233 for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
234 if (CategoryRegexs[i].match(IncludeName)) {
235 Ret = Style.IncludeCategories[i].Priority;
236 break;
237 }
238 if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
239 Ret = 0;
240 return Ret;
241}
242
244 bool CheckMainHeader) const {
245 int Ret = INT_MAX;
246 for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
247 if (CategoryRegexs[i].match(IncludeName)) {
248 Ret = Style.IncludeCategories[i].SortPriority;
249 if (Ret == 0)
250 Ret = Style.IncludeCategories[i].Priority;
251 break;
252 }
253 if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
254 Ret = 0;
255 return Ret;
256}
257bool IncludeCategoryManager::isMainHeader(StringRef IncludeName) const {
258 switch (Style.MainIncludeChar) {
259 case IncludeStyle::MICD_Quote:
260 if (!IncludeName.starts_with("\""))
261 return false;
262 break;
264 if (!IncludeName.starts_with("<"))
265 return false;
266 break;
267 case IncludeStyle::MICD_Any:
268 break;
269 }
270
271 IncludeName =
272 IncludeName.drop_front(1).drop_back(1); // remove the surrounding "" or <>
273 // Not matchingStem: implementation files may have compound extensions but
274 // headers may not.
275 StringRef HeaderStem = llvm::sys::path::stem(IncludeName);
276 StringRef FileStem = llvm::sys::path::stem(FileName); // foo.cu for foo.cu.cc
277 StringRef MatchingFileStem = matchingStem(FileName); // foo for foo.cu.cc
278 // main-header examples:
279 // 1) foo.h => foo.cc
280 // 2) foo.h => foo.cu.cc
281 // 3) foo.proto.h => foo.proto.cc
282 //
283 // non-main-header examples:
284 // 1) foo.h => bar.cc
285 // 2) foo.proto.h => foo.cc
286 StringRef Matching;
287 if (MatchingFileStem.starts_with_insensitive(HeaderStem))
288 Matching = MatchingFileStem; // example 1), 2)
289 else if (FileStem.equals_insensitive(HeaderStem))
290 Matching = FileStem; // example 3)
291 if (!Matching.empty()) {
292 llvm::Regex MainIncludeRegex(HeaderStem.str() + Style.IncludeIsMainRegex,
293 llvm::Regex::IgnoreCase);
294 if (MainIncludeRegex.match(Matching))
295 return true;
296 }
297 return false;
298}
299
300const llvm::Regex HeaderIncludes::IncludeRegex(IncludeRegexPattern);
301
302HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code,
303 const IncludeStyle &Style)
304 : FileName(FileName), Code(Code), FirstIncludeOffset(-1),
305 MinInsertOffset(getMinHeaderInsertionOffset(FileName, Code, Style)),
306 MaxInsertOffset(MinInsertOffset +
307 getMaxHeaderInsertionOffset(
308 FileName, Code.drop_front(MinInsertOffset), Style)),
309 MainIncludeFound(false), Categories(Style, FileName) {
310 // Add 0 for main header and INT_MAX for headers that are not in any
311 // category.
312 Priorities = {0, INT_MAX};
313 for (const auto &Category : Style.IncludeCategories)
314 Priorities.insert(Category.Priority);
316 Code.drop_front(MinInsertOffset).split(Lines, "\n");
317
318 unsigned Offset = MinInsertOffset;
319 unsigned NextLineOffset;
321 for (auto Line : Lines) {
322 NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1);
323 if (IncludeRegex.match(Line, &Matches)) {
324 // If this is the last line without trailing newline, we need to make
325 // sure we don't delete across the file boundary.
326 addExistingInclude(
327 Include(Matches[2],
329 Offset, std::min(Line.size() + 1, Code.size() - Offset)),
330 Matches[1] == "import" ? tooling::IncludeDirective::Import
332 NextLineOffset);
333 }
334 Offset = NextLineOffset;
335 }
336
337 // Populate CategoryEndOfssets:
338 // - Ensure that CategoryEndOffset[Highest] is always populated.
339 // - If CategoryEndOffset[Priority] isn't set, use the next higher value
340 // that is set, up to CategoryEndOffset[Highest].
341 auto Highest = Priorities.begin();
342 auto [It, Inserted] = CategoryEndOffsets.try_emplace(*Highest);
343 if (Inserted)
344 It->second = FirstIncludeOffset >= 0 ? FirstIncludeOffset : MinInsertOffset;
345 // By this point, CategoryEndOffset[Highest] is always set appropriately:
346 // - to an appropriate location before/after existing #includes, or
347 // - to right after the header guard, or
348 // - to the beginning of the file.
349 for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I)
350 if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end())
351 CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(I)];
352}
353
354// \p Offset: the start of the line following this include directive.
355void HeaderIncludes::addExistingInclude(Include IncludeToAdd,
356 unsigned NextLineOffset) {
357 auto &Incs = ExistingIncludes[trimInclude(IncludeToAdd.Name)];
358 Incs.push_back(std::move(IncludeToAdd));
359 auto &CurInclude = Incs.back();
360 // The header name with quotes or angle brackets.
361 // Only record the offset of current #include if we can insert after it.
362 if (CurInclude.R.getOffset() <= MaxInsertOffset) {
363 int Priority = Categories.getIncludePriority(
364 CurInclude.Name, /*CheckMainHeader=*/!MainIncludeFound);
365 if (Priority == 0)
366 MainIncludeFound = true;
367 CategoryEndOffsets[Priority] = NextLineOffset;
368 IncludesByPriority[Priority].push_back(&CurInclude);
369 if (FirstIncludeOffset < 0)
370 FirstIncludeOffset = CurInclude.R.getOffset();
371 }
372}
373
374std::optional<tooling::Replacement>
375HeaderIncludes::insert(llvm::StringRef IncludeName, bool IsAngled,
377 assert(IncludeName == trimInclude(IncludeName));
378 // If a <header> ("header") already exists in code, "header" (<header>) with
379 // different quotation and/or directive will still be inserted.
380 // FIXME: figure out if this is the best behavior.
381 auto It = ExistingIncludes.find(IncludeName);
382 if (It != ExistingIncludes.end()) {
383 for (const auto &Inc : It->second)
384 if (Inc.Directive == Directive &&
385 ((IsAngled && StringRef(Inc.Name).starts_with("<")) ||
386 (!IsAngled && StringRef(Inc.Name).starts_with("\""))))
387 return std::nullopt;
388 }
389 std::string Quoted =
390 std::string(llvm::formatv(IsAngled ? "<{0}>" : "\"{0}\"", IncludeName));
391 StringRef QuotedName = Quoted;
392 int Priority = Categories.getIncludePriority(
393 QuotedName, /*CheckMainHeader=*/!MainIncludeFound);
394 auto CatOffset = CategoryEndOffsets.find(Priority);
395 assert(CatOffset != CategoryEndOffsets.end());
396 unsigned InsertOffset = CatOffset->second; // Fall back offset
397 auto Iter = IncludesByPriority.find(Priority);
398 if (Iter != IncludesByPriority.end()) {
399 for (const auto *Inc : Iter->second) {
400 if (QuotedName < Inc->Name) {
401 InsertOffset = Inc->R.getOffset();
402 break;
403 }
404 }
405 }
406 assert(InsertOffset <= Code.size());
407 llvm::StringRef DirectiveSpelling =
408 Directive == IncludeDirective::Include ? "include" : "import";
409 std::string NewInclude =
410 llvm::formatv("#{0} {1}\n", DirectiveSpelling, QuotedName);
411 // When inserting headers at end of the code, also append '\n' to the code
412 // if it does not end with '\n'.
413 // FIXME: when inserting multiple #includes at the end of code, only one
414 // newline should be added.
415 if (InsertOffset == Code.size() && (!Code.empty() && Code.back() != '\n'))
416 NewInclude = "\n" + NewInclude;
417 return tooling::Replacement(FileName, InsertOffset, 0, NewInclude);
418}
419
421 bool IsAngled) const {
422 assert(IncludeName == trimInclude(IncludeName));
424 auto Iter = ExistingIncludes.find(IncludeName);
425 if (Iter == ExistingIncludes.end())
426 return Result;
427 for (const auto &Inc : Iter->second) {
428 if ((IsAngled && StringRef(Inc.Name).starts_with("\"")) ||
429 (!IsAngled && StringRef(Inc.Name).starts_with("<")))
430 continue;
431 llvm::Error Err = Result.add(tooling::Replacement(
432 FileName, Inc.R.getOffset(), Inc.R.getLength(), ""));
433 if (Err) {
434 auto ErrMsg = "Unexpected conflicts in #include deletions: " +
435 llvm::toString(std::move(Err));
436 llvm_unreachable(ErrMsg.c_str());
437 }
438 }
439 return Result;
440}
441
442} // namespace tooling
443} // namespace clang
Token Tok
The Token.
#define SM(sm)
Defines the SourceManager interface.
VerifyDiagnosticConsumer::Directive Directive
tooling::Replacements remove(llvm::StringRef Header, bool IsAngled) const
Removes all existing includes and imports of Header quoted with <> if IsAngled is true or "" if IsAng...
static const llvm::Regex IncludeRegex
HeaderIncludes(llvm::StringRef FileName, llvm::StringRef Code, const IncludeStyle &Style)
std::optional< tooling::Replacement > insert(llvm::StringRef Header, bool IsAngled, IncludeDirective Directive) const
Inserts an include or import directive of Header into the code.
int getIncludePriority(StringRef IncludeName, bool CheckMainHeader) const
Returns the priority of the category which IncludeName belongs to.
IncludeCategoryManager(const IncludeStyle &Style, StringRef FileName)
int getSortIncludePriority(StringRef IncludeName, bool CheckMainHeader) const
A source range independent of the SourceManager.
Definition Replacement.h:44
A text replacement.
Definition Replacement.h:83
Maintains a set of replacements that are conflict-free.
#define INT_MAX
Definition limits.h:50
SmallVector< BoundNodes, 1 > match(MatcherT Matcher, const NodeT &Node, ASTContext &Context)
Returns the results of matching Matcher on Node.
The JSON file list parser is used to communicate input to InstallAPI.
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
@ Result
The result type of a method or function.
Definition TypeBase.h:905
for(const auto &A :T->param_types())
int const char * function
Definition c++config.h:31
#define false
Definition stdbool.h:26
Style for sorting and grouping C++ include directives.
MICD_AngleBracket
Regular expressions denoting the different #include categories used for ordering #includes.