clang 19.0.0git
TokenAnalyzer.cpp
Go to the documentation of this file.
1//===--- TokenAnalyzer.cpp - Analyze Token Streams --------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements an abstract TokenAnalyzer and associated helper
11/// classes. TokenAnalyzer can be extended to generate replacements based on
12/// an annotated and pre-processed token stream.
13///
14//===----------------------------------------------------------------------===//
15
16#include "TokenAnalyzer.h"
17
18#define DEBUG_TYPE "format-formatter"
19
20namespace clang {
21namespace format {
22
23// FIXME: Instead of printing the diagnostic we should store it and have a
24// better way to return errors through the format APIs.
26public:
28 const Diagnostic &Info) override {
29 if (DiagLevel == DiagnosticsEngine::Fatal) {
30 Fatal = true;
32 Info.FormatDiagnostic(Message);
33 llvm::errs() << Message << "\n";
34 }
35 }
36
37 bool fatalError() const { return Fatal; }
38
39private:
40 bool Fatal = false;
41};
42
43std::unique_ptr<Environment>
44Environment::make(StringRef Code, StringRef FileName,
45 ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn,
46 unsigned NextStartColumn, unsigned LastStartColumn) {
47 auto Env = std::make_unique<Environment>(Code, FileName, FirstStartColumn,
48 NextStartColumn, LastStartColumn);
50 Env->SM.getDiagnostics().setClient(&Diags, /*ShouldOwnClient=*/false);
51 SourceLocation StartOfFile = Env->SM.getLocForStartOfFile(Env->ID);
52 for (const tooling::Range &Range : Ranges) {
53 SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
54 SourceLocation End = Start.getLocWithOffset(Range.getLength());
55 Env->CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
56 }
57 // Validate that we can get the buffer data without a fatal error.
58 Env->SM.getBufferData(Env->ID);
59 if (Diags.fatalError())
60 return nullptr;
61 return Env;
62}
63
64Environment::Environment(StringRef Code, StringRef FileName,
65 unsigned FirstStartColumn, unsigned NextStartColumn,
66 unsigned LastStartColumn)
67 : VirtualSM(new SourceManagerForFile(FileName, Code)), SM(VirtualSM->get()),
68 ID(VirtualSM->get().getMainFileID()), FirstStartColumn(FirstStartColumn),
69 NextStartColumn(NextStartColumn), LastStartColumn(LastStartColumn) {}
70
72 : Style(Style), Env(Env),
73 AffectedRangeMgr(Env.getSourceManager(), Env.getCharRanges()),
74 UnwrappedLines(1),
75 Encoding(encoding::detectEncoding(
76 Env.getSourceManager().getBufferData(Env.getFileID()))) {
77 LLVM_DEBUG(
78 llvm::dbgs() << "File encoding: "
79 << (Encoding == encoding::Encoding_UTF8 ? "UTF8" : "unknown")
80 << "\n");
81 LLVM_DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
82 << "\n");
83}
84
85std::pair<tooling::Replacements, unsigned>
86TokenAnalyzer::process(bool SkipAnnotation) {
88 llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
92 IdentTable);
93 ArrayRef<FormatToken *> Toks(Lex.lex());
94 SmallVector<FormatToken *, 10> Tokens(Toks.begin(), Toks.end());
96 Env.getFirstStartColumn(), Tokens, *this,
97 Allocator, IdentTable);
98 Parser.parse();
99 assert(UnwrappedLines.back().empty());
100 unsigned Penalty = 0;
101 for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; ++Run) {
102 const auto &Lines = UnwrappedLines[Run];
103 LLVM_DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
105 AnnotatedLines.reserve(Lines.size());
106
107 TokenAnnotator Annotator(Style, Lex.getKeywords());
108 for (const UnwrappedLine &Line : Lines) {
109 AnnotatedLines.push_back(new AnnotatedLine(Line));
110 if (!SkipAnnotation)
111 Annotator.annotate(*AnnotatedLines.back());
112 }
113
114 std::pair<tooling::Replacements, unsigned> RunResult =
115 analyze(Annotator, AnnotatedLines, Lex);
116
117 LLVM_DEBUG({
118 llvm::dbgs() << "Replacements for run " << Run << ":\n";
119 for (const tooling::Replacement &Fix : RunResult.first)
120 llvm::dbgs() << Fix.toString() << "\n";
121 });
122 for (AnnotatedLine *Line : AnnotatedLines)
123 delete Line;
124
125 Penalty += RunResult.second;
126 for (const auto &R : RunResult.first) {
127 auto Err = Result.add(R);
128 // FIXME: better error handling here. For now, simply return an empty
129 // Replacements to indicate failure.
130 if (Err) {
131 llvm::errs() << llvm::toString(std::move(Err)) << "\n";
132 return {tooling::Replacements(), 0};
133 }
134 }
135 }
136 return {Result, Penalty};
137}
138
140 assert(!UnwrappedLines.empty());
141 UnwrappedLines.back().push_back(TheLine);
142}
143
146}
147
148} // end namespace format
149} // end namespace clang
static char ID
Definition: Arena.cpp:183
#define SM(sm)
Definition: Cuda.cpp:82
const Environment & Env
Definition: HTMLLogger.cpp:148
This file declares an abstract TokenAnalyzer, and associated helper classes.
static CharSourceRange getCharRange(SourceRange R)
Abstract interface, implemented by clients of the front-end, which formats and prints fully processed...
Definition: Diagnostic.h:1745
A little helper class (which is basically a smart pointer that forwards info from DiagnosticsEngine) ...
Definition: Diagnostic.h:1571
void FormatDiagnostic(SmallVectorImpl< char > &OutStr) const
Format this diagnostic into a string, substituting the formal arguments into the %0 slots.
Definition: Diagnostic.cpp:791
Level
The level of the diagnostic, after it has been through mapping.
Definition: Diagnostic.h:195
Implements an efficient mapping from strings to IdentifierInfo nodes.
Parser - This implements a parser for the C family of languages.
Definition: Parser.h:54
Encodes a location in the source.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
SourceManager and necessary dependencies (e.g.
SourceManager & getSourceManager() const
Definition: TokenAnalyzer.h:38
Environment(StringRef Code, StringRef FileName, unsigned FirstStartColumn=0, unsigned NextStartColumn=0, unsigned LastStartColumn=0)
static std::unique_ptr< Environment > make(StringRef Code, StringRef FileName, ArrayRef< tooling::Range > Ranges, unsigned FirstStartColumn=0, unsigned NextStartColumn=0, unsigned LastStartColumn=0)
unsigned getFirstStartColumn() const
Definition: TokenAnalyzer.h:44
void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel, const Diagnostic &Info) override
Handle this diagnostic, reporting it to the user or capturing it to a log as needed.
const AdditionalKeywords & getKeywords()
ArrayRef< FormatToken * > lex()
encoding::Encoding Encoding
virtual std::pair< tooling::Replacements, unsigned > analyze(TokenAnnotator &Annotator, SmallVectorImpl< AnnotatedLine * > &AnnotatedLines, FormatTokenLexer &Tokens)=0
const Environment & Env
Definition: TokenAnalyzer.h:96
SmallVector< SmallVector< UnwrappedLine, 16 >, 2 > UnwrappedLines
Definition: TokenAnalyzer.h:99
TokenAnalyzer(const Environment &Env, const FormatStyle &Style)
std::pair< tooling::Replacements, unsigned > process(bool SkipAnnotation=false)
void consumeUnwrappedLine(const UnwrappedLine &TheLine) override
Determines extra information about the tokens comprising an UnwrappedLine.
void annotate(AnnotatedLine &Line)
A source range independent of the SourceManager.
Definition: Replacement.h:44
A text replacement.
Definition: Replacement.h:83
std::string toString() const
Returns a human readable string representation.
Definition: Replacement.cpp:87
Maintains a set of replacements that are conflict-free.
Definition: Replacement.h:212
LangOptions getFormattingLangOpts(const FormatStyle &Style=getLLVMStyle())
Returns the LangOpts that the formatter expects you to set.
Definition: Format.cpp:3815
StringRef getLanguageName(FormatStyle::LanguageKind Language)
Definition: Format.h:5271
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:55
LanguageKind Language
Language, this format style is targeted at.
Definition: Format.h:3125
An unwrapped line is a sequence of Token, that we would like to put on a single line if there was no ...