clang 19.0.0git
MacroArgs.cpp
Go to the documentation of this file.
1//===--- MacroArgs.cpp - Formal argument info for Macros ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the MacroArgs interface.
10//
11//===----------------------------------------------------------------------===//
12
13#include "clang/Lex/MacroArgs.h"
15#include "clang/Lex/MacroInfo.h"
17#include "llvm/ADT/SmallString.h"
18#include "llvm/Support/SaveAndRestore.h"
19#include <algorithm>
20
21using namespace clang;
22
23/// MacroArgs ctor function - This destroys the vector passed in.
25 ArrayRef<Token> UnexpArgTokens,
26 bool VarargsElided, Preprocessor &PP) {
27 assert(MI->isFunctionLike() &&
28 "Can't have args for an object-like macro!");
29 MacroArgs **ResultEnt = nullptr;
30 unsigned ClosestMatch = ~0U;
31
32 // See if we have an entry with a big enough argument list to reuse on the
33 // free list. If so, reuse it.
34 for (MacroArgs **Entry = &PP.MacroArgCache; *Entry;
35 Entry = &(*Entry)->ArgCache) {
36 if ((*Entry)->NumUnexpArgTokens >= UnexpArgTokens.size() &&
37 (*Entry)->NumUnexpArgTokens < ClosestMatch) {
38 ResultEnt = Entry;
39
40 // If we have an exact match, use it.
41 if ((*Entry)->NumUnexpArgTokens == UnexpArgTokens.size())
42 break;
43 // Otherwise, use the best fit.
44 ClosestMatch = (*Entry)->NumUnexpArgTokens;
45 }
46 }
48 if (!ResultEnt) {
49 // Allocate memory for a MacroArgs object with the lexer tokens at the end,
50 // and construct the MacroArgs object.
51 Result = new (
52 llvm::safe_malloc(totalSizeToAlloc<Token>(UnexpArgTokens.size())))
53 MacroArgs(UnexpArgTokens.size(), VarargsElided, MI->getNumParams());
54 } else {
55 Result = *ResultEnt;
56 // Unlink this node from the preprocessors singly linked list.
57 *ResultEnt = Result->ArgCache;
58 Result->NumUnexpArgTokens = UnexpArgTokens.size();
59 Result->VarargsElided = VarargsElided;
60 Result->NumMacroArgs = MI->getNumParams();
61 }
62
63 // Copy the actual unexpanded tokens to immediately after the result ptr.
64 if (!UnexpArgTokens.empty()) {
65 static_assert(std::is_trivial_v<Token>,
66 "assume trivial copyability if copying into the "
67 "uninitialized array (as opposed to reusing a cached "
68 "MacroArgs)");
69 std::copy(UnexpArgTokens.begin(), UnexpArgTokens.end(),
70 Result->getTrailingObjects<Token>());
71 }
72
73 return Result;
74}
75
76/// destroy - Destroy and deallocate the memory for this object.
77///
79 // Don't clear PreExpArgTokens, just clear the entries. Clearing the entries
80 // would deallocate the element vectors.
81 for (unsigned i = 0, e = PreExpArgTokens.size(); i != e; ++i)
82 PreExpArgTokens[i].clear();
83
84 // Add this to the preprocessor's free list.
85 ArgCache = PP.MacroArgCache;
86 PP.MacroArgCache = this;
87}
88
89/// deallocate - This should only be called by the Preprocessor when managing
90/// its freelist.
92 MacroArgs *Next = ArgCache;
93
94 // Run the dtor to deallocate the vectors.
95 this->~MacroArgs();
96 // Release the memory for the object.
97 static_assert(std::is_trivially_destructible_v<Token>,
98 "assume trivially destructible and forego destructors");
99 free(this);
100
101 return Next;
102}
103
104
105/// getArgLength - Given a pointer to an expanded or unexpanded argument,
106/// return the number of tokens, not counting the EOF, that make up the
107/// argument.
108unsigned MacroArgs::getArgLength(const Token *ArgPtr) {
109 unsigned NumArgTokens = 0;
110 for (; ArgPtr->isNot(tok::eof); ++ArgPtr)
111 ++NumArgTokens;
112 return NumArgTokens;
113}
114
115
116/// getUnexpArgument - Return the unexpanded tokens for the specified formal.
117///
118const Token *MacroArgs::getUnexpArgument(unsigned Arg) const {
119
120 assert(Arg < getNumMacroArguments() && "Invalid arg #");
121 // The unexpanded argument tokens start immediately after the MacroArgs object
122 // in memory.
123 const Token *Start = getTrailingObjects<Token>();
124 const Token *Result = Start;
125
126 // Scan to find Arg.
127 for (; Arg; ++Result) {
128 assert(Result < Start+NumUnexpArgTokens && "Invalid arg #");
129 if (Result->is(tok::eof))
130 --Arg;
131 }
132 assert(Result < Start+NumUnexpArgTokens && "Invalid arg #");
133 return Result;
134}
135
137 Preprocessor &PP) {
138 if (!MI->isVariadic())
139 return false;
140 const int VariadicArgIndex = getNumMacroArguments() - 1;
141 return getPreExpArgument(VariadicArgIndex, PP).front().isNot(tok::eof);
142}
143
144/// ArgNeedsPreexpansion - If we can prove that the argument won't be affected
145/// by pre-expansion, return false. Otherwise, conservatively return true.
147 Preprocessor &PP) const {
148 // If there are no identifiers in the argument list, or if the identifiers are
149 // known to not be macros, pre-expansion won't modify it.
150 for (; ArgTok->isNot(tok::eof); ++ArgTok)
151 if (IdentifierInfo *II = ArgTok->getIdentifierInfo())
152 if (II->hasMacroDefinition())
153 // Return true even though the macro could be a function-like macro
154 // without a following '(' token, or could be disabled, or not visible.
155 return true;
156 return false;
157}
158
159/// getPreExpArgument - Return the pre-expanded form of the specified
160/// argument.
161const std::vector<Token> &MacroArgs::getPreExpArgument(unsigned Arg,
162 Preprocessor &PP) {
163 assert(Arg < getNumMacroArguments() && "Invalid argument number!");
164
165 // If we have already computed this, return it.
166 if (PreExpArgTokens.size() < getNumMacroArguments())
167 PreExpArgTokens.resize(getNumMacroArguments());
168
169 std::vector<Token> &Result = PreExpArgTokens[Arg];
170 if (!Result.empty()) return Result;
171
172 SaveAndRestore PreExpandingMacroArgs(PP.InMacroArgPreExpansion, true);
173
174 const Token *AT = getUnexpArgument(Arg);
175 unsigned NumToks = getArgLength(AT)+1; // Include the EOF.
176
177 // Otherwise, we have to pre-expand this argument, populating Result. To do
178 // this, we set up a fake TokenLexer to lex from the unexpanded argument
179 // list. With this installed, we lex expanded tokens until we hit the EOF
180 // token at the end of the unexp list.
181 PP.EnterTokenStream(AT, NumToks, false /*disable expand*/,
182 false /*owns tokens*/, false /*is reinject*/);
183
184 // Lex all of the macro-expanded tokens into Result.
185 do {
186 Result.push_back(Token());
187 Token &Tok = Result.back();
188 PP.Lex(Tok);
189 } while (Result.back().isNot(tok::eof));
190
191 // Pop the token stream off the top of the stack. We know that the internal
192 // pointer inside of it is to the "end" of the token stream, but the stack
193 // will not otherwise be popped until the next token is lexed. The problem is
194 // that the token may be lexed sometime after the vector of tokens itself is
195 // destroyed, which would be badness.
196 if (PP.InCachingLexMode())
197 PP.ExitCachingLexMode();
199 return Result;
200}
201
202
203/// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of
204/// tokens into the literal string token that should be produced by the C #
205/// preprocessor operator. If Charify is true, then it should be turned into
206/// a character literal for the Microsoft charize (#@) extension.
207///
209 Preprocessor &PP, bool Charify,
210 SourceLocation ExpansionLocStart,
211 SourceLocation ExpansionLocEnd) {
212 Token Tok;
213 Tok.startToken();
214 Tok.setKind(Charify ? tok::char_constant : tok::string_literal);
215
216 const Token *ArgTokStart = ArgToks;
217
218 // Stringify all the tokens.
220 Result += "\"";
221
222 bool isFirst = true;
223 for (; ArgToks->isNot(tok::eof); ++ArgToks) {
224 const Token &Tok = *ArgToks;
225 if (!isFirst && (Tok.hasLeadingSpace() || Tok.isAtStartOfLine()))
226 Result += ' ';
227 isFirst = false;
228
229 // If this is a string or character constant, escape the token as specified
230 // by 6.10.3.2p2.
231 if (tok::isStringLiteral(Tok.getKind()) || // "foo", u8R"x(foo)x"_bar, etc.
232 Tok.is(tok::char_constant) || // 'x'
233 Tok.is(tok::wide_char_constant) || // L'x'.
234 Tok.is(tok::utf8_char_constant) || // u8'x'.
235 Tok.is(tok::utf16_char_constant) || // u'x'.
236 Tok.is(tok::utf32_char_constant)) { // U'x'.
237 bool Invalid = false;
238 std::string TokStr = PP.getSpelling(Tok, &Invalid);
239 if (!Invalid) {
240 std::string Str = Lexer::Stringify(TokStr);
241 Result.append(Str.begin(), Str.end());
242 }
243 } else if (Tok.is(tok::code_completion)) {
245 } else {
246 // Otherwise, just append the token. Do some gymnastics to get the token
247 // in place and avoid copies where possible.
248 unsigned CurStrLen = Result.size();
249 Result.resize(CurStrLen+Tok.getLength());
250 const char *BufPtr = Result.data() + CurStrLen;
251 bool Invalid = false;
252 unsigned ActualTokLen = PP.getSpelling(Tok, BufPtr, &Invalid);
253
254 if (!Invalid) {
255 // If getSpelling returned a pointer to an already uniqued version of
256 // the string instead of filling in BufPtr, memcpy it onto our string.
257 if (ActualTokLen && BufPtr != &Result[CurStrLen])
258 memcpy(&Result[CurStrLen], BufPtr, ActualTokLen);
259
260 // If the token was dirty, the spelling may be shorter than the token.
261 if (ActualTokLen != Tok.getLength())
262 Result.resize(CurStrLen+ActualTokLen);
263 }
264 }
265 }
266
267 // If the last character of the string is a \, and if it isn't escaped, this
268 // is an invalid string literal, diagnose it as specified in C99.
269 if (Result.back() == '\\') {
270 // Count the number of consecutive \ characters. If even, then they are
271 // just escaped backslashes, otherwise it's an error.
272 unsigned FirstNonSlash = Result.size()-2;
273 // Guaranteed to find the starting " if nothing else.
274 while (Result[FirstNonSlash] == '\\')
275 --FirstNonSlash;
276 if ((Result.size()-1-FirstNonSlash) & 1) {
277 // Diagnose errors for things like: #define F(X) #X / F(\‍)
278 PP.Diag(ArgToks[-1], diag::pp_invalid_string_literal);
279 Result.pop_back(); // remove one of the \'s.
280 }
281 }
282 Result += '"';
283
284 // If this is the charify operation and the result is not a legal character
285 // constant, diagnose it.
286 if (Charify) {
287 // First step, turn double quotes into single quotes:
288 Result[0] = '\'';
289 Result[Result.size()-1] = '\'';
290
291 // Check for bogus character.
292 bool isBad = false;
293 if (Result.size() == 3)
294 isBad = Result[1] == '\''; // ''' is not legal. '\' already fixed above.
295 else
296 isBad = (Result.size() != 4 || Result[1] != '\\'); // Not '\x'
297
298 if (isBad) {
299 PP.Diag(ArgTokStart[0], diag::err_invalid_character_to_charify);
300 Result = "' '"; // Use something arbitrary, but legal.
301 }
302 }
303
304 PP.CreateString(Result, Tok,
305 ExpansionLocStart, ExpansionLocEnd);
306 return Tok;
307}
Defines the clang::MacroInfo and clang::MacroDirective classes.
Defines the clang::Preprocessor interface.
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
One of these records is kept for each identifier that is lexed.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
Definition: Lexer.cpp:310
MacroArgs - An instance of this class captures information about the formal arguments specified to a ...
Definition: MacroArgs.h:30
const Token * getUnexpArgument(unsigned Arg) const
getUnexpArgument - Return a pointer to the first token of the unexpanded token list for the specified...
Definition: MacroArgs.cpp:118
MacroArgs * deallocate()
deallocate - This should only be called by the Preprocessor when managing its freelist.
Definition: MacroArgs.cpp:91
const std::vector< Token > & getPreExpArgument(unsigned Arg, Preprocessor &PP)
getPreExpArgument - Return the pre-expanded form of the specified argument.
Definition: MacroArgs.cpp:161
static MacroArgs * create(const MacroInfo *MI, ArrayRef< Token > UnexpArgTokens, bool VarargsElided, Preprocessor &PP)
MacroArgs ctor function - Create a new MacroArgs object with the specified macro and argument info.
Definition: MacroArgs.cpp:24
static unsigned getArgLength(const Token *ArgPtr)
getArgLength - Given a pointer to an expanded or unexpanded argument, return the number of tokens,...
Definition: MacroArgs.cpp:108
bool ArgNeedsPreexpansion(const Token *ArgTok, Preprocessor &PP) const
ArgNeedsPreexpansion - If we can prove that the argument won't be affected by pre-expansion,...
Definition: MacroArgs.cpp:146
bool invokedWithVariadicArgument(const MacroInfo *const MI, Preprocessor &PP)
Returns true if the macro was defined with a variadic (ellipsis) parameter AND was invoked with at le...
Definition: MacroArgs.cpp:136
unsigned getNumMacroArguments() const
getNumMacroArguments - Return the number of arguments the invoked macro expects.
Definition: MacroArgs.h:95
static Token StringifyArgument(const Token *ArgToks, Preprocessor &PP, bool Charify, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd)
StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of tokens into the literal string...
Definition: MacroArgs.cpp:208
void destroy(Preprocessor &PP)
destroy - Destroy and deallocate the memory for this object.
Definition: MacroArgs.cpp:78
Encapsulates the data about a macro definition (e.g.
Definition: MacroInfo.h:39
bool isFunctionLike() const
Definition: MacroInfo.h:201
unsigned getNumParams() const
Definition: MacroInfo.h:184
bool isVariadic() const
Definition: MacroInfo.h:209
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:128
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
void Lex(Token &Result)
Lex the next token for this preprocessor.
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
void RemoveTopOfLexerStack()
Pop the current lexer/macro exp off the top of the lexer stack.
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
Encodes a location in the source.
Token - This structure provides full information about a lexed token.
Definition: Token.h:36
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:187
unsigned getLength() const
Definition: Token.h:135
void setKind(tok::TokenKind K)
Definition: Token.h:95
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
Definition: Token.h:99
tok::TokenKind getKind() const
Definition: Token.h:94
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
Definition: Token.h:276
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
Definition: Token.h:280
bool isNot(tok::TokenKind K) const
Definition: Token.h:100
void startToken()
Reset all flags to cleared.
Definition: Token.h:177
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.
Definition: TokenKinds.h:89
The JSON file list parser is used to communicate input to InstallAPI.
@ Result
The result type of a method or function.