15 #include "clang/Basic/LangOptions.h"
16 #include "clang/Basic/SourceLocation.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "clang/Basic/TokenKinds.h"
19 #include "clang/Driver/Types.h"
20 #include "clang/Format/Format.h"
21 #include "clang/Lex/Lexer.h"
22 #include "clang/Lex/Preprocessor.h"
23 #include "clang/Lex/Token.h"
24 #include "clang/Tooling/Core/Replacement.h"
25 #include "clang/Tooling/Syntax/Tokens.h"
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/BitVector.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/ADT/StringMap.h"
31 #include "llvm/ADT/StringRef.h"
32 #include "llvm/Support/Compiler.h"
33 #include "llvm/Support/Errc.h"
34 #include "llvm/Support/Error.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/LineIterator.h"
37 #include "llvm/Support/MemoryBuffer.h"
38 #include "llvm/Support/Path.h"
39 #include "llvm/Support/VirtualFileSystem.h"
40 #include "llvm/Support/xxhash.h"
61 template <
typename Callback>
63 bool LoggedInvalid =
false;
66 for (
size_t I = 0; I < U8.size();) {
67 unsigned char C =
static_cast<unsigned char>(U8[I]);
68 if (LLVM_LIKELY(!(
C & 0x80))) {
75 size_t UTF8Length = llvm::countLeadingOnes(
C);
78 if (LLVM_UNLIKELY(UTF8Length < 2 || UTF8Length > 4)) {
80 elog(
"File has invalid UTF-8 near offset {0}: {1}", I, llvm::toHex(U8));
94 if (CB(UTF8Length, UTF8Length == 4 ? 2 : 1))
131 llvm_unreachable(
"unsupported encoding");
134 if (Result > U8.size()) {
167 llvm_unreachable(
"unsupported encoding");
173 bool AllowColumnsBeyondLineLength) {
175 return error(llvm::errc::invalid_argument,
176 "Line value can't be negative ({0})", P.
line);
178 return error(llvm::errc::invalid_argument,
179 "Character value can't be negative ({0})", P.
character);
180 size_t StartOfLine = 0;
181 for (
int I = 0; I != P.
line; ++I) {
182 size_t NextNL =
Code.find(
'\n', StartOfLine);
183 if (NextNL == llvm::StringRef::npos)
184 return error(llvm::errc::invalid_argument,
185 "Line value is out of range ({0})", P.
line);
186 StartOfLine = NextNL + 1;
189 Code.substr(StartOfLine).take_until([](
char C) {
return C ==
'\n'; });
194 if (!Valid && !AllowColumnsBeyondLineLength)
195 return error(llvm::errc::invalid_argument,
196 "{0} offset {1} is invalid for line {2}",
lspEncoding(),
198 return StartOfLine + ByteInLine;
203 llvm::StringRef Before =
Code.substr(0,
Offset);
204 int Lines = Before.count(
'\n');
205 size_t PrevNL = Before.rfind(
'\n');
206 size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1);
217 std::tie(FID,
Offset) = SM.getDecomposedSpellingLoc(
Loc);
219 P.
line =
static_cast<int>(SM.getLineNumber(FID,
Offset)) - 1;
220 bool Invalid =
false;
221 llvm::StringRef
Code = SM.getBufferData(FID, &Invalid);
223 auto ColumnInBytes = SM.getColumnNumber(FID,
Offset) - 1;
224 auto LineSoFar =
Code.substr(
Offset - ColumnInBytes, ColumnInBytes);
233 auto Spelling = SM.getDecomposedSpellingLoc(
Loc);
234 StringRef SpellingFile = SM.getSLocEntry(Spelling.first).getFile().getName();
235 if (SpellingFile ==
"<scratch space>")
237 if (SpellingFile ==
"<built-in>")
239 return !SM.isWrittenInCommandLineFile(
240 SM.getComposedLoc(Spelling.first, Spelling.second));
245 if (!R.getBegin().isValid() || !R.getEnd().isValid())
249 size_t BeginOffset = 0;
250 std::tie(BeginFID, BeginOffset) = Mgr.getDecomposedLoc(R.getBegin());
253 size_t EndOffset = 0;
254 std::tie(EndFID, EndOffset) = Mgr.getDecomposedLoc(R.getEnd());
256 return BeginFID.isValid() && BeginFID == EndFID && BeginOffset <= EndOffset;
260 assert(SM.getLocForEndOfFile(IncludedFile).isFileID());
261 FileID IncludingFile;
263 std::tie(IncludingFile,
Offset) =
264 SM.getDecomposedExpansionLoc(SM.getIncludeLoc(IncludedFile));
265 bool Invalid =
false;
266 llvm::StringRef Buf = SM.getBufferData(IncludingFile, &Invalid);
268 return SourceLocation();
272 assert(
Offset < Buf.size());
275 return SM.getComposedLoc(IncludingFile,
Offset);
277 return SourceLocation();
291 if (TheTok.is(tok::greatergreater))
293 return TheTok.getLength();
298 const SourceManager &SM,
301 return BeginLoc.getLocWithOffset(Len ? Len - 1 : 0);
306 const SourceManager &SM,
308 return EndLoc.getLocWithOffset(
315 if (!
Range.isTokenRange())
317 return Range.getAsRange();
323 const SourceManager &SM,
325 SourceLocation Begin =
326 SM.isBeforeInTranslationUnit(R1.getBegin(), R2.getBegin())
334 return SourceRange(Begin, End);
343 if (SM.isWrittenInSameFile(R.getBegin(), R.getEnd()))
346 llvm::DenseMap<FileID, SourceLocation> BeginExpansions;
347 for (SourceLocation Begin = R.getBegin(); Begin.isValid();
348 Begin = Begin.isFileID()
350 : SM.getImmediateExpansionRange(Begin).getBegin()) {
351 BeginExpansions[SM.getFileID(Begin)] = Begin;
355 for (SourceLocation End = R.getEnd(); End.isValid();
360 auto It = BeginExpansions.find(SM.getFileID(End));
361 if (It != BeginExpansions.end()) {
362 if (SM.getFileOffset(It->second) > SM.getFileOffset(End))
363 return SourceLocation();
364 return {It->second, End};
367 return SourceRange();
391 const SourceManager &SM,
393 SourceRange FileRange =
Loc;
394 while (!FileRange.getBegin().isFileID()) {
395 if (SM.isMacroArgExpansion(FileRange.getBegin())) {
397 SM.getImmediateSpellingLoc(FileRange.getBegin()),
398 SM.getImmediateSpellingLoc(FileRange.getEnd()), SM,
LangOpts);
399 assert(SM.isWrittenInSameFile(FileRange.getBegin(), FileRange.getEnd()));
401 SourceRange ExpansionRangeForBegin =
403 SourceRange ExpansionRangeForEnd =
405 if (ExpansionRangeForBegin.isInvalid() ||
406 ExpansionRangeForEnd.isInvalid())
407 return SourceRange();
408 assert(SM.isWrittenInSameFile(ExpansionRangeForBegin.getBegin(),
409 ExpansionRangeForEnd.getBegin()) &&
410 "Both Expansion ranges should be in same file.");
411 FileRange =
unionTokenRange(ExpansionRangeForBegin, ExpansionRangeForEnd,
421 FileID FID = SM.getFileID(SM.getExpansionLoc(
Loc));
422 return FID == SM.getMainFileID() || FID == SM.getPreambleFileID();
440 Result.setEnd(Result.getEnd().getLocWithOffset(TokLen));
449 auto Buf = SM.getBufferOrNone(SM.getFileID(R.getBegin()));
452 size_t BeginOffset = SM.getFileOffset(R.getBegin());
453 size_t EndOffset = SM.getFileOffset(R.getEnd());
454 return Buf->getBuffer().substr(BeginOffset, EndOffset - BeginOffset);
459 llvm::StringRef
Code = SM.getBufferOrFake(SM.getMainFileID()).getBuffer();
463 return Offset.takeError();
464 return SM.getLocForStartOfFile(SM.getMainFileID()).getLocWithOffset(*
Offset);
476 if (
B.start <
A.start)
485 llvm::StringRef Before =
Code.substr(0,
Offset);
486 int Lines = Before.count(
'\n');
487 size_t PrevNL = Before.rfind(
'\n');
488 size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1);
493 size_t Pos = QName.rfind(
"::");
494 if (
Pos == llvm::StringRef::npos)
495 return {llvm::StringRef(), QName};
496 return {QName.substr(0,
Pos + 2), QName.substr(
Pos + 2)};
500 const tooling::Replacement &R) {
501 Range ReplacementRange = {
504 return {ReplacementRange, std::string(R.getReplacementText())};
508 const tooling::Replacements &Repls) {
509 std::vector<TextEdit> Edits;
510 for (
const auto &R : Repls)
520 llvm::SmallString<128> FilePath = F->getName();
521 if (!llvm::sys::path::is_absolute(FilePath)) {
523 SourceMgr.getFileManager().getVirtualFileSystem().makeAbsolute(
525 elog(
"Could not turn relative path '{0}' to absolute: {1}", FilePath,
542 if (
auto Dir =
SourceMgr.getFileManager().getDirectory(
543 llvm::sys::path::parent_path(FilePath))) {
544 llvm::SmallString<128> RealPath;
545 llvm::StringRef DirName =
SourceMgr.getFileManager().getCanonicalName(*Dir);
546 llvm::sys::path::append(RealPath, DirName,
547 llvm::sys::path::filename(FilePath));
548 return RealPath.str().str();
551 return FilePath.str().str();
555 const LangOptions &L) {
559 Result.newText =
FixIt.CodeToInsert;
564 uint64_t Hash{llvm::xxHash64(Content)};
566 for (
unsigned I = 0; I < Result.size(); ++I) {
567 Result[I] = uint8_t(Hash);
573 std::optional<FileDigest>
digestFile(
const SourceManager &SM, FileID FID) {
574 bool Invalid =
false;
575 llvm::StringRef Content = SM.getBufferData(FID, &Invalid);
582 llvm::StringRef Content,
584 auto Style = format::getStyle(format::DefaultFormatStyle, File,
585 format::DefaultFallbackStyle, Content,
586 TFS.
view(std::nullopt).get());
588 log(
"getStyle() failed for file {0}: {1}. Fallback is LLVM style.", File,
590 return format::getLLVMStyle();
595 llvm::Expected<tooling::Replacements>
598 auto CleanReplaces = cleanupAroundReplacements(
Code, Replaces, Style);
600 return CleanReplaces;
601 return formatReplacements(
Code, std::move(*CleanReplaces), Style);
606 llvm::function_ref<
void(
const syntax::Token &,
const SourceManager &SM)>
609 std::string NullTerminatedCode =
Code.str();
610 SourceManagerForFile FileSM(
"mock_file_name.cpp", NullTerminatedCode);
611 auto &SM = FileSM.get();
612 for (
const auto &Tok : syntax::tokenize(SM.getMainFileID(), SM,
LangOpts))
618 llvm::StringMap<unsigned> Identifiers;
619 auto LangOpt = format::getFormattingLangOpts(Style);
620 lex(Content, LangOpt, [&](
const syntax::Token &Tok,
const SourceManager &SM) {
621 if (Tok.kind() == tok::identifier)
622 ++Identifiers[Tok.text(SM)];
624 else if (
const auto *
Keyword = tok::getKeywordSpelling(Tok.kind()))
631 llvm::StringRef Content,
633 std::vector<Range> Ranges;
635 [&](
const syntax::Token &Tok,
const SourceManager &SM) {
636 if (Tok.kind() != tok::identifier || Tok.text(SM) !=
Identifier)
645 clang::IdentifierTable KeywordsTable(
LangOpts);
646 return KeywordsTable.find(NewName) != KeywordsTable.end();
650 struct NamespaceEvent {
661 void parseNamespaceEvents(llvm::StringRef
Code,
const LangOptions &
LangOpts,
662 llvm::function_ref<
void(NamespaceEvent)>
Callback) {
665 std::vector<std::string> Enclosing;
667 llvm::BitVector BraceStack;
679 NamespaceEvent
Event;
680 lex(
Code,
LangOpts, [&](
const syntax::Token &Tok,
const SourceManager &SM) {
682 switch (Tok.kind()) {
684 State = State == Default ? Using : Default;
686 case tok::kw_namespace:
689 State = UsingNamespace;
699 case tok::identifier:
704 case UsingNamespaceName:
705 NSName.append(Tok.text(SM).str());
706 State = UsingNamespaceName;
712 NSName.append(Tok.text(SM).str());
713 State = NamespaceName;
721 case tok::coloncolon:
728 case UsingNamespaceName:
730 State = UsingNamespaceName;
734 State = NamespaceName;
745 if (State == NamespaceName) {
747 BraceStack.push_back(
true);
748 Enclosing.push_back(NSName);
749 Event.Trigger = NamespaceEvent::BeginNamespace;
755 BraceStack.push_back(
false);
762 if (!BraceStack.empty()) {
763 if (BraceStack.back()) {
765 Enclosing.pop_back();
766 Event.Trigger = NamespaceEvent::EndNamespace;
770 BraceStack.pop_back();
774 if (State == UsingNamespaceName) {
776 Event.Trigger = NamespaceEvent::UsingDirective;
777 Event.Payload = std::move(NSName);
790 llvm::SmallVector<llvm::StringRef> ancestorNamespaces(llvm::StringRef NS) {
791 llvm::SmallVector<llvm::StringRef>
Results;
792 Results.push_back(NS.take_front(0));
793 NS.split(
Results,
"::", -1,
false);
794 for (llvm::StringRef &R :
Results)
795 R = NS.take_front(R.end() - NS.begin());
805 llvm::StringMap<llvm::StringSet<>> UsingDirectives;
808 llvm::StringRef NS =
Event.Payload;
809 switch (
Event.Trigger) {
810 case NamespaceEvent::BeginNamespace:
811 case NamespaceEvent::EndNamespace:
812 Current = std::move(Event.Payload);
814 case NamespaceEvent::UsingDirective:
815 if (NS.consume_front(
"::"))
816 UsingDirectives[Current].insert(NS);
818 for (llvm::StringRef Enclosing : ancestorNamespaces(Current)) {
819 if (Enclosing.empty())
820 UsingDirectives[Current].insert(NS);
822 UsingDirectives[Current].insert((Enclosing +
"::" + NS).str());
829 std::vector<std::string> Found;
830 for (llvm::StringRef Enclosing : ancestorNamespaces(Current)) {
831 Found.push_back(std::string(Enclosing));
832 auto It = UsingDirectives.find(Enclosing);
833 if (It != UsingDirectives.end())
834 for (
const auto &Used : It->second)
835 Found.push_back(std::string(Used.getKey()));
838 llvm::sort(Found, [&](
const std::string &LHS,
const std::string &RHS) {
845 Found.erase(std::unique(Found.begin(), Found.end()), Found.end());
853 static constexpr
int MinWordLength = 4;
855 std::vector<CharRole> Roles(Content.size());
858 llvm::StringSet<> Result;
859 llvm::SmallString<256>
Word;
861 if (
Word.size() >= MinWordLength) {
863 C = llvm::toLower(
C);
868 for (
unsigned I = 0; I < Content.size(); ++I) {
874 Word.push_back(Content[I]);
888 llvm::StringRef After) {
890 if (Before.endswith(
"`") && After.startswith(
"`"))
893 if (Before.endswith(
"::") || After.startswith(
"::"))
898 Before = Before.take_back(100);
899 auto Pos = Before.find_last_of(
"\\@");
900 if (
Pos != llvm::StringRef::npos) {
901 llvm::StringRef
Tag = Before.substr(
Pos + 1).rtrim(
' ');
902 if (
Tag ==
"p" ||
Tag ==
"c" ||
Tag ==
"class" ||
Tag ==
"tparam" ||
903 Tag ==
"param" ||
Tag ==
"param[in]" ||
Tag ==
"param[out]" ||
904 Tag ==
"param[in,out]" ||
Tag ==
"retval" ||
Tag ==
"throw" ||
905 Tag ==
"throws" ||
Tag ==
"link")
911 if (
Word.contains(
'_')) {
918 bool HasLower =
Word.find_if(clang::isLowercase) != StringRef::npos;
919 bool HasUpper =
Word.substr(1).find_if(clang::isUppercase) != StringRef::npos;
920 if (HasLower && HasUpper) {
927 std::optional<SpelledWord> SpelledWord::touching(SourceLocation SpelledLoc,
928 const syntax::TokenBuffer &TB,
930 const auto &SM = TB.sourceManager();
931 auto Touching = syntax::spelledTokensTouching(SpelledLoc, TB);
932 for (
const auto &T : Touching) {
934 if (tok::isAnyIdentifier(T.kind()) || tok::getKeywordSpelling(T.kind())) {
936 Result.Location = T.location();
937 Result.Text = T.text(SM);
938 Result.LikelyIdentifier = tok::isAnyIdentifier(T.kind());
939 Result.PartOfSpelledToken = &T;
940 Result.SpelledToken = &T;
942 TB.expandedTokens(SM.getMacroArgExpandedLocation(T.location()));
943 if (Expanded.size() == 1 && Expanded.front().text(SM) == Result.Text)
944 Result.ExpandedToken = &Expanded.front();
950 std::tie(File,
Offset) = SM.getDecomposedLoc(SpelledLoc);
951 bool Invalid =
false;
952 llvm::StringRef
Code = SM.getBufferData(File, &Invalid);
956 while (
B > 0 && isAsciiIdentifierContinue(
Code[
B - 1]))
958 while (
E <
Code.size() && isAsciiIdentifierContinue(
Code[
E]))
964 Result.Location = SM.getComposedLoc(File,
B);
965 Result.Text =
Code.slice(
B,
E);
966 Result.LikelyIdentifier =
969 tok::isAnyIdentifier(
970 IdentifierTable(
LangOpts).get(Result.Text).getTokenID());
971 for (
const auto &T : Touching)
972 if (T.location() <= Result.Location)
973 Result.PartOfSpelledToken = &T;
982 assert(
Loc.isFileID());
983 const auto &SM =
PP.getSourceManager();
984 IdentifierInfo *IdentifierInfo =
PP.getIdentifierInfo(
SpelledTok.text(SM));
985 if (!IdentifierInfo || !IdentifierInfo->hadMacroDefinition())
996 FileID FID = SM.getFileID(
Loc);
997 assert(
Loc != SM.getLocForEndOfFile(FID));
998 SourceLocation JustAfterToken =
Loc.getLocWithOffset(1);
1000 PP.getMacroDefinitionAtLoc(IdentifierInfo, JustAfterToken).getMacroInfo();
1001 if (!MacroInfo && SM.getLocForStartOfFile(FID) !=
Loc) {
1002 SourceLocation JustBeforeToken =
Loc.getLocWithOffset(-1);
1003 MacroInfo =
PP.getMacroDefinitionAtLoc(IdentifierInfo, JustBeforeToken)
1007 return std::nullopt;
1010 IdentifierInfo->getName(), MacroInfo,
1014 llvm::Expected<std::string> Edit::apply()
const {
1015 return tooling::applyAllReplacements(InitialCode, Replacements);
1018 std::vector<TextEdit> Edit::asTextEdits()
const {
1022 bool Edit::canApplyTo(llvm::StringRef
Code)
const {
1025 auto LHS = llvm::MemoryBuffer::getMemBuffer(
Code);
1026 llvm::line_iterator LHSIt(*LHS,
false);
1028 auto RHS = llvm::MemoryBuffer::getMemBuffer(InitialCode);
1029 llvm::line_iterator RHSIt(*RHS,
false);
1035 while (!LHSIt.is_at_eof() && !RHSIt.is_at_eof()) {
1036 if (*LHSIt != *RHSIt)
1045 while (!LHSIt.is_at_eof()) {
1046 if (!LHSIt->empty())
1050 while (!RHSIt.is_at_eof()) {
1051 if (!RHSIt->empty())
1060 E.Replacements = std::move(*NewEdits);
1062 return NewEdits.takeError();
1063 return llvm::Error::success();
1088 if (!Contents.empty() && Contents.back() ==
'\n')
1090 if (
Pos.character != 0)
1092 if (
Pos.line != llvm::count(Contents,
'\n') + 1)
1094 log(
"Editor sent invalid change coordinates, inferring newline at EOF");
1095 Contents.push_back(
'\n');
1096 consumeError(Err.takeError());
1097 Err = Contents.size();
1102 if (!Change.
range) {
1103 Contents = Change.
text;
1104 return llvm::Error::success();
1108 llvm::Expected<size_t> StartIndex =
positionToOffset(Contents, Start,
false);
1111 return StartIndex.takeError();
1117 return EndIndex.takeError();
1119 if (*EndIndex < *StartIndex)
1120 return error(llvm::errc::invalid_argument,
1121 "Range's end position ({0}) is before start position ({1})",
1131 ssize_t ComputedRangeLength =
1132 lspLength(Contents.substr(*StartIndex, *EndIndex - *StartIndex));
1135 return error(llvm::errc::invalid_argument,
1136 "Change's rangeLength ({0}) doesn't match the "
1137 "computed range length ({1}).",
1140 Contents.replace(*StartIndex, *EndIndex - *StartIndex, Change.
text);
1142 return llvm::Error::success();
1146 llvm::StringRef FullyQualifiedName,
1150 std::vector<std::string> Enclosing = {
""};
1157 if (
Event.Trigger == NamespaceEvent::UsingDirective)
1161 Event.Payload.append(
"::");
1163 std::string CurrentNamespace;
1164 if (
Event.Trigger == NamespaceEvent::BeginNamespace) {
1165 Enclosing.emplace_back(std::move(Event.Payload));
1166 CurrentNamespace = Enclosing.back();
1169 ++Event.Pos.character;
1173 CurrentNamespace = std::move(Enclosing.back());
1174 Enclosing.pop_back();
1175 assert(Enclosing.back() == Event.Payload);
1179 if (!FullyQualifiedName.startswith(CurrentNamespace))
1184 ER.EligiblePoints.clear();
1185 ER.EnclosingNamespace = CurrentNamespace;
1199 std::optional<LangOptions>
LangOpts) {
1204 namespace types = clang::driver::types;
1205 auto Lang = types::lookupTypeForExtension(
1206 llvm::sys::path::extension(
FileName).substr(1));
1207 return Lang != types::TY_INVALID && types::onlyPrecompileType(Lang);
1214 auto FID = SM.getFileID(
Loc);
1216 static const char *ProtoHeaderComment =
1217 "// Generated by the protocol buffer compiler. DO NOT EDIT!";
1219 return SM.getBufferData(FID).startswith(ProtoHeaderComment);