clang API Documentation

SourceManager.cpp
Go to the documentation of this file.
00001 //===--- SourceManager.cpp - Track and cache source files -----------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 //  This file implements the SourceManager interface.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "clang/Basic/SourceManager.h"
00015 #include "clang/Basic/SourceManagerInternals.h"
00016 #include "clang/Basic/Diagnostic.h"
00017 #include "clang/Basic/FileManager.h"
00018 #include "llvm/ADT/StringSwitch.h"
00019 #include "llvm/ADT/Optional.h"
00020 #include "llvm/ADT/STLExtras.h"
00021 #include "llvm/Support/Compiler.h"
00022 #include "llvm/Support/MemoryBuffer.h"
00023 #include "llvm/Support/raw_ostream.h"
00024 #include "llvm/Support/Path.h"
00025 #include "llvm/Support/Capacity.h"
00026 #include <algorithm>
00027 #include <string>
00028 #include <cstring>
00029 #include <sys/stat.h>
00030 
00031 using namespace clang;
00032 using namespace SrcMgr;
00033 using llvm::MemoryBuffer;
00034 
00035 //===----------------------------------------------------------------------===//
00036 // SourceManager Helper Classes
00037 //===----------------------------------------------------------------------===//
00038 
00039 ContentCache::~ContentCache() {
00040   if (shouldFreeBuffer())
00041     delete Buffer.getPointer();
00042 }
00043 
00044 /// getSizeBytesMapped - Returns the number of bytes actually mapped for this
00045 /// ContentCache. This can be 0 if the MemBuffer was not actually expanded.
00046 unsigned ContentCache::getSizeBytesMapped() const {
00047   return Buffer.getPointer() ? Buffer.getPointer()->getBufferSize() : 0;
00048 }
00049 
00050 /// Returns the kind of memory used to back the memory buffer for
00051 /// this content cache.  This is used for performance analysis.
00052 llvm::MemoryBuffer::BufferKind ContentCache::getMemoryBufferKind() const {
00053   assert(Buffer.getPointer());
00054 
00055   // Should be unreachable, but keep for sanity.
00056   if (!Buffer.getPointer())
00057     return llvm::MemoryBuffer::MemoryBuffer_Malloc;
00058   
00059   const llvm::MemoryBuffer *buf = Buffer.getPointer();
00060   return buf->getBufferKind();
00061 }
00062 
00063 /// getSize - Returns the size of the content encapsulated by this ContentCache.
00064 ///  This can be the size of the source file or the size of an arbitrary
00065 ///  scratch buffer.  If the ContentCache encapsulates a source file, that
00066 ///  file is not lazily brought in from disk to satisfy this query.
00067 unsigned ContentCache::getSize() const {
00068   return Buffer.getPointer() ? (unsigned) Buffer.getPointer()->getBufferSize()
00069                              : (unsigned) ContentsEntry->getSize();
00070 }
00071 
00072 void ContentCache::replaceBuffer(const llvm::MemoryBuffer *B,
00073                                  bool DoNotFree) {
00074   if (B && B == Buffer.getPointer()) {
00075     assert(0 && "Replacing with the same buffer");
00076     Buffer.setInt(DoNotFree? DoNotFreeFlag : 0);
00077     return;
00078   }
00079   
00080   if (shouldFreeBuffer())
00081     delete Buffer.getPointer();
00082   Buffer.setPointer(B);
00083   Buffer.setInt(DoNotFree? DoNotFreeFlag : 0);
00084 }
00085 
00086 const llvm::MemoryBuffer *ContentCache::getBuffer(DiagnosticsEngine &Diag,
00087                                                   const SourceManager &SM,
00088                                                   SourceLocation Loc,
00089                                                   bool *Invalid) const {
00090   // Lazily create the Buffer for ContentCaches that wrap files.  If we already
00091   // computed it, just return what we have.
00092   if (Buffer.getPointer() || ContentsEntry == 0) {
00093     if (Invalid)
00094       *Invalid = isBufferInvalid();
00095     
00096     return Buffer.getPointer();
00097   }    
00098 
00099   std::string ErrorStr;
00100   Buffer.setPointer(SM.getFileManager().getBufferForFile(ContentsEntry, &ErrorStr));
00101 
00102   // If we were unable to open the file, then we are in an inconsistent
00103   // situation where the content cache referenced a file which no longer
00104   // exists. Most likely, we were using a stat cache with an invalid entry but
00105   // the file could also have been removed during processing. Since we can't
00106   // really deal with this situation, just create an empty buffer.
00107   //
00108   // FIXME: This is definitely not ideal, but our immediate clients can't
00109   // currently handle returning a null entry here. Ideally we should detect
00110   // that we are in an inconsistent situation and error out as quickly as
00111   // possible.
00112   if (!Buffer.getPointer()) {
00113     const StringRef FillStr("<<<MISSING SOURCE FILE>>>\n");
00114     Buffer.setPointer(MemoryBuffer::getNewMemBuffer(ContentsEntry->getSize(), 
00115                                                     "<invalid>"));
00116     char *Ptr = const_cast<char*>(Buffer.getPointer()->getBufferStart());
00117     for (unsigned i = 0, e = ContentsEntry->getSize(); i != e; ++i)
00118       Ptr[i] = FillStr[i % FillStr.size()];
00119 
00120     if (Diag.isDiagnosticInFlight())
00121       Diag.SetDelayedDiagnostic(diag::err_cannot_open_file, 
00122                                 ContentsEntry->getName(), ErrorStr);
00123     else 
00124       Diag.Report(Loc, diag::err_cannot_open_file)
00125         << ContentsEntry->getName() << ErrorStr;
00126 
00127     Buffer.setInt(Buffer.getInt() | InvalidFlag);
00128     
00129     if (Invalid) *Invalid = true;
00130     return Buffer.getPointer();
00131   }
00132   
00133   // Check that the file's size is the same as in the file entry (which may
00134   // have come from a stat cache).
00135   if (getRawBuffer()->getBufferSize() != (size_t)ContentsEntry->getSize()) {
00136     if (Diag.isDiagnosticInFlight())
00137       Diag.SetDelayedDiagnostic(diag::err_file_modified,
00138                                 ContentsEntry->getName());
00139     else
00140       Diag.Report(Loc, diag::err_file_modified)
00141         << ContentsEntry->getName();
00142 
00143     Buffer.setInt(Buffer.getInt() | InvalidFlag);
00144     if (Invalid) *Invalid = true;
00145     return Buffer.getPointer();
00146   }
00147 
00148   // If the buffer is valid, check to see if it has a UTF Byte Order Mark
00149   // (BOM).  We only support UTF-8 with and without a BOM right now.  See
00150   // http://en.wikipedia.org/wiki/Byte_order_mark for more information.
00151   StringRef BufStr = Buffer.getPointer()->getBuffer();
00152   const char *InvalidBOM = llvm::StringSwitch<const char *>(BufStr)
00153     .StartsWith("\xFE\xFF", "UTF-16 (BE)")
00154     .StartsWith("\xFF\xFE", "UTF-16 (LE)")
00155     .StartsWith("\x00\x00\xFE\xFF", "UTF-32 (BE)")
00156     .StartsWith("\xFF\xFE\x00\x00", "UTF-32 (LE)")
00157     .StartsWith("\x2B\x2F\x76", "UTF-7")
00158     .StartsWith("\xF7\x64\x4C", "UTF-1")
00159     .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC")
00160     .StartsWith("\x0E\xFE\xFF", "SDSU")
00161     .StartsWith("\xFB\xEE\x28", "BOCU-1")
00162     .StartsWith("\x84\x31\x95\x33", "GB-18030")
00163     .Default(0);
00164 
00165   if (InvalidBOM) {
00166     Diag.Report(Loc, diag::err_unsupported_bom)
00167       << InvalidBOM << ContentsEntry->getName();
00168     Buffer.setInt(Buffer.getInt() | InvalidFlag);
00169   }
00170   
00171   if (Invalid)
00172     *Invalid = isBufferInvalid();
00173   
00174   return Buffer.getPointer();
00175 }
00176 
00177 unsigned LineTableInfo::getLineTableFilenameID(StringRef Name) {
00178   // Look up the filename in the string table, returning the pre-existing value
00179   // if it exists.
00180   llvm::StringMapEntry<unsigned> &Entry =
00181     FilenameIDs.GetOrCreateValue(Name, ~0U);
00182   if (Entry.getValue() != ~0U)
00183     return Entry.getValue();
00184 
00185   // Otherwise, assign this the next available ID.
00186   Entry.setValue(FilenamesByID.size());
00187   FilenamesByID.push_back(&Entry);
00188   return FilenamesByID.size()-1;
00189 }
00190 
00191 /// AddLineNote - Add a line note to the line table that indicates that there
00192 /// is a #line at the specified FID/Offset location which changes the presumed
00193 /// location to LineNo/FilenameID.
00194 void LineTableInfo::AddLineNote(int FID, unsigned Offset,
00195                                 unsigned LineNo, int FilenameID) {
00196   std::vector<LineEntry> &Entries = LineEntries[FID];
00197 
00198   assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
00199          "Adding line entries out of order!");
00200 
00201   SrcMgr::CharacteristicKind Kind = SrcMgr::C_User;
00202   unsigned IncludeOffset = 0;
00203 
00204   if (!Entries.empty()) {
00205     // If this is a '#line 4' after '#line 42 "foo.h"', make sure to remember
00206     // that we are still in "foo.h".
00207     if (FilenameID == -1)
00208       FilenameID = Entries.back().FilenameID;
00209 
00210     // If we are after a line marker that switched us to system header mode, or
00211     // that set #include information, preserve it.
00212     Kind = Entries.back().FileKind;
00213     IncludeOffset = Entries.back().IncludeOffset;
00214   }
00215 
00216   Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, Kind,
00217                                    IncludeOffset));
00218 }
00219 
00220 /// AddLineNote This is the same as the previous version of AddLineNote, but is
00221 /// used for GNU line markers.  If EntryExit is 0, then this doesn't change the
00222 /// presumed #include stack.  If it is 1, this is a file entry, if it is 2 then
00223 /// this is a file exit.  FileKind specifies whether this is a system header or
00224 /// extern C system header.
00225 void LineTableInfo::AddLineNote(int FID, unsigned Offset,
00226                                 unsigned LineNo, int FilenameID,
00227                                 unsigned EntryExit,
00228                                 SrcMgr::CharacteristicKind FileKind) {
00229   assert(FilenameID != -1 && "Unspecified filename should use other accessor");
00230 
00231   std::vector<LineEntry> &Entries = LineEntries[FID];
00232 
00233   assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
00234          "Adding line entries out of order!");
00235 
00236   unsigned IncludeOffset = 0;
00237   if (EntryExit == 0) {  // No #include stack change.
00238     IncludeOffset = Entries.empty() ? 0 : Entries.back().IncludeOffset;
00239   } else if (EntryExit == 1) {
00240     IncludeOffset = Offset-1;
00241   } else if (EntryExit == 2) {
00242     assert(!Entries.empty() && Entries.back().IncludeOffset &&
00243        "PPDirectives should have caught case when popping empty include stack");
00244 
00245     // Get the include loc of the last entries' include loc as our include loc.
00246     IncludeOffset = 0;
00247     if (const LineEntry *PrevEntry =
00248           FindNearestLineEntry(FID, Entries.back().IncludeOffset))
00249       IncludeOffset = PrevEntry->IncludeOffset;
00250   }
00251 
00252   Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, FileKind,
00253                                    IncludeOffset));
00254 }
00255 
00256 
00257 /// FindNearestLineEntry - Find the line entry nearest to FID that is before
00258 /// it.  If there is no line entry before Offset in FID, return null.
00259 const LineEntry *LineTableInfo::FindNearestLineEntry(int FID,
00260                                                      unsigned Offset) {
00261   const std::vector<LineEntry> &Entries = LineEntries[FID];
00262   assert(!Entries.empty() && "No #line entries for this FID after all!");
00263 
00264   // It is very common for the query to be after the last #line, check this
00265   // first.
00266   if (Entries.back().FileOffset <= Offset)
00267     return &Entries.back();
00268 
00269   // Do a binary search to find the maximal element that is still before Offset.
00270   std::vector<LineEntry>::const_iterator I =
00271     std::upper_bound(Entries.begin(), Entries.end(), Offset);
00272   if (I == Entries.begin()) return 0;
00273   return &*--I;
00274 }
00275 
00276 /// \brief Add a new line entry that has already been encoded into
00277 /// the internal representation of the line table.
00278 void LineTableInfo::AddEntry(int FID,
00279                              const std::vector<LineEntry> &Entries) {
00280   LineEntries[FID] = Entries;
00281 }
00282 
00283 /// getLineTableFilenameID - Return the uniqued ID for the specified filename.
00284 ///
00285 unsigned SourceManager::getLineTableFilenameID(StringRef Name) {
00286   if (LineTable == 0)
00287     LineTable = new LineTableInfo();
00288   return LineTable->getLineTableFilenameID(Name);
00289 }
00290 
00291 
00292 /// AddLineNote - Add a line note to the line table for the FileID and offset
00293 /// specified by Loc.  If FilenameID is -1, it is considered to be
00294 /// unspecified.
00295 void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
00296                                 int FilenameID) {
00297   std::pair<FileID, unsigned> LocInfo = getDecomposedExpansionLoc(Loc);
00298 
00299   bool Invalid = false;
00300   const SLocEntry &Entry = getSLocEntry(LocInfo.first, &Invalid);
00301   if (!Entry.isFile() || Invalid)
00302     return;
00303   
00304   const SrcMgr::FileInfo &FileInfo = Entry.getFile();
00305 
00306   // Remember that this file has #line directives now if it doesn't already.
00307   const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
00308 
00309   if (LineTable == 0)
00310     LineTable = new LineTableInfo();
00311   LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID);
00312 }
00313 
00314 /// AddLineNote - Add a GNU line marker to the line table.
00315 void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
00316                                 int FilenameID, bool IsFileEntry,
00317                                 bool IsFileExit, bool IsSystemHeader,
00318                                 bool IsExternCHeader) {
00319   // If there is no filename and no flags, this is treated just like a #line,
00320   // which does not change the flags of the previous line marker.
00321   if (FilenameID == -1) {
00322     assert(!IsFileEntry && !IsFileExit && !IsSystemHeader && !IsExternCHeader &&
00323            "Can't set flags without setting the filename!");
00324     return AddLineNote(Loc, LineNo, FilenameID);
00325   }
00326 
00327   std::pair<FileID, unsigned> LocInfo = getDecomposedExpansionLoc(Loc);
00328 
00329   bool Invalid = false;
00330   const SLocEntry &Entry = getSLocEntry(LocInfo.first, &Invalid);
00331   if (!Entry.isFile() || Invalid)
00332     return;
00333   
00334   const SrcMgr::FileInfo &FileInfo = Entry.getFile();
00335 
00336   // Remember that this file has #line directives now if it doesn't already.
00337   const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
00338 
00339   if (LineTable == 0)
00340     LineTable = new LineTableInfo();
00341 
00342   SrcMgr::CharacteristicKind FileKind;
00343   if (IsExternCHeader)
00344     FileKind = SrcMgr::C_ExternCSystem;
00345   else if (IsSystemHeader)
00346     FileKind = SrcMgr::C_System;
00347   else
00348     FileKind = SrcMgr::C_User;
00349 
00350   unsigned EntryExit = 0;
00351   if (IsFileEntry)
00352     EntryExit = 1;
00353   else if (IsFileExit)
00354     EntryExit = 2;
00355 
00356   LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID,
00357                          EntryExit, FileKind);
00358 }
00359 
00360 LineTableInfo &SourceManager::getLineTable() {
00361   if (LineTable == 0)
00362     LineTable = new LineTableInfo();
00363   return *LineTable;
00364 }
00365 
00366 //===----------------------------------------------------------------------===//
00367 // Private 'Create' methods.
00368 //===----------------------------------------------------------------------===//
00369 
00370 SourceManager::SourceManager(DiagnosticsEngine &Diag, FileManager &FileMgr)
00371   : Diag(Diag), FileMgr(FileMgr), OverridenFilesKeepOriginalName(true),
00372     ExternalSLocEntries(0), LineTable(0), NumLinearScans(0),
00373     NumBinaryProbes(0), FakeBufferForRecovery(0),
00374     FakeContentCacheForRecovery(0) {
00375   clearIDTables();
00376   Diag.setSourceManager(this);
00377 }
00378 
00379 SourceManager::~SourceManager() {
00380   delete LineTable;
00381 
00382   // Delete FileEntry objects corresponding to content caches.  Since the actual
00383   // content cache objects are bump pointer allocated, we just have to run the
00384   // dtors, but we call the deallocate method for completeness.
00385   for (unsigned i = 0, e = MemBufferInfos.size(); i != e; ++i) {
00386     if (MemBufferInfos[i]) {
00387       MemBufferInfos[i]->~ContentCache();
00388       ContentCacheAlloc.Deallocate(MemBufferInfos[i]);
00389     }
00390   }
00391   for (llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*>::iterator
00392        I = FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
00393     if (I->second) {
00394       I->second->~ContentCache();
00395       ContentCacheAlloc.Deallocate(I->second);
00396     }
00397   }
00398   
00399   delete FakeBufferForRecovery;
00400   delete FakeContentCacheForRecovery;
00401 
00402   for (llvm::DenseMap<FileID, MacroArgsMap *>::iterator
00403          I = MacroArgsCacheMap.begin(),E = MacroArgsCacheMap.end(); I!=E; ++I) {
00404     delete I->second;
00405   }
00406 }
00407 
00408 void SourceManager::clearIDTables() {
00409   MainFileID = FileID();
00410   LocalSLocEntryTable.clear();
00411   LoadedSLocEntryTable.clear();
00412   SLocEntryLoaded.clear();
00413   LastLineNoFileIDQuery = FileID();
00414   LastLineNoContentCache = 0;
00415   LastFileIDLookup = FileID();
00416 
00417   if (LineTable)
00418     LineTable->clear();
00419 
00420   // Use up FileID #0 as an invalid expansion.
00421   NextLocalOffset = 0;
00422   CurrentLoadedOffset = MaxLoadedOffset;
00423   createExpansionLoc(SourceLocation(),SourceLocation(),SourceLocation(), 1);
00424 }
00425 
00426 /// getOrCreateContentCache - Create or return a cached ContentCache for the
00427 /// specified file.
00428 const ContentCache *
00429 SourceManager::getOrCreateContentCache(const FileEntry *FileEnt) {
00430   assert(FileEnt && "Didn't specify a file entry to use?");
00431 
00432   // Do we already have information about this file?
00433   ContentCache *&Entry = FileInfos[FileEnt];
00434   if (Entry) return Entry;
00435 
00436   // Nope, create a new Cache entry.  Make sure it is at least 8-byte aligned
00437   // so that FileInfo can use the low 3 bits of the pointer for its own
00438   // nefarious purposes.
00439   unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
00440   EntryAlign = std::max(8U, EntryAlign);
00441   Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
00442 
00443   if (OverriddenFilesInfo) {
00444     // If the file contents are overridden with contents from another file,
00445     // pass that file to ContentCache.
00446     llvm::DenseMap<const FileEntry *, const FileEntry *>::iterator
00447         overI = OverriddenFilesInfo->OverriddenFiles.find(FileEnt);
00448     if (overI == OverriddenFilesInfo->OverriddenFiles.end())
00449       new (Entry) ContentCache(FileEnt);
00450     else
00451       new (Entry) ContentCache(OverridenFilesKeepOriginalName ? FileEnt
00452                                                               : overI->second,
00453                                overI->second);
00454   } else {
00455     new (Entry) ContentCache(FileEnt);
00456   }
00457 
00458   return Entry;
00459 }
00460 
00461 
00462 /// createMemBufferContentCache - Create a new ContentCache for the specified
00463 ///  memory buffer.  This does no caching.
00464 const ContentCache*
00465 SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
00466   // Add a new ContentCache to the MemBufferInfos list and return it.  Make sure
00467   // it is at least 8-byte aligned so that FileInfo can use the low 3 bits of
00468   // the pointer for its own nefarious purposes.
00469   unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
00470   EntryAlign = std::max(8U, EntryAlign);
00471   ContentCache *Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
00472   new (Entry) ContentCache();
00473   MemBufferInfos.push_back(Entry);
00474   Entry->setBuffer(Buffer);
00475   return Entry;
00476 }
00477 
00478 const SrcMgr::SLocEntry &SourceManager::loadSLocEntry(unsigned Index,
00479                                                       bool *Invalid) const {
00480   assert(!SLocEntryLoaded[Index]);
00481   if (ExternalSLocEntries->ReadSLocEntry(-(static_cast<int>(Index) + 2))) {
00482     if (Invalid)
00483       *Invalid = true;
00484     // If the file of the SLocEntry changed we could still have loaded it.
00485     if (!SLocEntryLoaded[Index]) {
00486       // Try to recover; create a SLocEntry so the rest of clang can handle it.
00487       LoadedSLocEntryTable[Index] = SLocEntry::get(0,
00488                                  FileInfo::get(SourceLocation(),
00489                                                getFakeContentCacheForRecovery(),
00490                                                SrcMgr::C_User));
00491     }
00492   }
00493 
00494   return LoadedSLocEntryTable[Index];
00495 }
00496 
00497 std::pair<int, unsigned>
00498 SourceManager::AllocateLoadedSLocEntries(unsigned NumSLocEntries,
00499                                          unsigned TotalSize) {
00500   assert(ExternalSLocEntries && "Don't have an external sloc source");
00501   LoadedSLocEntryTable.resize(LoadedSLocEntryTable.size() + NumSLocEntries);
00502   SLocEntryLoaded.resize(LoadedSLocEntryTable.size());
00503   CurrentLoadedOffset -= TotalSize;
00504   assert(CurrentLoadedOffset >= NextLocalOffset && "Out of source locations");
00505   int ID = LoadedSLocEntryTable.size();
00506   return std::make_pair(-ID - 1, CurrentLoadedOffset);
00507 }
00508 
00509 /// \brief As part of recovering from missing or changed content, produce a
00510 /// fake, non-empty buffer.
00511 const llvm::MemoryBuffer *SourceManager::getFakeBufferForRecovery() const {
00512   if (!FakeBufferForRecovery)
00513     FakeBufferForRecovery
00514       = llvm::MemoryBuffer::getMemBuffer("<<<INVALID BUFFER>>");
00515   
00516   return FakeBufferForRecovery;
00517 }
00518 
00519 /// \brief As part of recovering from missing or changed content, produce a
00520 /// fake content cache.
00521 const SrcMgr::ContentCache *
00522 SourceManager::getFakeContentCacheForRecovery() const {
00523   if (!FakeContentCacheForRecovery) {
00524     FakeContentCacheForRecovery = new ContentCache();
00525     FakeContentCacheForRecovery->replaceBuffer(getFakeBufferForRecovery(),
00526                                                /*DoNotFree=*/true);
00527   }
00528   return FakeContentCacheForRecovery;
00529 }
00530 
00531 //===----------------------------------------------------------------------===//
00532 // Methods to create new FileID's and macro expansions.
00533 //===----------------------------------------------------------------------===//
00534 
00535 /// createFileID - Create a new FileID for the specified ContentCache and
00536 /// include position.  This works regardless of whether the ContentCache
00537 /// corresponds to a file or some other input source.
00538 FileID SourceManager::createFileID(const ContentCache *File,
00539                                    SourceLocation IncludePos,
00540                                    SrcMgr::CharacteristicKind FileCharacter,
00541                                    int LoadedID, unsigned LoadedOffset) {
00542   if (LoadedID < 0) {
00543     assert(LoadedID != -1 && "Loading sentinel FileID");
00544     unsigned Index = unsigned(-LoadedID) - 2;
00545     assert(Index < LoadedSLocEntryTable.size() && "FileID out of range");
00546     assert(!SLocEntryLoaded[Index] && "FileID already loaded");
00547     LoadedSLocEntryTable[Index] = SLocEntry::get(LoadedOffset,
00548         FileInfo::get(IncludePos, File, FileCharacter));
00549     SLocEntryLoaded[Index] = true;
00550     return FileID::get(LoadedID);
00551   }
00552   LocalSLocEntryTable.push_back(SLocEntry::get(NextLocalOffset,
00553                                                FileInfo::get(IncludePos, File,
00554                                                              FileCharacter)));
00555   unsigned FileSize = File->getSize();
00556   assert(NextLocalOffset + FileSize + 1 > NextLocalOffset &&
00557          NextLocalOffset + FileSize + 1 <= CurrentLoadedOffset &&
00558          "Ran out of source locations!");
00559   // We do a +1 here because we want a SourceLocation that means "the end of the
00560   // file", e.g. for the "no newline at the end of the file" diagnostic.
00561   NextLocalOffset += FileSize + 1;
00562 
00563   // Set LastFileIDLookup to the newly created file.  The next getFileID call is
00564   // almost guaranteed to be from that file.
00565   FileID FID = FileID::get(LocalSLocEntryTable.size()-1);
00566   return LastFileIDLookup = FID;
00567 }
00568 
00569 SourceLocation
00570 SourceManager::createMacroArgExpansionLoc(SourceLocation SpellingLoc,
00571                                           SourceLocation ExpansionLoc,
00572                                           unsigned TokLength) {
00573   ExpansionInfo Info = ExpansionInfo::createForMacroArg(SpellingLoc,
00574                                                         ExpansionLoc);
00575   return createExpansionLocImpl(Info, TokLength);
00576 }
00577 
00578 SourceLocation
00579 SourceManager::createExpansionLoc(SourceLocation SpellingLoc,
00580                                   SourceLocation ExpansionLocStart,
00581                                   SourceLocation ExpansionLocEnd,
00582                                   unsigned TokLength,
00583                                   int LoadedID,
00584                                   unsigned LoadedOffset) {
00585   ExpansionInfo Info = ExpansionInfo::create(SpellingLoc, ExpansionLocStart,
00586                                              ExpansionLocEnd);
00587   return createExpansionLocImpl(Info, TokLength, LoadedID, LoadedOffset);
00588 }
00589 
00590 SourceLocation
00591 SourceManager::createExpansionLocImpl(const ExpansionInfo &Info,
00592                                       unsigned TokLength,
00593                                       int LoadedID,
00594                                       unsigned LoadedOffset) {
00595   if (LoadedID < 0) {
00596     assert(LoadedID != -1 && "Loading sentinel FileID");
00597     unsigned Index = unsigned(-LoadedID) - 2;
00598     assert(Index < LoadedSLocEntryTable.size() && "FileID out of range");
00599     assert(!SLocEntryLoaded[Index] && "FileID already loaded");
00600     LoadedSLocEntryTable[Index] = SLocEntry::get(LoadedOffset, Info);
00601     SLocEntryLoaded[Index] = true;
00602     return SourceLocation::getMacroLoc(LoadedOffset);
00603   }
00604   LocalSLocEntryTable.push_back(SLocEntry::get(NextLocalOffset, Info));
00605   assert(NextLocalOffset + TokLength + 1 > NextLocalOffset &&
00606          NextLocalOffset + TokLength + 1 <= CurrentLoadedOffset &&
00607          "Ran out of source locations!");
00608   // See createFileID for that +1.
00609   NextLocalOffset += TokLength + 1;
00610   return SourceLocation::getMacroLoc(NextLocalOffset - (TokLength + 1));
00611 }
00612 
00613 const llvm::MemoryBuffer *
00614 SourceManager::getMemoryBufferForFile(const FileEntry *File,
00615                                       bool *Invalid) {
00616   const SrcMgr::ContentCache *IR = getOrCreateContentCache(File);
00617   assert(IR && "getOrCreateContentCache() cannot return NULL");
00618   return IR->getBuffer(Diag, *this, SourceLocation(), Invalid);
00619 }
00620 
00621 void SourceManager::overrideFileContents(const FileEntry *SourceFile,
00622                                          const llvm::MemoryBuffer *Buffer,
00623                                          bool DoNotFree) {
00624   const SrcMgr::ContentCache *IR = getOrCreateContentCache(SourceFile);
00625   assert(IR && "getOrCreateContentCache() cannot return NULL");
00626 
00627   const_cast<SrcMgr::ContentCache *>(IR)->replaceBuffer(Buffer, DoNotFree);
00628   const_cast<SrcMgr::ContentCache *>(IR)->BufferOverridden = true;
00629 
00630   getOverriddenFilesInfo().OverriddenFilesWithBuffer.insert(SourceFile);
00631 }
00632 
00633 void SourceManager::overrideFileContents(const FileEntry *SourceFile,
00634                                          const FileEntry *NewFile) {
00635   assert(SourceFile->getSize() == NewFile->getSize() &&
00636          "Different sizes, use the FileManager to create a virtual file with "
00637          "the correct size");
00638   assert(FileInfos.count(SourceFile) == 0 &&
00639          "This function should be called at the initialization stage, before "
00640          "any parsing occurs.");
00641   getOverriddenFilesInfo().OverriddenFiles[SourceFile] = NewFile;
00642 }
00643 
00644 void SourceManager::disableFileContentsOverride(const FileEntry *File) {
00645   if (!isFileOverridden(File))
00646     return;
00647 
00648   const SrcMgr::ContentCache *IR = getOrCreateContentCache(File);
00649   const_cast<SrcMgr::ContentCache *>(IR)->replaceBuffer(0);
00650   const_cast<SrcMgr::ContentCache *>(IR)->ContentsEntry = IR->OrigEntry;
00651 
00652   assert(OverriddenFilesInfo);
00653   OverriddenFilesInfo->OverriddenFiles.erase(File);
00654   OverriddenFilesInfo->OverriddenFilesWithBuffer.erase(File);
00655 }
00656 
00657 StringRef SourceManager::getBufferData(FileID FID, bool *Invalid) const {
00658   bool MyInvalid = false;
00659   const SLocEntry &SLoc = getSLocEntry(FID, &MyInvalid);
00660   if (!SLoc.isFile() || MyInvalid) {
00661     if (Invalid) 
00662       *Invalid = true;
00663     return "<<<<<INVALID SOURCE LOCATION>>>>>";
00664   }
00665   
00666   const llvm::MemoryBuffer *Buf
00667     = SLoc.getFile().getContentCache()->getBuffer(Diag, *this, SourceLocation(), 
00668                                                   &MyInvalid);
00669   if (Invalid)
00670     *Invalid = MyInvalid;
00671 
00672   if (MyInvalid)
00673     return "<<<<<INVALID SOURCE LOCATION>>>>>";
00674   
00675   return Buf->getBuffer();
00676 }
00677 
00678 //===----------------------------------------------------------------------===//
00679 // SourceLocation manipulation methods.
00680 //===----------------------------------------------------------------------===//
00681 
00682 /// \brief Return the FileID for a SourceLocation.
00683 ///
00684 /// This is the cache-miss path of getFileID. Not as hot as that function, but
00685 /// still very important. It is responsible for finding the entry in the
00686 /// SLocEntry tables that contains the specified location.
00687 FileID SourceManager::getFileIDSlow(unsigned SLocOffset) const {
00688   if (!SLocOffset)
00689     return FileID::get(0);
00690 
00691   // Now it is time to search for the correct file. See where the SLocOffset
00692   // sits in the global view and consult local or loaded buffers for it.
00693   if (SLocOffset < NextLocalOffset)
00694     return getFileIDLocal(SLocOffset);
00695   return getFileIDLoaded(SLocOffset);
00696 }
00697 
00698 /// \brief Return the FileID for a SourceLocation with a low offset.
00699 ///
00700 /// This function knows that the SourceLocation is in a local buffer, not a
00701 /// loaded one.
00702 FileID SourceManager::getFileIDLocal(unsigned SLocOffset) const {
00703   assert(SLocOffset < NextLocalOffset && "Bad function choice");
00704 
00705   // After the first and second level caches, I see two common sorts of
00706   // behavior: 1) a lot of searched FileID's are "near" the cached file
00707   // location or are "near" the cached expansion location. 2) others are just
00708   // completely random and may be a very long way away.
00709   //
00710   // To handle this, we do a linear search for up to 8 steps to catch #1 quickly
00711   // then we fall back to a less cache efficient, but more scalable, binary
00712   // search to find the location.
00713 
00714   // See if this is near the file point - worst case we start scanning from the
00715   // most newly created FileID.
00716   std::vector<SrcMgr::SLocEntry>::const_iterator I;
00717 
00718   if (LastFileIDLookup.ID < 0 ||
00719       LocalSLocEntryTable[LastFileIDLookup.ID].getOffset() < SLocOffset) {
00720     // Neither loc prunes our search.
00721     I = LocalSLocEntryTable.end();
00722   } else {
00723     // Perhaps it is near the file point.
00724     I = LocalSLocEntryTable.begin()+LastFileIDLookup.ID;
00725   }
00726 
00727   // Find the FileID that contains this.  "I" is an iterator that points to a
00728   // FileID whose offset is known to be larger than SLocOffset.
00729   unsigned NumProbes = 0;
00730   while (1) {
00731     --I;
00732     if (I->getOffset() <= SLocOffset) {
00733       FileID Res = FileID::get(int(I - LocalSLocEntryTable.begin()));
00734 
00735       // If this isn't an expansion, remember it.  We have good locality across
00736       // FileID lookups.
00737       if (!I->isExpansion())
00738         LastFileIDLookup = Res;
00739       NumLinearScans += NumProbes+1;
00740       return Res;
00741     }
00742     if (++NumProbes == 8)
00743       break;
00744   }
00745 
00746   // Convert "I" back into an index.  We know that it is an entry whose index is
00747   // larger than the offset we are looking for.
00748   unsigned GreaterIndex = I - LocalSLocEntryTable.begin();
00749   // LessIndex - This is the lower bound of the range that we're searching.
00750   // We know that the offset corresponding to the FileID is is less than
00751   // SLocOffset.
00752   unsigned LessIndex = 0;
00753   NumProbes = 0;
00754   while (1) {
00755     bool Invalid = false;
00756     unsigned MiddleIndex = (GreaterIndex-LessIndex)/2+LessIndex;
00757     unsigned MidOffset = getLocalSLocEntry(MiddleIndex, &Invalid).getOffset();
00758     if (Invalid)
00759       return FileID::get(0);
00760     
00761     ++NumProbes;
00762 
00763     // If the offset of the midpoint is too large, chop the high side of the
00764     // range to the midpoint.
00765     if (MidOffset > SLocOffset) {
00766       GreaterIndex = MiddleIndex;
00767       continue;
00768     }
00769 
00770     // If the middle index contains the value, succeed and return.
00771     // FIXME: This could be made faster by using a function that's aware of
00772     // being in the local area.
00773     if (isOffsetInFileID(FileID::get(MiddleIndex), SLocOffset)) {
00774       FileID Res = FileID::get(MiddleIndex);
00775 
00776       // If this isn't a macro expansion, remember it.  We have good locality
00777       // across FileID lookups.
00778       if (!LocalSLocEntryTable[MiddleIndex].isExpansion())
00779         LastFileIDLookup = Res;
00780       NumBinaryProbes += NumProbes;
00781       return Res;
00782     }
00783 
00784     // Otherwise, move the low-side up to the middle index.
00785     LessIndex = MiddleIndex;
00786   }
00787 }
00788 
00789 /// \brief Return the FileID for a SourceLocation with a high offset.
00790 ///
00791 /// This function knows that the SourceLocation is in a loaded buffer, not a
00792 /// local one.
00793 FileID SourceManager::getFileIDLoaded(unsigned SLocOffset) const {
00794   // Sanity checking, otherwise a bug may lead to hanging in release build.
00795   if (SLocOffset < CurrentLoadedOffset) {
00796     assert(0 && "Invalid SLocOffset or bad function choice");
00797     return FileID();
00798   }
00799 
00800   // Essentially the same as the local case, but the loaded array is sorted
00801   // in the other direction.
00802 
00803   // First do a linear scan from the last lookup position, if possible.
00804   unsigned I;
00805   int LastID = LastFileIDLookup.ID;
00806   if (LastID >= 0 || getLoadedSLocEntryByID(LastID).getOffset() < SLocOffset)
00807     I = 0;
00808   else
00809     I = (-LastID - 2) + 1;
00810 
00811   unsigned NumProbes;
00812   for (NumProbes = 0; NumProbes < 8; ++NumProbes, ++I) {
00813     // Make sure the entry is loaded!
00814     const SrcMgr::SLocEntry &E = getLoadedSLocEntry(I);
00815     if (E.getOffset() <= SLocOffset) {
00816       FileID Res = FileID::get(-int(I) - 2);
00817 
00818       if (!E.isExpansion())
00819         LastFileIDLookup = Res;
00820       NumLinearScans += NumProbes + 1;
00821       return Res;
00822     }
00823   }
00824 
00825   // Linear scan failed. Do the binary search. Note the reverse sorting of the
00826   // table: GreaterIndex is the one where the offset is greater, which is
00827   // actually a lower index!
00828   unsigned GreaterIndex = I;
00829   unsigned LessIndex = LoadedSLocEntryTable.size();
00830   NumProbes = 0;
00831   while (1) {
00832     ++NumProbes;
00833     unsigned MiddleIndex = (LessIndex - GreaterIndex) / 2 + GreaterIndex;
00834     const SrcMgr::SLocEntry &E = getLoadedSLocEntry(MiddleIndex);
00835 
00836     ++NumProbes;
00837 
00838     if (E.getOffset() > SLocOffset) {
00839       GreaterIndex = MiddleIndex;
00840       continue;
00841     }
00842 
00843     if (isOffsetInFileID(FileID::get(-int(MiddleIndex) - 2), SLocOffset)) {
00844       FileID Res = FileID::get(-int(MiddleIndex) - 2);
00845       if (!E.isExpansion())
00846         LastFileIDLookup = Res;
00847       NumBinaryProbes += NumProbes;
00848       return Res;
00849     }
00850 
00851     LessIndex = MiddleIndex;
00852   }
00853 }
00854 
00855 SourceLocation SourceManager::
00856 getExpansionLocSlowCase(SourceLocation Loc) const {
00857   do {
00858     // Note: If Loc indicates an offset into a token that came from a macro
00859     // expansion (e.g. the 5th character of the token) we do not want to add
00860     // this offset when going to the expansion location.  The expansion
00861     // location is the macro invocation, which the offset has nothing to do
00862     // with.  This is unlike when we get the spelling loc, because the offset
00863     // directly correspond to the token whose spelling we're inspecting.
00864     Loc = getSLocEntry(getFileID(Loc)).getExpansion().getExpansionLocStart();
00865   } while (!Loc.isFileID());
00866 
00867   return Loc;
00868 }
00869 
00870 SourceLocation SourceManager::getSpellingLocSlowCase(SourceLocation Loc) const {
00871   do {
00872     std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
00873     Loc = getSLocEntry(LocInfo.first).getExpansion().getSpellingLoc();
00874     Loc = Loc.getLocWithOffset(LocInfo.second);
00875   } while (!Loc.isFileID());
00876   return Loc;
00877 }
00878 
00879 SourceLocation SourceManager::getFileLocSlowCase(SourceLocation Loc) const {
00880   do {
00881     if (isMacroArgExpansion(Loc))
00882       Loc = getImmediateSpellingLoc(Loc);
00883     else
00884       Loc = getImmediateExpansionRange(Loc).first;
00885   } while (!Loc.isFileID());
00886   return Loc;
00887 }
00888 
00889 
00890 std::pair<FileID, unsigned>
00891 SourceManager::getDecomposedExpansionLocSlowCase(
00892                                              const SrcMgr::SLocEntry *E) const {
00893   // If this is an expansion record, walk through all the expansion points.
00894   FileID FID;
00895   SourceLocation Loc;
00896   unsigned Offset;
00897   do {
00898     Loc = E->getExpansion().getExpansionLocStart();
00899 
00900     FID = getFileID(Loc);
00901     E = &getSLocEntry(FID);
00902     Offset = Loc.getOffset()-E->getOffset();
00903   } while (!Loc.isFileID());
00904 
00905   return std::make_pair(FID, Offset);
00906 }
00907 
00908 std::pair<FileID, unsigned>
00909 SourceManager::getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E,
00910                                                 unsigned Offset) const {
00911   // If this is an expansion record, walk through all the expansion points.
00912   FileID FID;
00913   SourceLocation Loc;
00914   do {
00915     Loc = E->getExpansion().getSpellingLoc();
00916     Loc = Loc.getLocWithOffset(Offset);
00917 
00918     FID = getFileID(Loc);
00919     E = &getSLocEntry(FID);
00920     Offset = Loc.getOffset()-E->getOffset();
00921   } while (!Loc.isFileID());
00922 
00923   return std::make_pair(FID, Offset);
00924 }
00925 
00926 /// getImmediateSpellingLoc - Given a SourceLocation object, return the
00927 /// spelling location referenced by the ID.  This is the first level down
00928 /// towards the place where the characters that make up the lexed token can be
00929 /// found.  This should not generally be used by clients.
00930 SourceLocation SourceManager::getImmediateSpellingLoc(SourceLocation Loc) const{
00931   if (Loc.isFileID()) return Loc;
00932   std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
00933   Loc = getSLocEntry(LocInfo.first).getExpansion().getSpellingLoc();
00934   return Loc.getLocWithOffset(LocInfo.second);
00935 }
00936 
00937 
00938 /// getImmediateExpansionRange - Loc is required to be an expansion location.
00939 /// Return the start/end of the expansion information.
00940 std::pair<SourceLocation,SourceLocation>
00941 SourceManager::getImmediateExpansionRange(SourceLocation Loc) const {
00942   assert(Loc.isMacroID() && "Not a macro expansion loc!");
00943   const ExpansionInfo &Expansion = getSLocEntry(getFileID(Loc)).getExpansion();
00944   return Expansion.getExpansionLocRange();
00945 }
00946 
00947 /// getExpansionRange - Given a SourceLocation object, return the range of
00948 /// tokens covered by the expansion in the ultimate file.
00949 std::pair<SourceLocation,SourceLocation>
00950 SourceManager::getExpansionRange(SourceLocation Loc) const {
00951   if (Loc.isFileID()) return std::make_pair(Loc, Loc);
00952 
00953   std::pair<SourceLocation,SourceLocation> Res =
00954     getImmediateExpansionRange(Loc);
00955 
00956   // Fully resolve the start and end locations to their ultimate expansion
00957   // points.
00958   while (!Res.first.isFileID())
00959     Res.first = getImmediateExpansionRange(Res.first).first;
00960   while (!Res.second.isFileID())
00961     Res.second = getImmediateExpansionRange(Res.second).second;
00962   return Res;
00963 }
00964 
00965 bool SourceManager::isMacroArgExpansion(SourceLocation Loc) const {
00966   if (!Loc.isMacroID()) return false;
00967 
00968   FileID FID = getFileID(Loc);
00969   const SrcMgr::SLocEntry *E = &getSLocEntry(FID);
00970   const SrcMgr::ExpansionInfo &Expansion = E->getExpansion();
00971   return Expansion.isMacroArgExpansion();
00972 }
00973 
00974 
00975 //===----------------------------------------------------------------------===//
00976 // Queries about the code at a SourceLocation.
00977 //===----------------------------------------------------------------------===//
00978 
00979 /// getCharacterData - Return a pointer to the start of the specified location
00980 /// in the appropriate MemoryBuffer.
00981 const char *SourceManager::getCharacterData(SourceLocation SL,
00982                                             bool *Invalid) const {
00983   // Note that this is a hot function in the getSpelling() path, which is
00984   // heavily used by -E mode.
00985   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(SL);
00986 
00987   // Note that calling 'getBuffer()' may lazily page in a source file.
00988   bool CharDataInvalid = false;
00989   const SLocEntry &Entry = getSLocEntry(LocInfo.first, &CharDataInvalid);
00990   if (CharDataInvalid || !Entry.isFile()) {
00991     if (Invalid)
00992       *Invalid = true;
00993     
00994     return "<<<<INVALID BUFFER>>>>";
00995   }
00996   const llvm::MemoryBuffer *Buffer
00997     = Entry.getFile().getContentCache()
00998                   ->getBuffer(Diag, *this, SourceLocation(), &CharDataInvalid);
00999   if (Invalid)
01000     *Invalid = CharDataInvalid;
01001   return Buffer->getBufferStart() + (CharDataInvalid? 0 : LocInfo.second);
01002 }
01003 
01004 
01005 /// getColumnNumber - Return the column # for the specified file position.
01006 /// this is significantly cheaper to compute than the line number.
01007 unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos,
01008                                         bool *Invalid) const {
01009   bool MyInvalid = false;
01010   const llvm::MemoryBuffer *MemBuf = getBuffer(FID, &MyInvalid);
01011   if (Invalid)
01012     *Invalid = MyInvalid;
01013 
01014   if (MyInvalid)
01015     return 1;
01016 
01017   if (FilePos >= MemBuf->getBufferSize()) {
01018     if (Invalid)
01019       *Invalid = MyInvalid;
01020     return 1;
01021   }
01022 
01023   const char *Buf = MemBuf->getBufferStart();
01024   unsigned LineStart = FilePos;
01025   while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
01026     --LineStart;
01027   return FilePos-LineStart+1;
01028 }
01029 
01030 // isInvalid - Return the result of calling loc.isInvalid(), and
01031 // if Invalid is not null, set its value to same.
01032 static bool isInvalid(SourceLocation Loc, bool *Invalid) {
01033   bool MyInvalid = Loc.isInvalid();
01034   if (Invalid)
01035     *Invalid = MyInvalid;
01036   return MyInvalid;
01037 }
01038 
01039 unsigned SourceManager::getSpellingColumnNumber(SourceLocation Loc,
01040                                                 bool *Invalid) const {
01041   if (isInvalid(Loc, Invalid)) return 0;
01042   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
01043   return getColumnNumber(LocInfo.first, LocInfo.second, Invalid);
01044 }
01045 
01046 unsigned SourceManager::getExpansionColumnNumber(SourceLocation Loc,
01047                                                  bool *Invalid) const {
01048   if (isInvalid(Loc, Invalid)) return 0;
01049   std::pair<FileID, unsigned> LocInfo = getDecomposedExpansionLoc(Loc);
01050   return getColumnNumber(LocInfo.first, LocInfo.second, Invalid);
01051 }
01052 
01053 unsigned SourceManager::getPresumedColumnNumber(SourceLocation Loc,
01054                                                 bool *Invalid) const {
01055   if (isInvalid(Loc, Invalid)) return 0;
01056   return getPresumedLoc(Loc).getColumn();
01057 }
01058 
01059 #ifdef __SSE2__
01060 #include <emmintrin.h>
01061 #endif
01062 
01063 static LLVM_ATTRIBUTE_NOINLINE void
01064 ComputeLineNumbers(DiagnosticsEngine &Diag, ContentCache *FI,
01065                    llvm::BumpPtrAllocator &Alloc,
01066                    const SourceManager &SM, bool &Invalid);
01067 static void ComputeLineNumbers(DiagnosticsEngine &Diag, ContentCache *FI,
01068                                llvm::BumpPtrAllocator &Alloc,
01069                                const SourceManager &SM, bool &Invalid) {
01070   // Note that calling 'getBuffer()' may lazily page in the file.
01071   const MemoryBuffer *Buffer = FI->getBuffer(Diag, SM, SourceLocation(),
01072                                              &Invalid);
01073   if (Invalid)
01074     return;
01075 
01076   // Find the file offsets of all of the *physical* source lines.  This does
01077   // not look at trigraphs, escaped newlines, or anything else tricky.
01078   SmallVector<unsigned, 256> LineOffsets;
01079 
01080   // Line #1 starts at char 0.
01081   LineOffsets.push_back(0);
01082 
01083   const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
01084   const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
01085   unsigned Offs = 0;
01086   while (1) {
01087     // Skip over the contents of the line.
01088     const unsigned char *NextBuf = (const unsigned char *)Buf;
01089 
01090 #ifdef __SSE2__
01091     // Try to skip to the next newline using SSE instructions. This is very
01092     // performance sensitive for programs with lots of diagnostics and in -E
01093     // mode.
01094     __m128i CRs = _mm_set1_epi8('\r');
01095     __m128i LFs = _mm_set1_epi8('\n');
01096 
01097     // First fix up the alignment to 16 bytes.
01098     while (((uintptr_t)NextBuf & 0xF) != 0) {
01099       if (*NextBuf == '\n' || *NextBuf == '\r' || *NextBuf == '\0')
01100         goto FoundSpecialChar;
01101       ++NextBuf;
01102     }
01103 
01104     // Scan 16 byte chunks for '\r' and '\n'. Ignore '\0'.
01105     while (NextBuf+16 <= End) {
01106       __m128i Chunk = *(__m128i*)NextBuf;
01107       __m128i Cmp = _mm_or_si128(_mm_cmpeq_epi8(Chunk, CRs),
01108                                  _mm_cmpeq_epi8(Chunk, LFs));
01109       unsigned Mask = _mm_movemask_epi8(Cmp);
01110 
01111       // If we found a newline, adjust the pointer and jump to the handling code.
01112       if (Mask != 0) {
01113         NextBuf += llvm::CountTrailingZeros_32(Mask);
01114         goto FoundSpecialChar;
01115       }
01116       NextBuf += 16;
01117     }
01118 #endif
01119 
01120     while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
01121       ++NextBuf;
01122 
01123 #ifdef __SSE2__
01124 FoundSpecialChar:
01125 #endif
01126     Offs += NextBuf-Buf;
01127     Buf = NextBuf;
01128 
01129     if (Buf[0] == '\n' || Buf[0] == '\r') {
01130       // If this is \n\r or \r\n, skip both characters.
01131       if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
01132         ++Offs, ++Buf;
01133       ++Offs, ++Buf;
01134       LineOffsets.push_back(Offs);
01135     } else {
01136       // Otherwise, this is a null.  If end of file, exit.
01137       if (Buf == End) break;
01138       // Otherwise, skip the null.
01139       ++Offs, ++Buf;
01140     }
01141   }
01142 
01143   // Copy the offsets into the FileInfo structure.
01144   FI->NumLines = LineOffsets.size();
01145   FI->SourceLineCache = Alloc.Allocate<unsigned>(LineOffsets.size());
01146   std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
01147 }
01148 
01149 /// getLineNumber - Given a SourceLocation, return the spelling line number
01150 /// for the position indicated.  This requires building and caching a table of
01151 /// line offsets for the MemoryBuffer, so this is not cheap: use only when
01152 /// about to emit a diagnostic.
01153 unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos, 
01154                                       bool *Invalid) const {
01155   if (FID.isInvalid()) {
01156     if (Invalid)
01157       *Invalid = true;
01158     return 1;
01159   }
01160 
01161   ContentCache *Content;
01162   if (LastLineNoFileIDQuery == FID)
01163     Content = LastLineNoContentCache;
01164   else {
01165     bool MyInvalid = false;
01166     const SLocEntry &Entry = getSLocEntry(FID, &MyInvalid);
01167     if (MyInvalid || !Entry.isFile()) {
01168       if (Invalid)
01169         *Invalid = true;
01170       return 1;
01171     }
01172     
01173     Content = const_cast<ContentCache*>(Entry.getFile().getContentCache());
01174   }
01175   
01176   // If this is the first use of line information for this buffer, compute the
01177   /// SourceLineCache for it on demand.
01178   if (Content->SourceLineCache == 0) {
01179     bool MyInvalid = false;
01180     ComputeLineNumbers(Diag, Content, ContentCacheAlloc, *this, MyInvalid);
01181     if (Invalid)
01182       *Invalid = MyInvalid;
01183     if (MyInvalid)
01184       return 1;
01185   } else if (Invalid)
01186     *Invalid = false;
01187 
01188   // Okay, we know we have a line number table.  Do a binary search to find the
01189   // line number that this character position lands on.
01190   unsigned *SourceLineCache = Content->SourceLineCache;
01191   unsigned *SourceLineCacheStart = SourceLineCache;
01192   unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
01193 
01194   unsigned QueriedFilePos = FilePos+1;
01195 
01196   // FIXME: I would like to be convinced that this code is worth being as
01197   // complicated as it is, binary search isn't that slow.
01198   //
01199   // If it is worth being optimized, then in my opinion it could be more
01200   // performant, simpler, and more obviously correct by just "galloping" outward
01201   // from the queried file position. In fact, this could be incorporated into a
01202   // generic algorithm such as lower_bound_with_hint.
01203   //
01204   // If someone gives me a test case where this matters, and I will do it! - DWD
01205 
01206   // If the previous query was to the same file, we know both the file pos from
01207   // that query and the line number returned.  This allows us to narrow the
01208   // search space from the entire file to something near the match.
01209   if (LastLineNoFileIDQuery == FID) {
01210     if (QueriedFilePos >= LastLineNoFilePos) {
01211       // FIXME: Potential overflow?
01212       SourceLineCache = SourceLineCache+LastLineNoResult-1;
01213 
01214       // The query is likely to be nearby the previous one.  Here we check to
01215       // see if it is within 5, 10 or 20 lines.  It can be far away in cases
01216       // where big comment blocks and vertical whitespace eat up lines but
01217       // contribute no tokens.
01218       if (SourceLineCache+5 < SourceLineCacheEnd) {
01219         if (SourceLineCache[5] > QueriedFilePos)
01220           SourceLineCacheEnd = SourceLineCache+5;
01221         else if (SourceLineCache+10 < SourceLineCacheEnd) {
01222           if (SourceLineCache[10] > QueriedFilePos)
01223             SourceLineCacheEnd = SourceLineCache+10;
01224           else if (SourceLineCache+20 < SourceLineCacheEnd) {
01225             if (SourceLineCache[20] > QueriedFilePos)
01226               SourceLineCacheEnd = SourceLineCache+20;
01227           }
01228         }
01229       }
01230     } else {
01231       if (LastLineNoResult < Content->NumLines)
01232         SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
01233     }
01234   }
01235 
01236   // If the spread is large, do a "radix" test as our initial guess, based on
01237   // the assumption that lines average to approximately the same length.
01238   // NOTE: This is currently disabled, as it does not appear to be profitable in
01239   // initial measurements.
01240   if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
01241     unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
01242 
01243     // Take a stab at guessing where it is.
01244     unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
01245 
01246     // Check for -10 and +10 lines.
01247     unsigned LowerBound = std::max(int(ApproxPos-10), 0);
01248     unsigned UpperBound = std::min(ApproxPos+10, FileLen);
01249 
01250     // If the computed lower bound is less than the query location, move it in.
01251     if (SourceLineCache < SourceLineCacheStart+LowerBound &&
01252         SourceLineCacheStart[LowerBound] < QueriedFilePos)
01253       SourceLineCache = SourceLineCacheStart+LowerBound;
01254 
01255     // If the computed upper bound is greater than the query location, move it.
01256     if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
01257         SourceLineCacheStart[UpperBound] >= QueriedFilePos)
01258       SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
01259   }
01260 
01261   unsigned *Pos
01262     = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
01263   unsigned LineNo = Pos-SourceLineCacheStart;
01264 
01265   LastLineNoFileIDQuery = FID;
01266   LastLineNoContentCache = Content;
01267   LastLineNoFilePos = QueriedFilePos;
01268   LastLineNoResult = LineNo;
01269   return LineNo;
01270 }
01271 
01272 unsigned SourceManager::getSpellingLineNumber(SourceLocation Loc, 
01273                                               bool *Invalid) const {
01274   if (isInvalid(Loc, Invalid)) return 0;
01275   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
01276   return getLineNumber(LocInfo.first, LocInfo.second);
01277 }
01278 unsigned SourceManager::getExpansionLineNumber(SourceLocation Loc,
01279                                                bool *Invalid) const {
01280   if (isInvalid(Loc, Invalid)) return 0;
01281   std::pair<FileID, unsigned> LocInfo = getDecomposedExpansionLoc(Loc);
01282   return getLineNumber(LocInfo.first, LocInfo.second);
01283 }
01284 unsigned SourceManager::getPresumedLineNumber(SourceLocation Loc,
01285                                               bool *Invalid) const {
01286   if (isInvalid(Loc, Invalid)) return 0;
01287   return getPresumedLoc(Loc).getLine();
01288 }
01289 
01290 /// getFileCharacteristic - return the file characteristic of the specified
01291 /// source location, indicating whether this is a normal file, a system
01292 /// header, or an "implicit extern C" system header.
01293 ///
01294 /// This state can be modified with flags on GNU linemarker directives like:
01295 ///   # 4 "foo.h" 3
01296 /// which changes all source locations in the current file after that to be
01297 /// considered to be from a system header.
01298 SrcMgr::CharacteristicKind
01299 SourceManager::getFileCharacteristic(SourceLocation Loc) const {
01300   assert(!Loc.isInvalid() && "Can't get file characteristic of invalid loc!");
01301   std::pair<FileID, unsigned> LocInfo = getDecomposedExpansionLoc(Loc);
01302   bool Invalid = false;
01303   const SLocEntry &SEntry = getSLocEntry(LocInfo.first, &Invalid);
01304   if (Invalid || !SEntry.isFile())
01305     return C_User;
01306   
01307   const SrcMgr::FileInfo &FI = SEntry.getFile();
01308 
01309   // If there are no #line directives in this file, just return the whole-file
01310   // state.
01311   if (!FI.hasLineDirectives())
01312     return FI.getFileCharacteristic();
01313 
01314   assert(LineTable && "Can't have linetable entries without a LineTable!");
01315   // See if there is a #line directive before the location.
01316   const LineEntry *Entry =
01317     LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second);
01318 
01319   // If this is before the first line marker, use the file characteristic.
01320   if (!Entry)
01321     return FI.getFileCharacteristic();
01322 
01323   return Entry->FileKind;
01324 }
01325 
01326 /// Return the filename or buffer identifier of the buffer the location is in.
01327 /// Note that this name does not respect #line directives.  Use getPresumedLoc
01328 /// for normal clients.
01329 const char *SourceManager::getBufferName(SourceLocation Loc, 
01330                                          bool *Invalid) const {
01331   if (isInvalid(Loc, Invalid)) return "<invalid loc>";
01332 
01333   return getBuffer(getFileID(Loc), Invalid)->getBufferIdentifier();
01334 }
01335 
01336 
01337 /// getPresumedLoc - This method returns the "presumed" location of a
01338 /// SourceLocation specifies.  A "presumed location" can be modified by #line
01339 /// or GNU line marker directives.  This provides a view on the data that a
01340 /// user should see in diagnostics, for example.
01341 ///
01342 /// Note that a presumed location is always given as the expansion point of an
01343 /// expansion location, not at the spelling location.
01344 PresumedLoc SourceManager::getPresumedLoc(SourceLocation Loc) const {
01345   if (Loc.isInvalid()) return PresumedLoc();
01346 
01347   // Presumed locations are always for expansion points.
01348   std::pair<FileID, unsigned> LocInfo = getDecomposedExpansionLoc(Loc);
01349 
01350   bool Invalid = false;
01351   const SLocEntry &Entry = getSLocEntry(LocInfo.first, &Invalid);
01352   if (Invalid || !Entry.isFile())
01353     return PresumedLoc();
01354   
01355   const SrcMgr::FileInfo &FI = Entry.getFile();
01356   const SrcMgr::ContentCache *C = FI.getContentCache();
01357 
01358   // To get the source name, first consult the FileEntry (if one exists)
01359   // before the MemBuffer as this will avoid unnecessarily paging in the
01360   // MemBuffer.
01361   const char *Filename;
01362   if (C->OrigEntry)
01363     Filename = C->OrigEntry->getName();
01364   else
01365     Filename = C->getBuffer(Diag, *this)->getBufferIdentifier();
01366 
01367   unsigned LineNo = getLineNumber(LocInfo.first, LocInfo.second, &Invalid);
01368   if (Invalid)
01369     return PresumedLoc();
01370   unsigned ColNo  = getColumnNumber(LocInfo.first, LocInfo.second, &Invalid);
01371   if (Invalid)
01372     return PresumedLoc();
01373   
01374   SourceLocation IncludeLoc = FI.getIncludeLoc();
01375 
01376   // If we have #line directives in this file, update and overwrite the physical
01377   // location info if appropriate.
01378   if (FI.hasLineDirectives()) {
01379     assert(LineTable && "Can't have linetable entries without a LineTable!");
01380     // See if there is a #line directive before this.  If so, get it.
01381     if (const LineEntry *Entry =
01382           LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second)) {
01383       // If the LineEntry indicates a filename, use it.
01384       if (Entry->FilenameID != -1)
01385         Filename = LineTable->getFilename(Entry->FilenameID);
01386 
01387       // Use the line number specified by the LineEntry.  This line number may
01388       // be multiple lines down from the line entry.  Add the difference in
01389       // physical line numbers from the query point and the line marker to the
01390       // total.
01391       unsigned MarkerLineNo = getLineNumber(LocInfo.first, Entry->FileOffset);
01392       LineNo = Entry->LineNo + (LineNo-MarkerLineNo-1);
01393 
01394       // Note that column numbers are not molested by line markers.
01395 
01396       // Handle virtual #include manipulation.
01397       if (Entry->IncludeOffset) {
01398         IncludeLoc = getLocForStartOfFile(LocInfo.first);
01399         IncludeLoc = IncludeLoc.getLocWithOffset(Entry->IncludeOffset);
01400       }
01401     }
01402   }
01403 
01404   return PresumedLoc(Filename, LineNo, ColNo, IncludeLoc);
01405 }
01406 
01407 /// \brief The size of the SLocEnty that \arg FID represents.
01408 unsigned SourceManager::getFileIDSize(FileID FID) const {
01409   bool Invalid = false;
01410   const SrcMgr::SLocEntry &Entry = getSLocEntry(FID, &Invalid);
01411   if (Invalid)
01412     return 0;
01413 
01414   int ID = FID.ID;
01415   unsigned NextOffset;
01416   if ((ID > 0 && unsigned(ID+1) == local_sloc_entry_size()))
01417     NextOffset = getNextLocalOffset();
01418   else if (ID+1 == -1)
01419     NextOffset = MaxLoadedOffset;
01420   else
01421     NextOffset = getSLocEntry(FileID::get(ID+1)).getOffset();
01422 
01423   return NextOffset - Entry.getOffset() - 1;
01424 }
01425 
01426 //===----------------------------------------------------------------------===//
01427 // Other miscellaneous methods.
01428 //===----------------------------------------------------------------------===//
01429 
01430 /// \brief Retrieve the inode for the given file entry, if possible.
01431 ///
01432 /// This routine involves a system call, and therefore should only be used
01433 /// in non-performance-critical code.
01434 static llvm::Optional<ino_t> getActualFileInode(const FileEntry *File) {
01435   if (!File)
01436     return llvm::Optional<ino_t>();
01437   
01438   struct stat StatBuf;
01439   if (::stat(File->getName(), &StatBuf))
01440     return llvm::Optional<ino_t>();
01441     
01442   return StatBuf.st_ino;
01443 }
01444 
01445 /// \brief Get the source location for the given file:line:col triplet.
01446 ///
01447 /// If the source file is included multiple times, the source location will
01448 /// be based upon an arbitrary inclusion.
01449 SourceLocation SourceManager::translateFileLineCol(const FileEntry *SourceFile,
01450                                                   unsigned Line,
01451                                                   unsigned Col) const {
01452   assert(SourceFile && "Null source file!");
01453   assert(Line && Col && "Line and column should start from 1!");
01454 
01455   FileID FirstFID = translateFile(SourceFile);
01456   return translateLineCol(FirstFID, Line, Col);
01457 }
01458 
01459 /// \brief Get the FileID for the given file.
01460 ///
01461 /// If the source file is included multiple times, the FileID will be the
01462 /// first inclusion.
01463 FileID SourceManager::translateFile(const FileEntry *SourceFile) const {
01464   assert(SourceFile && "Null source file!");
01465 
01466   // Find the first file ID that corresponds to the given file.
01467   FileID FirstFID;
01468 
01469   // First, check the main file ID, since it is common to look for a
01470   // location in the main file.
01471   llvm::Optional<ino_t> SourceFileInode;
01472   llvm::Optional<StringRef> SourceFileName;
01473   if (!MainFileID.isInvalid()) {
01474     bool Invalid = false;
01475     const SLocEntry &MainSLoc = getSLocEntry(MainFileID, &Invalid);
01476     if (Invalid)
01477       return FileID();
01478     
01479     if (MainSLoc.isFile()) {
01480       const ContentCache *MainContentCache
01481         = MainSLoc.getFile().getContentCache();
01482       if (!MainContentCache) {
01483         // Can't do anything
01484       } else if (MainContentCache->OrigEntry == SourceFile) {
01485         FirstFID = MainFileID;
01486       } else {
01487         // Fall back: check whether we have the same base name and inode
01488         // as the main file.
01489         const FileEntry *MainFile = MainContentCache->OrigEntry;
01490         SourceFileName = llvm::sys::path::filename(SourceFile->getName());
01491         if (*SourceFileName == llvm::sys::path::filename(MainFile->getName())) {
01492           SourceFileInode = getActualFileInode(SourceFile);
01493           if (SourceFileInode) {
01494             if (llvm::Optional<ino_t> MainFileInode 
01495                                                = getActualFileInode(MainFile)) {
01496               if (*SourceFileInode == *MainFileInode) {
01497                 FirstFID = MainFileID;
01498                 SourceFile = MainFile;
01499               }
01500             }
01501           }
01502         }
01503       }
01504     }
01505   }
01506 
01507   if (FirstFID.isInvalid()) {
01508     // The location we're looking for isn't in the main file; look
01509     // through all of the local source locations.
01510     for (unsigned I = 0, N = local_sloc_entry_size(); I != N; ++I) {
01511       bool Invalid = false;
01512       const SLocEntry &SLoc = getLocalSLocEntry(I, &Invalid);
01513       if (Invalid)
01514         return FileID();
01515       
01516       if (SLoc.isFile() && 
01517           SLoc.getFile().getContentCache() &&
01518           SLoc.getFile().getContentCache()->OrigEntry == SourceFile) {
01519         FirstFID = FileID::get(I);
01520         break;
01521       }
01522     }
01523     // If that still didn't help, try the modules.
01524     if (FirstFID.isInvalid()) {
01525       for (unsigned I = 0, N = loaded_sloc_entry_size(); I != N; ++I) {
01526         const SLocEntry &SLoc = getLoadedSLocEntry(I);
01527         if (SLoc.isFile() && 
01528             SLoc.getFile().getContentCache() &&
01529             SLoc.getFile().getContentCache()->OrigEntry == SourceFile) {
01530           FirstFID = FileID::get(-int(I) - 2);
01531           break;
01532         }
01533       }
01534     }
01535   }
01536 
01537   // If we haven't found what we want yet, try again, but this time stat()
01538   // each of the files in case the files have changed since we originally 
01539   // parsed the file. 
01540   if (FirstFID.isInvalid() &&
01541       (SourceFileName || 
01542        (SourceFileName = llvm::sys::path::filename(SourceFile->getName()))) &&
01543       (SourceFileInode ||
01544        (SourceFileInode = getActualFileInode(SourceFile)))) {
01545     bool Invalid = false;
01546     for (unsigned I = 0, N = local_sloc_entry_size(); I != N; ++I) {
01547       FileID IFileID;
01548       IFileID.ID = I;
01549       const SLocEntry &SLoc = getSLocEntry(IFileID, &Invalid);
01550       if (Invalid)
01551         return FileID();
01552       
01553       if (SLoc.isFile()) { 
01554         const ContentCache *FileContentCache 
01555           = SLoc.getFile().getContentCache();
01556       const FileEntry *Entry =FileContentCache? FileContentCache->OrigEntry : 0;
01557         if (Entry && 
01558             *SourceFileName == llvm::sys::path::filename(Entry->getName())) {
01559           if (llvm::Optional<ino_t> EntryInode = getActualFileInode(Entry)) {
01560             if (*SourceFileInode == *EntryInode) {
01561               FirstFID = FileID::get(I);
01562               SourceFile = Entry;
01563               break;
01564             }
01565           }
01566         }
01567       }
01568     }      
01569   }
01570   
01571   return FirstFID;
01572 }
01573 
01574 /// \brief Get the source location in \arg FID for the given line:col.
01575 /// Returns null location if \arg FID is not a file SLocEntry.
01576 SourceLocation SourceManager::translateLineCol(FileID FID,
01577                                                unsigned Line,
01578                                                unsigned Col) const {
01579   if (FID.isInvalid())
01580     return SourceLocation();
01581 
01582   bool Invalid = false;
01583   const SLocEntry &Entry = getSLocEntry(FID, &Invalid);
01584   if (Invalid)
01585     return SourceLocation();
01586   
01587   if (!Entry.isFile())
01588     return SourceLocation();
01589 
01590   SourceLocation FileLoc = SourceLocation::getFileLoc(Entry.getOffset());
01591 
01592   if (Line == 1 && Col == 1)
01593     return FileLoc;
01594 
01595   ContentCache *Content
01596     = const_cast<ContentCache *>(Entry.getFile().getContentCache());
01597   if (!Content)
01598     return SourceLocation();
01599     
01600   // If this is the first use of line information for this buffer, compute the
01601   // SourceLineCache for it on demand.
01602   if (Content->SourceLineCache == 0) {
01603     bool MyInvalid = false;
01604     ComputeLineNumbers(Diag, Content, ContentCacheAlloc, *this, MyInvalid);
01605     if (MyInvalid)
01606       return SourceLocation();
01607   }
01608 
01609   if (Line > Content->NumLines) {
01610     unsigned Size = Content->getBuffer(Diag, *this)->getBufferSize();
01611     if (Size > 0)
01612       --Size;
01613     return FileLoc.getLocWithOffset(Size);
01614   }
01615 
01616   const llvm::MemoryBuffer *Buffer = Content->getBuffer(Diag, *this);
01617   unsigned FilePos = Content->SourceLineCache[Line - 1];
01618   const char *Buf = Buffer->getBufferStart() + FilePos;
01619   unsigned BufLength = Buffer->getBufferSize() - FilePos;
01620   if (BufLength == 0)
01621     return FileLoc.getLocWithOffset(FilePos);
01622 
01623   unsigned i = 0;
01624 
01625   // Check that the given column is valid.
01626   while (i < BufLength-1 && i < Col-1 && Buf[i] != '\n' && Buf[i] != '\r')
01627     ++i;
01628   if (i < Col-1)
01629     return FileLoc.getLocWithOffset(FilePos + i);
01630 
01631   return FileLoc.getLocWithOffset(FilePos + Col - 1);
01632 }
01633 
01634 /// \brief Compute a map of macro argument chunks to their expanded source
01635 /// location. Chunks that are not part of a macro argument will map to an
01636 /// invalid source location. e.g. if a file contains one macro argument at
01637 /// offset 100 with length 10, this is how the map will be formed:
01638 ///     0   -> SourceLocation()
01639 ///     100 -> Expanded macro arg location
01640 ///     110 -> SourceLocation()
01641 void SourceManager::computeMacroArgsCache(MacroArgsMap *&CachePtr,
01642                                           FileID FID) const {
01643   assert(!FID.isInvalid());
01644   assert(!CachePtr);
01645 
01646   CachePtr = new MacroArgsMap();
01647   MacroArgsMap &MacroArgsCache = *CachePtr;
01648   // Initially no macro argument chunk is present.
01649   MacroArgsCache.insert(std::make_pair(0, SourceLocation()));
01650 
01651   int ID = FID.ID;
01652   while (1) {
01653     ++ID;
01654     // Stop if there are no more FileIDs to check.
01655     if (ID > 0) {
01656       if (unsigned(ID) >= local_sloc_entry_size())
01657         return;
01658     } else if (ID == -1) {
01659       return;
01660     }
01661 
01662     const SrcMgr::SLocEntry &Entry = getSLocEntryByID(ID);
01663     if (Entry.isFile()) {
01664       SourceLocation IncludeLoc = Entry.getFile().getIncludeLoc();
01665       if (IncludeLoc.isInvalid())
01666         continue;
01667       if (!isInFileID(IncludeLoc, FID))
01668         return; // No more files/macros that may be "contained" in this file.
01669 
01670       // Skip the files/macros of the #include'd file, we only care about macros
01671       // that lexed macro arguments from our file.
01672       if (Entry.getFile().NumCreatedFIDs)
01673         ID += Entry.getFile().NumCreatedFIDs - 1/*because of next ++ID*/;
01674       continue;
01675     }
01676 
01677     const ExpansionInfo &ExpInfo = Entry.getExpansion();
01678 
01679     if (ExpInfo.getExpansionLocStart().isFileID()) {
01680       if (!isInFileID(ExpInfo.getExpansionLocStart(), FID))
01681         return; // No more files/macros that may be "contained" in this file.
01682     }
01683 
01684     if (!ExpInfo.isMacroArgExpansion())
01685       continue;
01686 
01687     SourceLocation SpellLoc = ExpInfo.getSpellingLoc();
01688     while (!SpellLoc.isFileID()) {
01689       std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(SpellLoc);
01690       const ExpansionInfo &Info = getSLocEntry(LocInfo.first).getExpansion();
01691       if (!Info.isMacroArgExpansion())
01692         break;
01693       SpellLoc = Info.getSpellingLoc().getLocWithOffset(LocInfo.second);
01694     }
01695     if (!SpellLoc.isFileID())
01696       continue;
01697     
01698     unsigned BeginOffs;
01699     if (!isInFileID(SpellLoc, FID, &BeginOffs))
01700       continue;
01701 
01702     unsigned EndOffs = BeginOffs + getFileIDSize(FileID::get(ID));
01703 
01704     // Add a new chunk for this macro argument. A previous macro argument chunk
01705     // may have been lexed again, so e.g. if the map is
01706     //     0   -> SourceLocation()
01707     //     100 -> Expanded loc #1
01708     //     110 -> SourceLocation()
01709     // and we found a new macro FileID that lexed from offet 105 with length 3,
01710     // the new map will be:
01711     //     0   -> SourceLocation()
01712     //     100 -> Expanded loc #1
01713     //     105 -> Expanded loc #2
01714     //     108 -> Expanded loc #1
01715     //     110 -> SourceLocation()
01716     //
01717     // Since re-lexed macro chunks will always be the same size or less of
01718     // previous chunks, we only need to find where the ending of the new macro
01719     // chunk is mapped to and update the map with new begin/end mappings.
01720 
01721     MacroArgsMap::iterator I = MacroArgsCache.upper_bound(EndOffs);
01722     --I;
01723     SourceLocation EndOffsMappedLoc = I->second;
01724     MacroArgsCache[BeginOffs] = SourceLocation::getMacroLoc(Entry.getOffset());
01725     MacroArgsCache[EndOffs] = EndOffsMappedLoc;
01726   }
01727 }
01728 
01729 /// \brief If \arg Loc points inside a function macro argument, the returned
01730 /// location will be the macro location in which the argument was expanded.
01731 /// If a macro argument is used multiple times, the expanded location will
01732 /// be at the first expansion of the argument.
01733 /// e.g.
01734 ///   MY_MACRO(foo);
01735 ///             ^
01736 /// Passing a file location pointing at 'foo', will yield a macro location
01737 /// where 'foo' was expanded into.
01738 SourceLocation
01739 SourceManager::getMacroArgExpandedLocation(SourceLocation Loc) const {
01740   if (Loc.isInvalid() || !Loc.isFileID())
01741     return Loc;
01742 
01743   FileID FID;
01744   unsigned Offset;
01745   llvm::tie(FID, Offset) = getDecomposedLoc(Loc);
01746   if (FID.isInvalid())
01747     return Loc;
01748 
01749   MacroArgsMap *&MacroArgsCache = MacroArgsCacheMap[FID];
01750   if (!MacroArgsCache)
01751     computeMacroArgsCache(MacroArgsCache, FID);
01752 
01753   assert(!MacroArgsCache->empty());
01754   MacroArgsMap::iterator I = MacroArgsCache->upper_bound(Offset);
01755   --I;
01756   
01757   unsigned MacroArgBeginOffs = I->first;
01758   SourceLocation MacroArgExpandedLoc = I->second;
01759   if (MacroArgExpandedLoc.isValid())
01760     return MacroArgExpandedLoc.getLocWithOffset(Offset - MacroArgBeginOffs);
01761 
01762   return Loc;
01763 }
01764 
01765 /// Given a decomposed source location, move it up the include/expansion stack
01766 /// to the parent source location.  If this is possible, return the decomposed
01767 /// version of the parent in Loc and return false.  If Loc is the top-level
01768 /// entry, return true and don't modify it.
01769 static bool MoveUpIncludeHierarchy(std::pair<FileID, unsigned> &Loc,
01770                                    const SourceManager &SM) {
01771   SourceLocation UpperLoc;
01772   const SrcMgr::SLocEntry &Entry = SM.getSLocEntry(Loc.first);
01773   if (Entry.isExpansion())
01774     UpperLoc = Entry.getExpansion().getExpansionLocStart();
01775   else
01776     UpperLoc = Entry.getFile().getIncludeLoc();
01777   
01778   if (UpperLoc.isInvalid())
01779     return true; // We reached the top.
01780   
01781   Loc = SM.getDecomposedLoc(UpperLoc);
01782   return false;
01783 }
01784   
01785 
01786 /// \brief Determines the order of 2 source locations in the translation unit.
01787 ///
01788 /// \returns true if LHS source location comes before RHS, false otherwise.
01789 bool SourceManager::isBeforeInTranslationUnit(SourceLocation LHS,
01790                                               SourceLocation RHS) const {
01791   assert(LHS.isValid() && RHS.isValid() && "Passed invalid source location!");
01792   if (LHS == RHS)
01793     return false;
01794 
01795   std::pair<FileID, unsigned> LOffs = getDecomposedLoc(LHS);
01796   std::pair<FileID, unsigned> ROffs = getDecomposedLoc(RHS);
01797 
01798   // If the source locations are in the same file, just compare offsets.
01799   if (LOffs.first == ROffs.first)
01800     return LOffs.second < ROffs.second;
01801 
01802   // If we are comparing a source location with multiple locations in the same
01803   // file, we get a big win by caching the result.
01804   if (IsBeforeInTUCache.isCacheValid(LOffs.first, ROffs.first))
01805     return IsBeforeInTUCache.getCachedResult(LOffs.second, ROffs.second);
01806 
01807   // Okay, we missed in the cache, start updating the cache for this query.
01808   IsBeforeInTUCache.setQueryFIDs(LOffs.first, ROffs.first,
01809                           /*isLFIDBeforeRFID=*/LOffs.first.ID < ROffs.first.ID);
01810 
01811   // We need to find the common ancestor. The only way of doing this is to
01812   // build the complete include chain for one and then walking up the chain
01813   // of the other looking for a match.
01814   // We use a map from FileID to Offset to store the chain. Easier than writing
01815   // a custom set hash info that only depends on the first part of a pair.
01816   typedef llvm::DenseMap<FileID, unsigned> LocSet;
01817   LocSet LChain;
01818   do {
01819     LChain.insert(LOffs);
01820     // We catch the case where LOffs is in a file included by ROffs and
01821     // quit early. The other way round unfortunately remains suboptimal.
01822   } while (LOffs.first != ROffs.first && !MoveUpIncludeHierarchy(LOffs, *this));
01823   LocSet::iterator I;
01824   while((I = LChain.find(ROffs.first)) == LChain.end()) {
01825     if (MoveUpIncludeHierarchy(ROffs, *this))
01826       break; // Met at topmost file.
01827   }
01828   if (I != LChain.end())
01829     LOffs = *I;
01830 
01831   // If we exited because we found a nearest common ancestor, compare the
01832   // locations within the common file and cache them.
01833   if (LOffs.first == ROffs.first) {
01834     IsBeforeInTUCache.setCommonLoc(LOffs.first, LOffs.second, ROffs.second);
01835     return IsBeforeInTUCache.getCachedResult(LOffs.second, ROffs.second);
01836   }
01837 
01838   // This can happen if a location is in a built-ins buffer.
01839   // But see PR5662.
01840   // Clear the lookup cache, it depends on a common location.
01841   IsBeforeInTUCache.clear();
01842   bool LIsBuiltins = strcmp("<built-in>",
01843                             getBuffer(LOffs.first)->getBufferIdentifier()) == 0;
01844   bool RIsBuiltins = strcmp("<built-in>",
01845                             getBuffer(ROffs.first)->getBufferIdentifier()) == 0;
01846   // built-in is before non-built-in
01847   if (LIsBuiltins != RIsBuiltins)
01848     return LIsBuiltins;
01849   assert(LIsBuiltins && RIsBuiltins &&
01850          "Non-built-in locations must be rooted in the main file");
01851   // Both are in built-in buffers, but from different files. We just claim that
01852   // lower IDs come first.
01853   return LOffs.first < ROffs.first;
01854 }
01855 
01856 /// PrintStats - Print statistics to stderr.
01857 ///
01858 void SourceManager::PrintStats() const {
01859   llvm::errs() << "\n*** Source Manager Stats:\n";
01860   llvm::errs() << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
01861                << " mem buffers mapped.\n";
01862   llvm::errs() << LocalSLocEntryTable.size() << " local SLocEntry's allocated ("
01863                << llvm::capacity_in_bytes(LocalSLocEntryTable)
01864                << " bytes of capacity), "
01865                << NextLocalOffset << "B of Sloc address space used.\n";
01866   llvm::errs() << LoadedSLocEntryTable.size()
01867                << " loaded SLocEntries allocated, "
01868                << MaxLoadedOffset - CurrentLoadedOffset
01869                << "B of Sloc address space used.\n";
01870   
01871   unsigned NumLineNumsComputed = 0;
01872   unsigned NumFileBytesMapped = 0;
01873   for (fileinfo_iterator I = fileinfo_begin(), E = fileinfo_end(); I != E; ++I){
01874     NumLineNumsComputed += I->second->SourceLineCache != 0;
01875     NumFileBytesMapped  += I->second->getSizeBytesMapped();
01876   }
01877   unsigned NumMacroArgsComputed = MacroArgsCacheMap.size();
01878 
01879   llvm::errs() << NumFileBytesMapped << " bytes of files mapped, "
01880                << NumLineNumsComputed << " files with line #'s computed, "
01881                << NumMacroArgsComputed << " files with macro args computed.\n";
01882   llvm::errs() << "FileID scans: " << NumLinearScans << " linear, "
01883                << NumBinaryProbes << " binary.\n";
01884 }
01885 
01886 ExternalSLocEntrySource::~ExternalSLocEntrySource() { }
01887 
01888 /// Return the amount of memory used by memory buffers, breaking down
01889 /// by heap-backed versus mmap'ed memory.
01890 SourceManager::MemoryBufferSizes SourceManager::getMemoryBufferSizes() const {
01891   size_t malloc_bytes = 0;
01892   size_t mmap_bytes = 0;
01893   
01894   for (unsigned i = 0, e = MemBufferInfos.size(); i != e; ++i)
01895     if (size_t sized_mapped = MemBufferInfos[i]->getSizeBytesMapped())
01896       switch (MemBufferInfos[i]->getMemoryBufferKind()) {
01897         case llvm::MemoryBuffer::MemoryBuffer_MMap:
01898           mmap_bytes += sized_mapped;
01899           break;
01900         case llvm::MemoryBuffer::MemoryBuffer_Malloc:
01901           malloc_bytes += sized_mapped;
01902           break;
01903       }
01904   
01905   return MemoryBufferSizes(malloc_bytes, mmap_bytes);
01906 }
01907 
01908 size_t SourceManager::getDataStructureSizes() const {
01909   size_t size = llvm::capacity_in_bytes(MemBufferInfos)
01910     + llvm::capacity_in_bytes(LocalSLocEntryTable)
01911     + llvm::capacity_in_bytes(LoadedSLocEntryTable)
01912     + llvm::capacity_in_bytes(SLocEntryLoaded)
01913     + llvm::capacity_in_bytes(FileInfos);
01914   
01915   if (OverriddenFilesInfo)
01916     size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);
01917 
01918   return size;
01919 }