1//===-- CodeGenTBAA.cpp - TBAA information for LLVM CodeGen ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is the code that manages TBAA information and defines the TBAA policy
10// for the optimizer to use. Relevant standards text includes:
11//
12//   C99 6.5p7
13//   C++ [basic.lval] (p10 in n3126, p15 in some earlier versions)
14//
15//===----------------------------------------------------------------------===//
16
17#include "CodeGenTBAA.h"
18#include "clang/AST/ASTContext.h"
19#include "clang/AST/Attr.h"
20#include "clang/AST/Mangle.h"
21#include "clang/AST/RecordLayout.h"
22#include "clang/Basic/CodeGenOptions.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/LLVMContext.h"
26#include "llvm/IR/Metadata.h"
27#include "llvm/IR/Module.h"
28#include "llvm/IR/Type.h"
29using namespace clang;
30using namespace CodeGen;
31
32CodeGenTBAA::CodeGenTBAA(ASTContext &Ctx, llvm::Module &M,
33                         const CodeGenOptions &CGO,
34                         const LangOptions &Features, MangleContext &MContext)
35  : Context(Ctx), Module(M), CodeGenOpts(CGO),
36    Features(Features), MContext(MContext), MDHelper(M.getContext()),
37    Root(nullptr), Char(nullptr)
38{}
39
40CodeGenTBAA::~CodeGenTBAA() {
41}
42
43llvm::MDNode *CodeGenTBAA::getRoot() {
44  // Define the root of the tree. This identifies the tree, so that
45  // if our LLVM IR is linked with LLVM IR from a different front-end
46  // (or a different version of this front-end), their TBAA trees will
47  // remain distinct, and the optimizer will treat them conservatively.
48  if (!Root) {
49    if (Features.CPlusPlus)
50      Root = MDHelper.createTBAARoot("Simple C++ TBAA");
51    else
52      Root = MDHelper.createTBAARoot("Simple C/C++ TBAA");
53  }
54
55  return Root;
56}
57
58llvm::MDNode *CodeGenTBAA::createScalarTypeNode(StringRef Name,
59                                                llvm::MDNode *Parent,
60                                                uint64_t Size) {
61  if (CodeGenOpts.NewStructPathTBAA) {
62    llvm::Metadata *Id = MDHelper.createString(Name);
63    return MDHelper.createTBAATypeNode(Parent, Size, Id);
64  }
65  return MDHelper.createTBAAScalarTypeNode(Name, Parent);
66}
67
68llvm::MDNode *CodeGenTBAA::getChar() {
69  // Define the root of the tree for user-accessible memory. C and C++
70  // give special powers to char and certain similar types. However,
71  // these special powers only cover user-accessible memory, and doesn't
72  // include things like vtables.
73  if (!Char)
74    Char = createScalarTypeNode("omnipotent char", getRoot(), /* Size= */ 1);
75
76  return Char;
77}
78
79static bool TypeHasMayAlias(QualType QTy) {
80  // Tagged types have declarations, and therefore may have attributes.
81  if (auto *TD = QTy->getAsTagDecl())
82    if (TD->hasAttr<MayAliasAttr>())
83      return true;
84
85  // Also look for may_alias as a declaration attribute on a typedef.
86  // FIXME: We should follow GCC and model may_alias as a type attribute
87  // rather than as a declaration attribute.
88  while (auto *TT = QTy->getAs<TypedefType>()) {
89    if (TT->getDecl()->hasAttr<MayAliasAttr>())
90      return true;
91    QTy = TT->desugar();
92  }
93  return false;
94}
95
96/// Check if the given type is a valid base type to be used in access tags.
97static bool isValidBaseType(QualType QTy) {
98  if (QTy->isReferenceType())
99    return false;
100  if (const RecordType *TTy = QTy->getAs<RecordType>()) {
101    const RecordDecl *RD = TTy->getDecl()->getDefinition();
102    // Incomplete types are not valid base access types.
103    if (!RD)
104      return false;
105    if (RD->hasFlexibleArrayMember())
106      return false;
107    // RD can be struct, union, class, interface or enum.
108    // For now, we only handle struct and class.
109    if (RD->isStruct() || RD->isClass())
110      return true;
111  }
112  return false;
113}
114
115llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
116  uint64_t Size = Context.getTypeSizeInChars(Ty).getQuantity();
117
118  // Handle builtin types.
119  if (const BuiltinType *BTy = dyn_cast<BuiltinType>(Ty)) {
120    switch (BTy->getKind()) {
121    // Character types are special and can alias anything.
122    // In C++, this technically only includes "char" and "unsigned char",
123    // and not "signed char". In C, it includes all three. For now,
124    // the risk of exploiting this detail in C++ seems likely to outweigh
125    // the benefit.
126    case BuiltinType::Char_U:
127    case BuiltinType::Char_S:
128    case BuiltinType::UChar:
129    case BuiltinType::SChar:
130      return getChar();
131
132    // Unsigned types can alias their corresponding signed types.
133    case BuiltinType::UShort:
134      return getTypeInfo(Context.ShortTy);
135    case BuiltinType::UInt:
136      return getTypeInfo(Context.IntTy);
137    case BuiltinType::ULong:
138      return getTypeInfo(Context.LongTy);
139    case BuiltinType::ULongLong:
140      return getTypeInfo(Context.LongLongTy);
141    case BuiltinType::UInt128:
142      return getTypeInfo(Context.Int128Ty);
143
144    case BuiltinType::UShortFract:
145      return getTypeInfo(Context.ShortFractTy);
146    case BuiltinType::UFract:
147      return getTypeInfo(Context.FractTy);
148    case BuiltinType::ULongFract:
149      return getTypeInfo(Context.LongFractTy);
150
151    case BuiltinType::SatUShortFract:
152      return getTypeInfo(Context.SatShortFractTy);
153    case BuiltinType::SatUFract:
154      return getTypeInfo(Context.SatFractTy);
155    case BuiltinType::SatULongFract:
156      return getTypeInfo(Context.SatLongFractTy);
157
158    case BuiltinType::UShortAccum:
159      return getTypeInfo(Context.ShortAccumTy);
160    case BuiltinType::UAccum:
161      return getTypeInfo(Context.AccumTy);
162    case BuiltinType::ULongAccum:
163      return getTypeInfo(Context.LongAccumTy);
164
165    case BuiltinType::SatUShortAccum:
166      return getTypeInfo(Context.SatShortAccumTy);
167    case BuiltinType::SatUAccum:
168      return getTypeInfo(Context.SatAccumTy);
169    case BuiltinType::SatULongAccum:
170      return getTypeInfo(Context.SatLongAccumTy);
171
172    // Treat all other builtin types as distinct types. This includes
173    // treating wchar_t, char16_t, and char32_t as distinct from their
174    // "underlying types".
175    default:
176      return createScalarTypeNode(BTy->getName(Features), getChar(), Size);
177    }
178  }
179
180  // C++1z [basic.lval]p10: "If a program attempts to access the stored value of
181  // an object through a glvalue of other than one of the following types the
182  // behavior is undefined: [...] a char, unsigned char, or std::byte type."
183  if (Ty->isStdByteType())
184    return getChar();
185
186  // Handle pointers and references.
187  // TODO: Implement C++'s type "similarity" and consider dis-"similar"
188  // pointers distinct.
189  if (Ty->isPointerType() || Ty->isReferenceType())
190    return createScalarTypeNode("any pointer", getChar(), Size);
191
192  // Accesses to arrays are accesses to objects of their element types.
193  if (CodeGenOpts.NewStructPathTBAA && Ty->isArrayType())
194    return getTypeInfo(cast<ArrayType>(Ty)->getElementType());
195
196  // Enum types are distinct types. In C++ they have "underlying types",
197  // however they aren't related for TBAA.
198  if (const EnumType *ETy = dyn_cast<EnumType>(Ty)) {
199    // In C++ mode, types have linkage, so we can rely on the ODR and
200    // on their mangled names, if they're external.
201    // TODO: Is there a way to get a program-wide unique name for a
202    // decl with local linkage or no linkage?
203    if (!Features.CPlusPlus || !ETy->getDecl()->isExternallyVisible())
204      return getChar();
205
206    SmallString<256> OutName;
207    llvm::raw_svector_ostream Out(OutName);
208    MContext.mangleTypeName(QualType(ETy, 0), Out);
209    return createScalarTypeNode(OutName, getChar(), Size);
210  }
211
212  if (const auto *EIT = dyn_cast<BitIntType>(Ty)) {
213    SmallString<256> OutName;
214    llvm::raw_svector_ostream Out(OutName);
215    // Don't specify signed/unsigned since integer types can alias despite sign
216    // differences.
217    Out << "_BitInt(" << EIT->getNumBits() << ')';
218    return createScalarTypeNode(OutName, getChar(), Size);
219  }
220
221  // For now, handle any other kind of type conservatively.
222  return getChar();
223}
224
225llvm::MDNode *CodeGenTBAA::getTypeInfo(QualType QTy) {
226  // At -O0 or relaxed aliasing, TBAA is not emitted for regular types.
227  if (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing)
228    return nullptr;
229
230  // If the type has the may_alias attribute (even on a typedef), it is
231  // effectively in the general char alias class.
232  if (TypeHasMayAlias(QTy))
233    return getChar();
234
235  // We need this function to not fall back to returning the "omnipotent char"
236  // type node for aggregate and union types. Otherwise, any dereference of an
237  // aggregate will result into the may-alias access descriptor, meaning all
238  // subsequent accesses to direct and indirect members of that aggregate will
239  // be considered may-alias too.
240  // TODO: Combine getTypeInfo() and getBaseTypeInfo() into a single function.
241  if (isValidBaseType(QTy))
242    return getBaseTypeInfo(QTy);
243
244  const Type *Ty = Context.getCanonicalType(QTy).getTypePtr();
245  if (llvm::MDNode *N = MetadataCache[Ty])
246    return N;
247
248  // Note that the following helper call is allowed to add new nodes to the
249  // cache, which invalidates all its previously obtained iterators. So we
250  // first generate the node for the type and then add that node to the cache.
251  llvm::MDNode *TypeNode = getTypeInfoHelper(Ty);
252  return MetadataCache[Ty] = TypeNode;
253}
254
255TBAAAccessInfo CodeGenTBAA::getAccessInfo(QualType AccessType) {
256  // Pointee values may have incomplete types, but they shall never be
257  // dereferenced.
258  if (AccessType->isIncompleteType())
259    return TBAAAccessInfo::getIncompleteInfo();
260
261  if (TypeHasMayAlias(AccessType))
262    return TBAAAccessInfo::getMayAliasInfo();
263
264  uint64_t Size = Context.getTypeSizeInChars(AccessType).getQuantity();
265  return TBAAAccessInfo(getTypeInfo(AccessType), Size);
266}
267
268TBAAAccessInfo CodeGenTBAA::getVTablePtrAccessInfo(llvm::Type *VTablePtrType) {
269  llvm::DataLayout DL(&Module);
270  unsigned Size = DL.getPointerTypeSize(VTablePtrType);
271  return TBAAAccessInfo(createScalarTypeNode("vtable pointer", getRoot(), Size),
272                        Size);
273}
274
275bool
276CodeGenTBAA::CollectFields(uint64_t BaseOffset,
277                           QualType QTy,
278                           SmallVectorImpl<llvm::MDBuilder::TBAAStructField> &
279                             Fields,
280                           bool MayAlias) {
281  /* Things not handled yet include: C++ base classes, bitfields, */
282
283  if (const RecordType *TTy = QTy->getAs<RecordType>()) {
284    const RecordDecl *RD = TTy->getDecl()->getDefinition();
285    if (RD->hasFlexibleArrayMember())
286      return false;
287
288    // TODO: Handle C++ base classes.
289    if (const CXXRecordDecl *Decl = dyn_cast<CXXRecordDecl>(RD))
290      if (Decl->bases_begin() != Decl->bases_end())
291        return false;
292
293    const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
294
295    unsigned idx = 0;
296    for (RecordDecl::field_iterator i = RD->field_begin(),
297         e = RD->field_end(); i != e; ++i, ++idx) {
298      if ((*i)->isZeroSize(Context) || (*i)->isUnnamedBitfield())
299        continue;
300      uint64_t Offset = BaseOffset +
301                        Layout.getFieldOffset(idx) / Context.getCharWidth();
302      QualType FieldQTy = i->getType();
303      if (!CollectFields(Offset, FieldQTy, Fields,
304                         MayAlias || TypeHasMayAlias(FieldQTy)))
305        return false;
306    }
307    return true;
308  }
309
310  /* Otherwise, treat whatever it is as a field. */
311  uint64_t Offset = BaseOffset;
312  uint64_t Size = Context.getTypeSizeInChars(QTy).getQuantity();
313  llvm::MDNode *TBAAType = MayAlias ? getChar() : getTypeInfo(QTy);
314  llvm::MDNode *TBAATag = getAccessTagInfo(TBAAAccessInfo(TBAAType, Size));
315  Fields.push_back(llvm::MDBuilder::TBAAStructField(Offset, Size, TBAATag));
316  return true;
317}
318
319llvm::MDNode *
320CodeGenTBAA::getTBAAStructInfo(QualType QTy) {
321  const Type *Ty = Context.getCanonicalType(QTy).getTypePtr();
322
323  if (llvm::MDNode *N = StructMetadataCache[Ty])
324    return N;
325
326  SmallVector<llvm::MDBuilder::TBAAStructField, 4> Fields;
327  if (CollectFields(0, QTy, Fields, TypeHasMayAlias(QTy)))
328    return MDHelper.createTBAAStructNode(Fields);
329
330  // For now, handle any other kind of type conservatively.
331  return StructMetadataCache[Ty] = nullptr;
332}
333
334llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) {
335  if (auto *TTy = dyn_cast<RecordType>(Ty)) {
336    const RecordDecl *RD = TTy->getDecl()->getDefinition();
337    const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
338    using TBAAStructField = llvm::MDBuilder::TBAAStructField;
339    SmallVector<TBAAStructField, 4> Fields;
340    if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
341      // Handle C++ base classes. Non-virtual bases can treated a kind of
342      // field. Virtual bases are more complex and omitted, but avoid an
343      // incomplete view for NewStructPathTBAA.
344      if (CodeGenOpts.NewStructPathTBAA && CXXRD->getNumVBases() != 0)
345        return BaseTypeMetadataCache[Ty] = nullptr;
346      for (const CXXBaseSpecifier &B : CXXRD->bases()) {
347        if (B.isVirtual())
348          continue;
349        QualType BaseQTy = B.getType();
350        const CXXRecordDecl *BaseRD = BaseQTy->getAsCXXRecordDecl();
351        if (BaseRD->isEmpty())
352          continue;
353        llvm::MDNode *TypeNode = isValidBaseType(BaseQTy)
354                                     ? getBaseTypeInfo(BaseQTy)
355                                     : getTypeInfo(BaseQTy);
356        if (!TypeNode)
357          return BaseTypeMetadataCache[Ty] = nullptr;
358        uint64_t Offset = Layout.getBaseClassOffset(BaseRD).getQuantity();
359        uint64_t Size =
360            Context.getASTRecordLayout(BaseRD).getDataSize().getQuantity();
361        Fields.push_back(
362            llvm::MDBuilder::TBAAStructField(Offset, Size, TypeNode));
363      }
364      // The order in which base class subobjects are allocated is unspecified,
365      // so may differ from declaration order. In particular, Itanium ABI will
366      // allocate a primary base first.
367      // Since we exclude empty subobjects, the objects are not overlapping and
368      // their offsets are unique.
369      llvm::sort(Fields,
370                 [](const TBAAStructField &A, const TBAAStructField &B) {
371                   return A.Offset < B.Offset;
372                 });
373    }
374    for (FieldDecl *Field : RD->fields()) {
375      if (Field->isZeroSize(Context) || Field->isUnnamedBitfield())
376        continue;
377      QualType FieldQTy = Field->getType();
378      llvm::MDNode *TypeNode = isValidBaseType(FieldQTy) ?
379          getBaseTypeInfo(FieldQTy) : getTypeInfo(FieldQTy);
380      if (!TypeNode)
381        return BaseTypeMetadataCache[Ty] = nullptr;
382
383      uint64_t BitOffset = Layout.getFieldOffset(Field->getFieldIndex());
384      uint64_t Offset = Context.toCharUnitsFromBits(BitOffset).getQuantity();
385      uint64_t Size = Context.getTypeSizeInChars(FieldQTy).getQuantity();
386      Fields.push_back(llvm::MDBuilder::TBAAStructField(Offset, Size,
387                                                        TypeNode));
388    }
389
390    SmallString<256> OutName;
391    if (Features.CPlusPlus) {
392      // Don't use the mangler for C code.
393      llvm::raw_svector_ostream Out(OutName);
394      MContext.mangleTypeName(QualType(Ty, 0), Out);
395    } else {
396      OutName = RD->getName();
397    }
398
399    if (CodeGenOpts.NewStructPathTBAA) {
400      llvm::MDNode *Parent = getChar();
401      uint64_t Size = Context.getTypeSizeInChars(Ty).getQuantity();
402      llvm::Metadata *Id = MDHelper.createString(OutName);
403      return MDHelper.createTBAATypeNode(Parent, Size, Id, Fields);
404    }
405
406    // Create the struct type node with a vector of pairs (offset, type).
407    SmallVector<std::pair<llvm::MDNode*, uint64_t>, 4> OffsetsAndTypes;
408    for (const auto &Field : Fields)
409        OffsetsAndTypes.push_back(std::make_pair(Field.Type, Field.Offset));
410    return MDHelper.createTBAAStructTypeNode(OutName, OffsetsAndTypes);
411  }
412
413  return nullptr;
414}
415
416llvm::MDNode *CodeGenTBAA::getBaseTypeInfo(QualType QTy) {
417  if (!isValidBaseType(QTy))
418    return nullptr;
419
420  const Type *Ty = Context.getCanonicalType(QTy).getTypePtr();
421  if (llvm::MDNode *N = BaseTypeMetadataCache[Ty])
422    return N;
423
424  // Note that the following helper call is allowed to add new nodes to the
425  // cache, which invalidates all its previously obtained iterators. So we
426  // first generate the node for the type and then add that node to the cache.
427  llvm::MDNode *TypeNode = getBaseTypeInfoHelper(Ty);
428  return BaseTypeMetadataCache[Ty] = TypeNode;
429}
430
431llvm::MDNode *CodeGenTBAA::getAccessTagInfo(TBAAAccessInfo Info) {
432  assert(!Info.isIncomplete() && "Access to an object of an incomplete type!");
433
434  if (Info.isMayAlias())
435    Info = TBAAAccessInfo(getChar(), Info.Size);
436
437  if (!Info.AccessType)
438    return nullptr;
439
440  if (!CodeGenOpts.StructPathTBAA)
441    Info = TBAAAccessInfo(Info.AccessType, Info.Size);
442
443  llvm::MDNode *&N = AccessTagMetadataCache[Info];
444  if (N)
445    return N;
446
447  if (!Info.BaseType) {
448    Info.BaseType = Info.AccessType;
449    assert(!Info.Offset && "Nonzero offset for an access with no base type!");
450  }
451  if (CodeGenOpts.NewStructPathTBAA) {
452    return N = MDHelper.createTBAAAccessTag(Info.BaseType, Info.AccessType,
453                                            Info.Offset, Info.Size);
454  }
455  return N = MDHelper.createTBAAStructTagNode(Info.BaseType, Info.AccessType,
456                                              Info.Offset);
457}
458
459TBAAAccessInfo CodeGenTBAA::mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo,
460                                                 TBAAAccessInfo TargetInfo) {
461  if (SourceInfo.isMayAlias() || TargetInfo.isMayAlias())
462    return TBAAAccessInfo::getMayAliasInfo();
463  return TargetInfo;
464}
465
466TBAAAccessInfo
467CodeGenTBAA::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA,
468                                                 TBAAAccessInfo InfoB) {
469  if (InfoA == InfoB)
470    return InfoA;
471
472  if (!InfoA || !InfoB)
473    return TBAAAccessInfo();
474
475  if (InfoA.isMayAlias() || InfoB.isMayAlias())
476    return TBAAAccessInfo::getMayAliasInfo();
477
478  // TODO: Implement the rest of the logic here. For example, two accesses
479  // with same final access types result in an access to an object of that final
480  // access type regardless of their base types.
481  return TBAAAccessInfo::getMayAliasInfo();
482}
483
484TBAAAccessInfo
485CodeGenTBAA::mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo,
486                                            TBAAAccessInfo SrcInfo) {
487  if (DestInfo == SrcInfo)
488    return DestInfo;
489
490  if (!DestInfo || !SrcInfo)
491    return TBAAAccessInfo();
492
493  if (DestInfo.isMayAlias() || SrcInfo.isMayAlias())
494    return TBAAAccessInfo::getMayAliasInfo();
495
496  // TODO: Implement the rest of the logic here. For example, two accesses
497  // with same final access types result in an access to an object of that final
498  // access type regardless of their base types.
499  return TBAAAccessInfo::getMayAliasInfo();
500}
501