Skip to content

Commit 8ed1d81

Browse files
committed
[asan] Skip promotable allocas to improve performance at -O0
Currently, the ASan executables built with -O0 are unnecessarily slow. The main reason is that ASan instrumentation pass inserts redundant checks around promotable allocas. These allocas do not get instrumented under -O1 because they get converted to virtual registered by mem2reg. With this patch, ASan instrumentation pass will only instrument non promotable allocas, giving us a speedup of 39% on a collection of benchmarks with -O0. (There is no measurable speedup at -O1.) llvm-svn: 230724
1 parent 2249049 commit 8ed1d81

File tree

8 files changed

+105
-36
lines changed

8 files changed

+105
-36
lines changed

llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp

Lines changed: 51 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
#include "llvm/Transforms/Utils/Cloning.h"
5050
#include "llvm/Transforms/Utils/Local.h"
5151
#include "llvm/Transforms/Utils/ModuleUtils.h"
52+
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
5253
#include <algorithm>
5354
#include <string>
5455
#include <system_error>
@@ -165,6 +166,9 @@ static cl::opt<std::string> ClMemoryAccessCallbackPrefix(
165166
cl::init("__asan_"));
166167
static cl::opt<bool> ClInstrumentAllocas("asan-instrument-allocas",
167168
cl::desc("instrument dynamic allocas"), cl::Hidden, cl::init(false));
169+
static cl::opt<bool> ClSkipPromotableAllocas("asan-skip-promotable-allocas",
170+
cl::desc("Do not instrument promotable allocas"),
171+
cl::Hidden, cl::init(true));
168172

169173
// These flags allow to change the shadow mapping.
170174
// The shadow mapping looks like
@@ -372,6 +376,17 @@ struct AddressSanitizer : public FunctionPass {
372376
void getAnalysisUsage(AnalysisUsage &AU) const override {
373377
AU.addRequired<DominatorTreeWrapperPass>();
374378
}
379+
uint64_t getAllocaSizeInBytes(AllocaInst *AI) const {
380+
Type *Ty = AI->getAllocatedType();
381+
uint64_t SizeInBytes = DL->getTypeAllocSize(Ty);
382+
return SizeInBytes;
383+
}
384+
/// Check if we want (and can) handle this alloca.
385+
bool isInterestingAlloca(AllocaInst &AI) const;
386+
/// If it is an interesting memory access, return the PointerOperand
387+
/// and set IsWrite/Alignment. Otherwise return nullptr.
388+
Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
389+
unsigned *Alignment) const;
375390
void instrumentMop(Instruction *I, bool UseCalls);
376391
void instrumentPointerComparisonOrSubtraction(Instruction *I);
377392
void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,
@@ -599,7 +614,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
599614

600615
/// \brief Collect Alloca instructions we want (and can) handle.
601616
void visitAllocaInst(AllocaInst &AI) {
602-
if (!isInterestingAlloca(AI)) return;
617+
if (!ASan.isInterestingAlloca(AI)) return;
603618

604619
StackAlignment = std::max(StackAlignment, AI.getAlignment());
605620
if (isDynamicAlloca(AI))
@@ -653,19 +668,6 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
653668
bool isDynamicAlloca(AllocaInst &AI) const {
654669
return AI.isArrayAllocation() || !AI.isStaticAlloca();
655670
}
656-
657-
// Check if we want (and can) handle this alloca.
658-
bool isInterestingAlloca(AllocaInst &AI) const {
659-
return (AI.getAllocatedType()->isSized() &&
660-
// alloca() may be called with 0 size, ignore it.
661-
getAllocaSizeInBytes(&AI) > 0);
662-
}
663-
664-
uint64_t getAllocaSizeInBytes(AllocaInst *AI) const {
665-
Type *Ty = AI->getAllocatedType();
666-
uint64_t SizeInBytes = ASan.DL->getTypeAllocSize(Ty);
667-
return SizeInBytes;
668-
}
669671
/// Finds alloca where the value comes from.
670672
AllocaInst *findAllocaForValue(Value *V);
671673
void poisonRedZones(ArrayRef<uint8_t> ShadowBytes, IRBuilder<> &IRB,
@@ -775,38 +777,56 @@ void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
775777
MI->eraseFromParent();
776778
}
777779

778-
// If I is an interesting memory access, return the PointerOperand
779-
// and set IsWrite/Alignment. Otherwise return nullptr.
780-
static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
781-
unsigned *Alignment) {
780+
/// Check if we want (and can) handle this alloca.
781+
bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) const {
782+
return (AI.getAllocatedType()->isSized() &&
783+
// alloca() may be called with 0 size, ignore it.
784+
getAllocaSizeInBytes(&AI) > 0 &&
785+
// We are only interested in allocas not promotable to registers.
786+
// Promotable allocas are common under -O0.
787+
(!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)));
788+
}
789+
790+
/// If I is an interesting memory access, return the PointerOperand
791+
/// and set IsWrite/Alignment. Otherwise return nullptr.
792+
Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I,
793+
bool *IsWrite,
794+
unsigned *Alignment) const {
782795
// Skip memory accesses inserted by another instrumentation.
783796
if (I->getMetadata("nosanitize"))
784797
return nullptr;
798+
799+
Value *PtrOperand = nullptr;
785800
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
786801
if (!ClInstrumentReads) return nullptr;
787802
*IsWrite = false;
788803
*Alignment = LI->getAlignment();
789-
return LI->getPointerOperand();
790-
}
791-
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
804+
PtrOperand = LI->getPointerOperand();
805+
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
792806
if (!ClInstrumentWrites) return nullptr;
793807
*IsWrite = true;
794808
*Alignment = SI->getAlignment();
795-
return SI->getPointerOperand();
796-
}
797-
if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
809+
PtrOperand = SI->getPointerOperand();
810+
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
798811
if (!ClInstrumentAtomics) return nullptr;
799812
*IsWrite = true;
800813
*Alignment = 0;
801-
return RMW->getPointerOperand();
802-
}
803-
if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
814+
PtrOperand = RMW->getPointerOperand();
815+
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
804816
if (!ClInstrumentAtomics) return nullptr;
805817
*IsWrite = true;
806818
*Alignment = 0;
807-
return XCHG->getPointerOperand();
819+
PtrOperand = XCHG->getPointerOperand();
808820
}
809-
return nullptr;
821+
822+
// Treat memory accesses to promotable allocas as non-interesting since they
823+
// will not cause memory violations. This greatly speeds up the instrumented
824+
// executable at -O0.
825+
if (ClSkipPromotableAllocas)
826+
if (auto AI = dyn_cast_or_null<AllocaInst>(PtrOperand))
827+
return isInterestingAlloca(*AI) ? AI : nullptr;
828+
829+
return PtrOperand;
810830
}
811831

812832
static bool isPointerOperand(Value *V) {
@@ -1665,7 +1685,7 @@ void FunctionStackPoisoner::poisonStack() {
16651685
SVD.reserve(AllocaVec.size());
16661686
for (AllocaInst *AI : AllocaVec) {
16671687
ASanStackVariableDescription D = { AI->getName().data(),
1668-
getAllocaSizeInBytes(AI),
1688+
ASan.getAllocaSizeInBytes(AI),
16691689
AI->getAlignment(), AI, 0};
16701690
SVD.push_back(D);
16711691
}
@@ -1856,7 +1876,7 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
18561876
AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) {
18571877
if (AllocaInst *AI = dyn_cast<AllocaInst>(V))
18581878
// We're intested only in allocas we can handle.
1859-
return isInterestingAlloca(*AI) ? AI : nullptr;
1879+
return ASan.isInterestingAlloca(*AI) ? AI : nullptr;
18601880
// See if we've already calculated (or started to calculate) alloca for a
18611881
// given value.
18621882
AllocaForValueMapTy::iterator I = AllocaForValue.find(V);

llvm/test/Instrumentation/AddressSanitizer/debug_info.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,12 @@ define i32 @_Z3zzzi(i32 %p) nounwind uwtable sanitize_address {
1010
entry:
1111
%p.addr = alloca i32, align 4
1212
%r = alloca i32, align 4
13-
store i32 %p, i32* %p.addr, align 4
13+
store volatile i32 %p, i32* %p.addr, align 4
1414
call void @llvm.dbg.declare(metadata i32* %p.addr, metadata !10, metadata !{!"0x102"}), !dbg !11
1515
call void @llvm.dbg.declare(metadata i32* %r, metadata !12, metadata !{!"0x102"}), !dbg !14
1616
%0 = load i32* %p.addr, align 4, !dbg !14
1717
%add = add nsw i32 %0, 1, !dbg !14
18-
store i32 %add, i32* %r, align 4, !dbg !14
18+
store volatile i32 %add, i32* %r, align 4, !dbg !14
1919
%1 = load i32* %r, align 4, !dbg !15
2020
ret i32 %1, !dbg !15
2121
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
; RUN: opt < %s -asan -asan-module -asan-instrument-allocas=1 -S | FileCheck %s --check-prefix=CHECK
2+
3+
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
4+
target triple = "x86_64-apple-macosx10.10.0"
5+
6+
define i32 @test_promotable_allocas() sanitize_address {
7+
entry:
8+
; CHECK: %0 = alloca i32, align 4
9+
; CHECK: store i32 0, i32* %0, align 4
10+
; CHECK: %1 = load i32* %0, align 4
11+
; CHECK: ret i32 %1
12+
13+
; CHECK-NOT: __asan_stack_malloc_0
14+
; CHECK-NOT: icmp
15+
; CHECK-NOT: call void @__asan_report_store4
16+
17+
%0 = alloca i32, align 4
18+
store i32 0, i32* %0, align 4
19+
%1 = load i32* %0, align 4
20+
ret i32 %1
21+
}

llvm/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,11 @@ entry:
1515
; CHECK-NOALLOCA-NOT: store i32 -875836469
1616
%0 = alloca i32, align 4
1717
%1 = alloca i8*
18-
store i32 %len, i32* %0, align 4
18+
store volatile i32 %len, i32* %0, align 4
1919
%2 = load i32* %0, align 4
2020
%3 = zext i32 %2 to i64
2121
%4 = alloca i8, i64 %3, align 32
22+
store volatile i8 0, i8* %4
2223
ret void
2324
}
2425

llvm/test/Instrumentation/AddressSanitizer/lifetime-uar.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ entry:
1717
; Memory is unpoisoned at llvm.lifetime.start
1818
; CHECK: call void @__asan_unpoison_stack_memory(i64 %{{[^ ]+}}, i64 1)
1919

20-
store i32 0, i32* %retval
21-
store i8 0, i8* %c, align 1
20+
store volatile i32 0, i32* %retval
21+
store volatile i8 0, i8* %c, align 1
2222

2323
call void @llvm.lifetime.end(i64 1, i8* %c)
2424
; Memory is poisoned at llvm.lifetime.end

llvm/test/Instrumentation/AddressSanitizer/lifetime.ll

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ entry:
1212
%i = alloca i32, align 4
1313
%i.ptr = bitcast i32* %i to i8*
1414
call void @llvm.lifetime.start(i64 -1, i8* %i.ptr)
15+
store volatile i8 0, i8* %i.ptr
1516
call void @llvm.lifetime.end(i64 -1, i8* %i.ptr)
1617

1718
; Check that lifetime with no size are ignored.
@@ -30,6 +31,7 @@ define void @lifetime() sanitize_address {
3031
%i = alloca i32, align 4
3132
%i.ptr = bitcast i32* %i to i8*
3233
call void @llvm.lifetime.start(i64 3, i8* %i.ptr)
34+
store volatile i8 0, i8* %i.ptr
3335
; Memory is unpoisoned at llvm.lifetime.start
3436
; CHECK: %[[VAR:[^ ]*]] = ptrtoint i32* %{{[^ ]+}} to i64
3537
; CHECK-NEXT: call void @__asan_unpoison_stack_memory(i64 %[[VAR]], i64 3)
@@ -43,12 +45,14 @@ define void @lifetime() sanitize_address {
4345
%arr = alloca [10 x i32], align 16
4446
%arr.ptr = bitcast [10 x i32]* %arr to i8*
4547
call void @llvm.lifetime.start(i64 40, i8* %arr.ptr)
48+
store volatile i8 0, i8* %arr.ptr
4649
; CHECK: call void @__asan_unpoison_stack_memory(i64 %{{[^ ]+}}, i64 40)
4750
call void @llvm.lifetime.end(i64 40, i8* %arr.ptr)
4851
; CHECK: call void @__asan_poison_stack_memory(i64 %{{[^ ]+}}, i64 40)
4952

5053
; One more lifetime start/end for the same variable %i.
5154
call void @llvm.lifetime.start(i64 4, i8* %i.ptr)
55+
store volatile i8 0, i8* %i.ptr
5256
; CHECK: call void @__asan_unpoison_stack_memory(i64 %{{[^ ]+}}, i64 4)
5357
call void @llvm.lifetime.end(i64 4, i8* %i.ptr)
5458
; CHECK: call void @__asan_poison_stack_memory(i64 %{{[^ ]+}}, i64 4)
@@ -68,6 +72,7 @@ entry:
6872
%i = alloca i64, align 4
6973
%i.ptr = bitcast i64* %i to i8*
7074
call void @llvm.lifetime.start(i64 8, i8* %i.ptr)
75+
store volatile i8 0, i8* %i.ptr
7176
; CHECK: __asan_unpoison_stack_memory
7277
br i1 %x, label %bb0, label %bb1
7378

llvm/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ entry:
2626
; CHECK: ret void
2727

2828
%XXX = alloca [20 x i8], align 1
29+
%arr.ptr = bitcast [20 x i8]* %XXX to i8*
30+
store volatile i8 0, i8* %arr.ptr
2931
ret void
3032
}
3133

@@ -37,6 +39,8 @@ entry:
3739
; CHECK: ret void
3840

3941
%XXX = alloca [20 x i8], align 1
42+
%arr.ptr = bitcast [20 x i8]* %XXX to i8*
43+
store volatile i8 0, i8* %arr.ptr
4044
call void asm sideeffect "mov %%rbx, %%rcx", "~{dirflag},~{fpsr},~{flags}"() nounwind
4145
ret void
4246
}

llvm/test/Instrumentation/AddressSanitizer/stack_layout.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@ entry:
2626
%XXX = alloca [10 x i8], align 1
2727
%YYY = alloca [20 x i8], align 1
2828
%ZZZ = alloca [30 x i8], align 1
29+
%arr1.ptr = bitcast [10 x i8]* %XXX to i8*
30+
store volatile i8 0, i8* %arr1.ptr
31+
%arr2.ptr = bitcast [20 x i8]* %YYY to i8*
32+
store volatile i8 0, i8* %arr2.ptr
33+
%arr3.ptr = bitcast [30 x i8]* %ZZZ to i8*
34+
store volatile i8 0, i8* %arr3.ptr
2935
ret void
3036
}
3137

@@ -41,6 +47,12 @@ entry:
4147
%AAA = alloca [5 x i8], align 1
4248
%BBB = alloca [55 x i8], align 1
4349
%CCC = alloca [555 x i8], align 1
50+
%arr1.ptr = bitcast [5 x i8]* %AAA to i8*
51+
store volatile i8 0, i8* %arr1.ptr
52+
%arr2.ptr = bitcast [55 x i8]* %BBB to i8*
53+
store volatile i8 0, i8* %arr2.ptr
54+
%arr3.ptr = bitcast [555 x i8]* %CCC to i8*
55+
store volatile i8 0, i8* %arr3.ptr
4456
ret void
4557
}
4658

@@ -57,5 +69,11 @@ entry:
5769
%AAA = alloca [128 x i8], align 16
5870
%BBB = alloca [128 x i8], align 64
5971
%CCC = alloca [128 x i8], align 256
72+
%arr1.ptr = bitcast [128 x i8]* %AAA to i8*
73+
store volatile i8 0, i8* %arr1.ptr
74+
%arr2.ptr = bitcast [128 x i8]* %BBB to i8*
75+
store volatile i8 0, i8* %arr2.ptr
76+
%arr3.ptr = bitcast [128 x i8]* %CCC to i8*
77+
store volatile i8 0, i8* %arr3.ptr
6078
ret void
6179
}

0 commit comments

Comments
 (0)