| //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file contains an optimization for div and rem on architectures that |
| // execute short instructions significantly faster than longer instructions. |
| // For example, on Intel Atom 32-bit divides are slow enough that during |
| // runtime it is profitable to check the value of the operands, and if they are |
| // positive and less than 256 use an unsigned 8-bit divide. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #define DEBUG_TYPE "bypass-slow-division" |
| #include "llvm/Transforms/Utils/BypassSlowDivision.h" |
| #include "llvm/ADT/DenseMap.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/IRBuilder.h" |
| #include "llvm/IR/Instructions.h" |
| |
| using namespace llvm; |
| |
| namespace { |
| struct DivOpInfo { |
| bool SignedOp; |
| Value *Dividend; |
| Value *Divisor; |
| |
| DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor) |
| : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} |
| }; |
| |
| struct DivPhiNodes { |
| PHINode *Quotient; |
| PHINode *Remainder; |
| |
| DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder) |
| : Quotient(InQuotient), Remainder(InRemainder) {} |
| }; |
| } |
| |
| namespace llvm { |
| template<> |
| struct DenseMapInfo<DivOpInfo> { |
| static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) { |
| return Val1.SignedOp == Val2.SignedOp && |
| Val1.Dividend == Val2.Dividend && |
| Val1.Divisor == Val2.Divisor; |
| } |
| |
| static DivOpInfo getEmptyKey() { |
| return DivOpInfo(false, 0, 0); |
| } |
| |
| static DivOpInfo getTombstoneKey() { |
| return DivOpInfo(true, 0, 0); |
| } |
| |
| static unsigned getHashValue(const DivOpInfo &Val) { |
| return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^ |
| reinterpret_cast<uintptr_t>(Val.Divisor)) ^ |
| (unsigned)Val.SignedOp; |
| } |
| }; |
| |
| typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy; |
| } |
| |
| // insertFastDiv - Substitutes the div/rem instruction with code that checks the |
| // value of the operands and uses a shorter-faster div/rem instruction when |
| // possible and the longer-slower div/rem instruction otherwise. |
| static bool insertFastDiv(Function &F, |
| Function::iterator &I, |
| BasicBlock::iterator &J, |
| IntegerType *BypassType, |
| bool UseDivOp, |
| bool UseSignedOp, |
| DivCacheTy &PerBBDivCache) { |
| // Get instruction operands |
| Instruction *Instr = J; |
| Value *Dividend = Instr->getOperand(0); |
| Value *Divisor = Instr->getOperand(1); |
| |
| if (isa<ConstantInt>(Divisor) || |
| (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) { |
| // Operations with immediate values should have |
| // been solved and replaced during compile time. |
| return false; |
| } |
| |
| // Basic Block is split before divide |
| BasicBlock *MainBB = I; |
| BasicBlock *SuccessorBB = I->splitBasicBlock(J); |
| ++I; //advance iterator I to successorBB |
| |
| // Add new basic block for slow divide operation |
| BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "", |
| MainBB->getParent(), SuccessorBB); |
| SlowBB->moveBefore(SuccessorBB); |
| IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); |
| Value *SlowQuotientV; |
| Value *SlowRemainderV; |
| if (UseSignedOp) { |
| SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor); |
| SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor); |
| } else { |
| SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor); |
| SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor); |
| } |
| SlowBuilder.CreateBr(SuccessorBB); |
| |
| // Add new basic block for fast divide operation |
| BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "", |
| MainBB->getParent(), SuccessorBB); |
| FastBB->moveBefore(SlowBB); |
| IRBuilder<> FastBuilder(FastBB, FastBB->begin()); |
| Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, |
| BypassType); |
| Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, |
| BypassType); |
| |
| // udiv/urem because optimization only handles positive numbers |
| Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV, |
| ShortDivisorV); |
| Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV, |
| ShortDivisorV); |
| Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt, |
| ShortQuotientV, |
| Dividend->getType()); |
| Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt, |
| ShortRemainderV, |
| Dividend->getType()); |
| FastBuilder.CreateBr(SuccessorBB); |
| |
| // Phi nodes for result of div and rem |
| IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); |
| PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); |
| QuoPhi->addIncoming(SlowQuotientV, SlowBB); |
| QuoPhi->addIncoming(FastQuotientV, FastBB); |
| PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); |
| RemPhi->addIncoming(SlowRemainderV, SlowBB); |
| RemPhi->addIncoming(FastRemainderV, FastBB); |
| |
| // Replace Instr with appropriate phi node |
| if (UseDivOp) |
| Instr->replaceAllUsesWith(QuoPhi); |
| else |
| Instr->replaceAllUsesWith(RemPhi); |
| Instr->eraseFromParent(); |
| |
| // Combine operands into a single value with OR for value testing below |
| MainBB->getInstList().back().eraseFromParent(); |
| IRBuilder<> MainBuilder(MainBB, MainBB->end()); |
| Value *OrV = MainBuilder.CreateOr(Dividend, Divisor); |
| |
| // BitMask is inverted to check if the operands are |
| // larger than the bypass type |
| uint64_t BitMask = ~BypassType->getBitMask(); |
| Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); |
| |
| // Compare operand values and branch |
| Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0); |
| Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); |
| MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); |
| |
| // point iterator J at first instruction of successorBB |
| J = I->begin(); |
| |
| // Cache phi nodes to be used later in place of other instances |
| // of div or rem with the same sign, dividend, and divisor |
| DivOpInfo Key(UseSignedOp, Dividend, Divisor); |
| DivPhiNodes Value(QuoPhi, RemPhi); |
| PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value)); |
| return true; |
| } |
| |
| // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if |
| // operands and operation are identical. Otherwise call insertFastDiv to perform |
| // the optimization and cache the resulting dividend and remainder. |
| static bool reuseOrInsertFastDiv(Function &F, |
| Function::iterator &I, |
| BasicBlock::iterator &J, |
| IntegerType *BypassType, |
| bool UseDivOp, |
| bool UseSignedOp, |
| DivCacheTy &PerBBDivCache) { |
| // Get instruction operands |
| Instruction *Instr = J; |
| DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1)); |
| DivCacheTy::iterator CacheI = PerBBDivCache.find(Key); |
| |
| if (CacheI == PerBBDivCache.end()) { |
| // If previous instance does not exist, insert fast div |
| return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp, |
| PerBBDivCache); |
| } |
| |
| // Replace operation value with previously generated phi node |
| DivPhiNodes &Value = CacheI->second; |
| if (UseDivOp) { |
| // Replace all uses of div instruction with quotient phi node |
| J->replaceAllUsesWith(Value.Quotient); |
| } else { |
| // Replace all uses of rem instruction with remainder phi node |
| J->replaceAllUsesWith(Value.Remainder); |
| } |
| |
| // Advance to next operation |
| ++J; |
| |
| // Remove redundant operation |
| Instr->eraseFromParent(); |
| return true; |
| } |
| |
| // bypassSlowDivision - This optimization identifies DIV instructions that can |
| // be profitably bypassed and carried out with a shorter, faster divide. |
| bool llvm::bypassSlowDivision(Function &F, |
| Function::iterator &I, |
| const DenseMap<unsigned int, unsigned int> &BypassWidths) { |
| DivCacheTy DivCache; |
| |
| bool MadeChange = false; |
| for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) { |
| |
| // Get instruction details |
| unsigned Opcode = J->getOpcode(); |
| bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; |
| bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; |
| bool UseSignedOp = Opcode == Instruction::SDiv || |
| Opcode == Instruction::SRem; |
| |
| // Only optimize div or rem ops |
| if (!UseDivOp && !UseRemOp) |
| continue; |
| |
| // Skip division on vector types, only optimize integer instructions |
| if (!J->getType()->isIntegerTy()) |
| continue; |
| |
| // Get bitwidth of div/rem instruction |
| IntegerType *T = cast<IntegerType>(J->getType()); |
| unsigned int bitwidth = T->getBitWidth(); |
| |
| // Continue if bitwidth is not bypassed |
| DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth); |
| if (BI == BypassWidths.end()) |
| continue; |
| |
| // Get type for div/rem instruction with bypass bitwidth |
| IntegerType *BT = IntegerType::get(J->getContext(), BI->second); |
| |
| MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp, |
| UseSignedOp, DivCache); |
| } |
| |
| return MadeChange; |
| } |