From 113902e9fba5f4baf3de3c6ac0241d49ffdfa55c Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Thu, 8 Apr 2010 18:47:09 +0000 Subject: [PATCH] Add a -lint pass which checks for common sources of undefined or likely unintended behavior. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@100798 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/Lint.h | 52 ++++++ include/llvm/LinkAllPasses.h | 2 + lib/Analysis/Lint.cpp | 368 +++++++++++++++++++++++++++++++++++++++++++ test/Other/lint.ll | 31 ++++ 4 files changed, 453 insertions(+) create mode 100644 include/llvm/Analysis/Lint.h create mode 100644 lib/Analysis/Lint.cpp create mode 100644 test/Other/lint.ll diff --git a/include/llvm/Analysis/Lint.h b/include/llvm/Analysis/Lint.h new file mode 100644 index 00000000000..2f0136608d3 --- /dev/null +++ b/include/llvm/Analysis/Lint.h @@ -0,0 +1,52 @@ +//===-- llvm/Analysis/Lint.h - LLVM IR Lint ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines lint interfaces that can be used for some sanity checking +// of input to the system, and for checking that transformations +// haven't done something bad. In contrast to the Verifier, the Lint checker +// checks for undefined behavior or constructions with likely unintended +// behavior. +// +// To see what specifically is checked, look at Lint.cpp +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LINT_H +#define LLVM_ANALYSIS_LINT_H + +#include + +namespace llvm { + +class FunctionPass; +class Module; +class Function; + +/// @brief Create a lint pass. +/// +/// Check a module or function. +FunctionPass *createLintPass(); + +/// @brief Check a module. +/// +/// This should only be used for debugging, because it plays games with +/// PassManagers and stuff. +void lintModule( + const Module &M, ///< The module to be checked + std::string *ErrorInfo = 0 ///< Information about failures. +); + +// lintFunction - Check a function. +void lintFunction( + const Function &F ///< The function to be checked +); + +} // End llvm namespace + +#endif diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index ae538518ceb..f6ba39ccc50 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -23,6 +23,7 @@ #include "llvm/Analysis/PointerTracking.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/Lint.h" #include "llvm/Assembly/PrintModulePass.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Function.h" @@ -137,6 +138,7 @@ namespace { (void) llvm::createGEPSplitterPass(); (void) llvm::createSCCVNPass(); (void) llvm::createABCDPass(); + (void) llvm::createLintPass(); (void)new llvm::IntervalPartition(); (void)new llvm::FindUsedTypes(); diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp new file mode 100644 index 00000000000..19945179f4d --- /dev/null +++ b/lib/Analysis/Lint.cpp @@ -0,0 +1,368 @@ +//===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass statically checks for common and easily-identified constructs +// which produce undefined or likely unintended behavior in LLVM IR. +// +// It is not a guarantee of correctness, in two ways. First, it isn't +// comprehensive. There are checks which could be done statically which are +// not yet implemented. Some of these are indicated by TODO comments, but +// those aren't comprehensive either. Second, many conditions cannot be +// checked statically. This pass does no dynamic instrumentation, so it +// can't check for all possible problems. +// +// Another limitation is that it assumes all code will be executed. A store +// through a null pointer in a basic block which is never reached is harmless, +// but this pass will warn about it anyway. + +// Optimization passes may make conditions that this pass checks for more or +// less obvious. If an optimization pass appears to be introducing a warning, +// it may be that the optimization pass is merely exposing an existing +// condition in the code. +// +// This code may be run before instcombine. In many cases, instcombine checks +// for the same kinds of things and turns instructions with undefined behavior +// into unreachable (or equivalent). Because of this, this pass makes some +// effort to look through bitcasts and so on. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Lint.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Pass.h" +#include "llvm/PassManager.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Function.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + class Lint : public FunctionPass, public InstVisitor { + friend class InstVisitor; + + void visitCallSite(CallSite CS); + void visitMemoryReference(Instruction &I, Value *Ptr, unsigned Align, + const Type *Ty); + + void visitInstruction(Instruction &I); + void visitCallInst(CallInst &I); + void visitInvokeInst(InvokeInst &I); + void visitReturnInst(ReturnInst &I); + void visitLoadInst(LoadInst &I); + void visitStoreInst(StoreInst &I); + void visitSDiv(BinaryOperator &I); + void visitUDiv(BinaryOperator &I); + void visitSRem(BinaryOperator &I); + void visitURem(BinaryOperator &I); + void visitAllocaInst(AllocaInst &I); + void visitVAArgInst(VAArgInst &I); + void visitIndirectBrInst(IndirectBrInst &I); + + public: + Module *Mod; + AliasAnalysis *AA; + TargetData *TD; + + std::string Messages; + raw_string_ostream MessagesStr; + + static char ID; // Pass identification, replacement for typeid + Lint() : FunctionPass(&ID), MessagesStr(Messages) {} + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + } + virtual void print(raw_ostream &O, const Module *M) const {} + + void WriteValue(const Value *V) { + if (!V) return; + if (isa(V)) { + MessagesStr << *V << '\n'; + } else { + WriteAsOperand(MessagesStr, V, true, Mod); + MessagesStr << '\n'; + } + } + + void WriteType(const Type *T) { + if (!T) return; + MessagesStr << ' '; + WriteTypeSymbolic(MessagesStr, T, Mod); + } + + // CheckFailed - A check failed, so print out the condition and the message + // that failed. This provides a nice place to put a breakpoint if you want + // to see why something is not correct. + void CheckFailed(const Twine &Message, + const Value *V1 = 0, const Value *V2 = 0, + const Value *V3 = 0, const Value *V4 = 0) { + MessagesStr << Message.str() << "\n"; + WriteValue(V1); + WriteValue(V2); + WriteValue(V3); + WriteValue(V4); + } + + void CheckFailed(const Twine &Message, const Value *V1, + const Type *T2, const Value *V3 = 0) { + MessagesStr << Message.str() << "\n"; + WriteValue(V1); + WriteType(T2); + WriteValue(V3); + } + + void CheckFailed(const Twine &Message, const Type *T1, + const Type *T2 = 0, const Type *T3 = 0) { + MessagesStr << Message.str() << "\n"; + WriteType(T1); + WriteType(T2); + WriteType(T3); + } + }; +} + +char Lint::ID = 0; +static RegisterPass +X("lint", "Statically lint-checks LLVM IR", false, true); + +// Assert - We know that cond should be true, if not print an error message. +#define Assert(C, M) \ + do { if (!(C)) { CheckFailed(M); return; } } while (0) +#define Assert1(C, M, V1) \ + do { if (!(C)) { CheckFailed(M, V1); return; } } while (0) +#define Assert2(C, M, V1, V2) \ + do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0) +#define Assert3(C, M, V1, V2, V3) \ + do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0) +#define Assert4(C, M, V1, V2, V3, V4) \ + do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0) + +// Lint::run - This is the main Analysis entry point for a +// function. +// +bool Lint::runOnFunction(Function &F) { + Mod = F.getParent(); + AA = &getAnalysis(); + TD = getAnalysisIfAvailable(); + visit(F); + dbgs() << MessagesStr.str(); + return false; +} + +void Lint::visitInstruction(Instruction &I) { +} + +void Lint::visitCallSite(CallSite CS) { + Instruction &I = *CS.getInstruction(); + Value *Callee = CS.getCalledValue(); + + // TODO: Check function alignment? + visitMemoryReference(I, Callee, 0, 0); + + if (Function *F = dyn_cast(Callee->stripPointerCasts())) { + Assert1(CS.getCallingConv() == F->getCallingConv(), + "Caller and callee calling convention differ", &I); + + const FunctionType *FT = F->getFunctionType(); + unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin()); + + Assert1(FT->isVarArg() ? + FT->getNumParams() <= NumActualArgs : + FT->getNumParams() == NumActualArgs, + "Call argument count mismatches callee argument count", &I); + + // TODO: Check argument types (in case the callee was casted) + + // TODO: Check ABI-significant attributes. + + // TODO: Check noalias attribute. + + // TODO: Check sret attribute. + } + + // TODO: Check the "tail" keyword constraints. + + if (IntrinsicInst *II = dyn_cast(&I)) + switch (II->getIntrinsicID()) { + default: break; + + // TODO: Check more intrinsics + + case Intrinsic::memcpy: { + MemCpyInst *MCI = cast(&I); + visitMemoryReference(I, MCI->getSource(), MCI->getAlignment(), 0); + visitMemoryReference(I, MCI->getDest(), MCI->getAlignment(), 0); + + unsigned Size = 0; + if (const ConstantInt *Len = + dyn_cast(MCI->getLength()->stripPointerCasts())) + if (Len->getValue().isIntN(32)) + Size = Len->getValue().getZExtValue(); + Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != + AliasAnalysis::MustAlias, + "memcpy source and destination overlap", &I); + break; + } + case Intrinsic::memmove: { + MemMoveInst *MMI = cast(&I); + visitMemoryReference(I, MMI->getSource(), MMI->getAlignment(), 0); + visitMemoryReference(I, MMI->getDest(), MMI->getAlignment(), 0); + break; + } + case Intrinsic::memset: { + MemSetInst *MSI = cast(&I); + visitMemoryReference(I, MSI->getDest(), MSI->getAlignment(), 0); + break; + } + + case Intrinsic::vastart: + visitMemoryReference(I, CS.getArgument(0), 0, 0); + break; + case Intrinsic::vacopy: + visitMemoryReference(I, CS.getArgument(0), 0, 0); + visitMemoryReference(I, CS.getArgument(1), 0, 0); + break; + case Intrinsic::vaend: + visitMemoryReference(I, CS.getArgument(0), 0, 0); + break; + + case Intrinsic::stackrestore: + visitMemoryReference(I, CS.getArgument(0), 0, 0); + break; + } +} + +void Lint::visitCallInst(CallInst &I) { + return visitCallSite(&I); +} + +void Lint::visitInvokeInst(InvokeInst &I) { + return visitCallSite(&I); +} + +void Lint::visitReturnInst(ReturnInst &I) { + Function *F = I.getParent()->getParent(); + Assert1(!F->doesNotReturn(), + "Return statement in function with noreturn attribute", &I); +} + +// TODO: Add a length argument and check that the reference is in bounds +// TODO: Add read/write/execute flags and check for writing to read-only +// memory or jumping to suspicious writeable memory +void Lint::visitMemoryReference(Instruction &I, + Value *Ptr, unsigned Align, const Type *Ty) { + Assert1(!isa(Ptr->getUnderlyingObject()), + "Null pointer dereference", &I); + Assert1(!isa(Ptr->getUnderlyingObject()), + "Undef pointer dereference", &I); + + if (TD) { + if (Align == 0 && Ty) Align = TD->getABITypeAlignment(Ty); + + if (Align != 0) { + unsigned BitWidth = TD->getTypeSizeInBits(Ptr->getType()); + APInt Mask = APInt::getAllOnesValue(BitWidth), + KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(Ptr, Mask, KnownZero, KnownOne, TD); + Assert1(!(KnownOne & APInt::getLowBitsSet(BitWidth, Log2_32(Align))), + "Memory reference address is misaligned", &I); + } + } +} + +void Lint::visitLoadInst(LoadInst &I) { + visitMemoryReference(I, I.getPointerOperand(), I.getAlignment(), I.getType()); +} + +void Lint::visitStoreInst(StoreInst &I) { + visitMemoryReference(I, I.getPointerOperand(), I.getAlignment(), + I.getOperand(0)->getType()); +} + +static bool isZero(Value *V, TargetData *TD) { + unsigned BitWidth = cast(V->getType())->getBitWidth(); + APInt Mask = APInt::getAllOnesValue(BitWidth), + KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD); + return KnownZero.isAllOnesValue(); +} + +void Lint::visitSDiv(BinaryOperator &I) { + Assert1(!isZero(I.getOperand(1), TD), "Division by zero", &I); +} + +void Lint::visitUDiv(BinaryOperator &I) { + Assert1(!isZero(I.getOperand(1), TD), "Division by zero", &I); +} + +void Lint::visitSRem(BinaryOperator &I) { + Assert1(!isZero(I.getOperand(1), TD), "Division by zero", &I); +} + +void Lint::visitURem(BinaryOperator &I) { + Assert1(!isZero(I.getOperand(1), TD), "Division by zero", &I); +} + +void Lint::visitAllocaInst(AllocaInst &I) { + if (isa(I.getArraySize())) + // This isn't undefined behavior, it's just an obvious pessimization. + Assert1(&I.getParent()->getParent()->getEntryBlock() == I.getParent(), + "Static alloca outside of entry block", &I); +} + +void Lint::visitVAArgInst(VAArgInst &I) { + visitMemoryReference(I, I.getOperand(0), 0, 0); +} + +void Lint::visitIndirectBrInst(IndirectBrInst &I) { + visitMemoryReference(I, I.getAddress(), 0, 0); +} + +//===----------------------------------------------------------------------===// +// Implement the public interfaces to this file... +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createLintPass() { + return new Lint(); +} + +/// lintFunction - Check a function for errors, printing messages on stderr. +/// +void llvm::lintFunction(const Function &f) { + Function &F = const_cast(f); + assert(!F.isDeclaration() && "Cannot lint external functions"); + + FunctionPassManager FPM(F.getParent()); + Lint *V = new Lint(); + FPM.add(V); + FPM.run(F); +} + +/// lintModule - Check a module for errors, printing messages on stderr. +/// Return true if the module is corrupt. +/// +void llvm::lintModule(const Module &M, std::string *ErrorInfo) { + PassManager PM; + Lint *V = new Lint(); + PM.add(V); + PM.run(const_cast(M)); + + if (ErrorInfo) + *ErrorInfo = V->MessagesStr.str(); +} diff --git a/test/Other/lint.ll b/test/Other/lint.ll new file mode 100644 index 00000000000..6ccaa6fd4c4 --- /dev/null +++ b/test/Other/lint.ll @@ -0,0 +1,31 @@ +; RUN: opt -lint -disable-output < %s |& FileCheck %s +target datalayout = "e-p:64:64:64" + +declare fastcc void @bar() + +define i32 @foo() noreturn { +; CHECK: Caller and callee calling convention differ + call void @bar() +; CHECK: Null pointer dereference + store i32 0, i32* null +; CHECK: Null pointer dereference + %t = load i32* null +; CHECK: Memory reference address is misaligned + %x = inttoptr i32 1 to i32* + load i32* %x, align 4 +; CHECK: Division by zero + %sd = sdiv i32 2, 0 +; CHECK: Division by zero + %ud = udiv i32 2, 0 +; CHECK: Division by zero + %sr = srem i32 2, 0 +; CHECK: Division by zero + %ur = urem i32 2, 0 + br label %next + +next: +; CHECK: Static alloca outside of entry block + %a = alloca i32 +; CHECK: Return statement in function with noreturn attribute + ret i32 0 +} -- 2.11.0