return 8 * getTypeStoreSize(Ty);
}
+ /// Returns true if no extra padding bits are needed when storing the
+ /// specified type.
+ ///
+ /// For example, returns false for i19 that has a 24-bit store size.
+ bool typeSizeEqualsStoreSize(Type *Ty) const {
+ return getTypeSizeInBits(Ty) == getTypeStoreSizeInBits(Ty);
+ }
+
/// Returns the offset in bytes between successive objects of the
/// specified type, including alignment padding.
///
auto *Earlier = dyn_cast<StoreInst>(DepWrite);
auto *Later = dyn_cast<StoreInst>(Inst);
if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) &&
+ DL.typeSizeEqualsStoreSize(
+ Earlier->getValueOperand()->getType()) &&
Later && isa<ConstantInt>(Later->getValueOperand()) &&
+ DL.typeSizeEqualsStoreSize(
+ Later->getValueOperand()->getType()) &&
memoryIsNotModifiedBetween(Earlier, Later, AA)) {
// If the store we find is:
// a) partially overwritten by the store to 'Loc'
// b) the later store is fully contained in the earlier one and
// c) they both have a constant value
+ // d) none of the two stores need padding
// Merge the two stores, replacing the earlier store's value with a
// merge of both values.
// TODO: Deal with other constant types (vectors, etc), and probably
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s --data-layout "e" -dse -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-LE %s
+; RUN: opt < %s --data-layout "E" -dse -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-BE %s
+
+; This test used to hit an assertion (see PR41949).
+;
+; Better safe than sorry, do not assume anything about the padding for the
+; i28 store that has 32 bits as store size.
+define void @test1(i32* %p) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: [[A:%.*]] = alloca i32
+; CHECK-NEXT: [[B:%.*]] = bitcast i32* [[A]] to i28*
+; CHECK-NEXT: [[C:%.*]] = bitcast i32* [[A]] to { i16, i16 }*
+; CHECK-NEXT: [[C1:%.*]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* [[C]], i32 0, i32 1
+; CHECK-NEXT: store i28 10, i28* [[B]]
+; CHECK-NEXT: store i16 20, i16* [[C1]]
+; CHECK-NEXT: call void @test1(i32* [[A]])
+; CHECK-NEXT: ret void
+;
+ %a = alloca i32
+ %b = bitcast i32* %a to i28*
+ %c = bitcast i32* %a to { i16, i16 }*
+ %c1 = getelementptr inbounds { i16, i16 }, { i16, i16 }* %c, i32 0, i32 1
+ store i28 10, i28* %b
+ store i16 20, i16* %c1
+
+ call void @test1(i32* %a)
+ ret void
+}
+
+
+; This test used to mis-compile (see PR41949).
+;
+; Better safe than sorry, do not assume anything about the padding for the
+; i12 store that has 16 bits as store size.
+define void @test2(i32* %p) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: [[U:%.*]] = alloca i32
+; CHECK-NEXT: [[A:%.*]] = bitcast i32* [[U]] to i32*
+; CHECK-NEXT: [[B:%.*]] = bitcast i32* [[U]] to i12*
+; CHECK-NEXT: store i32 -1, i32* [[A]]
+; CHECK-NEXT: store i12 20, i12* [[B]]
+; CHECK-NEXT: call void @test2(i32* [[U]])
+; CHECK-NEXT: ret void
+;
+ %u = alloca i32
+ %a = bitcast i32* %u to i32*
+ %b = bitcast i32* %u to i12*
+ store i32 -1, i32* %a
+ store i12 20, i12* %b
+
+ call void @test2(i32* %u)
+ ret void
+}
+