--- /dev/null
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep movq | wc -l | grep 3
+
+; FIXME: This code outputs:
+;
+; subl $28, %esp
+; movl 32(%esp), %eax
+; movd %eax, %mm0
+; movq %mm0, (%esp)
+; movl (%esp), %eax
+; movl %eax, 20(%esp)
+; movq %mm0, 8(%esp)
+; movl 12(%esp), %eax
+; movl %eax, 16(%esp)
+; movq 16(%esp), %mm0
+; addl $28, %esp
+;
+; Which is ugly. We need to fix this.
+
+define <2 x i32> @qux(i32 %A) {
+entry:
+ %tmp3 = insertelement <2 x i32> < i32 0, i32 undef >, i32 %A, i32 1 ; <<2 x i32>> [#uses=1]
+ ret <2 x i32> %tmp3
+}
--- /dev/null
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep punpckhdq | wc -l | grep 1
+
+define void @bork(<1 x i64>* %x) {
+entry:
+ %tmp2 = load <1 x i64>* %x ; <<1 x i64>> [#uses=1]
+ %tmp6 = bitcast <1 x i64> %tmp2 to <2 x i32> ; <<2 x i32>> [#uses=1]
+ %tmp9 = shufflevector <2 x i32> %tmp6, <2 x i32> undef, <2 x i32> < i32 1, i32 1 > ; <<2 x i32>> [#uses=1]
+ %tmp10 = bitcast <2 x i32> %tmp9 to <1 x i64> ; <<1 x i64>> [#uses=1]
+ store <1 x i64> %tmp10, <1 x i64>* %x
+ tail call void @llvm.x86.mmx.emms( )
+ ret void
+}
+
+declare void @llvm.x86.mmx.emms()