blob: d8c27f25043a69162aee6b36c3814b993b146ced [file] [log] [blame]
Benjamin Kramer08219ea2013-01-13 11:37:04 +00001; RUN: llc < %s -march=x86-64 -mcpu=penryn | FileCheck -check-prefix=SSE41 %s
2; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck -check-prefix=AVX1 %s
3; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck -check-prefix=AVX2 %s
4
5; PR14887
6; These tests inject a store into the chain to test the inreg versions of pmovsx
7
8define void @test1(<2 x i8>* %in, <2 x i64>* %out) nounwind {
9 %wide.load35 = load <2 x i8>* %in, align 1
10 %sext = sext <2 x i8> %wide.load35 to <2 x i64>
11 store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8
12 store <2 x i64> %sext, <2 x i64>* %out, align 8
13 ret void
14
15; SSE41: test1:
16; SSE41: pmovsxbq
17
18; AVX1: test1:
19; AVX1: vpmovsxbq
20
21; AVX2: test1:
22; AVX2: vpmovsxbq
23}
24
25define void @test2(<4 x i8>* %in, <4 x i64>* %out) nounwind {
26 %wide.load35 = load <4 x i8>* %in, align 1
27 %sext = sext <4 x i8> %wide.load35 to <4 x i64>
28 store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8
29 store <4 x i64> %sext, <4 x i64>* %out, align 8
30 ret void
31
32; AVX2: test2:
33; AVX2: vpmovsxbq
34}
35
36define void @test3(<4 x i8>* %in, <4 x i32>* %out) nounwind {
37 %wide.load35 = load <4 x i8>* %in, align 1
38 %sext = sext <4 x i8> %wide.load35 to <4 x i32>
39 store <4 x i32> zeroinitializer, <4 x i32>* undef, align 8
40 store <4 x i32> %sext, <4 x i32>* %out, align 8
41 ret void
42
43; SSE41: test3:
44; SSE41: pmovsxbd
45
46; AVX1: test3:
47; AVX1: vpmovsxbd
48
49; AVX2: test3:
50; AVX2: vpmovsxbd
51}
52
53define void @test4(<8 x i8>* %in, <8 x i32>* %out) nounwind {
54 %wide.load35 = load <8 x i8>* %in, align 1
55 %sext = sext <8 x i8> %wide.load35 to <8 x i32>
56 store <8 x i32> zeroinitializer, <8 x i32>* undef, align 8
57 store <8 x i32> %sext, <8 x i32>* %out, align 8
58 ret void
59
60; AVX2: test4:
61; AVX2: vpmovsxbd
62}
63
64define void @test5(<8 x i8>* %in, <8 x i16>* %out) nounwind {
65 %wide.load35 = load <8 x i8>* %in, align 1
66 %sext = sext <8 x i8> %wide.load35 to <8 x i16>
67 store <8 x i16> zeroinitializer, <8 x i16>* undef, align 8
68 store <8 x i16> %sext, <8 x i16>* %out, align 8
69 ret void
70
71; SSE41: test5:
72; SSE41: pmovsxbw
73
74; AVX1: test5:
75; AVX1: vpmovsxbw
76
77; AVX2: test5:
78; AVX2: vpmovsxbw
79}
80
81define void @test6(<16 x i8>* %in, <16 x i16>* %out) nounwind {
82 %wide.load35 = load <16 x i8>* %in, align 1
83 %sext = sext <16 x i8> %wide.load35 to <16 x i16>
84 store <16 x i16> zeroinitializer, <16 x i16>* undef, align 8
85 store <16 x i16> %sext, <16 x i16>* %out, align 8
86 ret void
87
88; AVX2: test6:
89; FIXME: v16i8 -> v16i16 is scalarized.
90; AVX2-NOT: pmovsx
91}
92
93define void @test7(<2 x i16>* %in, <2 x i64>* %out) nounwind {
94 %wide.load35 = load <2 x i16>* %in, align 1
95 %sext = sext <2 x i16> %wide.load35 to <2 x i64>
96 store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8
97 store <2 x i64> %sext, <2 x i64>* %out, align 8
98 ret void
99
100
101; SSE41: test7:
102; SSE41: pmovsxwq
103
104; AVX1: test7:
105; AVX1: vpmovsxwq
106
107; AVX2: test7:
108; AVX2: vpmovsxwq
109}
110
111define void @test8(<4 x i16>* %in, <4 x i64>* %out) nounwind {
112 %wide.load35 = load <4 x i16>* %in, align 1
113 %sext = sext <4 x i16> %wide.load35 to <4 x i64>
114 store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8
115 store <4 x i64> %sext, <4 x i64>* %out, align 8
116 ret void
117
118; AVX2: test8:
119; AVX2: vpmovsxwq
120}
121
122define void @test9(<4 x i16>* %in, <4 x i32>* %out) nounwind {
123 %wide.load35 = load <4 x i16>* %in, align 1
124 %sext = sext <4 x i16> %wide.load35 to <4 x i32>
125 store <4 x i32> zeroinitializer, <4 x i32>* undef, align 8
126 store <4 x i32> %sext, <4 x i32>* %out, align 8
127 ret void
128
129; SSE41: test9:
130; SSE41: pmovsxwd
131
132; AVX1: test9:
133; AVX1: vpmovsxwd
134
135; AVX2: test9:
136; AVX2: vpmovsxwd
137}
138
139define void @test10(<8 x i16>* %in, <8 x i32>* %out) nounwind {
140 %wide.load35 = load <8 x i16>* %in, align 1
141 %sext = sext <8 x i16> %wide.load35 to <8 x i32>
142 store <8 x i32> zeroinitializer, <8 x i32>* undef, align 8
143 store <8 x i32> %sext, <8 x i32>* %out, align 8
144 ret void
145
146; AVX2: test10:
147; AVX2: vpmovsxwd
148}
149
150define void @test11(<2 x i32>* %in, <2 x i64>* %out) nounwind {
151 %wide.load35 = load <2 x i32>* %in, align 1
152 %sext = sext <2 x i32> %wide.load35 to <2 x i64>
153 store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8
154 store <2 x i64> %sext, <2 x i64>* %out, align 8
155 ret void
156
157; SSE41: test11:
158; SSE41: pmovsxdq
159
160; AVX1: test11:
161; AVX1: vpmovsxdq
162
163; AVX2: test11:
164; AVX2: vpmovsxdq
165}
166
167define void @test12(<4 x i32>* %in, <4 x i64>* %out) nounwind {
168 %wide.load35 = load <4 x i32>* %in, align 1
169 %sext = sext <4 x i32> %wide.load35 to <4 x i64>
170 store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8
171 store <4 x i64> %sext, <4 x i64>* %out, align 8
172 ret void
173
174; AVX2: test12:
175; AVX2: vpmovsxdq
176}