ARM cost model: Make some vector integer to float casts cheaper

The default logic marks them as too expensive.

For example, before this patch we estimated:
  cost of 16 for instruction:   %r = uitofp <4 x i16> %v0 to <4 x float>

While this translates to:
  vmovl.u16 q8, d16
  vcvt.f32.u32  q8, q8

All other costs are left to the values assigned by the fallback logic. Theses
costs are mostly reasonable in the sense that they get progressively more
expensive as the instruction sequences emitted get longer.

radar://13445992

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177334 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 3883403..140a8db 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -222,6 +222,28 @@
     // Vector float <-> i32 conversions.
     { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i32, 1 },
     { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i32, 1 },
+
+    { ISD::SINT_TO_FP,  MVT::v2f32, MVT::v2i8, 3 },
+    { ISD::UINT_TO_FP,  MVT::v2f32, MVT::v2i8, 3 },
+    { ISD::SINT_TO_FP,  MVT::v2f32, MVT::v2i16, 2 },
+    { ISD::UINT_TO_FP,  MVT::v2f32, MVT::v2i16, 2 },
+    { ISD::SINT_TO_FP,  MVT::v2f32, MVT::v2i32, 1 },
+    { ISD::UINT_TO_FP,  MVT::v2f32, MVT::v2i32, 1 },
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i1, 3 },
+    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i1, 3 },
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i8, 3 },
+    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i8, 3 },
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i16, 2 },
+    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i16, 2 },
+    { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i16, 4 },
+    { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i16, 4 },
+    { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i32, 2 },
+    { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i32, 2 },
+    { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i16, 8 },
+    { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i16, 8 },
+    { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i32, 4 },
+    { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i32, 4 },
+
     { ISD::FP_TO_SINT,  MVT::v4i32, MVT::v4f32, 1 },
     { ISD::FP_TO_UINT,  MVT::v4i32, MVT::v4f32, 1 },
     { ISD::FP_TO_SINT,  MVT::v4i8, MVT::v4f32, 3 },
@@ -232,6 +254,14 @@
     // Vector double <-> i32 conversions.
     { ISD::SINT_TO_FP,  MVT::v2f64, MVT::v2i32, 2 },
     { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i32, 2 },
+
+    { ISD::SINT_TO_FP,  MVT::v2f64, MVT::v2i8, 4 },
+    { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i8, 4 },
+    { ISD::SINT_TO_FP,  MVT::v2f64, MVT::v2i16, 3 },
+    { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i16, 3 },
+    { ISD::SINT_TO_FP,  MVT::v2f64, MVT::v2i32, 2 },
+    { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i32, 2 },
+
     { ISD::FP_TO_SINT,  MVT::v2i32, MVT::v2f64, 2 },
     { ISD::FP_TO_UINT,  MVT::v2i32, MVT::v2f64, 2 },
     { ISD::FP_TO_SINT,  MVT::v8i16, MVT::v8f32, 4 },
diff --git a/test/Analysis/CostModel/ARM/cast.ll b/test/Analysis/CostModel/ARM/cast.ll
index 88b1844..96eb335 100644
--- a/test/Analysis/CostModel/ARM/cast.ll
+++ b/test/Analysis/CostModel/ARM/cast.ll
@@ -359,6 +359,174 @@
   ; CHECK: cost of 192 {{.*}} fptosi
   %r169 = fptosi <16 x double> undef to <16 x i64>
 
+  ; CHECK: cost of 8 {{.*}} uitofp
+  %r170 = uitofp <2 x i1> undef to <2 x float>
+  ; CHECK: cost of 8 {{.*}} sitofp
+  %r171 = sitofp <2 x i1> undef to <2 x float>
+  ; CHECK: cost of 3 {{.*}} uitofp
+  %r172 = uitofp <2 x i8> undef to <2 x float>
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %r173 = sitofp <2 x i8> undef to <2 x float>
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r174 = uitofp <2 x i16> undef to <2 x float>
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r175 = sitofp <2 x i16> undef to <2 x float>
+  ; CHECK: cost of 1 {{.*}} uitofp
+  %r176 = uitofp <2 x i32> undef to <2 x float>
+  ; CHECK: cost of 1 {{.*}} sitofp
+  %r177 = sitofp <2 x i32> undef to <2 x float>
+  ; CHECK: cost of 24 {{.*}} uitofp
+  %r178 = uitofp <2 x i64> undef to <2 x float>
+  ; CHECK: cost of 24 {{.*}} sitofp
+  %r179 = sitofp <2 x i64> undef to <2 x float>
+
+  ; CHECK: cost of 8 {{.*}} uitofp
+  %r180 = uitofp <2 x i1> undef to <2 x double>
+  ; CHECK: cost of 8 {{.*}} sitofp
+  %r181 = sitofp <2 x i1> undef to <2 x double>
+  ; CHECK: cost of 4 {{.*}} uitofp
+  %r182 = uitofp <2 x i8> undef to <2 x double>
+  ; CHECK: cost of 4 {{.*}} sitofp
+  %r183 = sitofp <2 x i8> undef to <2 x double>
+  ; CHECK: cost of 3 {{.*}} uitofp
+  %r184 = uitofp <2 x i16> undef to <2 x double>
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %r185 = sitofp <2 x i16> undef to <2 x double>
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r186 = uitofp <2 x i32> undef to <2 x double>
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r187 = sitofp <2 x i32> undef to <2 x double>
+  ; CHECK: cost of 24 {{.*}} uitofp
+  %r188 = uitofp <2 x i64> undef to <2 x double>
+  ; CHECK: cost of 24 {{.*}} sitofp
+  %r189 = sitofp <2 x i64> undef to <2 x double>
+
+  ; CHECK: cost of 3 {{.*}} uitofp
+  %r190 = uitofp <4 x i1> undef to <4 x float>
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %r191 = sitofp <4 x i1> undef to <4 x float>
+  ; CHECK: cost of 3 {{.*}} uitofp
+  %r192 = uitofp <4 x i8> undef to <4 x float>
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %r193 = sitofp <4 x i8> undef to <4 x float>
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r194 = uitofp <4 x i16> undef to <4 x float>
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r195 = sitofp <4 x i16> undef to <4 x float>
+  ; CHECK: cost of 1 {{.*}} uitofp
+  %r196 = uitofp <4 x i32> undef to <4 x float>
+  ; CHECK: cost of 1 {{.*}} sitofp
+  %r197 = sitofp <4 x i32> undef to <4 x float>
+  ; CHECK: cost of 48 {{.*}} uitofp
+  %r198 = uitofp <4 x i64> undef to <4 x float>
+  ; CHECK: cost of 48 {{.*}} sitofp
+  %r199 = sitofp <4 x i64> undef to <4 x float>
+
+  ; CHECK: cost of 16 {{.*}} uitofp
+  %r200 = uitofp <4 x i1> undef to <4 x double>
+  ; CHECK: cost of 16 {{.*}} sitofp
+  %r201 = sitofp <4 x i1> undef to <4 x double>
+  ; CHECK: cost of 16 {{.*}} uitofp
+  %r202 = uitofp <4 x i8> undef to <4 x double>
+  ; CHECK: cost of 16 {{.*}} sitofp
+  %r203 = sitofp <4 x i8> undef to <4 x double>
+  ; CHECK: cost of 16 {{.*}} uitofp
+  %r204 = uitofp <4 x i16> undef to <4 x double>
+  ; CHECK: cost of 16 {{.*}} sitofp
+  %r205 = sitofp <4 x i16> undef to <4 x double>
+  ; CHECK: cost of 16 {{.*}} uitofp
+  %r206 = uitofp <4 x i32> undef to <4 x double>
+  ; CHECK: cost of 16 {{.*}} sitofp
+  %r207 = sitofp <4 x i32> undef to <4 x double>
+  ; CHECK: cost of 48 {{.*}} uitofp
+  %r208 = uitofp <4 x i64> undef to <4 x double>
+  ; CHECK: cost of 48 {{.*}} sitofp
+  %r209 = sitofp <4 x i64> undef to <4 x double>
+
+  ; CHECK: cost of 32 {{.*}} uitofp
+  %r210 = uitofp <8 x i1> undef to <8 x float>
+  ; CHECK: cost of 32 {{.*}} sitofp
+  %r211 = sitofp <8 x i1> undef to <8 x float>
+  ; CHECK: cost of 32 {{.*}} uitofp
+  %r212 = uitofp <8 x i8> undef to <8 x float>
+  ; CHECK: cost of 32 {{.*}} sitofp
+  %r213 = sitofp <8 x i8> undef to <8 x float>
+  ; CHECK: cost of 4 {{.*}} uitofp
+  %r214 = uitofp <8 x i16> undef to <8 x float>
+  ; CHECK: cost of 4 {{.*}} sitofp
+  %r215 = sitofp <8 x i16> undef to <8 x float>
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r216 = uitofp <8 x i32> undef to <8 x float>
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r217 = sitofp <8 x i32> undef to <8 x float>
+  ; CHECK: cost of 96 {{.*}} uitofp
+  %r218 = uitofp <8 x i64> undef to <8 x float>
+  ; CHECK: cost of 96 {{.*}} sitofp
+  %r219 = sitofp <8 x i64> undef to <8 x float>
+
+  ; CHECK: cost of 32 {{.*}} uitofp
+  %r220 = uitofp <8 x i1> undef to <8 x double>
+  ; CHECK: cost of 32 {{.*}} sitofp
+  %r221 = sitofp <8 x i1> undef to <8 x double>
+  ; CHECK: cost of 32 {{.*}} uitofp
+  %r222 = uitofp <8 x i8> undef to <8 x double>
+  ; CHECK: cost of 32 {{.*}} sitofp
+  %r223 = sitofp <8 x i8> undef to <8 x double>
+  ; CHECK: cost of 32 {{.*}} uitofp
+  %r224 = uitofp <8 x i16> undef to <8 x double>
+  ; CHECK: cost of 32 {{.*}} sitofp
+  %r225 = sitofp <8 x i16> undef to <8 x double>
+  ; CHECK: cost of 32 {{.*}} uitofp
+  %r226 = uitofp <8 x i16> undef to <8 x double>
+  ; CHECK: cost of 32 {{.*}} sitofp
+  %r227 = sitofp <8 x i16> undef to <8 x double>
+  ; CHECK: cost of 96 {{.*}} uitofp
+  %r228 = uitofp <8 x i64> undef to <8 x double>
+  ; CHECK: cost of 96 {{.*}} sitofp
+  %r229 = sitofp <8 x i64> undef to <8 x double>
+
+  ; CHECK: cost of 64 {{.*}} uitofp
+  %r230 = uitofp <16 x i1> undef to <16 x float>
+  ; CHECK: cost of 64 {{.*}} sitofp
+  %r231 = sitofp <16 x i1> undef to <16 x float>
+  ; CHECK: cost of 64 {{.*}} uitofp
+  %r232 = uitofp <16 x i8> undef to <16 x float>
+  ; CHECK: cost of 64 {{.*}} sitofp
+  %r233 = sitofp <16 x i8> undef to <16 x float>
+  ; CHECK: cost of 8 {{.*}} uitofp
+  %r234 = uitofp <16 x i16> undef to <16 x float>
+  ; CHECK: cost of 8 {{.*}} sitofp
+  %r235 = sitofp <16 x i16> undef to <16 x float>
+  ; CHECK: cost of 4 {{.*}} uitofp
+  %r236 = uitofp <16 x i32> undef to <16 x float>
+  ; CHECK: cost of 4 {{.*}} sitofp
+  %r237 = sitofp <16 x i32> undef to <16 x float>
+  ; CHECK: cost of 192 {{.*}} uitofp
+  %r238 = uitofp <16 x i64> undef to <16 x float>
+  ; CHECK: cost of 192 {{.*}} sitofp
+  %r239 = sitofp <16 x i64> undef to <16 x float>
+
+  ; CHECK: cost of 64 {{.*}} uitofp
+  %r240 = uitofp <16 x i1> undef to <16 x double>
+  ; CHECK: cost of 64 {{.*}} sitofp
+  %r241 = sitofp <16 x i1> undef to <16 x double>
+  ; CHECK: cost of 64 {{.*}} uitofp
+  %r242 = uitofp <16 x i8> undef to <16 x double>
+  ; CHECK: cost of 64 {{.*}} sitofp
+  %r243 = sitofp <16 x i8> undef to <16 x double>
+  ; C4ECK: cost of 64 {{.*}} uitofp
+  %r244 = uitofp <16 x i16> undef to <16 x double>
+  ; CHECK: cost of 64 {{.*}} sitofp
+  %r245 = sitofp <16 x i16> undef to <16 x double>
+  ; CHECK: cost of 64 {{.*}} uitofp
+  %r246 = uitofp <16 x i16> undef to <16 x double>
+  ; CHECK: cost of 64 {{.*}} sitofp
+  %r247 = sitofp <16 x i16> undef to <16 x double>
+  ; CHECK: cost of 192 {{.*}} uitofp
+  %r248 = uitofp <16 x i64> undef to <16 x double>
+  ; CHECK: cost of 192 {{.*}} sitofp
+  %r249 = sitofp <16 x i64> undef to <16 x double>
+
   ;CHECK: cost of 0 {{.*}} ret
   ret i32 undef
 }