| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=instcombine -S < %s | FileCheck %s |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.rcp |
| ; -------------------------------------------------------------------- |
| |
| declare float @llvm.amdgcn.rcp.f32(float) nounwind readnone |
| declare double @llvm.amdgcn.rcp.f64(double) nounwind readnone |
| |
| define float @test_constant_fold_rcp_f32_undef() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_rcp_f32_undef( |
| ; CHECK-NEXT: ret float 0x7FF8000000000000 |
| ; |
| %val = call float @llvm.amdgcn.rcp.f32(float undef) nounwind readnone |
| ret float %val |
| } |
| |
| define float @test_constant_fold_rcp_f32_1() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_rcp_f32_1( |
| ; CHECK-NEXT: ret float 1.000000e+00 |
| ; |
| %val = call float @llvm.amdgcn.rcp.f32(float 1.0) nounwind readnone |
| ret float %val |
| } |
| |
| define double @test_constant_fold_rcp_f64_1() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_rcp_f64_1( |
| ; CHECK-NEXT: ret double 1.000000e+00 |
| ; |
| %val = call double @llvm.amdgcn.rcp.f64(double 1.0) nounwind readnone |
| ret double %val |
| } |
| |
| define float @test_constant_fold_rcp_f32_half() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_rcp_f32_half( |
| ; CHECK-NEXT: ret float 2.000000e+00 |
| ; |
| %val = call float @llvm.amdgcn.rcp.f32(float 0.5) nounwind readnone |
| ret float %val |
| } |
| |
| define double @test_constant_fold_rcp_f64_half() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_rcp_f64_half( |
| ; CHECK-NEXT: ret double 2.000000e+00 |
| ; |
| %val = call double @llvm.amdgcn.rcp.f64(double 0.5) nounwind readnone |
| ret double %val |
| } |
| |
| define float @test_constant_fold_rcp_f32_43() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_rcp_f32_43( |
| ; CHECK-NEXT: ret float 0x3F97D05F40000000 |
| ; |
| %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) nounwind readnone |
| ret float %val |
| } |
| |
| define double @test_constant_fold_rcp_f64_43() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_rcp_f64_43( |
| ; CHECK-NEXT: ret double 0x3F97D05F417D05F4 |
| ; |
| %val = call double @llvm.amdgcn.rcp.f64(double 4.300000e+01) nounwind readnone |
| ret double %val |
| } |
| |
| define float @test_constant_fold_rcp_f32_43_strictfp() nounwind strictfp { |
| ; CHECK-LABEL: @test_constant_fold_rcp_f32_43_strictfp( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) #[[ATTR14:[0-9]+]] |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) strictfp nounwind readnone |
| ret float %val |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.sqrt |
| ; -------------------------------------------------------------------- |
| |
| declare half @llvm.amdgcn.sqrt.f16(half) nounwind readnone |
| declare float @llvm.amdgcn.sqrt.f32(float) nounwind readnone |
| declare double @llvm.amdgcn.sqrt.f64(double) nounwind readnone |
| |
| define half @test_constant_fold_sqrt_f16_undef() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_sqrt_f16_undef( |
| ; CHECK-NEXT: ret half 0xH7E00 |
| ; |
| %val = call half @llvm.amdgcn.sqrt.f16(half undef) nounwind readnone |
| ret half %val |
| } |
| |
| define float @test_constant_fold_sqrt_f32_undef() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_sqrt_f32_undef( |
| ; CHECK-NEXT: ret float 0x7FF8000000000000 |
| ; |
| %val = call float @llvm.amdgcn.sqrt.f32(float undef) nounwind readnone |
| ret float %val |
| } |
| |
| define double @test_constant_fold_sqrt_f64_undef() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_sqrt_f64_undef( |
| ; CHECK-NEXT: ret double 0x7FF8000000000000 |
| ; |
| %val = call double @llvm.amdgcn.sqrt.f64(double undef) nounwind readnone |
| ret double %val |
| } |
| |
| define half @test_constant_fold_sqrt_f16_0() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_sqrt_f16_0( |
| ; CHECK-NEXT: ret half 0xH0000 |
| ; |
| %val = call half @llvm.amdgcn.sqrt.f16(half 0.0) nounwind readnone |
| ret half %val |
| } |
| |
| define float @test_constant_fold_sqrt_f32_0() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_sqrt_f32_0( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 0.000000e+00) #[[ATTR15:[0-9]+]] |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.sqrt.f32(float 0.0) nounwind readnone |
| ret float %val |
| } |
| |
| define double @test_constant_fold_sqrt_f64_0() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_sqrt_f64_0( |
| ; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double 0.000000e+00) #[[ATTR15]] |
| ; CHECK-NEXT: ret double [[VAL]] |
| ; |
| %val = call double @llvm.amdgcn.sqrt.f64(double 0.0) nounwind readnone |
| ret double %val |
| } |
| |
| define half @test_constant_fold_sqrt_f16_neg0() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_sqrt_f16_neg0( |
| ; CHECK-NEXT: ret half 0xH8000 |
| ; |
| %val = call half @llvm.amdgcn.sqrt.f16(half -0.0) nounwind readnone |
| ret half %val |
| } |
| |
| define float @test_constant_fold_sqrt_f32_neg0() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_sqrt_f32_neg0( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float -0.000000e+00) #[[ATTR15]] |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.sqrt.f32(float -0.0) nounwind readnone |
| ret float %val |
| } |
| |
| define double @test_constant_fold_sqrt_f64_neg0() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_sqrt_f64_neg0( |
| ; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double -0.000000e+00) #[[ATTR15]] |
| ; CHECK-NEXT: ret double [[VAL]] |
| ; |
| %val = call double @llvm.amdgcn.sqrt.f64(double -0.0) nounwind readnone |
| ret double %val |
| } |
| |
| define double @test_constant_fold_sqrt_snan_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_sqrt_snan_f64( |
| ; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double 0x7FF0000000000001) |
| ; CHECK-NEXT: ret double [[VAL]] |
| ; |
| %val = call double @llvm.amdgcn.sqrt.f64(double 0x7FF0000000000001) |
| ret double %val |
| } |
| |
| define double @test_constant_fold_sqrt_qnan_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_sqrt_qnan_f64( |
| ; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double 0x7FF8000000000000) |
| ; CHECK-NEXT: ret double [[VAL]] |
| ; |
| %val = call double @llvm.amdgcn.sqrt.f64(double 0x7FF8000000000000) |
| ret double %val |
| } |
| |
| define double @test_constant_fold_sqrt_neg1() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_sqrt_neg1( |
| ; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double -1.000000e+00) |
| ; CHECK-NEXT: ret double [[VAL]] |
| ; |
| %val = call double @llvm.amdgcn.sqrt.f64(double -1.0) |
| ret double %val |
| } |
| |
| define half @test_amdgcn_sqrt_f16(half %arg) { |
| ; CHECK-LABEL: @test_amdgcn_sqrt_f16( |
| ; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.sqrt.f16(half [[ARG:%.*]]) |
| ; CHECK-NEXT: ret half [[VAL]] |
| ; |
| %val = call half @llvm.amdgcn.sqrt.f16(half %arg) |
| ret half %val |
| } |
| |
| define half @test_amdgcn_sqrt_f16_flags(half %arg) { |
| ; CHECK-LABEL: @test_amdgcn_sqrt_f16_flags( |
| ; CHECK-NEXT: [[VAL:%.*]] = call nnan half @llvm.sqrt.f16(half [[ARG:%.*]]) |
| ; CHECK-NEXT: ret half [[VAL]] |
| ; |
| %val = call nnan half @llvm.amdgcn.sqrt.f16(half %arg) |
| ret half %val |
| } |
| |
| define float @test_amdgcn_sqrt_f32(float %arg) { |
| ; CHECK-LABEL: @test_amdgcn_sqrt_f32( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[ARG:%.*]]) |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.sqrt.f32(float %arg) |
| ret float %val |
| } |
| |
| define double @test_amdgcn_sqrt_f64(double %arg) { |
| ; CHECK-LABEL: @test_amdgcn_sqrt_f64( |
| ; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double [[ARG:%.*]]) |
| ; CHECK-NEXT: ret double [[VAL]] |
| ; |
| %val = call double @llvm.amdgcn.sqrt.f64(double %arg) |
| ret double %val |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.rsq |
| ; -------------------------------------------------------------------- |
| |
| declare float @llvm.amdgcn.rsq.f32(float) nounwind readnone |
| |
| define float @test_constant_fold_rsq_f32_undef() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_rsq_f32_undef( |
| ; CHECK-NEXT: ret float 0x7FF8000000000000 |
| ; |
| %val = call float @llvm.amdgcn.rsq.f32(float undef) nounwind readnone |
| ret float %val |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.frexp.mant |
| ; -------------------------------------------------------------------- |
| |
| declare float @llvm.amdgcn.frexp.mant.f32(float) nounwind readnone |
| declare double @llvm.amdgcn.frexp.mant.f64(double) nounwind readnone |
| |
| |
| define float @test_constant_fold_frexp_mant_f32_undef() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_undef( |
| ; CHECK-NEXT: ret float undef |
| ; |
| %val = call float @llvm.amdgcn.frexp.mant.f32(float undef) |
| ret float %val |
| } |
| |
| define double @test_constant_fold_frexp_mant_f64_undef() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_undef( |
| ; CHECK-NEXT: ret double undef |
| ; |
| %val = call double @llvm.amdgcn.frexp.mant.f64(double undef) |
| ret double %val |
| } |
| |
| define float @test_constant_fold_frexp_mant_f32_0() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_0( |
| ; CHECK-NEXT: ret float 0.000000e+00 |
| ; |
| %val = call float @llvm.amdgcn.frexp.mant.f32(float 0.0) |
| ret float %val |
| } |
| |
| define double @test_constant_fold_frexp_mant_f64_0() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_0( |
| ; CHECK-NEXT: ret double 0.000000e+00 |
| ; |
| %val = call double @llvm.amdgcn.frexp.mant.f64(double 0.0) |
| ret double %val |
| } |
| |
| define float @test_constant_fold_frexp_mant_f32_n0() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_n0( |
| ; CHECK-NEXT: ret float -0.000000e+00 |
| ; |
| %val = call float @llvm.amdgcn.frexp.mant.f32(float -0.0) |
| ret float %val |
| } |
| |
| define double @test_constant_fold_frexp_mant_f64_n0() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_n0( |
| ; CHECK-NEXT: ret double -0.000000e+00 |
| ; |
| %val = call double @llvm.amdgcn.frexp.mant.f64(double -0.0) |
| ret double %val |
| } |
| |
| define float @test_constant_fold_frexp_mant_f32_1() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_1( |
| ; CHECK-NEXT: ret float 5.000000e-01 |
| ; |
| %val = call float @llvm.amdgcn.frexp.mant.f32(float 1.0) |
| ret float %val |
| } |
| |
| define double @test_constant_fold_frexp_mant_f64_1() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_1( |
| ; CHECK-NEXT: ret double 5.000000e-01 |
| ; |
| %val = call double @llvm.amdgcn.frexp.mant.f64(double 1.0) |
| ret double %val |
| } |
| |
| define float @test_constant_fold_frexp_mant_f32_n1() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_n1( |
| ; CHECK-NEXT: ret float -5.000000e-01 |
| ; |
| %val = call float @llvm.amdgcn.frexp.mant.f32(float -1.0) |
| ret float %val |
| } |
| |
| define double @test_constant_fold_frexp_mant_f64_n1() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_n1( |
| ; CHECK-NEXT: ret double -5.000000e-01 |
| ; |
| %val = call double @llvm.amdgcn.frexp.mant.f64(double -1.0) |
| ret double %val |
| } |
| |
| define float @test_constant_fold_frexp_mant_f32_nan() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_nan( |
| ; CHECK-NEXT: ret float 0x7FF8000000000000 |
| ; |
| %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x7FF8000000000000) |
| ret float %val |
| } |
| |
| define double @test_constant_fold_frexp_mant_f64_nan() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_nan( |
| ; CHECK-NEXT: ret double 0x7FF8000000000000 |
| ; |
| %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FF8000000000000) |
| ret double %val |
| } |
| |
| define float @test_constant_fold_frexp_mant_f32_inf() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_inf( |
| ; CHECK-NEXT: ret float 0x7FF0000000000000 |
| ; |
| %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x7FF0000000000000) |
| ret float %val |
| } |
| |
| define double @test_constant_fold_frexp_mant_f64_inf() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_inf( |
| ; CHECK-NEXT: ret double 0x7FF0000000000000 |
| ; |
| %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FF0000000000000) |
| ret double %val |
| } |
| |
| define float @test_constant_fold_frexp_mant_f32_ninf() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_ninf( |
| ; CHECK-NEXT: ret float 0xFFF0000000000000 |
| ; |
| %val = call float @llvm.amdgcn.frexp.mant.f32(float 0xFFF0000000000000) |
| ret float %val |
| } |
| |
| define double @test_constant_fold_frexp_mant_f64_ninf() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_ninf( |
| ; CHECK-NEXT: ret double 0xFFF0000000000000 |
| ; |
| %val = call double @llvm.amdgcn.frexp.mant.f64(double 0xFFF0000000000000) |
| ret double %val |
| } |
| |
| define float @test_constant_fold_frexp_mant_f32_max_num() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_max_num( |
| ; CHECK-NEXT: ret float 0x3FEFFFFFE0000000 |
| ; |
| %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x47EFFFFFE0000000) |
| ret float %val |
| } |
| |
| define double @test_constant_fold_frexp_mant_f64_max_num() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_max_num( |
| ; CHECK-NEXT: ret double 0x3FEFFFFFFFFFFFFF |
| ; |
| %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FEFFFFFFFFFFFFF) |
| ret double %val |
| } |
| |
| define float @test_constant_fold_frexp_mant_f32_min_num() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_min_num( |
| ; CHECK-NEXT: ret float 5.000000e-01 |
| ; |
| %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x36A0000000000000) |
| ret float %val |
| } |
| |
| define double @test_constant_fold_frexp_mant_f64_min_num() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_min_num( |
| ; CHECK-NEXT: ret double 5.000000e-01 |
| ; |
| %val = call double @llvm.amdgcn.frexp.mant.f64(double 4.940656e-324) |
| ret double %val |
| } |
| |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.frexp.exp |
| ; -------------------------------------------------------------------- |
| |
| declare i32 @llvm.amdgcn.frexp.exp.f32(float) nounwind readnone |
| declare i32 @llvm.amdgcn.frexp.exp.f64(double) nounwind readnone |
| |
| define i32 @test_constant_fold_frexp_exp_f32_undef() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_undef( |
| ; CHECK-NEXT: ret i32 undef |
| ; |
| %val = call i32 @llvm.amdgcn.frexp.exp.f32(float undef) |
| ret i32 %val |
| } |
| |
| define i32 @test_constant_fold_frexp_exp_f64_undef() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_undef( |
| ; CHECK-NEXT: ret i32 undef |
| ; |
| %val = call i32 @llvm.amdgcn.frexp.exp.f64(double undef) |
| ret i32 %val |
| } |
| |
| define i32 @test_constant_fold_frexp_exp_f32_0() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_0( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0.0) |
| ret i32 %val |
| } |
| |
| define i32 @test_constant_fold_frexp_exp_f64_0() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_0( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0.0) |
| ret i32 %val |
| } |
| |
| define i32 @test_constant_fold_frexp_exp_f32_n0() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_n0( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %val = call i32 @llvm.amdgcn.frexp.exp.f32(float -0.0) |
| ret i32 %val |
| } |
| |
| define i32 @test_constant_fold_frexp_exp_f64_n0() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_n0( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %val = call i32 @llvm.amdgcn.frexp.exp.f64(double -0.0) |
| ret i32 %val |
| } |
| |
| define i32 @test_constant_fold_frexp_exp_f32_1024() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_1024( |
| ; CHECK-NEXT: ret i32 11 |
| ; |
| %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 1024.0) |
| ret i32 %val |
| } |
| |
| define i32 @test_constant_fold_frexp_exp_f64_1024() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_1024( |
| ; CHECK-NEXT: ret i32 11 |
| ; |
| %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 1024.0) |
| ret i32 %val |
| } |
| |
| define i32 @test_constant_fold_frexp_exp_f32_n1024() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_n1024( |
| ; CHECK-NEXT: ret i32 11 |
| ; |
| %val = call i32 @llvm.amdgcn.frexp.exp.f32(float -1024.0) |
| ret i32 %val |
| } |
| |
| define i32 @test_constant_fold_frexp_exp_f64_n1024() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_n1024( |
| ; CHECK-NEXT: ret i32 11 |
| ; |
| %val = call i32 @llvm.amdgcn.frexp.exp.f64(double -1024.0) |
| ret i32 %val |
| } |
| |
| define i32 @test_constant_fold_frexp_exp_f32_1_1024() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_1_1024( |
| ; CHECK-NEXT: ret i32 -9 |
| ; |
| %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0.0009765625) |
| ret i32 %val |
| } |
| |
| define i32 @test_constant_fold_frexp_exp_f64_1_1024() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_1_1024( |
| ; CHECK-NEXT: ret i32 -9 |
| ; |
| %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0.0009765625) |
| ret i32 %val |
| } |
| |
| define i32 @test_constant_fold_frexp_exp_f32_nan() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_nan( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x7FF8000000000000) |
| ret i32 %val |
| } |
| |
| define i32 @test_constant_fold_frexp_exp_f64_nan() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_nan( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FF8000000000000) |
| ret i32 %val |
| } |
| |
| define i32 @test_constant_fold_frexp_exp_f32_inf() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_inf( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x7FF0000000000000) |
| ret i32 %val |
| } |
| |
| define i32 @test_constant_fold_frexp_exp_f64_inf() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_inf( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FF0000000000000) |
| ret i32 %val |
| } |
| |
| define i32 @test_constant_fold_frexp_exp_f32_ninf() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_ninf( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0xFFF0000000000000) |
| ret i32 %val |
| } |
| |
| define i32 @test_constant_fold_frexp_exp_f64_ninf() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_ninf( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0xFFF0000000000000) |
| ret i32 %val |
| } |
| |
| define i32 @test_constant_fold_frexp_exp_f32_max_num() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_max_num( |
| ; CHECK-NEXT: ret i32 128 |
| ; |
| %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x47EFFFFFE0000000) |
| ret i32 %val |
| } |
| |
| define i32 @test_constant_fold_frexp_exp_f64_max_num() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_max_num( |
| ; CHECK-NEXT: ret i32 1024 |
| ; |
| %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FEFFFFFFFFFFFFF) |
| ret i32 %val |
| } |
| |
| define i32 @test_constant_fold_frexp_exp_f32_min_num() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_min_num( |
| ; CHECK-NEXT: ret i32 -148 |
| ; |
| %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x36A0000000000000) |
| ret i32 %val |
| } |
| |
| define i32 @test_constant_fold_frexp_exp_f64_min_num() nounwind { |
| ; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_min_num( |
| ; CHECK-NEXT: ret i32 -1073 |
| ; |
| %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 4.940656e-324) |
| ret i32 %val |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.class |
| ; -------------------------------------------------------------------- |
| |
| declare i1 @llvm.amdgcn.class.f32(float, i32) nounwind readnone |
| declare i1 @llvm.amdgcn.class.f64(double, i32) nounwind readnone |
| |
| define i1 @test_class_undef_mask_f32(float %x) nounwind { |
| ; CHECK-LABEL: @test_class_undef_mask_f32( |
| ; CHECK-NEXT: ret i1 false |
| ; |
| %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 undef) |
| ret i1 %val |
| } |
| |
| define i1 @test_class_poison_poison_f32(float %x) nounwind { |
| ; CHECK-LABEL: @test_class_poison_poison_f32( |
| ; CHECK-NEXT: ret i1 poison |
| ; |
| %val = call i1 @llvm.amdgcn.class.f32(float poison, i32 poison) |
| ret i1 %val |
| } |
| define i1 @test_class_val_poison_f32(float %arg) nounwind { |
| ; CHECK-LABEL: @test_class_val_poison_f32( |
| ; CHECK-NEXT: ret i1 poison |
| ; |
| %val = call i1 @llvm.amdgcn.class.f32(float %arg, i32 poison) |
| ret i1 %val |
| } |
| |
| define i1 @test_class_poison_val_f32(i32 %arg) nounwind { |
| ; CHECK-LABEL: @test_class_poison_val_f32( |
| ; CHECK-NEXT: ret i1 poison |
| ; |
| %val = call i1 @llvm.amdgcn.class.f32(float poison, i32 %arg) |
| ret i1 %val |
| } |
| |
| define i1 @test_class_over_max_mask_f32(float %x) nounwind { |
| ; CHECK-LABEL: @test_class_over_max_mask_f32( |
| ; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 1) |
| ; CHECK-NEXT: ret i1 [[VAL]] |
| ; |
| %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1025) |
| ret i1 %val |
| } |
| |
| define i1 @test_class_no_mask_f32(float %x) nounwind { |
| ; CHECK-LABEL: @test_class_no_mask_f32( |
| ; CHECK-NEXT: ret i1 false |
| ; |
| %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 0) |
| ret i1 %val |
| } |
| |
| define i1 @test_class_full_mask_f32(float %x) nounwind { |
| ; CHECK-LABEL: @test_class_full_mask_f32( |
| ; CHECK-NEXT: ret i1 true |
| ; |
| %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1023) |
| ret i1 %val |
| } |
| |
| define i1 @test_class_undef_no_mask_f32() nounwind { |
| ; CHECK-LABEL: @test_class_undef_no_mask_f32( |
| ; CHECK-NEXT: ret i1 false |
| ; |
| %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 0) |
| ret i1 %val |
| } |
| |
| define i1 @test_class_undef_full_mask_f32() nounwind { |
| ; CHECK-LABEL: @test_class_undef_full_mask_f32( |
| ; CHECK-NEXT: ret i1 true |
| ; |
| %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 1023) |
| ret i1 %val |
| } |
| |
| define i1 @test_class_undef_val_f32() nounwind { |
| ; CHECK-LABEL: @test_class_undef_val_f32( |
| ; CHECK-NEXT: ret i1 undef |
| ; |
| %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 4) |
| ret i1 %val |
| } |
| |
| define i1 @test_class_undef_val_f32_var(i32 %arg) nounwind { |
| ; CHECK-LABEL: @test_class_undef_val_f32_var( |
| ; CHECK-NEXT: [[VAL:%.*]] = icmp ne i32 [[ARG:%.*]], 0 |
| ; CHECK-NEXT: ret i1 [[VAL]] |
| ; |
| %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 %arg) |
| ret i1 %val |
| } |
| |
| define i1 @test_class_val_undef_f32(float %arg) nounwind { |
| ; CHECK-LABEL: @test_class_val_undef_f32( |
| ; CHECK-NEXT: ret i1 false |
| ; |
| %val = call i1 @llvm.amdgcn.class.f32(float %arg, i32 undef) |
| ret i1 %val |
| } |
| |
| define i1 @test_class_undef_undef_f32() nounwind { |
| ; CHECK-LABEL: @test_class_undef_undef_f32( |
| ; CHECK-NEXT: ret i1 false |
| ; |
| %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 undef) |
| ret i1 %val |
| } |
| |
| define i1 @test_class_var_mask_f32(float %x, i32 %mask) nounwind { |
| ; CHECK-LABEL: @test_class_var_mask_f32( |
| ; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.amdgcn.class.f32(float [[X:%.*]], i32 [[MASK:%.*]]) |
| ; CHECK-NEXT: ret i1 [[VAL]] |
| ; |
| %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 %mask) |
| ret i1 %val |
| } |
| |
| define i1 @test_class_isnan_f32(float %x) nounwind { |
| ; CHECK-LABEL: @test_class_isnan_f32( |
| ; CHECK-NEXT: [[VAL:%.*]] = fcmp uno float [[X:%.*]], 0.000000e+00 |
| ; CHECK-NEXT: ret i1 [[VAL]] |
| ; |
| %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 3) |
| ret i1 %val |
| } |
| |
| define i1 @test_class_isnan_f32_strict(float %x) nounwind strictfp { |
| ; CHECK-LABEL: @test_class_isnan_f32_strict( |
| ; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 3) #[[ATTR16:[0-9]+]] |
| ; CHECK-NEXT: ret i1 [[VAL]] |
| ; |
| %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 3) strictfp |
| ret i1 %val |
| } |
| |
| define i1 @test_class_is_p0_n0_f32(float %x) nounwind { |
| ; CHECK-LABEL: @test_class_is_p0_n0_f32( |
| ; CHECK-NEXT: [[VAL:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00 |
| ; CHECK-NEXT: ret i1 [[VAL]] |
| ; |
| %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 96) |
| ret i1 %val |
| } |
| |
| define i1 @test_class_is_p0_n0_f32_strict(float %x) nounwind strictfp { |
| ; CHECK-LABEL: @test_class_is_p0_n0_f32_strict( |
| ; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 96) #[[ATTR16]] |
| ; CHECK-NEXT: ret i1 [[VAL]] |
| ; |
| %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 96) strictfp |
| ret i1 %val |
| } |
| |
| define i1 @test_constant_class_snan_test_snan_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_class_snan_test_snan_f64( |
| ; CHECK-NEXT: ret i1 true |
| ; |
| %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 1) |
| ret i1 %val |
| } |
| |
| define i1 @test_constant_class_qnan_test_qnan_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_class_qnan_test_qnan_f64( |
| ; CHECK-NEXT: ret i1 true |
| ; |
| %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 2) |
| ret i1 %val |
| } |
| |
| define i1 @test_constant_class_qnan_test_snan_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_class_qnan_test_snan_f64( |
| ; CHECK-NEXT: ret i1 false |
| ; |
| %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 1) |
| ret i1 %val |
| } |
| |
| define i1 @test_constant_class_ninf_test_ninf_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_class_ninf_test_ninf_f64( |
| ; CHECK-NEXT: ret i1 true |
| ; |
| %val = call i1 @llvm.amdgcn.class.f64(double 0xFFF0000000000000, i32 4) |
| ret i1 %val |
| } |
| |
| define i1 @test_constant_class_pinf_test_ninf_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_class_pinf_test_ninf_f64( |
| ; CHECK-NEXT: ret i1 false |
| ; |
| %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000000, i32 4) |
| ret i1 %val |
| } |
| |
| define i1 @test_constant_class_qnan_test_ninf_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_class_qnan_test_ninf_f64( |
| ; CHECK-NEXT: ret i1 false |
| ; |
| %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 4) |
| ret i1 %val |
| } |
| |
| define i1 @test_constant_class_snan_test_ninf_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_class_snan_test_ninf_f64( |
| ; CHECK-NEXT: ret i1 false |
| ; |
| %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 4) |
| ret i1 %val |
| } |
| |
| define i1 @test_constant_class_nnormal_test_nnormal_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_class_nnormal_test_nnormal_f64( |
| ; CHECK-NEXT: ret i1 true |
| ; |
| %val = call i1 @llvm.amdgcn.class.f64(double -1.0, i32 8) |
| ret i1 %val |
| } |
| |
| define i1 @test_constant_class_pnormal_test_nnormal_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_class_pnormal_test_nnormal_f64( |
| ; CHECK-NEXT: ret i1 false |
| ; |
| %val = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 8) |
| ret i1 %val |
| } |
| |
| define i1 @test_constant_class_nsubnormal_test_nsubnormal_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_class_nsubnormal_test_nsubnormal_f64( |
| ; CHECK-NEXT: ret i1 true |
| ; |
| %val = call i1 @llvm.amdgcn.class.f64(double 0x800fffffffffffff, i32 16) |
| ret i1 %val |
| } |
| |
| define i1 @test_constant_class_psubnormal_test_nsubnormal_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_class_psubnormal_test_nsubnormal_f64( |
| ; CHECK-NEXT: ret i1 false |
| ; |
| %val = call i1 @llvm.amdgcn.class.f64(double 0x000fffffffffffff, i32 16) |
| ret i1 %val |
| } |
| |
| define i1 @test_constant_class_nzero_test_nzero_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_class_nzero_test_nzero_f64( |
| ; CHECK-NEXT: ret i1 true |
| ; |
| %val = call i1 @llvm.amdgcn.class.f64(double -0.0, i32 32) |
| ret i1 %val |
| } |
| |
| define i1 @test_constant_class_pzero_test_nzero_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_class_pzero_test_nzero_f64( |
| ; CHECK-NEXT: ret i1 false |
| ; |
| %val = call i1 @llvm.amdgcn.class.f64(double 0.0, i32 32) |
| ret i1 %val |
| } |
| |
| define i1 @test_constant_class_pzero_test_pzero_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_class_pzero_test_pzero_f64( |
| ; CHECK-NEXT: ret i1 true |
| ; |
| %val = call i1 @llvm.amdgcn.class.f64(double 0.0, i32 64) |
| ret i1 %val |
| } |
| |
| define i1 @test_constant_class_nzero_test_pzero_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_class_nzero_test_pzero_f64( |
| ; CHECK-NEXT: ret i1 false |
| ; |
| %val = call i1 @llvm.amdgcn.class.f64(double -0.0, i32 64) |
| ret i1 %val |
| } |
| |
| define i1 @test_constant_class_psubnormal_test_psubnormal_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_class_psubnormal_test_psubnormal_f64( |
| ; CHECK-NEXT: ret i1 true |
| ; |
| %val = call i1 @llvm.amdgcn.class.f64(double 0x000fffffffffffff, i32 128) |
| ret i1 %val |
| } |
| |
| define i1 @test_constant_class_nsubnormal_test_psubnormal_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_class_nsubnormal_test_psubnormal_f64( |
| ; CHECK-NEXT: ret i1 false |
| ; |
| %val = call i1 @llvm.amdgcn.class.f64(double 0x800fffffffffffff, i32 128) |
| ret i1 %val |
| } |
| |
| define i1 @test_constant_class_pnormal_test_pnormal_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_class_pnormal_test_pnormal_f64( |
| ; CHECK-NEXT: ret i1 true |
| ; |
| %val = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 256) |
| ret i1 %val |
| } |
| |
| define i1 @test_constant_class_nnormal_test_pnormal_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_class_nnormal_test_pnormal_f64( |
| ; CHECK-NEXT: ret i1 false |
| ; |
| %val = call i1 @llvm.amdgcn.class.f64(double -1.0, i32 256) |
| ret i1 %val |
| } |
| |
| define i1 @test_constant_class_pinf_test_pinf_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_class_pinf_test_pinf_f64( |
| ; CHECK-NEXT: ret i1 true |
| ; |
| %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000000, i32 512) |
| ret i1 %val |
| } |
| |
| define i1 @test_constant_class_ninf_test_pinf_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_class_ninf_test_pinf_f64( |
| ; CHECK-NEXT: ret i1 false |
| ; |
| %val = call i1 @llvm.amdgcn.class.f64(double 0xFFF0000000000000, i32 512) |
| ret i1 %val |
| } |
| |
| define i1 @test_constant_class_qnan_test_pinf_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_class_qnan_test_pinf_f64( |
| ; CHECK-NEXT: ret i1 false |
| ; |
| %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 512) |
| ret i1 %val |
| } |
| |
| define i1 @test_constant_class_snan_test_pinf_f64() nounwind { |
| ; CHECK-LABEL: @test_constant_class_snan_test_pinf_f64( |
| ; CHECK-NEXT: ret i1 false |
| ; |
| %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 512) |
| ret i1 %val |
| } |
| |
| define i1 @test_class_is_snan_nnan_src(float %x) { |
| ; CHECK-LABEL: @test_class_is_snan_nnan_src( |
| ; CHECK-NEXT: ret i1 false |
| ; |
| %nnan = fadd nnan float %x, 1.0 |
| %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 1) |
| ret i1 %class |
| } |
| |
| define i1 @test_class_is_qnan_nnan_src(float %x) { |
| ; CHECK-LABEL: @test_class_is_qnan_nnan_src( |
| ; CHECK-NEXT: ret i1 false |
| ; |
| %nnan = fadd nnan float %x, 1.0 |
| %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 2) |
| ret i1 %class |
| } |
| |
| define i1 @test_class_is_nan_nnan_src(float %x) { |
| ; CHECK-LABEL: @test_class_is_nan_nnan_src( |
| ; CHECK-NEXT: ret i1 false |
| ; |
| %nnan = fadd nnan float %x, 1.0 |
| %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 3) |
| ret i1 %class |
| } |
| |
| define i1 @test_class_is_nan_other_nnan_src(float %x) { |
| ; CHECK-LABEL: @test_class_is_nan_other_nnan_src( |
| ; CHECK-NEXT: [[NNAN:%.*]] = fadd nnan float [[X:%.*]], 1.000000e+00 |
| ; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[NNAN]], i32 264) |
| ; CHECK-NEXT: ret i1 [[CLASS]] |
| ; |
| %nnan = fadd nnan float %x, 1.0 |
| %class = call i1 @llvm.amdgcn.class.f32(float %nnan, i32 267) |
| ret i1 %class |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.cos |
| ; -------------------------------------------------------------------- |
| declare float @llvm.amdgcn.cos.f32(float) nounwind readnone |
| declare float @llvm.fabs.f32(float) nounwind readnone |
| |
| define float @cos_fneg_f32(float %x) { |
| ; CHECK-LABEL: @cos_fneg_f32( |
| ; CHECK-NEXT: [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]]) |
| ; CHECK-NEXT: ret float [[COS]] |
| ; |
| %x.fneg = fsub float -0.0, %x |
| %cos = call float @llvm.amdgcn.cos.f32(float %x.fneg) |
| ret float %cos |
| } |
| |
| define float @cos_unary_fneg_f32(float %x) { |
| ; CHECK-LABEL: @cos_unary_fneg_f32( |
| ; CHECK-NEXT: [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]]) |
| ; CHECK-NEXT: ret float [[COS]] |
| ; |
| %x.fneg = fneg float %x |
| %cos = call float @llvm.amdgcn.cos.f32(float %x.fneg) |
| ret float %cos |
| } |
| |
| define float @cos_fabs_f32(float %x) { |
| ; CHECK-LABEL: @cos_fabs_f32( |
| ; CHECK-NEXT: [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]]) |
| ; CHECK-NEXT: ret float [[COS]] |
| ; |
| %x.fabs = call float @llvm.fabs.f32(float %x) |
| %cos = call float @llvm.amdgcn.cos.f32(float %x.fabs) |
| ret float %cos |
| } |
| |
| define float @cos_fabs_fneg_f32(float %x) { |
| ; CHECK-LABEL: @cos_fabs_fneg_f32( |
| ; CHECK-NEXT: [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]]) |
| ; CHECK-NEXT: ret float [[COS]] |
| ; |
| %x.fabs = call float @llvm.fabs.f32(float %x) |
| %x.fabs.fneg = fsub float -0.0, %x.fabs |
| %cos = call float @llvm.amdgcn.cos.f32(float %x.fabs.fneg) |
| ret float %cos |
| } |
| |
| define float @cos_fabs_unary_fneg_f32(float %x) { |
| ; CHECK-LABEL: @cos_fabs_unary_fneg_f32( |
| ; CHECK-NEXT: [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]]) |
| ; CHECK-NEXT: ret float [[COS]] |
| ; |
| %x.fabs = call float @llvm.fabs.f32(float %x) |
| %x.fabs.fneg = fneg float %x.fabs |
| %cos = call float @llvm.amdgcn.cos.f32(float %x.fabs.fneg) |
| ret float %cos |
| } |
| |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.sin |
| ; -------------------------------------------------------------------- |
| declare float @llvm.amdgcn.sin.f32(float) nounwind readnone |
| |
| define float @sin_fneg_f32(float %x) { |
| ; CHECK-LABEL: @sin_fneg_f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.sin.f32(float [[X:%.*]]) |
| ; CHECK-NEXT: [[SIN:%.*]] = fneg float [[TMP1]] |
| ; CHECK-NEXT: ret float [[SIN]] |
| ; |
| %x.fneg = fneg float %x |
| %sin = call float @llvm.amdgcn.sin.f32(float %x.fneg) |
| ret float %sin |
| } |
| |
| define float @sin_fabs_f32(float %x) { |
| ; CHECK-LABEL: @sin_fabs_f32( |
| ; CHECK-NEXT: [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]]) |
| ; CHECK-NEXT: [[SIN:%.*]] = call float @llvm.amdgcn.sin.f32(float [[X_FABS]]) |
| ; CHECK-NEXT: ret float [[SIN]] |
| ; |
| %x.fabs = call float @llvm.fabs.f32(float %x) |
| %sin = call float @llvm.amdgcn.sin.f32(float %x.fabs) |
| ret float %sin |
| } |
| |
| define float @sin_fabs_fneg_f32(float %x) { |
| ; CHECK-LABEL: @sin_fabs_fneg_f32( |
| ; CHECK-NEXT: [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]]) |
| ; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.sin.f32(float [[X_FABS]]) |
| ; CHECK-NEXT: [[SIN:%.*]] = fneg float [[TMP1]] |
| ; CHECK-NEXT: ret float [[SIN]] |
| ; |
| %x.fabs = call float @llvm.fabs.f32(float %x) |
| %x.fabs.fneg = fneg float %x.fabs |
| %sin = call float @llvm.amdgcn.sin.f32(float %x.fabs.fneg) |
| ret float %sin |
| } |
| |
| define float @sin_fabs_fneg_fast_f32(float %x) { |
| ; CHECK-LABEL: @sin_fabs_fneg_fast_f32( |
| ; CHECK-NEXT: [[X_FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X:%.*]]) |
| ; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.amdgcn.sin.f32(float [[X_FABS]]) |
| ; CHECK-NEXT: [[SIN:%.*]] = fneg fast float [[TMP1]] |
| ; CHECK-NEXT: ret float [[SIN]] |
| ; |
| %x.fabs = call fast float @llvm.fabs.f32(float %x) |
| %x.fabs.fneg = fneg float %x.fabs |
| %sin = call fast float @llvm.amdgcn.sin.f32(float %x.fabs.fneg) |
| ret float %sin |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.cvt.pkrtz |
| ; -------------------------------------------------------------------- |
| |
| declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) nounwind readnone |
| |
| define <2 x half> @vars_lhs_cvt_pkrtz(float %x, float %y) { |
| ; CHECK-LABEL: @vars_lhs_cvt_pkrtz( |
| ; CHECK-NEXT: [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[X:%.*]], float [[Y:%.*]]) |
| ; CHECK-NEXT: ret <2 x half> [[CVT]] |
| ; |
| %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y) |
| ret <2 x half> %cvt |
| } |
| |
| define <2 x half> @constant_lhs_cvt_pkrtz(float %y) { |
| ; CHECK-LABEL: @constant_lhs_cvt_pkrtz( |
| ; CHECK-NEXT: [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.000000e+00, float [[Y:%.*]]) |
| ; CHECK-NEXT: ret <2 x half> [[CVT]] |
| ; |
| %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.0, float %y) |
| ret <2 x half> %cvt |
| } |
| |
| define <2 x half> @constant_rhs_cvt_pkrtz(float %x) { |
| ; CHECK-LABEL: @constant_rhs_cvt_pkrtz( |
| ; CHECK-NEXT: [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[X:%.*]], float 0.000000e+00) |
| ; CHECK-NEXT: ret <2 x half> [[CVT]] |
| ; |
| %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float 0.0) |
| ret <2 x half> %cvt |
| } |
| |
| define <2 x half> @undef_lhs_cvt_pkrtz(float %y) { |
| ; CHECK-LABEL: @undef_lhs_cvt_pkrtz( |
| ; CHECK-NEXT: [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float [[Y:%.*]]) |
| ; CHECK-NEXT: ret <2 x half> [[CVT]] |
| ; |
| %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %y) |
| ret <2 x half> %cvt |
| } |
| |
| define <2 x half> @undef_rhs_cvt_pkrtz(float %x) { |
| ; CHECK-LABEL: @undef_rhs_cvt_pkrtz( |
| ; CHECK-NEXT: [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[X:%.*]], float undef) |
| ; CHECK-NEXT: ret <2 x half> [[CVT]] |
| ; |
| %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float undef) |
| ret <2 x half> %cvt |
| } |
| |
| define <2 x half> @undef_cvt_pkrtz() { |
| ; CHECK-LABEL: @undef_cvt_pkrtz( |
| ; CHECK-NEXT: ret <2 x half> undef |
| ; |
| %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float undef) |
| ret <2 x half> %cvt |
| } |
| |
| define <2 x half> @constant_splat0_cvt_pkrtz() { |
| ; CHECK-LABEL: @constant_splat0_cvt_pkrtz( |
| ; CHECK-NEXT: ret <2 x half> zeroinitializer |
| ; |
| %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.0, float 0.0) |
| ret <2 x half> %cvt |
| } |
| |
| define <2 x half> @constant_cvt_pkrtz() { |
| ; CHECK-LABEL: @constant_cvt_pkrtz( |
| ; CHECK-NEXT: ret <2 x half> <half 0xH4000, half 0xH4400> |
| ; |
| %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 2.0, float 4.0) |
| ret <2 x half> %cvt |
| } |
| |
| ; Test constant values where rtz changes result |
| define <2 x half> @constant_rtz_pkrtz() { |
| ; CHECK-LABEL: @constant_rtz_pkrtz( |
| ; CHECK-NEXT: ret <2 x half> <half 0xH7BFF, half 0xH7BFF> |
| ; |
| %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 65535.0, float 65535.0) |
| ret <2 x half> %cvt |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.cvt.pknorm.i16 |
| ; -------------------------------------------------------------------- |
| |
| declare <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float, float) nounwind readnone |
| |
| define <2 x i16> @undef_lhs_cvt_pknorm_i16(float %y) { |
| ; CHECK-LABEL: @undef_lhs_cvt_pknorm_i16( |
| ; CHECK-NEXT: [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float undef, float [[Y:%.*]]) |
| ; CHECK-NEXT: ret <2 x i16> [[CVT]] |
| ; |
| %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float undef, float %y) |
| ret <2 x i16> %cvt |
| } |
| |
| define <2 x i16> @undef_rhs_cvt_pknorm_i16(float %x) { |
| ; CHECK-LABEL: @undef_rhs_cvt_pknorm_i16( |
| ; CHECK-NEXT: [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float [[X:%.*]], float undef) |
| ; CHECK-NEXT: ret <2 x i16> [[CVT]] |
| ; |
| %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %x, float undef) |
| ret <2 x i16> %cvt |
| } |
| |
| define <2 x i16> @undef_cvt_pknorm_i16() { |
| ; CHECK-LABEL: @undef_cvt_pknorm_i16( |
| ; CHECK-NEXT: ret <2 x i16> undef |
| ; |
| %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float undef, float undef) |
| ret <2 x i16> %cvt |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.cvt.pknorm.u16 |
| ; -------------------------------------------------------------------- |
| |
| declare <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float, float) nounwind readnone |
| |
| define <2 x i16> @undef_lhs_cvt_pknorm_u16(float %y) { |
| ; CHECK-LABEL: @undef_lhs_cvt_pknorm_u16( |
| ; CHECK-NEXT: [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float undef, float [[Y:%.*]]) |
| ; CHECK-NEXT: ret <2 x i16> [[CVT]] |
| ; |
| %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float undef, float %y) |
| ret <2 x i16> %cvt |
| } |
| |
| define <2 x i16> @undef_rhs_cvt_pknorm_u16(float %x) { |
| ; CHECK-LABEL: @undef_rhs_cvt_pknorm_u16( |
| ; CHECK-NEXT: [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float [[X:%.*]], float undef) |
| ; CHECK-NEXT: ret <2 x i16> [[CVT]] |
| ; |
| %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %x, float undef) |
| ret <2 x i16> %cvt |
| } |
| |
| define <2 x i16> @undef_cvt_pknorm_u16() { |
| ; CHECK-LABEL: @undef_cvt_pknorm_u16( |
| ; CHECK-NEXT: ret <2 x i16> undef |
| ; |
| %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float undef, float undef) |
| ret <2 x i16> %cvt |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.cvt.pk.i16 |
| ; -------------------------------------------------------------------- |
| |
| declare <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32, i32) nounwind readnone |
| |
| define <2 x i16> @undef_lhs_cvt_pk_i16(i32 %y) { |
| ; CHECK-LABEL: @undef_lhs_cvt_pk_i16( |
| ; CHECK-NEXT: [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 undef, i32 [[Y:%.*]]) |
| ; CHECK-NEXT: ret <2 x i16> [[CVT]] |
| ; |
| %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 undef, i32 %y) |
| ret <2 x i16> %cvt |
| } |
| |
| define <2 x i16> @undef_rhs_cvt_pk_i16(i32 %x) { |
| ; CHECK-LABEL: @undef_rhs_cvt_pk_i16( |
| ; CHECK-NEXT: [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 [[X:%.*]], i32 undef) |
| ; CHECK-NEXT: ret <2 x i16> [[CVT]] |
| ; |
| %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %x, i32 undef) |
| ret <2 x i16> %cvt |
| } |
| |
| define <2 x i16> @undef_cvt_pk_i16() { |
| ; CHECK-LABEL: @undef_cvt_pk_i16( |
| ; CHECK-NEXT: ret <2 x i16> undef |
| ; |
| %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 undef, i32 undef) |
| ret <2 x i16> %cvt |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.cvt.pk.u16 |
| ; -------------------------------------------------------------------- |
| |
| declare <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32, i32) nounwind readnone |
| |
| define <2 x i16> @undef_lhs_cvt_pk_u16(i32 %y) { |
| ; CHECK-LABEL: @undef_lhs_cvt_pk_u16( |
| ; CHECK-NEXT: [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 undef, i32 [[Y:%.*]]) |
| ; CHECK-NEXT: ret <2 x i16> [[CVT]] |
| ; |
| %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 undef, i32 %y) |
| ret <2 x i16> %cvt |
| } |
| |
| define <2 x i16> @undef_rhs_cvt_pk_u16(i32 %x) { |
| ; CHECK-LABEL: @undef_rhs_cvt_pk_u16( |
| ; CHECK-NEXT: [[CVT:%.*]] = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 [[X:%.*]], i32 undef) |
| ; CHECK-NEXT: ret <2 x i16> [[CVT]] |
| ; |
| %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %x, i32 undef) |
| ret <2 x i16> %cvt |
| } |
| |
| define <2 x i16> @undef_cvt_pk_u16() { |
| ; CHECK-LABEL: @undef_cvt_pk_u16( |
| ; CHECK-NEXT: ret <2 x i16> undef |
| ; |
| %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 undef, i32 undef) |
| ret <2 x i16> %cvt |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.ubfe |
| ; -------------------------------------------------------------------- |
| |
| declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) nounwind readnone |
| declare i64 @llvm.amdgcn.ubfe.i64(i64, i32, i32) nounwind readnone |
| |
| define i32 @ubfe_var_i32(i32 %src, i32 %offset, i32 %width) { |
| ; CHECK-LABEL: @ubfe_var_i32( |
| ; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 [[WIDTH:%.*]]) |
| ; CHECK-NEXT: ret i32 [[BFE]] |
| ; |
| %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 %width) |
| ret i32 %bfe |
| } |
| |
| define i32 @ubfe_clear_high_bits_constant_offset_i32(i32 %src, i32 %width) { |
| ; CHECK-LABEL: @ubfe_clear_high_bits_constant_offset_i32( |
| ; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 5, i32 [[WIDTH:%.*]]) |
| ; CHECK-NEXT: ret i32 [[BFE]] |
| ; |
| %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 133, i32 %width) |
| ret i32 %bfe |
| } |
| |
| define i32 @ubfe_clear_high_bits_constant_width_i32(i32 %src, i32 %offset) { |
| ; CHECK-LABEL: @ubfe_clear_high_bits_constant_width_i32( |
| ; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 5) |
| ; CHECK-NEXT: ret i32 [[BFE]] |
| ; |
| %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 133) |
| ret i32 %bfe |
| } |
| |
| define i32 @ubfe_width_0(i32 %src, i32 %offset) { |
| ; CHECK-LABEL: @ubfe_width_0( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 0) |
| ret i32 %bfe |
| } |
| |
| define i32 @ubfe_width_31(i32 %src, i32 %offset) { |
| ; CHECK-LABEL: @ubfe_width_31( |
| ; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 31) |
| ; CHECK-NEXT: ret i32 [[BFE]] |
| ; |
| %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 31) |
| ret i32 %bfe |
| } |
| |
| define i32 @ubfe_width_32(i32 %src, i32 %offset) { |
| ; CHECK-LABEL: @ubfe_width_32( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 32) |
| ret i32 %bfe |
| } |
| |
| define i32 @ubfe_width_33(i32 %src, i32 %offset) { |
| ; CHECK-LABEL: @ubfe_width_33( |
| ; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 1) |
| ; CHECK-NEXT: ret i32 [[BFE]] |
| ; |
| %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 33) |
| ret i32 %bfe |
| } |
| |
| define i32 @ubfe_offset_33(i32 %src, i32 %width) { |
| ; CHECK-LABEL: @ubfe_offset_33( |
| ; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 1, i32 [[WIDTH:%.*]]) |
| ; CHECK-NEXT: ret i32 [[BFE]] |
| ; |
| %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 33, i32 %width) |
| ret i32 %bfe |
| } |
| |
| define i32 @ubfe_offset_0(i32 %src, i32 %width) { |
| ; CHECK-LABEL: @ubfe_offset_0( |
| ; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 0, i32 [[WIDTH:%.*]]) |
| ; CHECK-NEXT: ret i32 [[BFE]] |
| ; |
| %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 %width) |
| ret i32 %bfe |
| } |
| |
| define i32 @ubfe_offset_32(i32 %src, i32 %width) { |
| ; CHECK-LABEL: @ubfe_offset_32( |
| ; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 0, i32 [[WIDTH:%.*]]) |
| ; CHECK-NEXT: ret i32 [[BFE]] |
| ; |
| %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 32, i32 %width) |
| ret i32 %bfe |
| } |
| |
| define i32 @ubfe_offset_31(i32 %src, i32 %width) { |
| ; CHECK-LABEL: @ubfe_offset_31( |
| ; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 31, i32 [[WIDTH:%.*]]) |
| ; CHECK-NEXT: ret i32 [[BFE]] |
| ; |
| %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 31, i32 %width) |
| ret i32 %bfe |
| } |
| |
| define i32 @ubfe_offset_0_width_0(i32 %src) { |
| ; CHECK-LABEL: @ubfe_offset_0_width_0( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 0) |
| ret i32 %bfe |
| } |
| |
| define i32 @ubfe_offset_0_width_3(i32 %src) { |
| ; CHECK-LABEL: @ubfe_offset_0_width_3( |
| ; CHECK-NEXT: [[BFE:%.*]] = and i32 [[SRC:%.*]], 7 |
| ; CHECK-NEXT: ret i32 [[BFE]] |
| ; |
| %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 3) |
| ret i32 %bfe |
| } |
| |
| define i32 @ubfe_offset_3_width_1(i32 %src) { |
| ; CHECK-LABEL: @ubfe_offset_3_width_1( |
| ; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[SRC:%.*]], 3 |
| ; CHECK-NEXT: [[BFE:%.*]] = and i32 [[TMP1]], 1 |
| ; CHECK-NEXT: ret i32 [[BFE]] |
| ; |
| %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 3, i32 1) |
| ret i32 %bfe |
| } |
| |
| define i32 @ubfe_offset_3_width_4(i32 %src) { |
| ; CHECK-LABEL: @ubfe_offset_3_width_4( |
| ; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[SRC:%.*]], 3 |
| ; CHECK-NEXT: [[BFE:%.*]] = and i32 [[TMP1]], 15 |
| ; CHECK-NEXT: ret i32 [[BFE]] |
| ; |
| %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 3, i32 4) |
| ret i32 %bfe |
| } |
| |
| define i32 @ubfe_0_0_0() { |
| ; CHECK-LABEL: @ubfe_0_0_0( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0) |
| ret i32 %bfe |
| } |
| |
| define i32 @ubfe_neg1_5_7() { |
| ; CHECK-LABEL: @ubfe_neg1_5_7( |
| ; CHECK-NEXT: ret i32 127 |
| ; |
| %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 -1, i32 5, i32 7) |
| ret i32 %bfe |
| } |
| |
| define i32 @ubfe_undef_src_i32(i32 %offset, i32 %width) { |
| ; CHECK-LABEL: @ubfe_undef_src_i32( |
| ; CHECK-NEXT: ret i32 undef |
| ; |
| %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 undef, i32 %offset, i32 %width) |
| ret i32 %bfe |
| } |
| |
| define i32 @ubfe_undef_offset_i32(i32 %src, i32 %width) { |
| ; CHECK-LABEL: @ubfe_undef_offset_i32( |
| ; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 undef, i32 [[WIDTH:%.*]]) |
| ; CHECK-NEXT: ret i32 [[BFE]] |
| ; |
| %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 undef, i32 %width) |
| ret i32 %bfe |
| } |
| |
| define i32 @ubfe_undef_width_i32(i32 %src, i32 %offset) { |
| ; CHECK-LABEL: @ubfe_undef_width_i32( |
| ; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.ubfe.i32(i32 [[SRC:%.*]], i32 [[OFFSET:%.*]], i32 undef) |
| ; CHECK-NEXT: ret i32 [[BFE]] |
| ; |
| %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 undef) |
| ret i32 %bfe |
| } |
| |
| define i64 @ubfe_offset_33_width_4_i64(i64 %src) { |
| ; CHECK-LABEL: @ubfe_offset_33_width_4_i64( |
| ; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[SRC:%.*]], 33 |
| ; CHECK-NEXT: [[BFE:%.*]] = and i64 [[TMP1]], 15 |
| ; CHECK-NEXT: ret i64 [[BFE]] |
| ; |
| %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 33, i32 4) |
| ret i64 %bfe |
| } |
| |
| define i64 @ubfe_offset_0_i64(i64 %src, i32 %width) { |
| ; CHECK-LABEL: @ubfe_offset_0_i64( |
| ; CHECK-NEXT: [[BFE:%.*]] = call i64 @llvm.amdgcn.ubfe.i64(i64 [[SRC:%.*]], i32 0, i32 [[WIDTH:%.*]]) |
| ; CHECK-NEXT: ret i64 [[BFE]] |
| ; |
| %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 0, i32 %width) |
| ret i64 %bfe |
| } |
| |
| define i64 @ubfe_offset_32_width_32_i64(i64 %src) { |
| ; CHECK-LABEL: @ubfe_offset_32_width_32_i64( |
| ; CHECK-NEXT: [[BFE:%.*]] = lshr i64 [[SRC:%.*]], 32 |
| ; CHECK-NEXT: ret i64 [[BFE]] |
| ; |
| %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 32, i32 32) |
| ret i64 %bfe |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.sbfe |
| ; -------------------------------------------------------------------- |
| |
| declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) nounwind readnone |
| declare i64 @llvm.amdgcn.sbfe.i64(i64, i32, i32) nounwind readnone |
| |
| define i32 @sbfe_offset_31(i32 %src, i32 %width) { |
| ; CHECK-LABEL: @sbfe_offset_31( |
| ; CHECK-NEXT: [[BFE:%.*]] = call i32 @llvm.amdgcn.sbfe.i32(i32 [[SRC:%.*]], i32 31, i32 [[WIDTH:%.*]]) |
| ; CHECK-NEXT: ret i32 [[BFE]] |
| ; |
| %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 31, i32 %width) |
| ret i32 %bfe |
| } |
| |
| define i32 @sbfe_neg1_5_7() { |
| ; CHECK-LABEL: @sbfe_neg1_5_7( |
| ; CHECK-NEXT: ret i32 -1 |
| ; |
| %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 -1, i32 5, i32 7) |
| ret i32 %bfe |
| } |
| |
| define i64 @sbfe_offset_32_width_32_i64(i64 %src) { |
| ; CHECK-LABEL: @sbfe_offset_32_width_32_i64( |
| ; CHECK-NEXT: [[BFE:%.*]] = ashr i64 [[SRC:%.*]], 32 |
| ; CHECK-NEXT: ret i64 [[BFE]] |
| ; |
| %bfe = call i64 @llvm.amdgcn.sbfe.i64(i64 %src, i32 32, i32 32) |
| ret i64 %bfe |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.exp |
| ; -------------------------------------------------------------------- |
| |
| declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) nounwind inaccessiblememonly |
| |
| |
| |
| |
| define void @exp_disabled_inputs_to_undef(float %x, float %y, float %z, float %w) { |
| ; enable src0..src3 constants |
| ; CHECK-LABEL: @exp_disabled_inputs_to_undef( |
| ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.000000e+00, float undef, float undef, float undef, i1 true, i1 false) |
| ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float undef, float 2.000000e+00, float undef, float undef, i1 true, i1 false) |
| ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float undef, float undef, float 5.000000e-01, float undef, i1 true, i1 false) |
| ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float undef, float undef, float undef, float 4.000000e+00, i1 true, i1 false) |
| ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float [[X:%.*]], float undef, float undef, float undef, i1 true, i1 false) |
| ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float undef, float [[Y:%.*]], float undef, float undef, i1 true, i1 false) |
| ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float undef, float undef, float [[Z:%.*]], float undef, i1 true, i1 false) |
| ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float undef, float undef, float undef, float [[W:%.*]], i1 true, i1 false) |
| ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float undef, float undef, float undef, float undef, i1 true, i1 false) |
| ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.000000e+00, float 2.000000e+00, float undef, float undef, i1 true, i1 false) |
| ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.000000e+00, float undef, float 5.000000e-01, float undef, i1 true, i1 false) |
| ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.000000e+00, float undef, float undef, float 4.000000e+00, i1 false, i1 false) |
| ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 false, i1 false) |
| ; CHECK-NEXT: ret void |
| ; |
| call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) |
| call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) |
| call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) |
| call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) |
| |
| ; enable src0..src3 variables |
| call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float %x, float %y, float %z, float %w, i1 true, i1 false) |
| call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float %x, float %y, float %z, float %w, i1 true, i1 false) |
| call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float %x, float %y, float %z, float %w, i1 true, i1 false) |
| call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float %x, float %y, float %z, float %w, i1 true, i1 false) |
| |
| ; enable none |
| call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float %x, float %y, float %z, float %w, i1 true, i1 false) |
| |
| ; enable different source combinations |
| call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) |
| call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) |
| call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false) |
| call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false) |
| |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.exp.compr |
| ; -------------------------------------------------------------------- |
| |
| declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) nounwind inaccessiblememonly |
| |
| |
| |
| define void @exp_compr_disabled_inputs_to_undef(<2 x half> %xy, <2 x half> %zw) { |
| ; CHECK-LABEL: @exp_compr_disabled_inputs_to_undef( |
| ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false) |
| ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false) |
| ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false) |
| ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false) |
| ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false) |
| ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> [[XY:%.*]], <2 x half> undef, i1 true, i1 false) |
| ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> [[XY]], <2 x half> undef, i1 true, i1 false) |
| ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> [[XY]], <2 x half> undef, i1 true, i1 false) |
| ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 12, <2 x half> undef, <2 x half> [[ZW:%.*]], i1 true, i1 false) |
| ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> [[XY]], <2 x half> [[ZW]], i1 true, i1 false) |
| ; CHECK-NEXT: ret void |
| ; |
| call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false) |
| call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false) |
| call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false) |
| call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false) |
| |
| call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false) |
| call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false) |
| call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false) |
| call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false) |
| |
| call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 12, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false) |
| call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false) |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.fmed3 |
| ; -------------------------------------------------------------------- |
| |
| declare float @llvm.amdgcn.fmed3.f32(float, float, float) nounwind readnone |
| |
| define float @fmed3_f32(float %x, float %y, float %z) { |
| ; CHECK-LABEL: @fmed3_f32( |
| ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]]) |
| ; CHECK-NEXT: ret float [[MED3]] |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float %z) |
| ret float %med3 |
| } |
| |
| define float @fmed3_canonicalize_x_c0_c1_f32(float %x) { |
| ; CHECK-LABEL: @fmed3_canonicalize_x_c0_c1_f32( |
| ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float 0.000000e+00, float 1.000000e+00) |
| ; CHECK-NEXT: ret float [[MED3]] |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0.0, float 1.0) |
| ret float %med3 |
| } |
| |
| define float @fmed3_canonicalize_c0_x_c1_f32(float %x) { |
| ; CHECK-LABEL: @fmed3_canonicalize_c0_x_c1_f32( |
| ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float 0.000000e+00, float 1.000000e+00) |
| ; CHECK-NEXT: ret float [[MED3]] |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %x, float 1.0) |
| ret float %med3 |
| } |
| |
| define float @fmed3_canonicalize_c0_c1_x_f32(float %x) { |
| ; CHECK-LABEL: @fmed3_canonicalize_c0_c1_x_f32( |
| ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float 0.000000e+00, float 1.000000e+00) |
| ; CHECK-NEXT: ret float [[MED3]] |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %x) |
| ret float %med3 |
| } |
| |
| define float @fmed3_canonicalize_x_y_c_f32(float %x, float %y) { |
| ; CHECK-LABEL: @fmed3_canonicalize_x_y_c_f32( |
| ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float [[Y:%.*]], float 1.000000e+00) |
| ; CHECK-NEXT: ret float [[MED3]] |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.0) |
| ret float %med3 |
| } |
| |
| define float @fmed3_canonicalize_x_c_y_f32(float %x, float %y) { |
| ; CHECK-LABEL: @fmed3_canonicalize_x_c_y_f32( |
| ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float [[Y:%.*]], float 1.000000e+00) |
| ; CHECK-NEXT: ret float [[MED3]] |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 1.0, float %y) |
| ret float %med3 |
| } |
| |
| define float @fmed3_canonicalize_c_x_y_f32(float %x, float %y) { |
| ; CHECK-LABEL: @fmed3_canonicalize_c_x_y_f32( |
| ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.amdgcn.fmed3.f32(float [[X:%.*]], float [[Y:%.*]], float 1.000000e+00) |
| ; CHECK-NEXT: ret float [[MED3]] |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %x, float %y) |
| ret float %med3 |
| } |
| |
| define float @fmed3_undef_x_y_f32(float %x, float %y) { |
| ; CHECK-LABEL: @fmed3_undef_x_y_f32( |
| ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) |
| ; CHECK-NEXT: ret float [[MED3]] |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y) |
| ret float %med3 |
| } |
| |
| define float @fmed3_fmf_undef_x_y_f32(float %x, float %y) { |
| ; CHECK-LABEL: @fmed3_fmf_undef_x_y_f32( |
| ; CHECK-NEXT: [[MED3:%.*]] = call nnan float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) |
| ; CHECK-NEXT: ret float [[MED3]] |
| ; |
| %med3 = call nnan float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y) |
| ret float %med3 |
| } |
| |
| define float @fmed3_x_undef_y_f32(float %x, float %y) { |
| ; CHECK-LABEL: @fmed3_x_undef_y_f32( |
| ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) |
| ; CHECK-NEXT: ret float [[MED3]] |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float undef, float %y) |
| ret float %med3 |
| } |
| |
| define float @fmed3_x_y_undef_f32(float %x, float %y) { |
| ; CHECK-LABEL: @fmed3_x_y_undef_f32( |
| ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) |
| ; CHECK-NEXT: ret float [[MED3]] |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float undef) |
| ret float %med3 |
| } |
| |
| define float @fmed3_qnan0_x_y_f32(float %x, float %y) { |
| ; CHECK-LABEL: @fmed3_qnan0_x_y_f32( |
| ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) |
| ; CHECK-NEXT: ret float [[MED3]] |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000000000000, float %x, float %y) |
| ret float %med3 |
| } |
| |
| define float @fmed3_x_qnan0_y_f32(float %x, float %y) { |
| ; CHECK-LABEL: @fmed3_x_qnan0_y_f32( |
| ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) |
| ; CHECK-NEXT: ret float [[MED3]] |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8000000000000, float %y) |
| ret float %med3 |
| } |
| |
| define float @fmed3_x_y_qnan0_f32(float %x, float %y) { |
| ; CHECK-LABEL: @fmed3_x_y_qnan0_f32( |
| ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) |
| ; CHECK-NEXT: ret float [[MED3]] |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8000000000000) |
| ret float %med3 |
| } |
| |
| define float @fmed3_qnan1_x_y_f32(float %x, float %y) { |
| ; CHECK-LABEL: @fmed3_qnan1_x_y_f32( |
| ; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) |
| ; CHECK-NEXT: ret float [[MED3]] |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float %x, float %y) |
| ret float %med3 |
| } |
| |
| ; This can return any of the qnans. |
| define float @fmed3_qnan0_qnan1_qnan2_f32(float %x, float %y) { |
| ; CHECK-LABEL: @fmed3_qnan0_qnan1_qnan2_f32( |
| ; CHECK-NEXT: ret float 0x7FF8030000000000 |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float 0x7FF8002000000000, float 0x7FF8030000000000) |
| ret float %med3 |
| } |
| |
| define float @fmed3_constant_src0_0_f32(float %x, float %y) { |
| ; CHECK-LABEL: @fmed3_constant_src0_0_f32( |
| ; CHECK-NEXT: ret float 5.000000e-01 |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.5, float -1.0, float 4.0) |
| ret float %med3 |
| } |
| |
| define float @fmed3_constant_src0_1_f32(float %x, float %y) { |
| ; CHECK-LABEL: @fmed3_constant_src0_1_f32( |
| ; CHECK-NEXT: ret float 5.000000e-01 |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.5, float 4.0, float -1.0) |
| ret float %med3 |
| } |
| |
| define float @fmed3_constant_src1_0_f32(float %x, float %y) { |
| ; CHECK-LABEL: @fmed3_constant_src1_0_f32( |
| ; CHECK-NEXT: ret float 5.000000e-01 |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 0.5, float 4.0) |
| ret float %med3 |
| } |
| |
| define float @fmed3_constant_src1_1_f32(float %x, float %y) { |
| ; CHECK-LABEL: @fmed3_constant_src1_1_f32( |
| ; CHECK-NEXT: ret float 5.000000e-01 |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float 4.0, float 0.5, float -1.0) |
| ret float %med3 |
| } |
| |
| define float @fmed3_constant_src2_0_f32(float %x, float %y) { |
| ; CHECK-LABEL: @fmed3_constant_src2_0_f32( |
| ; CHECK-NEXT: ret float 5.000000e-01 |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 4.0, float 0.5) |
| ret float %med3 |
| } |
| |
| define float @fmed3_constant_src2_1_f32(float %x, float %y) { |
| ; CHECK-LABEL: @fmed3_constant_src2_1_f32( |
| ; CHECK-NEXT: ret float 5.000000e-01 |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float 4.0, float -1.0, float 0.5) |
| ret float %med3 |
| } |
| |
| define float @fmed3_x_qnan0_qnan1_f32(float %x) { |
| ; CHECK-LABEL: @fmed3_x_qnan0_qnan1_f32( |
| ; CHECK-NEXT: ret float [[X:%.*]] |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8001000000000, float 0x7FF8002000000000) |
| ret float %med3 |
| } |
| |
| define float @fmed3_qnan0_x_qnan1_f32(float %x) { |
| ; CHECK-LABEL: @fmed3_qnan0_x_qnan1_f32( |
| ; CHECK-NEXT: ret float [[X:%.*]] |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float %x, float 0x7FF8002000000000) |
| ret float %med3 |
| } |
| |
| define float @fmed3_qnan0_qnan1_x_f32(float %x) { |
| ; CHECK-LABEL: @fmed3_qnan0_qnan1_x_f32( |
| ; CHECK-NEXT: ret float [[X:%.*]] |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0x7FF8002000000000, float %x) |
| ret float %med3 |
| } |
| |
| define float @fmed3_nan_0_1_f32() { |
| ; CHECK-LABEL: @fmed3_nan_0_1_f32( |
| ; CHECK-NEXT: ret float 0.000000e+00 |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0.0, float 1.0) |
| ret float %med3 |
| } |
| |
| define float @fmed3_0_nan_1_f32() { |
| ; CHECK-LABEL: @fmed3_0_nan_1_f32( |
| ; CHECK-NEXT: ret float 0.000000e+00 |
| ; |
| %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 0x7FF8001000000000, float 1.0) |
| ret float %med |
| } |
| |
| define float @fmed3_0_1_nan_f32() { |
| ; CHECK-LABEL: @fmed3_0_1_nan_f32( |
| ; CHECK-NEXT: ret float 1.000000e+00 |
| ; |
| %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8001000000000) |
| ret float %med |
| } |
| |
| define float @fmed3_undef_0_1_f32() { |
| ; CHECK-LABEL: @fmed3_undef_0_1_f32( |
| ; CHECK-NEXT: ret float 0.000000e+00 |
| ; |
| %med3 = call float @llvm.amdgcn.fmed3.f32(float undef, float 0.0, float 1.0) |
| ret float %med3 |
| } |
| |
| define float @fmed3_0_undef_1_f32() { |
| ; CHECK-LABEL: @fmed3_0_undef_1_f32( |
| ; CHECK-NEXT: ret float 0.000000e+00 |
| ; |
| %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float undef, float 1.0) |
| ret float %med |
| } |
| |
| define float @fmed3_0_1_undef_f32() { |
| ; CHECK-LABEL: @fmed3_0_1_undef_f32( |
| ; CHECK-NEXT: ret float 1.000000e+00 |
| ; |
| %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float undef) |
| ret float %med |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.icmp |
| ; -------------------------------------------------------------------- |
| |
| declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32 immarg) nounwind readnone convergent |
| declare i64 @llvm.amdgcn.icmp.i64.i64(i64, i64, i32 immarg) nounwind readnone convergent |
| declare i64 @llvm.amdgcn.icmp.i64.i1(i1, i1, i32 immarg) nounwind readnone convergent |
| |
| define i64 @invalid_icmp_code(i32 %a, i32 %b) { |
| ; CHECK-LABEL: @invalid_icmp_code( |
| ; CHECK-NEXT: [[UNDER:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 31) |
| ; CHECK-NEXT: [[OVER:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A]], i32 [[B]], i32 42) |
| ; CHECK-NEXT: [[OR:%.*]] = or i64 [[UNDER]], [[OVER]] |
| ; CHECK-NEXT: ret i64 [[OR]] |
| ; |
| %under = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 31) |
| %over = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 42) |
| %or = or i64 %under, %over |
| ret i64 %or |
| } |
| |
| define i64 @icmp_constant_inputs_false() { |
| ; CHECK-LABEL: @icmp_constant_inputs_false( |
| ; CHECK-NEXT: ret i64 0 |
| ; |
| %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 32) |
| ret i64 %result |
| } |
| |
| define i64 @icmp_constant_inputs_true() { |
| ; CHECK-LABEL: @icmp_constant_inputs_true( |
| ; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR17:[0-9]+]] |
| ; CHECK-NEXT: ret i64 [[RESULT]] |
| ; |
| %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 34) |
| ret i64 %result |
| } |
| |
| define i64 @icmp_constant_to_rhs_slt(i32 %x) { |
| ; CHECK-LABEL: @icmp_constant_to_rhs_slt( |
| ; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[X:%.*]], i32 9, i32 38) |
| ; CHECK-NEXT: ret i64 [[RESULT]] |
| ; |
| %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 %x, i32 40) |
| ret i64 %result |
| } |
| |
| define i64 @fold_icmp_ne_0_zext_icmp_eq_i32(i32 %a, i32 %b) { |
| ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i32( |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp eq i32 %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_ne_0_zext_icmp_ne_i32(i32 %a, i32 %b) { |
| ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ne_i32( |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp ne i32 %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) { |
| ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_sle_i32( |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 41) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp sle i32 %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_ne_0_zext_icmp_ugt_i64(i64 %a, i64 %b) { |
| ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ugt_i64( |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]], i32 34) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp ugt i64 %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_ne_0_zext_icmp_ult_swap_i64(i64 %a, i64 %b) { |
| ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_swap_i64( |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]], i32 34) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp ugt i64 %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 0, i32 %zext.cmp, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f32(float %a, float %b) { |
| ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f32( |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 1) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = fcmp oeq float %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_ne_0_zext_fcmp_une_f32(float %a, float %b) { |
| ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_une_f32( |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 14) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = fcmp une float %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_ne_0_zext_fcmp_olt_f64(double %a, double %b) { |
| ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_olt_f64( |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f64(double [[A:%.*]], double [[B:%.*]], i32 4) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = fcmp olt double %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_sext_icmp_ne_0_i32(i32 %a, i32 %b) { |
| ; CHECK-LABEL: @fold_icmp_sext_icmp_ne_0_i32( |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp eq i32 %a, %b |
| %sext.cmp = sext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 0, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_eq_0_zext_icmp_eq_i32(i32 %a, i32 %b) { |
| ; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_eq_i32( |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp eq i32 %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_eq_0_zext_icmp_slt_i32(i32 %a, i32 %b) { |
| ; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_slt_i32( |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 39) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp slt i32 %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_eq_0_zext_fcmp_oeq_f32(float %a, float %b) { |
| ; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_oeq_f32( |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 14) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = fcmp oeq float %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_eq_0_zext_fcmp_ule_f32(float %a, float %b) { |
| ; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ule_f32( |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 2) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = fcmp ule float %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_eq_0_zext_fcmp_ogt_f32(float %a, float %b) { |
| ; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ogt_f32( |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 13) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = fcmp ogt float %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_zext_icmp_eq_1_i32(i32 %a, i32 %b) { |
| ; CHECK-LABEL: @fold_icmp_zext_icmp_eq_1_i32( |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp eq i32 %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 1, i32 32) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_zext_argi1_eq_1_i32(i1 %cond) { |
| ; CHECK-LABEL: @fold_icmp_zext_argi1_eq_1_i32( |
| ; CHECK-NEXT: [[ZEXT_COND:%.*]] = zext i1 [[COND:%.*]] to i32 |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_COND]], i32 0, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %zext.cond = zext i1 %cond to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cond, i32 1, i32 32) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_zext_argi1_eq_neg1_i32(i1 %cond) { |
| ; CHECK-LABEL: @fold_icmp_zext_argi1_eq_neg1_i32( |
| ; CHECK-NEXT: [[ZEXT_COND:%.*]] = zext i1 [[COND:%.*]] to i32 |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_COND]], i32 -1, i32 32) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %zext.cond = zext i1 %cond to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cond, i32 -1, i32 32) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_sext_argi1_eq_1_i32(i1 %cond) { |
| ; CHECK-LABEL: @fold_icmp_sext_argi1_eq_1_i32( |
| ; CHECK-NEXT: [[SEXT_COND:%.*]] = sext i1 [[COND:%.*]] to i32 |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[SEXT_COND]], i32 1, i32 32) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %sext.cond = sext i1 %cond to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cond, i32 1, i32 32) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_sext_argi1_eq_neg1_i32(i1 %cond) { |
| ; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i32( |
| ; CHECK-NEXT: [[SEXT_COND:%.*]] = sext i1 [[COND:%.*]] to i32 |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[SEXT_COND]], i32 0, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %sext.cond = sext i1 %cond to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cond, i32 -1, i32 32) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_sext_argi1_eq_neg1_i64(i1 %cond) { |
| ; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i64( |
| ; CHECK-NEXT: [[SEXT_COND:%.*]] = sext i1 [[COND:%.*]] to i64 |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[SEXT_COND]], i64 0, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %sext.cond = sext i1 %cond to i64 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i64(i64 %sext.cond, i64 -1, i32 32) |
| ret i64 %mask |
| } |
| |
| ; TODO: Should be able to fold to false |
| define i64 @fold_icmp_sext_icmp_eq_1_i32(i32 %a, i32 %b) { |
| ; CHECK-LABEL: @fold_icmp_sext_icmp_eq_1_i32( |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[SEXT_CMP:%.*]] = sext i1 [[CMP]] to i32 |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[SEXT_CMP]], i32 1, i32 32) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp eq i32 %a, %b |
| %sext.cmp = sext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 1, i32 32) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_sext_icmp_eq_neg1_i32(i32 %a, i32 %b) { |
| ; CHECK-LABEL: @fold_icmp_sext_icmp_eq_neg1_i32( |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp eq i32 %a, %b |
| %sext.cmp = sext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 -1, i32 32) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_sext_icmp_sge_neg1_i32(i32 %a, i32 %b) { |
| ; CHECK-LABEL: @fold_icmp_sext_icmp_sge_neg1_i32( |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 39) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp sge i32 %a, %b |
| %sext.cmp = sext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 -1, i32 32) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_not_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) { |
| ; CHECK-LABEL: @fold_not_icmp_ne_0_zext_icmp_sle_i32( |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 38) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp sle i32 %a, %b |
| %not = xor i1 %cmp, true |
| %zext.cmp = zext i1 %not to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_ne_0_zext_icmp_eq_i4(i4 %a, i4 %b) { |
| ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i4( |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i4 [[A:%.*]] to i16 |
| ; CHECK-NEXT: [[TMP2:%.*]] = zext i4 [[B:%.*]] to i16 |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 32) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp eq i4 %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_ne_0_zext_icmp_eq_i8(i8 %a, i8 %b) { |
| ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i8( |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16 |
| ; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[B:%.*]] to i16 |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 32) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp eq i8 %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_ne_0_zext_icmp_eq_i16(i16 %a, i16 %b) { |
| ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i16( |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 32) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp eq i16 %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_ne_0_zext_icmp_eq_i36(i36 %a, i36 %b) { |
| ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i36( |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i36 [[A:%.*]] to i64 |
| ; CHECK-NEXT: [[TMP2:%.*]] = zext i36 [[B:%.*]] to i64 |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[TMP1]], i64 [[TMP2]], i32 32) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp eq i36 %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_ne_0_zext_icmp_eq_i128(i128 %a, i128 %b) { |
| ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i128( |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i128 [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[ZEXT_CMP:%.*]] = zext i1 [[CMP]] to i32 |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_CMP]], i32 0, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp eq i128 %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f16(half %a, half %b) { |
| ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f16( |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f16(half [[A:%.*]], half [[B:%.*]], i32 1) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = fcmp oeq half %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f128(fp128 %a, fp128 %b) { |
| ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f128( |
| ; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq fp128 [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[ZEXT_CMP:%.*]] = zext i1 [[CMP]] to i32 |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_CMP]], i32 0, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = fcmp oeq fp128 %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_ne_0_zext_icmp_slt_i4(i4 %a, i4 %b) { |
| ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_slt_i4( |
| ; CHECK-NEXT: [[TMP1:%.*]] = sext i4 [[A:%.*]] to i16 |
| ; CHECK-NEXT: [[TMP2:%.*]] = sext i4 [[B:%.*]] to i16 |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 40) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp slt i4 %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_ne_0_zext_icmp_slt_i8(i8 %a, i8 %b) { |
| ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_slt_i8( |
| ; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[A:%.*]] to i16 |
| ; CHECK-NEXT: [[TMP2:%.*]] = sext i8 [[B:%.*]] to i16 |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 40) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp slt i8 %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_ne_0_zext_icmp_slt_i16(i16 %a, i16 %b) { |
| ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_slt_i16( |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 40) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp slt i16 %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_ne_0_zext_icmp_ult_i4(i4 %a, i4 %b) { |
| ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_i4( |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i4 [[A:%.*]] to i16 |
| ; CHECK-NEXT: [[TMP2:%.*]] = zext i4 [[B:%.*]] to i16 |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 36) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp ult i4 %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_ne_0_zext_icmp_ult_i8(i8 %a, i8 %b) { |
| ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_i8( |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16 |
| ; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[B:%.*]] to i16 |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 36) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp ult i8 %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_ne_0_zext_icmp_ult_i16(i16 %a, i16 %b) { |
| ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_i16( |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 36) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp ult i16 %a, %b |
| %zext.cmp = zext i1 %cmp to i32 |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) |
| ret i64 %mask |
| } |
| |
| ; 1-bit NE comparisons |
| |
| define i64 @fold_icmp_i1_ne_0_icmp_eq_i1(i32 %a, i32 %b) { |
| ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i1( |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp eq i32 %a, %b |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_i1_ne_0_icmp_ne_i1(i32 %a, i32 %b) { |
| ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ne_i1( |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp ne i32 %a, %b |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_i1_ne_0_icmp_sle_i1(i32 %a, i32 %b) { |
| ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_sle_i1( |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp sle i32 %a, %b |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_i1_ne_0_icmp_ugt_i64(i64 %a, i64 %b) { |
| ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ugt_i64( |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp ugt i64 %a, %b |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_i1_ne_0_icmp_ult_swap_i64(i64 %a, i64 %b) { |
| ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_swap_i64( |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp ugt i64 %a, %b |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 false, i1 %cmp, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f32(float %a, float %b) { |
| ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f32( |
| ; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = fcmp oeq float %a, %b |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_i1_ne_0_fcmp_une_f32(float %a, float %b) { |
| ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_une_f32( |
| ; CHECK-NEXT: [[CMP:%.*]] = fcmp une float [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = fcmp une float %a, %b |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_i1_ne_0_fcmp_olt_f64(double %a, double %b) { |
| ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_olt_f64( |
| ; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = fcmp olt double %a, %b |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_i1_ne_0_icmp_eq_i4(i4 %a, i4 %b) { |
| ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i4( |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i4 [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp eq i4 %a, %b |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_i1_ne_0_icmp_eq_i8(i8 %a, i8 %b) { |
| ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i8( |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp eq i8 %a, %b |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_i1_ne_0_icmp_eq_i16(i16 %a, i16 %b) { |
| ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i16( |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp eq i16 %a, %b |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_i1_ne_0_icmp_eq_i36(i36 %a, i36 %b) { |
| ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i36( |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i36 [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp eq i36 %a, %b |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_i1_ne_0_icmp_eq_i128(i128 %a, i128 %b) { |
| ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i128( |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i128 [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp eq i128 %a, %b |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f16(half %a, half %b) { |
| ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f16( |
| ; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq half [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = fcmp oeq half %a, %b |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f128(fp128 %a, fp128 %b) { |
| ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f128( |
| ; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq fp128 [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = fcmp oeq fp128 %a, %b |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_i1_ne_0_icmp_slt_i4(i4 %a, i4 %b) { |
| ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i4( |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i4 [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp slt i4 %a, %b |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_i1_ne_0_icmp_slt_i8(i8 %a, i8 %b) { |
| ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i8( |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp slt i8 %a, %b |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_i1_ne_0_icmp_slt_i16(i16 %a, i16 %b) { |
| ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i16( |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp slt i16 %a, %b |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_i1_ne_0_icmp_ult_i4(i4 %a, i4 %b) { |
| ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i4( |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i4 [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp ult i4 %a, %b |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_i1_ne_0_icmp_ult_i8(i8 %a, i8 %b) { |
| ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i8( |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp ult i8 %a, %b |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) |
| ret i64 %mask |
| } |
| |
| define i64 @fold_icmp_i1_ne_0_icmp_ult_i16(i16 %a, i16 %b) { |
| ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i16( |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[A:%.*]], [[B:%.*]] |
| ; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) |
| ; CHECK-NEXT: ret i64 [[MASK]] |
| ; |
| %cmp = icmp ult i16 %a, %b |
| %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) |
| ret i64 %mask |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.fcmp |
| ; -------------------------------------------------------------------- |
| |
| declare i64 @llvm.amdgcn.fcmp.i64.f32(float, float, i32 immarg) nounwind readnone convergent |
| |
| define i64 @invalid_fcmp_code(float %a, float %b) { |
| ; CHECK-LABEL: @invalid_fcmp_code( |
| ; CHECK-NEXT: [[UNDER:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 -1) |
| ; CHECK-NEXT: [[OVER:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A]], float [[B]], i32 16) |
| ; CHECK-NEXT: [[OR:%.*]] = or i64 [[UNDER]], [[OVER]] |
| ; CHECK-NEXT: ret i64 [[OR]] |
| ; |
| %under = call i64 @llvm.amdgcn.fcmp.i64.f32(float %a, float %b, i32 -1) |
| %over = call i64 @llvm.amdgcn.fcmp.i64.f32(float %a, float %b, i32 16) |
| %or = or i64 %under, %over |
| ret i64 %or |
| } |
| |
| define i64 @fcmp_constant_inputs_false() { |
| ; CHECK-LABEL: @fcmp_constant_inputs_false( |
| ; CHECK-NEXT: ret i64 0 |
| ; |
| %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 1) |
| ret i64 %result |
| } |
| |
| define i64 @fcmp_constant_inputs_true() { |
| ; CHECK-LABEL: @fcmp_constant_inputs_true( |
| ; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR17]] |
| ; CHECK-NEXT: ret i64 [[RESULT]] |
| ; |
| %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 4) |
| ret i64 %result |
| } |
| |
| define i64 @fcmp_constant_to_rhs_olt(float %x) { |
| ; CHECK-LABEL: @fcmp_constant_to_rhs_olt( |
| ; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[X:%.*]], float 4.000000e+00, i32 2) |
| ; CHECK-NEXT: ret i64 [[RESULT]] |
| ; |
| %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 4.0, float %x, i32 4) |
| ret i64 %result |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.ballot |
| ; -------------------------------------------------------------------- |
| |
| declare i64 @llvm.amdgcn.ballot.i64(i1) nounwind readnone convergent |
| declare i32 @llvm.amdgcn.ballot.i32(i1) nounwind readnone convergent |
| |
| define i64 @ballot_nocombine_64(i1 %i) { |
| ; CHECK-LABEL: @ballot_nocombine_64( |
| ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[I:%.*]]) |
| ; CHECK-NEXT: [[B:%.*]] = zext i32 [[TMP1]] to i64 |
| ; CHECK-NEXT: ret i64 [[B]] |
| ; |
| %b = call i64 @llvm.amdgcn.ballot.i64(i1 %i) |
| ret i64 %b |
| } |
| |
| define i64 @ballot_zero_64() { |
| ; CHECK-LABEL: @ballot_zero_64( |
| ; CHECK-NEXT: ret i64 0 |
| ; |
| %b = call i64 @llvm.amdgcn.ballot.i64(i1 0) |
| ret i64 %b |
| } |
| |
| define i64 @ballot_one_64() { |
| ; CHECK-LABEL: @ballot_one_64( |
| ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 true) |
| ; CHECK-NEXT: [[B:%.*]] = zext i32 [[TMP1]] to i64 |
| ; CHECK-NEXT: ret i64 [[B]] |
| ; |
| %b = call i64 @llvm.amdgcn.ballot.i64(i1 1) |
| ret i64 %b |
| } |
| |
| define i32 @ballot_nocombine_32(i1 %i) { |
| ; CHECK-LABEL: @ballot_nocombine_32( |
| ; CHECK-NEXT: [[B:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[I:%.*]]) |
| ; CHECK-NEXT: ret i32 [[B]] |
| ; |
| %b = call i32 @llvm.amdgcn.ballot.i32(i1 %i) |
| ret i32 %b |
| } |
| |
| define i32 @ballot_zero_32() { |
| ; CHECK-LABEL: @ballot_zero_32( |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| %b = call i32 @llvm.amdgcn.ballot.i32(i1 0) |
| ret i32 %b |
| } |
| |
| define i32 @ballot_one_32() { |
| ; CHECK-LABEL: @ballot_one_32( |
| ; CHECK-NEXT: [[B:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 true) |
| ; CHECK-NEXT: ret i32 [[B]] |
| ; |
| %b = call i32 @llvm.amdgcn.ballot.i32(i1 1) |
| ret i32 %b |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.wqm.vote |
| ; -------------------------------------------------------------------- |
| |
| declare i1 @llvm.amdgcn.wqm.vote(i1) |
| |
| define float @wqm_vote_true() { |
| ; CHECK-LABEL: @wqm_vote_true( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: ret float 1.000000e+00 |
| ; |
| main_body: |
| %w = call i1 @llvm.amdgcn.wqm.vote(i1 true) |
| %r = select i1 %w, float 1.0, float 0.0 |
| ret float %r |
| } |
| |
| define float @wqm_vote_false() { |
| ; CHECK-LABEL: @wqm_vote_false( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: ret float 0.000000e+00 |
| ; |
| main_body: |
| %w = call i1 @llvm.amdgcn.wqm.vote(i1 false) |
| %r = select i1 %w, float 1.0, float 0.0 |
| ret float %r |
| } |
| |
| define float @wqm_vote_undef() { |
| ; CHECK-LABEL: @wqm_vote_undef( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: ret float 0.000000e+00 |
| ; |
| main_body: |
| %w = call i1 @llvm.amdgcn.wqm.vote(i1 undef) |
| %r = select i1 %w, float 1.0, float 0.0 |
| ret float %r |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.kill |
| ; -------------------------------------------------------------------- |
| |
| declare void @llvm.amdgcn.kill(i1) |
| |
| define void @kill_true() { |
| ; CHECK-LABEL: @kill_true( |
| ; CHECK-NEXT: ret void |
| ; |
| call void @llvm.amdgcn.kill(i1 true) |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.readfirstlane |
| ; -------------------------------------------------------------------- |
| |
| declare i32 @llvm.amdgcn.readfirstlane(i32) |
| |
| @gv = constant i32 0 |
| |
| define amdgpu_kernel void @readfirstlane_constant(i32 %arg) { |
| ; CHECK-LABEL: @readfirstlane_constant( |
| ; CHECK-NEXT: [[VAR:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[ARG:%.*]]) |
| ; CHECK-NEXT: store volatile i32 [[VAR]], ptr undef, align 4 |
| ; CHECK-NEXT: store volatile i32 0, ptr undef, align 4 |
| ; CHECK-NEXT: store volatile i32 123, ptr undef, align 4 |
| ; CHECK-NEXT: store volatile i32 ptrtoint (ptr @gv to i32), ptr undef, align 4 |
| ; CHECK-NEXT: store volatile i32 undef, ptr undef, align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| %var = call i32 @llvm.amdgcn.readfirstlane(i32 %arg) |
| %zero = call i32 @llvm.amdgcn.readfirstlane(i32 0) |
| %imm = call i32 @llvm.amdgcn.readfirstlane(i32 123) |
| %constexpr = call i32 @llvm.amdgcn.readfirstlane(i32 ptrtoint (ptr @gv to i32)) |
| %undef = call i32 @llvm.amdgcn.readfirstlane(i32 undef) |
| store volatile i32 %var, ptr undef |
| store volatile i32 %zero, ptr undef |
| store volatile i32 %imm, ptr undef |
| store volatile i32 %constexpr, ptr undef |
| store volatile i32 %undef, ptr undef |
| ret void |
| } |
| |
| define i32 @readfirstlane_idempotent(i32 %arg) { |
| ; CHECK-LABEL: @readfirstlane_idempotent( |
| ; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[ARG:%.*]]) |
| ; CHECK-NEXT: ret i32 [[READ0]] |
| ; |
| %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg) |
| %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0) |
| %read2 = call i32 @llvm.amdgcn.readfirstlane(i32 %read1) |
| ret i32 %read2 |
| } |
| |
| define i32 @readfirstlane_readlane(i32 %arg) { |
| ; CHECK-LABEL: @readfirstlane_readlane( |
| ; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[ARG:%.*]]) |
| ; CHECK-NEXT: ret i32 [[READ0]] |
| ; |
| %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg) |
| %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0) |
| ret i32 %read1 |
| } |
| |
| define i32 @readfirstlane_readfirstlane_different_block(i32 %arg) { |
| ; CHECK-LABEL: @readfirstlane_readfirstlane_different_block( |
| ; CHECK-NEXT: bb0: |
| ; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[ARG:%.*]]) |
| ; CHECK-NEXT: br label [[BB1:%.*]] |
| ; CHECK: bb1: |
| ; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[READ0]]) |
| ; CHECK-NEXT: ret i32 [[READ1]] |
| ; |
| bb0: |
| %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg) |
| br label %bb1 |
| |
| bb1: |
| %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0) |
| ret i32 %read1 |
| } |
| |
| define i32 @readfirstlane_readlane_different_block(i32 %arg) { |
| ; CHECK-LABEL: @readfirstlane_readlane_different_block( |
| ; CHECK-NEXT: bb0: |
| ; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG:%.*]], i32 0) |
| ; CHECK-NEXT: br label [[BB1:%.*]] |
| ; CHECK: bb1: |
| ; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[READ0]]) |
| ; CHECK-NEXT: ret i32 [[READ1]] |
| ; |
| bb0: |
| %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 0) |
| br label %bb1 |
| |
| bb1: |
| %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0) |
| ret i32 %read1 |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.readlane |
| ; -------------------------------------------------------------------- |
| |
| declare i32 @llvm.amdgcn.readlane(i32, i32) |
| |
| define amdgpu_kernel void @readlane_constant(i32 %arg, i32 %lane) { |
| ; CHECK-LABEL: @readlane_constant( |
| ; CHECK-NEXT: [[VAR:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG:%.*]], i32 7) |
| ; CHECK-NEXT: store volatile i32 [[VAR]], ptr undef, align 4 |
| ; CHECK-NEXT: store volatile i32 0, ptr undef, align 4 |
| ; CHECK-NEXT: store volatile i32 123, ptr undef, align 4 |
| ; CHECK-NEXT: store volatile i32 ptrtoint (ptr @gv to i32), ptr undef, align 4 |
| ; CHECK-NEXT: store volatile i32 undef, ptr undef, align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| %var = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 7) |
| %zero = call i32 @llvm.amdgcn.readlane(i32 0, i32 %lane) |
| %imm = call i32 @llvm.amdgcn.readlane(i32 123, i32 %lane) |
| %constexpr = call i32 @llvm.amdgcn.readlane(i32 ptrtoint (ptr @gv to i32), i32 %lane) |
| %undef = call i32 @llvm.amdgcn.readlane(i32 undef, i32 %lane) |
| store volatile i32 %var, ptr undef |
| store volatile i32 %zero, ptr undef |
| store volatile i32 %imm, ptr undef |
| store volatile i32 %constexpr, ptr undef |
| store volatile i32 %undef, ptr undef |
| ret void |
| } |
| |
| define i32 @readlane_idempotent(i32 %arg, i32 %lane) { |
| ; CHECK-LABEL: @readlane_idempotent( |
| ; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG:%.*]], i32 [[LANE:%.*]]) |
| ; CHECK-NEXT: ret i32 [[READ0]] |
| ; |
| %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane) |
| %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane) |
| ret i32 %read1 |
| } |
| |
| define i32 @readlane_idempotent_different_lanes(i32 %arg, i32 %lane0, i32 %lane1) { |
| ; CHECK-LABEL: @readlane_idempotent_different_lanes( |
| ; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG:%.*]], i32 [[LANE0:%.*]]) |
| ; CHECK-NEXT: ret i32 [[READ0]] |
| ; |
| %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane0) |
| %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane1) |
| ret i32 %read1 |
| } |
| |
| define i32 @readlane_readfirstlane(i32 %arg) { |
| ; CHECK-LABEL: @readlane_readfirstlane( |
| ; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[ARG:%.*]]) |
| ; CHECK-NEXT: ret i32 [[READ0]] |
| ; |
| %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg) |
| %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0) |
| ret i32 %read1 |
| } |
| |
| define i32 @readlane_idempotent_different_block(i32 %arg, i32 %lane) { |
| ; CHECK-LABEL: @readlane_idempotent_different_block( |
| ; CHECK-NEXT: bb0: |
| ; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG:%.*]], i32 [[LANE:%.*]]) |
| ; CHECK-NEXT: br label [[BB1:%.*]] |
| ; CHECK: bb1: |
| ; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[READ0]], i32 [[LANE]]) |
| ; CHECK-NEXT: ret i32 [[READ1]] |
| ; |
| bb0: |
| %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane) |
| br label %bb1 |
| |
| bb1: |
| %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane) |
| ret i32 %read1 |
| } |
| |
| |
| define i32 @readlane_readfirstlane_different_block(i32 %arg) { |
| ; CHECK-LABEL: @readlane_readfirstlane_different_block( |
| ; CHECK-NEXT: bb0: |
| ; CHECK-NEXT: [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[ARG:%.*]]) |
| ; CHECK-NEXT: br label [[BB1:%.*]] |
| ; CHECK: bb1: |
| ; CHECK-NEXT: [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[READ0]], i32 0) |
| ; CHECK-NEXT: ret i32 [[READ1]] |
| ; |
| bb0: |
| %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg) |
| br label %bb1 |
| |
| bb1: |
| %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0) |
| ret i32 %read1 |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.update.dpp.i32 |
| ; -------------------------------------------------------------------- |
| |
| declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) |
| |
| define amdgpu_kernel void @update_dpp_no_combine(ptr addrspace(1) %out, i32 %in1, i32 %in2) { |
| ; CHECK-LABEL: @update_dpp_no_combine( |
| ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 [[IN1:%.*]], i32 [[IN2:%.*]], i32 1, i32 1, i32 1, i1 false) |
| ; CHECK-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 0) |
| store i32 %tmp0, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @update_dpp_drop_old(ptr addrspace(1) %out, i32 %in1, i32 %in2) { |
| ; CHECK-LABEL: @update_dpp_drop_old( |
| ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[IN2:%.*]], i32 3, i32 15, i32 15, i1 true) |
| ; CHECK-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 3, i32 15, i32 15, i1 1) |
| store i32 %tmp0, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @update_dpp_undef_old(ptr addrspace(1) %out, i32 %in1) { |
| ; CHECK-LABEL: @update_dpp_undef_old( |
| ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[IN1:%.*]], i32 4, i32 15, i32 15, i1 true) |
| ; CHECK-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in1, i32 4, i32 15, i32 15, i1 1) |
| store i32 %tmp0, ptr addrspace(1) %out |
| ret void |
| } |
| |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.permlane16 |
| ; -------------------------------------------------------------------- |
| |
| declare i32 @llvm.amdgcn.permlane16.i32(i32, i32, i32, i32, i1 immarg, i1 immarg) |
| |
| define amdgpu_kernel void @permlane16(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) { |
| ; CHECK-LABEL: @permlane16( |
| ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.amdgcn.permlane16.i32(i32 12345, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 false) |
| ; CHECK-NEXT: store i32 [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| %res = call i32 @llvm.amdgcn.permlane16.i32(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) |
| store i32 %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @permlane16_bound_ctrl(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) { |
| ; CHECK-LABEL: @permlane16_bound_ctrl( |
| ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.amdgcn.permlane16.i32(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 true) |
| ; CHECK-NEXT: store i32 [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| %res = call i32 @llvm.amdgcn.permlane16.i32(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 true) |
| store i32 %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @permlane16_fetch_invalid_bound_ctrl(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) { |
| ; CHECK-LABEL: @permlane16_fetch_invalid_bound_ctrl( |
| ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.amdgcn.permlane16.i32(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 true, i1 true) |
| ; CHECK-NEXT: store i32 [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| %res = call i32 @llvm.amdgcn.permlane16.i32(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 true, i1 true) |
| store i32 %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.permlanex16 |
| ; -------------------------------------------------------------------- |
| |
| declare i32 @llvm.amdgcn.permlanex16.i32(i32, i32, i32, i32, i1 immarg, i1 immarg) |
| |
| define amdgpu_kernel void @permlanex16(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) { |
| ; CHECK-LABEL: @permlanex16( |
| ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.amdgcn.permlanex16.i32(i32 12345, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 false) |
| ; CHECK-NEXT: store i32 [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| %res = call i32 @llvm.amdgcn.permlanex16.i32(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) |
| store i32 %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @permlanex16_bound_ctrl(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) { |
| ; CHECK-LABEL: @permlanex16_bound_ctrl( |
| ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.amdgcn.permlanex16.i32(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 true) |
| ; CHECK-NEXT: store i32 [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| %res = call i32 @llvm.amdgcn.permlanex16.i32(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 true) |
| store i32 %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @permlanex16_fetch_invalid_bound_ctrl(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) { |
| ; CHECK-LABEL: @permlanex16_fetch_invalid_bound_ctrl( |
| ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.amdgcn.permlanex16.i32(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 true, i1 true) |
| ; CHECK-NEXT: store i32 [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| %res = call i32 @llvm.amdgcn.permlanex16.i32(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 true, i1 true) |
| store i32 %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.permlane64 |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_kernel void @permlane64_uniform(ptr addrspace(1) %out, i32 %src0) { |
| ; CHECK-LABEL: @permlane64_uniform( |
| ; CHECK-NEXT: [[SRC1:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[SRC0:%.*]]) |
| ; CHECK-NEXT: store i32 [[SRC1]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| %src1 = call i32 @llvm.amdgcn.readfirstlane(i32 %src0) |
| %res = call i32 @llvm.amdgcn.permlane64(i32 %src1) |
| store i32 %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample a16 |
| ; -------------------------------------------------------------------- |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| define amdgpu_kernel void @image_sample_a16_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { |
| ; CHECK-LABEL: @image_sample_a16_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { |
| ; CHECK-LABEL: @image_sample_a16_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_3d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) { |
| ; CHECK-LABEL: @image_sample_a16_3d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %r32 = fpext half %r to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_cube(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) { |
| ; |
| ; CHECK-LABEL: @image_sample_a16_cube( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[FACE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %face32 = fpext half %face to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s32, float %t32, float %face32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_1darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) { |
| ; CHECK-LABEL: @image_sample_a16_1darray( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half [[S:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %slice32 = fpext half %slice to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_2darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) { |
| ; CHECK-LABEL: @image_sample_a16_2darray( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %slice32 = fpext half %slice to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) { |
| ; CHECK-LABEL: @image_sample_a16_c_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) { |
| ; CHECK-LABEL: @image_sample_a16_c_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) { |
| ; CHECK-LABEL: @image_sample_a16_cl_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %clamp32 = fpext half %clamp to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) { |
| ; CHECK-LABEL: @image_sample_a16_cl_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %clamp32 = fpext half %clamp to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) { |
| ; CHECK-LABEL: @image_sample_a16_c_cl_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %clamp32 = fpext half %clamp to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) { |
| ; CHECK-LABEL: @image_sample_a16_c_cl_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %clamp32 = fpext half %clamp to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float %zcompare, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_b16_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s) { |
| ; CHECK-LABEL: @image_sample_a16_b16_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %bias32 = fpext half %bias to float |
| %s32 = fpext half %s to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_b32_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) { |
| ; CHECK-LABEL: @image_sample_a16_b32_1d( |
| ; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_b16_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t) { |
| ; CHECK-LABEL: @image_sample_a16_b16_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %bias32 = fpext half %bias to float |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_b32_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) { |
| ; CHECK-LABEL: @image_sample_a16_b32_2d( |
| ; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float |
| ; CHECK-NEXT: [[T32:%.*]] = fpext half [[T:%.*]] to float |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S32]], float [[T32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_b16_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s) { |
| ; CHECK-LABEL: @image_sample_a16_c_b16_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %bias32 = fpext half %bias to float |
| %s32 = fpext half %s to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias32, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_b32_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) { |
| ; CHECK-LABEL: @image_sample_a16_c_b32_1d( |
| ; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_b16_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t) { |
| ; CHECK-LABEL: @image_sample_a16_c_b16_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %bias32 = fpext half %bias to float |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias32, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_b32_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) { |
| ; CHECK-LABEL: @image_sample_a16_c_b32_2d( |
| ; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float |
| ; CHECK-NEXT: [[T32:%.*]] = fpext half [[T:%.*]] to float |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], float [[T32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_b16_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %clamp) { |
| ; CHECK-LABEL: @image_sample_a16_b16_cl_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %bias32 = fpext half %bias to float |
| %s32 = fpext half %s to float |
| %clamp32 = fpext half %clamp to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_b32_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) { |
| ; CHECK-LABEL: @image_sample_a16_b32_cl_1d( |
| ; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float |
| ; CHECK-NEXT: [[CLAMP32:%.*]] = fpext half [[CLAMP:%.*]] to float |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %clamp32 = fpext half %clamp to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_b16_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t, half %clamp) { |
| ; CHECK-LABEL: @image_sample_a16_b16_cl_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %bias32 = fpext half %bias to float |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %clamp32 = fpext half %clamp to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_b32_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) { |
| ; CHECK-LABEL: @image_sample_a16_b32_cl_2d( |
| ; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float |
| ; CHECK-NEXT: [[T32:%.*]] = fpext half [[T:%.*]] to float |
| ; CHECK-NEXT: [[CLAMP32:%.*]] = fpext half [[CLAMP:%.*]] to float |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S32]], float [[T32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %clamp32 = fpext half %clamp to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_b16_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %clamp) { |
| ; CHECK-LABEL: @image_sample_a16_c_b16_cl_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %bias32 = fpext half %bias to float |
| %s32 = fpext half %s to float |
| %clamp32 = fpext half %clamp to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias32, float %zcompare, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_b32_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) { |
| ; CHECK-LABEL: @image_sample_a16_c_b32_cl_1d( |
| ; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float |
| ; CHECK-NEXT: [[CLAMP32:%.*]] = fpext half [[CLAMP:%.*]] to float |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %clamp32 = fpext half %clamp to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_b16_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t, half %clamp) { |
| ; CHECK-LABEL: @image_sample_a16_c_b16_cl_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %bias32 = fpext half %bias to float |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %clamp32 = fpext half %clamp to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias32, float %zcompare, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_b32_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) { |
| ; CHECK-LABEL: @image_sample_a16_c_b32_cl_2d( |
| ; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float |
| ; CHECK-NEXT: [[T32:%.*]] = fpext half [[T:%.*]] to float |
| ; CHECK-NEXT: [[CLAMP32:%.*]] = fpext half [[CLAMP:%.*]] to float |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], float [[T32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %clamp32 = fpext half %clamp to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_d_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) { |
| ; CHECK-LABEL: @image_sample_a16_d_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %s32 = fpext half %s to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_d_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { |
| ; CHECK-LABEL: @image_sample_a16_d_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dtdh32 = fpext half %dtdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %dtdv32 = fpext half %dtdv to float |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_d_3d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) { |
| ; CHECK-LABEL: @image_sample_a16_d_3d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DRDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[DRDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dtdh32 = fpext half %dtdh to float |
| %drdh32 = fpext half %drdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %dtdv32 = fpext half %dtdv to float |
| %drdv32 = fpext half %drdv to float |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %r32 = fpext half %r to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %drdh32, float %dsdv32, float %dtdv32, float %drdv32, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_d_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) { |
| ; CHECK-LABEL: @image_sample_a16_c_d_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %s32 = fpext half %s to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_d_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { |
| ; CHECK-LABEL: @image_sample_a16_c_d_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dtdh32 = fpext half %dtdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %dtdv32 = fpext half %dtdv to float |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_d_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) { |
| ; CHECK-LABEL: @image_sample_a16_d_cl_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %s32 = fpext half %s to float |
| %clamp32 = fpext half %clamp to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_d_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { |
| ; CHECK-LABEL: @image_sample_a16_d_cl_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dtdh32 = fpext half %dtdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %dtdv32 = fpext half %dtdv to float |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %clamp32 = fpext half %clamp to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_d_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) { |
| ; CHECK-LABEL: @image_sample_a16_c_d_cl_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %s32 = fpext half %s to float |
| %clamp32 = fpext half %clamp to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_d_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { |
| ; CHECK-LABEL: @image_sample_a16_c_d_cl_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dtdh32 = fpext half %dtdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %dtdv32 = fpext half %dtdv to float |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %clamp32 = fpext half %clamp to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_cd_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) { |
| ; CHECK-LABEL: @image_sample_a16_cd_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %s32 = fpext half %s to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_cd_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { |
| ; CHECK-LABEL: @image_sample_a16_cd_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dtdh32 = fpext half %dtdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %dtdv32 = fpext half %dtdv to float |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_cd_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) { |
| ; CHECK-LABEL: @image_sample_a16_c_cd_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %s32 = fpext half %s to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_cd_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { |
| ; CHECK-LABEL: @image_sample_a16_c_cd_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dtdh32 = fpext half %dtdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %dtdv32 = fpext half %dtdv to float |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_cd_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) { |
| ; CHECK-LABEL: @image_sample_a16_cd_cl_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %s32 = fpext half %s to float |
| %clamp32 = fpext half %clamp to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_cd_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { |
| ; CHECK-LABEL: @image_sample_a16_cd_cl_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dtdh32 = fpext half %dtdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %dtdv32 = fpext half %dtdv to float |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %clamp32 = fpext half %clamp to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_cd_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) { |
| ; CHECK-LABEL: @image_sample_a16_c_cd_cl_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %s32 = fpext half %s to float |
| %clamp32 = fpext half %clamp to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_cd_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { |
| ; CHECK-LABEL: @image_sample_a16_c_cd_cl_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dtdh32 = fpext half %dtdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %dtdv32 = fpext half %dtdv to float |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %clamp32 = fpext half %clamp to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_l_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) { |
| ; CHECK-LABEL: @image_sample_a16_l_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half [[S:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %lod32 = fpext half %lod to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_l_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) { |
| ; CHECK-LABEL: @image_sample_a16_l_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %lod32 = fpext half %lod to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s32, float %t32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_l_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) { |
| ; CHECK-LABEL: @image_sample_a16_c_l_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %lod32 = fpext half %lod to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_l_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) { |
| ; CHECK-LABEL: @image_sample_a16_c_l_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %lod32 = fpext half %lod to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s32, float %t32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_lz_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { |
| ; CHECK-LABEL: @image_sample_a16_lz_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_lz_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { |
| ; CHECK-LABEL: @image_sample_a16_lz_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_lz_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) { |
| ; CHECK-LABEL: @image_sample_a16_c_lz_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_lz_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) { |
| ; CHECK-LABEL: @image_sample_a16_c_lz_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V1(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) { |
| ; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_V1( |
| ; CHECK-NEXT: [[RES:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store float [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dtdh32 = fpext half %dtdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %dtdv32 = fpext half %dtdv to float |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %slice32 = fpext half %slice to float |
| %res = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store float %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V2(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) { |
| ; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_V2( |
| ; CHECK-NEXT: [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f16(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <2 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dtdh32 = fpext half %dtdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %dtdv32 = fpext half %dtdv to float |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %slice32 = fpext half %slice to float |
| %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <2 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_const(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %slice) { |
| ; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_const( |
| ; CHECK-NEXT: [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f16(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half 0xH3400, half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <2 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dtdh32 = fpext half %dtdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %dtdv32 = fpext half %dtdv to float |
| %s32 = fpext half %s to float |
| %slice32 = fpext half %slice to float |
| %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float 0.25, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <2 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_const_noopt(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %slice) { |
| ; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_const_noopt( |
| ; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float |
| ; CHECK-NEXT: [[SLICE32:%.*]] = fpext half [[SLICE:%.*]] to float |
| ; CHECK-NEXT: [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S32]], float 1.000000e+10, float [[SLICE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <2 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dtdh32 = fpext half %dtdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %dtdv32 = fpext half %dtdv to float |
| %s32 = fpext half %s to float |
| %slice32 = fpext half %slice to float |
| %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float 1.0e+10, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <2 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_load_a16_mip_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i16 %s) { |
| ; CHECK-LABEL: @image_load_a16_mip_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = zext i16 %s to i32 |
| %res = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s32, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_load_a16_mip_1d_noopt(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i16 %s) { |
| ; CHECK-LABEL: @image_load_a16_mip_1d_noopt( |
| ; CHECK-NEXT: [[S32:%.*]] = sext i16 [[S:%.*]] to i32 |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 [[S32]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = sext i16 %s to i32 |
| %res = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s32, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_load_a16_mip_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i16 %s, i16 %t) { |
| ; CHECK-LABEL: @image_load_a16_mip_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 [[S:%.*]], i16 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = zext i16 %s to i32 |
| %t32 = zext i16 %t to i32 |
| %res = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s32, i32 %t32, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_load_a16_mip_2d_const(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i16 %s) { |
| ; CHECK-LABEL: @image_load_a16_mip_2d_const( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 [[S:%.*]], i16 -1, <8 x i32> [[RSRC:%.*]], i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = zext i16 %s to i32 |
| %res = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s32, i32 65535, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_load_a16_mip_2d_const_noopt(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i16 %s) { |
| ; CHECK-LABEL: @image_load_a16_mip_2d_const_noopt( |
| ; CHECK-NEXT: [[S32:%.*]] = zext i16 [[S:%.*]] to i32 |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 [[S32]], i32 65536, <8 x i32> [[RSRC:%.*]], i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = zext i16 %s to i32 |
| %res = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s32, i32 65536, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample g16 |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_kernel void @image_sample_g16_d_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { |
| ; CHECK-LABEL: @image_sample_g16_d_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_g16_d_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { |
| ; CHECK-LABEL: @image_sample_g16_d_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dtdh32 = fpext half %dtdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %dtdv32 = fpext half %dtdv to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_g16_d_3d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) { |
| ; CHECK-LABEL: @image_sample_g16_d_3d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DRDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[DRDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dtdh32 = fpext half %dtdh to float |
| %drdh32 = fpext half %drdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %dtdv32 = fpext half %dtdv to float |
| %drdv32 = fpext half %drdv to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %drdh32, float %dsdv32, float %dtdv32, float %drdv32, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_g16_c_d_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) { |
| ; CHECK-LABEL: @image_sample_g16_c_d_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_g16_c_d_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { |
| ; CHECK-LABEL: @image_sample_g16_c_d_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dtdh32 = fpext half %dtdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %dtdv32 = fpext half %dtdv to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_g16_d_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) { |
| ; CHECK-LABEL: @image_sample_g16_d_cl_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_g16_d_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { |
| ; CHECK-LABEL: @image_sample_g16_d_cl_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dtdh32 = fpext half %dtdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %dtdv32 = fpext half %dtdv to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_g16_c_d_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) { |
| ; CHECK-LABEL: @image_sample_g16_c_d_cl_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_g16_c_d_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { |
| ; CHECK-LABEL: @image_sample_g16_c_d_cl_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dtdh32 = fpext half %dtdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %dtdv32 = fpext half %dtdv to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_g16_cd_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { |
| ; CHECK-LABEL: @image_sample_g16_cd_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_g16_cd_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { |
| ; CHECK-LABEL: @image_sample_g16_cd_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dtdh32 = fpext half %dtdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %dtdv32 = fpext half %dtdv to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_g16_c_cd_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) { |
| ; CHECK-LABEL: @image_sample_g16_c_cd_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_g16_c_cd_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { |
| ; CHECK-LABEL: @image_sample_g16_c_cd_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dtdh32 = fpext half %dtdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %dtdv32 = fpext half %dtdv to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_g16_cd_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) { |
| ; CHECK-LABEL: @image_sample_g16_cd_cl_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_g16_cd_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { |
| ; CHECK-LABEL: @image_sample_g16_cd_cl_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dtdh32 = fpext half %dtdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %dtdv32 = fpext half %dtdv to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_g16_c_cd_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) { |
| ; CHECK-LABEL: @image_sample_g16_c_cd_cl_1d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_g16_c_cd_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { |
| ; CHECK-LABEL: @image_sample_g16_c_cd_cl_2d( |
| ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dtdh32 = fpext half %dtdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %dtdv32 = fpext half %dtdv to float |
| %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V1(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { |
| ; CHECK-LABEL: @image_sample_g16_c_d_o_2darray_V1( |
| ; CHECK-NEXT: [[RES:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32 4, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store float [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dtdh32 = fpext half %dtdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %dtdv32 = fpext half %dtdv to float |
| %res = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store float %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V2(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { |
| ; CHECK-LABEL: @image_sample_g16_c_d_o_2darray_V2( |
| ; CHECK-NEXT: [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <2 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %dsdh32 = fpext half %dsdh to float |
| %dtdh32 = fpext half %dtdh to float |
| %dsdv32 = fpext half %dsdv to float |
| %dtdv32 = fpext half %dtdv to float |
| %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <2 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample a16 preserve fast-math flags |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_kernel void @image_sample_a16_1d_nnan(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { |
| ; CHECK-LABEL: @image_sample_a16_1d_nnan( |
| ; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %res = call nnan <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_1d_nnan_ninf_nsz(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { |
| ; CHECK-LABEL: @image_sample_a16_1d_nnan_ninf_nsz( |
| ; CHECK-NEXT: [[RES:%.*]] = call nnan ninf nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %res = call nnan ninf nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_1d_fast(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { |
| ; CHECK-LABEL: @image_sample_a16_1d_fast( |
| ; CHECK-NEXT: [[RES:%.*]] = call fast <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %res = call fast <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_2d_nnan(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { |
| ; CHECK-LABEL: @image_sample_a16_2d_nnan( |
| ; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %res = call nnan <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_3d_nnan(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) { |
| ; CHECK-LABEL: @image_sample_a16_3d_nnan( |
| ; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %r32 = fpext half %r to float |
| %res = call nnan <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_cube_nnan(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) { |
| ; |
| ; CHECK-LABEL: @image_sample_a16_cube_nnan( |
| ; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[FACE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %face32 = fpext half %face to float |
| %res = call nnan <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s32, float %t32, float %face32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_1darray_nnan(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) { |
| ; CHECK-LABEL: @image_sample_a16_1darray_nnan( |
| ; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half [[S:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %slice32 = fpext half %slice to float |
| %res = call nnan <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @image_sample_a16_2darray_nnan(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) { |
| ; CHECK-LABEL: @image_sample_a16_2darray_nnan( |
| ; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| %s32 = fpext half %s to float |
| %t32 = fpext half %t to float |
| %slice32 = fpext half %slice to float |
| %res = call nnan <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %res, ptr addrspace(1) %out |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample l to lz |
| ; -------------------------------------------------------------------- |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2darray.v4f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| define amdgpu_kernel void @sample_l_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) { |
| ; CHECK-LABEL: @sample_l_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @sample_l_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { |
| ; CHECK-LABEL: @sample_l_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float -0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @sample_c_l_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) { |
| ; CHECK-LABEL: @sample_c_l_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float -2.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @sample_c_l_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { |
| ; CHECK-LABEL: @sample_c_l_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @sample_l_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %lod) { |
| ; CHECK-LABEL: @sample_l_o_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @sample_l_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) { |
| ; CHECK-LABEL: @sample_l_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @sample_c_l_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %lod) { |
| ; CHECK-LABEL: @sample_c_l_o_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @sample_c_l_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) { |
| ; CHECK-LABEL: @sample_c_l_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @gather4_l_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { |
| ; CHECK-LABEL: @gather4_l_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 15, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @gather4_c_l_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { |
| ; CHECK-LABEL: @gather4_c_l_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @gather4_l_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) { |
| ; CHECK-LABEL: @gather4_l_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @gather4_c_l_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) { |
| ; CHECK-LABEL: @gather4_c_l_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @gather4_c_l_o_2darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %slice, float %lod) { |
| ; CHECK-LABEL: @gather4_c_l_o_2darray( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2darray.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2darray.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float %slice, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample mipmap zero |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_kernel void @load_mip_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i32 %s) { |
| ; CHECK-LABEL: @load_mip_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @load_mip_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t) { |
| ; CHECK-LABEL: @load_mip_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @load_mip_3d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) { |
| ; CHECK-LABEL: @load_mip_3d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @load_mip_1darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t) { |
| ; CHECK-LABEL: @load_mip_1darray( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @load_mip_2darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) { |
| ; CHECK-LABEL: @load_mip_2darray( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @load_mip_cube(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) { |
| ; CHECK-LABEL: @load_mip_cube( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| |
| define amdgpu_kernel void @store_mip_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { |
| ; CHECK-LABEL: @store_mip_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| ret void |
| } |
| |
| define amdgpu_kernel void @store_mip_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) { |
| ; CHECK-LABEL: @store_mip_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| ret void |
| } |
| |
| define amdgpu_kernel void @store_mip_3d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) { |
| ; CHECK-LABEL: @store_mip_3d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| ret void |
| } |
| |
| define amdgpu_kernel void @store_mip_1darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) { |
| ; CHECK-LABEL: @store_mip_1darray( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: call void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| ret void |
| } |
| |
| define amdgpu_kernel void @store_mip_2darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) { |
| ; CHECK-LABEL: @store_mip_2darray( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| ret void |
| } |
| |
| define amdgpu_kernel void @store_mip_cube(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) { |
| ; CHECK-LABEL: @store_mip_cube( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| ret void |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 |
| |
| |
| declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0 |
| declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 |
| declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 |
| declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 |
| declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 |
| declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample bias zero |
| ; -------------------------------------------------------------------- |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f16(i32, i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| define amdgpu_kernel void @sample_b_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { |
| ; CHECK-LABEL: @sample_b_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float 0.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @sample_b_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { |
| ; CHECK-LABEL: @sample_b_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32(i32 15, float -0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @sample_c_b_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { |
| ; CHECK-LABEL: @sample_c_b_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32(i32 15, float -0.0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @sample_c_b_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { |
| ; CHECK-LABEL: @sample_c_b_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32(i32 15, float 0.0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @sample_b_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s) { |
| ; CHECK-LABEL: @sample_b_o_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @sample_b_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) { |
| ; CHECK-LABEL: @sample_b_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @sample_c_b_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s) { |
| ; CHECK-LABEL: @sample_c_b_o_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @sample_c_b_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) { |
| ; CHECK-LABEL: @sample_c_b_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @gather4_b_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { |
| ; CHECK-LABEL: @gather4_b_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32(i32 15, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @gather4_c_b_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { |
| ; CHECK-LABEL: @gather4_c_b_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32(i32 15, float 0.0, float %zcompare,float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @gather4_b_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) { |
| ; CHECK-LABEL: @gather4_b_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @gather4_c_b_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) { |
| ; CHECK-LABEL: @gather4_c_b_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %zcompare,float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @sample_c_b_o_a16_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %s, half %t) { |
| ; CHECK-LABEL: @sample_c_b_o_a16_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f16(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f16(i32 15, i32 %offset, half 0.0, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| ; Check that bias is not optimized away if > 0 |
| define amdgpu_kernel void @sample_b_1d_pos(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { |
| ; CHECK-LABEL: @sample_b_1d_pos( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float 1.000000e+00, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float 1.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| ; Check that bias is not optimized away if < 0 |
| define amdgpu_kernel void @sample_b_1d_neg(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { |
| ; CHECK-LABEL: @sample_b_1d_neg( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float -1.000000e+00, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float -1.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| ; Zero bias + A16 |
| define amdgpu_kernel void @sample_b_1d_a16(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { |
| ; CHECK-LABEL: @sample_b_1d_a16( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %s32 = fpext half %s to float |
| %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float -0.0, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample offset zero |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_kernel void @offset_sample_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { |
| ; CHECK-LABEL: @offset_sample_o_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32 15, i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { |
| ; CHECK-LABEL: @offset_sample_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32 15, i32 0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_c_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { |
| ; CHECK-LABEL: @offset_sample_c_o_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_c_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { |
| ; CHECK-LABEL: @offset_sample_c_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) { |
| ; CHECK-LABEL: @offset_sample_cl_o_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32 15, i32 0, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) { |
| ; CHECK-LABEL: @offset_sample_cl_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32(i32 15, i32 0, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_c_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) { |
| ; CHECK-LABEL: @offset_sample_c_cl_o_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_c_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) { |
| ; CHECK-LABEL: @offset_sample_c_cl_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_b_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) { |
| ; CHECK-LABEL: @offset_sample_b_o_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_b_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { |
| ; CHECK-LABEL: @offset_sample_b_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_c_b_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) { |
| ; CHECK-LABEL: @offset_sample_c_b_o_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_c_b_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) { |
| ; CHECK-LABEL: @offset_sample_c_b_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_b_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) { |
| ; CHECK-LABEL: @offset_sample_b_cl_o_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_b_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) { |
| ; CHECK-LABEL: @offset_sample_b_cl_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_c_b_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) { |
| ; CHECK-LABEL: @offset_sample_c_b_cl_o_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_c_b_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) { |
| ; CHECK-LABEL: @offset_sample_c_b_cl_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_d_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) { |
| ; CHECK-LABEL: @offset_sample_d_o_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_d_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { |
| ; CHECK-LABEL: @offset_sample_d_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_c_d_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) { |
| ; CHECK-LABEL: @offset_sample_c_d_o_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_c_d_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { |
| ; CHECK-LABEL: @offset_sample_c_d_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_d_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) { |
| ; CHECK-LABEL: @offset_sample_d_cl_o_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_d_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { |
| ; CHECK-LABEL: @offset_sample_d_cl_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_c_d_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) { |
| ; CHECK-LABEL: @offset_sample_c_d_cl_o_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_c_d_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { |
| ; CHECK-LABEL: @offset_sample_c_d_cl_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_cd_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) { |
| ; CHECK-LABEL: @offset_sample_cd_o_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_cd_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { |
| ; CHECK-LABEL: @offset_sample_cd_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_c_cd_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) { |
| ; CHECK-LABEL: @offset_sample_c_cd_o_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_c_cd_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { |
| ; CHECK-LABEL: @offset_sample_c_cd_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_cd_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) { |
| ; CHECK-LABEL: @offset_sample_cd_cl_o_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_cd_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { |
| ; CHECK-LABEL: @offset_sample_cd_cl_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_c_cd_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) { |
| ; CHECK-LABEL: @offset_sample_c_cd_cl_o_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_c_cd_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { |
| ; CHECK-LABEL: @offset_sample_c_cd_cl_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_l_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) { |
| ; CHECK-LABEL: @offset_sample_l_o_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 0, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_l_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { |
| ; CHECK-LABEL: @offset_sample_l_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 0, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_c_l_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) { |
| ; CHECK-LABEL: @offset_sample_c_l_o_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_c_l_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { |
| ; CHECK-LABEL: @offset_sample_c_l_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_lz_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { |
| ; CHECK-LABEL: @offset_sample_lz_o_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32 15, i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_lz_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { |
| ; CHECK-LABEL: @offset_sample_lz_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32 15, i32 0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_c_lz_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { |
| ; CHECK-LABEL: @offset_sample_c_lz_o_1d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @offset_sample_c_lz_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { |
| ; CHECK-LABEL: @offset_sample_c_lz_o_2d( |
| ; CHECK-NEXT: main_body: |
| ; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| main_body: |
| %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
| store <4 x float> %v, ptr addrspace(1) %out |
| ret void |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.is.shared |
| ; -------------------------------------------------------------------- |
| |
| declare i1 @llvm.amdgcn.is.shared(ptr) nounwind readnone |
| |
| define i1 @test_is_shared_null() nounwind { |
| ; CHECK-LABEL: @test_is_shared_null( |
| ; CHECK-NEXT: ret i1 false |
| ; |
| %val = call i1 @llvm.amdgcn.is.shared(ptr null) |
| ret i1 %val |
| } |
| |
| define i1 @test_is_shared_undef() nounwind { |
| ; CHECK-LABEL: @test_is_shared_undef( |
| ; CHECK-NEXT: ret i1 undef |
| ; |
| %val = call i1 @llvm.amdgcn.is.shared(ptr undef) |
| ret i1 %val |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.is.private |
| ; -------------------------------------------------------------------- |
| |
| declare i1 @llvm.amdgcn.is.private(ptr) nounwind readnone |
| |
| define i1 @test_is_private_null() nounwind { |
| ; CHECK-LABEL: @test_is_private_null( |
| ; CHECK-NEXT: ret i1 false |
| ; |
| %val = call i1 @llvm.amdgcn.is.private(ptr null) |
| ret i1 %val |
| } |
| |
| define i1 @test_is_private_undef() nounwind { |
| ; CHECK-LABEL: @test_is_private_undef( |
| ; CHECK-NEXT: ret i1 undef |
| ; |
| %val = call i1 @llvm.amdgcn.is.private(ptr undef) |
| ret i1 %val |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.trig.preop |
| ; -------------------------------------------------------------------- |
| |
| declare double @llvm.amdgcn.trig.preop.f64(double, i32) |
| declare float @llvm.amdgcn.trig.preop.f32(float, i32) |
| |
| define double @trig_preop_constfold_variable_undef_arg(i32 %arg) { |
| ; CHECK-LABEL: @trig_preop_constfold_variable_undef_arg( |
| ; CHECK-NEXT: ret double 0x7FF8000000000000 |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double undef, i32 %arg) |
| ret double %val |
| } |
| |
| define double @trig_preop_constfold_variable_poison_arg(i32 %arg) { |
| ; CHECK-LABEL: @trig_preop_constfold_variable_poison_arg( |
| ; CHECK-NEXT: ret double poison |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double poison, i32 %arg) |
| ret double %val |
| } |
| |
| define double @trig_preop_constfold_variable_arg_undef(double %arg) { |
| ; CHECK-LABEL: @trig_preop_constfold_variable_arg_undef( |
| ; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[ARG:%.*]], i32 undef) |
| ; CHECK-NEXT: ret double [[VAL]] |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double %arg, i32 undef) |
| ret double %val |
| } |
| |
| define double @trig_preop_constfold_variable_arg_poison(double %arg) { |
| ; CHECK-LABEL: @trig_preop_constfold_variable_arg_poison( |
| ; CHECK-NEXT: ret double poison |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double %arg, i32 poison) |
| ret double %val |
| } |
| |
| define double @trig_preop_constfold_variable_int(i32 %arg) { |
| ; CHECK-LABEL: @trig_preop_constfold_variable_int( |
| ; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 [[ARG:%.*]]) |
| ; CHECK-NEXT: ret double [[VAL]] |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 %arg) |
| ret double %val |
| } |
| |
| define double @trig_preop_qnan(i32 %arg) { |
| ; CHECK-LABEL: @trig_preop_qnan( |
| ; CHECK-NEXT: ret double 0x7FF8000000000000 |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF8000000000000, i32 %arg) |
| ret double %val |
| } |
| |
| define double @trig_preop_snan(i32 %arg) { |
| ; CHECK-LABEL: @trig_preop_snan( |
| ; CHECK-NEXT: ret double 0x7FF8000000000001 |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF0000000000001, i32 %arg) |
| ret double %val |
| } |
| |
| define double @trig_preop_inf_0() { |
| ; CHECK-LABEL: @trig_preop_inf_0( |
| ; CHECK-NEXT: ret double 0xB43DD63F5F2F8BD |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF0000000000000, i32 0) |
| ret double %val |
| } |
| |
| define double @trig_preop_ninf_0() { |
| ; CHECK-LABEL: @trig_preop_ninf_0( |
| ; CHECK-NEXT: ret double 0xB43DD63F5F2F8BD |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0xFFF0000000000000, i32 0) |
| ret double %val |
| } |
| |
| define double @trig_preop_variable_fp(double %arg) { |
| ; CHECK-LABEL: @trig_preop_variable_fp( |
| ; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[ARG:%.*]], i32 5) |
| ; CHECK-NEXT: ret double [[VAL]] |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double %arg, i32 5) |
| ret double %val |
| } |
| |
| define double @trig_preop_variable_args(double %arg0, i32 %arg1) { |
| ; CHECK-LABEL: @trig_preop_variable_args( |
| ; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[ARG0:%.*]], i32 [[ARG1:%.*]]) |
| ; CHECK-NEXT: ret double [[VAL]] |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double %arg0, i32 %arg1) |
| ret double %val |
| } |
| |
| define double @trig_preop_constfold() { |
| ; CHECK-LABEL: @trig_preop_constfold( |
| ; CHECK-NEXT: ret double 0x394A6EE06DB14ACC |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 2) |
| ret double %val |
| } |
| |
| ; src1[4:0] <= 21 for segment to be inbound with this exponent of src0. |
| define double @trig_preop_constfold_outbound_segment() { |
| ; CHECK-LABEL: @trig_preop_constfold_outbound_segment( |
| ; CHECK-NEXT: ret double 0.000000e+00 |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 22) |
| ret double %val |
| } |
| |
| ; Only use src1[4:0], so segment is actually 31 for -1. |
| define double @trig_preop_constfold_neg1_segment() { |
| ; CHECK-LABEL: @trig_preop_constfold_neg1_segment( |
| ; CHECK-NEXT: ret double 0.000000e+00 |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 -1) |
| ret double %val |
| } |
| |
| ; Only use src1[4:0], so segment is actually 0 for -32. |
| define double @trig_preop_constfold_neg32_segment() { |
| ; CHECK-LABEL: @trig_preop_constfold_neg32_segment( |
| ; CHECK-NEXT: ret double 0x3FE45F306DC9C882 |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 -32) |
| ret double %val |
| } |
| |
| define double @trig_preop_constfold_strictfp() strictfp { |
| ; CHECK-LABEL: @trig_preop_constfold_strictfp( |
| ; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5) #[[ATTR16]] |
| ; CHECK-NEXT: ret double [[VAL]] |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5) strictfp |
| ret double %val |
| } |
| |
| define double @trig_preop_constfold_exponent0_mantissa0__segment0() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent0_mantissa0__segment0( |
| ; CHECK-NEXT: ret double 0x3FE45F306DC9C882 |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0.0, i32 0) |
| ret double %val |
| } |
| |
| define double @trig_preop_constfold_exponent0_mantissa1__segment0() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent0_mantissa1__segment0( |
| ; CHECK-NEXT: ret double 0x3FE45F306DC9C882 |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x000FFFFFFFFFFFFF, i32 0) |
| ret double %val |
| } |
| |
| define double @trig_preop_constfold_exponent0_mantissaX__segment0() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent0_mantissaX__segment0( |
| ; CHECK-NEXT: ret double 0x3FE45F306DC9C882 |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x0004A7F09D5F47D4, i32 0) |
| ret double %val |
| } |
| |
| define double @trig_preop_constfold_exponent0_mantissa0__segment2() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent0_mantissa0__segment2( |
| ; CHECK-NEXT: ret double 0x394A6EE06DB14ACC |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0.0, i32 2) |
| ret double %val |
| } |
| |
| define double @trig_preop_constfold_exponent0_mantissa1__segment2() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent0_mantissa1__segment2( |
| ; CHECK-NEXT: ret double 0x394A6EE06DB14ACC |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x000FFFFFFFFFFFFF, i32 2) |
| ret double %val |
| } |
| |
| define double @trig_preop_constfold_exponent0_mantissaX__segment2() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent0_mantissaX__segment2( |
| ; CHECK-NEXT: ret double 0x394A6EE06DB14ACC |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x00094A6EE06DB14A, i32 2) |
| ret double %val |
| } |
| |
| ; src1[4:0] <= 21 for segment to be inbound with this exponent of src0. |
| define double @trig_preop_constfold_exponent0_mantissa0__outbound_segment() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent0_mantissa0__outbound_segment( |
| ; CHECK-NEXT: ret double 0.000000e+00 |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0.0, i32 22) |
| ret double %val |
| } |
| |
| ; src1[4:0] <= 21 for segment to be inbound with this exponent of src0. |
| define double @trig_preop_constfold_exponent0_mantissa1__outbound_segment() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent0_mantissa1__outbound_segment( |
| ; CHECK-NEXT: ret double 0.000000e+00 |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x000FFFFFFFFFFFFF, i32 22) |
| ret double %val |
| } |
| |
| ; src1[4:0] <= 21 for segment to be inbound with this exponent of src0. |
| define double @trig_preop_constfold_exponent0_mantissaX__outbound_segment() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent0_mantissaX__outbound_segment( |
| ; CHECK-NEXT: ret double 0.000000e+00 |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x000A6EE06DB14ACC, i32 22) |
| ret double %val |
| } |
| |
| ; 1607 = 1077 + 10 * 53 |
| define double @trig_preop_constfold_exponent1607_mantissa0__segment0() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent1607_mantissa0__segment0( |
| ; CHECK-NEXT: ret double 0x1EC8135A2FBF209C |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x6470000000000000, i32 0) |
| ret double %val |
| } |
| |
| ; 1607 = 1077 + 10 * 53 |
| define double @trig_preop_constfold_exponent1607_mantissa1__segment1() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent1607_mantissa1__segment1( |
| ; CHECK-NEXT: ret double 0x1EC8135A2FBF209C |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x647FFFFFFFFFFFFF, i32 0) |
| ret double %val |
| } |
| |
| ; 1607 = 1077 + 10 * 53 |
| define double @trig_preop_constfold_exponent1607_mantissaX__segment1() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent1607_mantissaX__segment1( |
| ; CHECK-NEXT: ret double 0x1EC8135A2FBF209C |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x6471B791D6398353, i32 0) |
| ret double %val |
| } |
| |
| ; 1607 = 1077 + 10 * 53 |
| define double @trig_preop_constfold_exponent1607_mantissa0__segment2() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent1607_mantissa0__segment2( |
| ; CHECK-NEXT: ret double 0x181272117E2EF7E4 |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x6470000000000000, i32 2) |
| ret double %val |
| } |
| |
| ; 1607 = 1077 + 10 * 53 |
| define double @trig_preop_constfold_exponent1607_mantissa1__segment2() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent1607_mantissa1__segment2( |
| ; CHECK-NEXT: ret double 0x181272117E2EF7E4 |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x647FFFFFFFFFFFFF, i32 2) |
| ret double %val |
| } |
| |
| ; 1607 = 1077 + 10 * 53 |
| define double @trig_preop_constfold_exponent1607_mantissaX__segment2() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent1607_mantissaX__segment2( |
| ; CHECK-NEXT: ret double 0x181272117E2EF7E4 |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x647272117E2EF7E4, i32 2) |
| ret double %val |
| } |
| |
| ; src1[4:0] <= 11 for segment to be inbound with this exponent of src0. |
| define double @trig_preop_constfold_exponent1607_mantissa0__outbound_segment() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent1607_mantissa0__outbound_segment( |
| ; CHECK-NEXT: ret double 0.000000e+00 |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x6470000000000000, i32 12) |
| ret double %val |
| } |
| |
| ; src1[4:0] <= 11 for segment to be inbound with this exponent of src0. |
| define double @trig_preop_constfold_exponent1607_mantissa1__outbound_segment() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent1607_mantissa1__outbound_segment( |
| ; CHECK-NEXT: ret double 0.000000e+00 |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x647FFFFFFFFFFFFF, i32 12) |
| ret double %val |
| } |
| |
| ; src1[4:0] <= 11 for segment to be inbound with this exponent of src0. |
| define double @trig_preop_constfold_exponent1607_mantissaX__outbound_segment() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent1607_mantissaX__outbound_segment( |
| ; CHECK-NEXT: ret double 0.000000e+00 |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x647181272117E2EF, i32 12) |
| ret double %val |
| } |
| |
| define double @trig_preop_constfold_exponent1968_mantissa0__segment0() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent1968_mantissa0__segment0( |
| ; CHECK-NEXT: ret double 0x10374F463F669E5F |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7B00000000000000, i32 0) |
| ret double %val |
| } |
| |
| define double @trig_preop_constfold_exponent1968_mantissa1__segment0() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent1968_mantissa1__segment0( |
| ; CHECK-NEXT: ret double 0x10374F463F669E5F |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7B0FFFFFFFFFFFFF, i32 0) |
| ret double %val |
| } |
| |
| define double @trig_preop_constfold_exponent1968_mantissax__segment0() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent1968_mantissax__segment0( |
| ; CHECK-NEXT: ret double 0x10374F463F669E5F |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7B074F463F669E5F, i32 0) |
| ret double %val |
| } |
| |
| define double @trig_preop_constfold_exponent1968_mantissa0__segment2() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent1968_mantissa0__segment2( |
| ; CHECK-NEXT: ret double 0x98F2F8BD9E839CE |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7B00000000000000, i32 2) |
| ret double %val |
| } |
| |
| define double @trig_preop_constfold_exponent1968_mantissa1__segment2() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent1968_mantissa1__segment2( |
| ; CHECK-NEXT: ret double 0x98F2F8BD9E839CE |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7B0FFFFFFFFFFFFF, i32 2) |
| ret double %val |
| } |
| |
| define double @trig_preop_constfold_exponent1968_mantissaX__segment2() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent1968_mantissaX__segment2( |
| ; CHECK-NEXT: ret double 0x98F2F8BD9E839CE |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7B0A2F8BD9E839CE, i32 2) |
| ret double %val |
| } |
| |
| ; src1[4:0] <= 4 for segment to be inbound with this exponent of src0. |
| define double @trig_preop_constfold_exponent1968_mantissa0__outbound_segment() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent1968_mantissa0__outbound_segment( |
| ; CHECK-NEXT: ret double 0.000000e+00 |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7B00000000000000, i32 5) |
| ret double %val |
| } |
| |
| ; src1[4:0] <= 4 for segment to be inbound with this exponent of src0. |
| define double @trig_preop_constfold_exponent1968_mantissa1__outbound_segment() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent1968_mantissa1__outbound_segment( |
| ; CHECK-NEXT: ret double 0.000000e+00 |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7B0FFFFFFFFFFFFF, i32 5) |
| ret double %val |
| } |
| |
| ; src1[4:0] <= 4 for segment to be inbound with this exponent of src0. |
| define double @trig_preop_constfold_exponent1968_mantissaX__outbound_segment() { |
| ; CHECK-LABEL: @trig_preop_constfold_exponent1968_mantissaX__outbound_segment( |
| ; CHECK-NEXT: ret double 0.000000e+00 |
| ; |
| %val = call double @llvm.amdgcn.trig.preop.f64(double 0x7B0A98F2F8BD9E83, i32 5) |
| ret double %val |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.log |
| ; -------------------------------------------------------------------- |
| |
| declare float @llvm.amdgcn.log.f32(float) nounwind readnone |
| declare half @llvm.amdgcn.log.f16(half) nounwind readnone |
| |
| define float @test_constant_fold_log_f32_undef() { |
| ; CHECK-LABEL: @test_constant_fold_log_f32_undef( |
| ; CHECK-NEXT: ret float 0x7FF8000000000000 |
| ; |
| %val = call float @llvm.amdgcn.log.f32(float undef) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_log_f32_poison() { |
| ; CHECK-LABEL: @test_constant_fold_log_f32_poison( |
| ; CHECK-NEXT: ret float poison |
| ; |
| %val = call float @llvm.amdgcn.log.f32(float poison) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_log_f32_p0() { |
| ; CHECK-LABEL: @test_constant_fold_log_f32_p0( |
| ; CHECK-NEXT: ret float 0xFFF0000000000000 |
| ; |
| %val = call float @llvm.amdgcn.log.f32(float 0.0) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_log_f32_n0() { |
| ; CHECK-LABEL: @test_constant_fold_log_f32_n0( |
| ; CHECK-NEXT: ret float 0xFFF0000000000000 |
| ; |
| %val = call float @llvm.amdgcn.log.f32(float -0.0) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_log_f32_subnormal() { |
| ; CHECK-LABEL: @test_constant_fold_log_f32_subnormal( |
| ; CHECK-NEXT: ret float 0xFFF0000000000000 |
| ; |
| %val = call float @llvm.amdgcn.log.f32(float 0x380FFFFFC0000000) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_log_f32_negsubnormal() { |
| ; CHECK-LABEL: @test_constant_fold_log_f32_negsubnormal( |
| ; CHECK-NEXT: ret float 0xFFF0000000000000 |
| ; |
| %val = call float @llvm.amdgcn.log.f32(float 0xB80FFFFFC0000000) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_log_f32_pinf() { |
| ; CHECK-LABEL: @test_constant_fold_log_f32_pinf( |
| ; CHECK-NEXT: ret float 0x7FF0000000000000 |
| ; |
| %val = call float @llvm.amdgcn.log.f32(float 0x7FF0000000000000) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_log_f32_ninf() { |
| ; CHECK-LABEL: @test_constant_fold_log_f32_ninf( |
| ; CHECK-NEXT: ret float 0x7FF8000000000000 |
| ; |
| %val = call float @llvm.amdgcn.log.f32(float 0xFFF0000000000000) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_log_f32_p1() { |
| ; CHECK-LABEL: @test_constant_fold_log_f32_p1( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 1.000000e+00) |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.log.f32(float 1.0) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_log_f32_p10() { |
| ; CHECK-LABEL: @test_constant_fold_log_f32_p10( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 1.000000e+01) |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.log.f32(float 10.0) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_log_f32_neg10() { |
| ; CHECK-LABEL: @test_constant_fold_log_f32_neg10( |
| ; CHECK-NEXT: ret float 0x7FF8000000000000 |
| ; |
| %val = call float @llvm.amdgcn.log.f32(float -10.0) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_log_f32_qnan() { |
| ; CHECK-LABEL: @test_constant_fold_log_f32_qnan( |
| ; CHECK-NEXT: ret float 0x7FF8000000000000 |
| ; |
| %val = call float @llvm.amdgcn.log.f32(float 0x7FF8000000000000) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_log_f32_snan() { |
| ; CHECK-LABEL: @test_constant_fold_log_f32_snan( |
| ; CHECK-NEXT: ret float 0x7FF8000020000000 |
| ; |
| %val = call float @llvm.amdgcn.log.f32(float 0x7FF0000020000000) |
| ret float %val |
| } |
| |
| define half @test_constant_fold_log_f16_p0() { |
| ; CHECK-LABEL: @test_constant_fold_log_f16_p0( |
| ; CHECK-NEXT: ret half 0xHFC00 |
| ; |
| %val = call half @llvm.amdgcn.log.f16(half 0.0) |
| ret half %val |
| } |
| |
| define half @test_constant_fold_log_f16_neg10() { |
| ; CHECK-LABEL: @test_constant_fold_log_f16_neg10( |
| ; CHECK-NEXT: ret half 0xH7E00 |
| ; |
| %val = call half @llvm.amdgcn.log.f16(half -10.0) |
| ret half %val |
| } |
| |
| define float @test_constant_fold_log_f32_qnan_strictfp() strictfp { |
| ; CHECK-LABEL: @test_constant_fold_log_f32_qnan_strictfp( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0x7FF8000000000000) #[[ATTR16]] |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.log.f32(float 0x7FF8000000000000) strictfp |
| ret float %val |
| } |
| |
| define float @test_constant_fold_log_f32_0_strictfp() strictfp { |
| ; CHECK-LABEL: @test_constant_fold_log_f32_0_strictfp( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0.000000e+00) #[[ATTR16]] |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.log.f32(float 0.0) strictfp |
| ret float %val |
| } |
| |
| define float @test_constant_fold_log_f32_neg0_strictfp() strictfp { |
| ; CHECK-LABEL: @test_constant_fold_log_f32_neg0_strictfp( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float -0.000000e+00) #[[ATTR16]] |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.log.f32(float -0.0) strictfp |
| ret float %val |
| } |
| |
| define float @test_constant_fold_log_f32_neg_strictfp() strictfp { |
| ; CHECK-LABEL: @test_constant_fold_log_f32_neg_strictfp( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float -1.000000e+01) #[[ATTR16]] |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.log.f32(float -10.0) strictfp |
| ret float %val |
| } |
| |
| define float @test_constant_fold_log_f32_pinf_strictfp() strictfp { |
| ; CHECK-LABEL: @test_constant_fold_log_f32_pinf_strictfp( |
| ; CHECK-NEXT: ret float 0x7FF0000000000000 |
| ; |
| %val = call float @llvm.amdgcn.log.f32(float 0x7FF0000000000000) strictfp |
| ret float %val |
| } |
| |
| define float @test_constant_fold_log_f32_ninf_strictfp() strictfp { |
| ; CHECK-LABEL: @test_constant_fold_log_f32_ninf_strictfp( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0xFFF0000000000000) #[[ATTR16]] |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.log.f32(float 0xFFF0000000000000) strictfp |
| ret float %val |
| } |
| |
| define half @test_constant_fold_log_f16_denorm() { |
| ; CHECK-LABEL: @test_constant_fold_log_f16_denorm( |
| ; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.amdgcn.log.f16(half 0xH03FF) |
| ; CHECK-NEXT: ret half [[VAL]] |
| ; |
| %val = call half @llvm.amdgcn.log.f16(half 0xH03ff) |
| ret half %val |
| } |
| |
| define half @test_constant_fold_log_f16_neg_denorm() { |
| ; CHECK-LABEL: @test_constant_fold_log_f16_neg_denorm( |
| ; CHECK-NEXT: ret half 0xH7E00 |
| ; |
| %val = call half @llvm.amdgcn.log.f16(half 0xH83ff) |
| ret half %val |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.exp2 |
| ; -------------------------------------------------------------------- |
| |
| declare float @llvm.amdgcn.exp2.f32(float) nounwind readnone |
| declare half @llvm.amdgcn.exp2.f16(half) nounwind readnone |
| |
| define float @test_constant_fold_exp2_f32_undef() { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_undef( |
| ; CHECK-NEXT: ret float 0x7FF8000000000000 |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float undef) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_poison() { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_poison( |
| ; CHECK-NEXT: ret float poison |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float poison) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_p0() { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_p0( |
| ; CHECK-NEXT: ret float 1.000000e+00 |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float 0.0) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_n0() { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_n0( |
| ; CHECK-NEXT: ret float 1.000000e+00 |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float -0.0) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_p1() { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_p1( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 1.000000e+00) |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float 1.0) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_n1() { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_n1( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+00) |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float -1.0) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_p2() { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_p2( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 2.000000e+00) |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float 2.0) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_n2() { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_n2( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -2.000000e+00) |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float -2.0) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_p4() { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_p4( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 4.000000e+00) |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float 4.0) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_n4() { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_n4( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -4.000000e+00) |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float -4.0) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_subnormal() { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_subnormal( |
| ; CHECK-NEXT: ret float 1.000000e+00 |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float 0x380FFFFFC0000000) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_negsubnormal() { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_negsubnormal( |
| ; CHECK-NEXT: ret float 1.000000e+00 |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float 0xB80FFFFFC0000000) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_pinf() { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_pinf( |
| ; CHECK-NEXT: ret float 0x7FF0000000000000 |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float 0x7FF0000000000000) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_ninf() { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_ninf( |
| ; CHECK-NEXT: ret float 0.000000e+00 |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float 0xFFF0000000000000) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_p10() { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_p10( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 1.000000e+01) |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float 10.0) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_neg10() { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_neg10( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+01) |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float -10.0) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_qnan() { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_qnan( |
| ; CHECK-NEXT: ret float 0x7FF8000000000000 |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float 0x7FF8000000000000) |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_snan() { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_snan( |
| ; CHECK-NEXT: ret float 0x7FF8000020000000 |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float 0x7FF0000020000000) |
| ret float %val |
| } |
| |
| define half @test_constant_fold_exp2_f16_p0() { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f16_p0( |
| ; CHECK-NEXT: ret half 0xH3C00 |
| ; |
| %val = call half @llvm.amdgcn.exp2.f16(half 0.0) |
| ret half %val |
| } |
| |
| define half @test_constant_fold_exp2_f16_neg10() { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f16_neg10( |
| ; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.amdgcn.exp2.f16(half 0xHC900) |
| ; CHECK-NEXT: ret half [[VAL]] |
| ; |
| %val = call half @llvm.amdgcn.exp2.f16(half -10.0) |
| ret half %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_qnan_strictfp() strictfp { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_qnan_strictfp( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 0x7FF8000000000000) #[[ATTR16]] |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float 0x7FF8000000000000) strictfp |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_0_strictfp() strictfp { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_0_strictfp( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 0.000000e+00) #[[ATTR16]] |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float 0.0) strictfp |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_neg0_strictfp() strictfp { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_neg0_strictfp( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -0.000000e+00) #[[ATTR16]] |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float -0.0) strictfp |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_1_strictfp() strictfp { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_1_strictfp( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 1.000000e+00) #[[ATTR16]] |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float 1.0) strictfp |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_neg1_strictfp() strictfp { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_neg1_strictfp( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+00) #[[ATTR16]] |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float -1.0) strictfp |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_2_strictfp() strictfp { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_2_strictfp( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 2.000000e+00) #[[ATTR16]] |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float 2.0) strictfp |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_neg2_strictfp() strictfp { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_neg2_strictfp( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -2.000000e+00) #[[ATTR16]] |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float -2.0) strictfp |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_neg_strictfp() strictfp { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_neg_strictfp( |
| ; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+01) #[[ATTR16]] |
| ; CHECK-NEXT: ret float [[VAL]] |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float -10.0) strictfp |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_pinf_strictfp() strictfp { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_pinf_strictfp( |
| ; CHECK-NEXT: ret float 0x7FF0000000000000 |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float 0x7FF0000000000000) strictfp |
| ret float %val |
| } |
| |
| define float @test_constant_fold_exp2_f32_ninf_strictfp() strictfp { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f32_ninf_strictfp( |
| ; CHECK-NEXT: ret float 0.000000e+00 |
| ; |
| %val = call float @llvm.amdgcn.exp2.f32(float 0xFFF0000000000000) strictfp |
| ret float %val |
| } |
| |
| define half @test_constant_fold_exp2_f16_denorm() { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f16_denorm( |
| ; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.amdgcn.exp2.f16(half 0xH03FF) |
| ; CHECK-NEXT: ret half [[VAL]] |
| ; |
| %val = call half @llvm.amdgcn.exp2.f16(half 0xH03ff) |
| ret half %val |
| } |
| |
| define half @test_constant_fold_exp2_f16_neg_denorm() { |
| ; CHECK-LABEL: @test_constant_fold_exp2_f16_neg_denorm( |
| ; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.amdgcn.exp2.f16(half 0xH83FF) |
| ; CHECK-NEXT: ret half [[VAL]] |
| ; |
| %val = call half @llvm.amdgcn.exp2.f16(half 0xH83ff) |
| ret half %val |
| } |