llvm/test/CodeGen/X86/dagcombine-shifts.ll - toolchain/llvm-project - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86
 ; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64

 ; fold (shl (zext (lshr (A, X))), X) -> (zext (shl (lshr (A, X)), X))

 ; Canolicalize the sequence shl/zext/lshr performing the zeroextend
 ; as the last instruction of the sequence.
 ; This will help DAGCombiner to identify and then fold the sequence
 ; of shifts into a single AND.
 ; This transformation is profitable if the shift amounts are the same
 ; and if there is only one use of the zext.

 define i16 @fun1(i8 zeroext %v) {
 ; X86-LABEL: fun1:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    andl $-16, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fun1:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    andl $-16, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 entry:
   %shr = lshr i8 %v, 4
   %ext = zext i8 %shr to i16
   %shl = shl i16 %ext, 4
   ret i16 %shl
 }

 define i32 @fun2(i8 zeroext %v) {
 ; X86-LABEL: fun2:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    andl $-16, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fun2:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    andl $-16, %eax
 ; X64-NEXT:    retq
 entry:
   %shr = lshr i8 %v, 4
   %ext = zext i8 %shr to i32
   %shl = shl i32 %ext, 4
   ret i32 %shl
 }

 define i32 @fun3(i16 zeroext %v) {
 ; X86-LABEL: fun3:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    andl $-16, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fun3:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    andl $-16, %eax
 ; X64-NEXT:    retq
 entry:
   %shr = lshr i16 %v, 4
   %ext = zext i16 %shr to i32
   %shl = shl i32 %ext, 4
   ret i32 %shl
 }

 define i64 @fun4(i8 zeroext %v) {
 ; X86-LABEL: fun4:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    andl $-16, %eax
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fun4:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    andl $-16, %eax
 ; X64-NEXT:    retq
 entry:
   %shr = lshr i8 %v, 4
   %ext = zext i8 %shr to i64
   %shl = shl i64 %ext, 4
   ret i64 %shl
 }

 define i64 @fun5(i16 zeroext %v) {
 ; X86-LABEL: fun5:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    andl $-16, %eax
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fun5:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    andl $-16, %eax
 ; X64-NEXT:    retq
 entry:
   %shr = lshr i16 %v, 4
   %ext = zext i16 %shr to i64
   %shl = shl i64 %ext, 4
   ret i64 %shl
 }

 define i64 @fun6(i32 zeroext %v) {
 ; X86-LABEL: fun6:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    andl $-16, %eax
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fun6:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    andl $-16, %eax
 ; X64-NEXT:    retq
 entry:
   %shr = lshr i32 %v, 4
   %ext = zext i32 %shr to i64
   %shl = shl i64 %ext, 4
   ret i64 %shl
 }

 ; Don't fold the pattern if we use arithmetic shifts.

 define i64 @fun7(i8 zeroext %v) {
 ; X86-LABEL: fun7:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    sarb $4, %al
 ; X86-NEXT:    movzbl %al, %eax
 ; X86-NEXT:    shll $4, %eax
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fun7:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    sarb $4, %dil
 ; X64-NEXT:    movzbl %dil, %eax
 ; X64-NEXT:    shll $4, %eax
 ; X64-NEXT:    retq
 entry:
   %shr = ashr i8 %v, 4
   %ext = zext i8 %shr to i64
   %shl = shl i64 %ext, 4
   ret i64 %shl
 }

 define i64 @fun8(i16 zeroext %v) {
 ; X86-LABEL: fun8:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    andl $1048560, %eax # imm = 0xFFFF0
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fun8:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movswl %di, %eax
 ; X64-NEXT:    andl $1048560, %eax # imm = 0xFFFF0
 ; X64-NEXT:    retq
 entry:
   %shr = ashr i16 %v, 4
   %ext = zext i16 %shr to i64
   %shl = shl i64 %ext, 4
   ret i64 %shl
 }

 define i64 @fun9(i32 zeroext %v) {
 ; X86-LABEL: fun9:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl %eax, %edx
 ; X86-NEXT:    sarl $4, %edx
 ; X86-NEXT:    andl $-16, %eax
 ; X86-NEXT:    shrl $28, %edx
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fun9:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    sarl $4, %eax
 ; X64-NEXT:    shlq $4, %rax
 ; X64-NEXT:    retq
 entry:
   %shr = ashr i32 %v, 4
   %ext = zext i32 %shr to i64
   %shl = shl i64 %ext, 4
   ret i64 %shl
 }

 ; Don't fold the pattern if there is more than one use of the
 ; operand in input to the shift left.

 define i64 @fun10(i8 zeroext %v) {
 ; X86-LABEL: fun10:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    shrb $4, %al
 ; X86-NEXT:    movzbl %al, %ecx
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    shll $4, %eax
 ; X86-NEXT:    orl %ecx, %eax
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fun10:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    shrb $4, %al
 ; X64-NEXT:    movzbl %al, %eax
 ; X64-NEXT:    andl $-16, %edi
 ; X64-NEXT:    orq %rdi, %rax
 ; X64-NEXT:    retq
 entry:
   %shr = lshr i8 %v, 4
   %ext = zext i8 %shr to i64
   %shl = shl i64 %ext, 4
   %add = add i64 %shl, %ext
   ret i64 %add
 }

 define i64 @fun11(i16 zeroext %v) {
 ; X86-LABEL: fun11:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    shrl $4, %ecx
 ; X86-NEXT:    andl $-16, %eax
 ; X86-NEXT:    addl %ecx, %eax
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fun11:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    shrl $4, %eax
 ; X64-NEXT:    andl $-16, %edi
 ; X64-NEXT:    addq %rdi, %rax
 ; X64-NEXT:    retq
 entry:
   %shr = lshr i16 %v, 4
   %ext = zext i16 %shr to i64
   %shl = shl i64 %ext, 4
   %add = add i64 %shl, %ext
   ret i64 %add
 }

 define i64 @fun12(i32 zeroext %v) {
 ; X86-LABEL: fun12:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    shrl $4, %ecx
 ; X86-NEXT:    andl $-16, %eax
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    addl %ecx, %eax
 ; X86-NEXT:    setb %dl
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fun12:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    shrl $4, %eax
 ; X64-NEXT:    andl $-16, %edi
 ; X64-NEXT:    addq %rdi, %rax
 ; X64-NEXT:    retq
 entry:
   %shr = lshr i32 %v, 4
   %ext = zext i32 %shr to i64
   %shl = shl i64 %ext, 4
   %add = add i64 %shl, %ext
   ret i64 %add
 }

 ; PR17380
 ; Make sure that the combined dags are legal if we run the DAGCombiner after
 ; Legalization took place. The add instruction is redundant and increases by
 ; one the number of uses of the zext. This prevents the transformation from
 ; firing before dags are legalized and optimized.
 ; Once the add is removed, the number of uses becomes one and therefore the
 ; dags are canonicalized. After Legalization, we need to make sure that the
 ; valuetype for the shift count is legal.
 ; Verify also that we correctly fold the shl-shr sequence into an
 ; AND with bitmask.

 define void @g(i32 %a) nounwind {
 ; X86-LABEL: g:
 ; X86:       # %bb.0:
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    andl $-4, %eax
 ; X86-NEXT:    subl $8, %esp
 ; X86-NEXT:    pushl $0
 ; X86-NEXT:    pushl %eax
 ; X86-NEXT:    calll f
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: g:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    andl $-4, %edi
 ; X64-NEXT:    jmp f # TAILCALL
   %b = lshr i32 %a, 2
   %c = zext i32 %b to i64
   %d = add i64 %c, 1
   %e = shl i64 %c, 2
   tail call void @f(i64 %e)
   ret void
 }

 define i32 @shift_zext_shl(i8 zeroext %x) {
 ; X86-LABEL: shift_zext_shl:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    andl $64, %eax
 ; X86-NEXT:    shll $9, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: shift_zext_shl:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    andl $64, %eax
 ; X64-NEXT:    shll $9, %eax
 ; X64-NEXT:    retq
   %a = and i8 %x, 64
   %b = zext i8 %a to i16
   %c = shl i16 %b, 9
   %d = zext i16 %c to i32
   ret i32 %d
 }

 define i32 @shift_zext_shl2(i8 zeroext %x) {
 ; X86-LABEL: shift_zext_shl2:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    andl $64, %eax
 ; X86-NEXT:    shll $9, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: shift_zext_shl2:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    andl $64, %eax
 ; X64-NEXT:    shll $9, %eax
 ; X64-NEXT:    retq
   %a = and i8 %x, 64
   %b = zext i8 %a to i32
   %c = shl i32 %b, 9
   ret i32 %c
 }

 define <4 x i32> @shift_zext_shl_vec(<4 x i8> %x) nounwind {
 ; X86-LABEL: shift_zext_shl_vec:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    andl $64, %ecx
 ; X86-NEXT:    shll $9, %ecx
 ; X86-NEXT:    andl $63, %edx
 ; X86-NEXT:    shll $8, %edx
 ; X86-NEXT:    andl $31, %esi
 ; X86-NEXT:    shll $7, %esi
 ; X86-NEXT:    andl $23, %edi
 ; X86-NEXT:    shll $6, %edi
 ; X86-NEXT:    movl %edi, 12(%eax)
 ; X86-NEXT:    movl %esi, 8(%eax)
 ; X86-NEXT:    movl %edx, 4(%eax)
 ; X86-NEXT:    movl %ecx, (%eax)
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    retl $4
 ;
 ; X64-LABEL: shift_zext_shl_vec:
 ; X64:       # %bb.0:
 ; X64-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 ; X64-NEXT:    pxor %xmm1, %xmm1
 ; X64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
 ; X64-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [512,256,128,64,u,u,u,u]
 ; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
 ; X64-NEXT:    retq
   %a = and <4 x i8> %x, <i8 64, i8 63, i8 31, i8 23>
   %b = zext <4 x i8> %a to <4 x i16>
   %c = shl <4 x i16> %b, <i16 9, i16 8, i16 7, i16 6>
   %d = zext <4 x i16> %c to <4 x i32>
   ret <4 x i32> %d
 }

 define <4 x i32> @shift_zext_shl2_vec(<4 x i8> %x) nounwind {
 ; X86-LABEL: shift_zext_shl2_vec:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    andl $23, %edi
 ; X86-NEXT:    andl $31, %esi
 ; X86-NEXT:    andl $63, %edx
 ; X86-NEXT:    andl $64, %ecx
 ; X86-NEXT:    shll $9, %ecx
 ; X86-NEXT:    shll $8, %edx
 ; X86-NEXT:    shll $7, %esi
 ; X86-NEXT:    shll $6, %edi
 ; X86-NEXT:    movl %edi, 12(%eax)
 ; X86-NEXT:    movl %esi, 8(%eax)
 ; X86-NEXT:    movl %edx, 4(%eax)
 ; X86-NEXT:    movl %ecx, (%eax)
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    retl $4
 ;
 ; X64-LABEL: shift_zext_shl2_vec:
 ; X64:       # %bb.0:
 ; X64-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 ; X64-NEXT:    pxor %xmm1, %xmm1
 ; X64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
 ; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
 ; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
 ; X64-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
 ; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; X64-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
 ; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
 ; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; X64-NEXT:    retq
   %a = and <4 x i8> %x, <i8 64, i8 63, i8 31, i8 23>
   %b = zext <4 x i8> %a to <4 x i32>
   %c = shl <4 x i32> %b, <i32 9, i32 8, i32 7, i32 6>
   ret <4 x i32> %c
 }

 declare dso_local void @f(i64)
	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=i686-linux-gnu \| FileCheck %s --check-prefixes=X86
	; RUN: llc < %s -mtriple=x86_64-linux-gnu \| FileCheck %s --check-prefixes=X64

	; fold (shl (zext (lshr (A, X))), X) -> (zext (shl (lshr (A, X)), X))

	; Canolicalize the sequence shl/zext/lshr performing the zeroextend
	; as the last instruction of the sequence.
	; This will help DAGCombiner to identify and then fold the sequence
	; of shifts into a single AND.
	; This transformation is profitable if the shift amounts are the same
	; and if there is only one use of the zext.

	define i16 @fun1(i8 zeroext %v) {
	; X86-LABEL: fun1:
	; X86: # %bb.0: # %entry
	; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: andl $-16, %eax
	; X86-NEXT: # kill: def $ax killed $ax killed $eax
	; X86-NEXT: retl
	;
	; X64-LABEL: fun1:
	; X64: # %bb.0: # %entry
	; X64-NEXT: movl %edi, %eax
	; X64-NEXT: andl $-16, %eax
	; X64-NEXT: # kill: def $ax killed $ax killed $eax
	; X64-NEXT: retq
	entry:
	%shr = lshr i8 %v, 4
	%ext = zext i8 %shr to i16
	%shl = shl i16 %ext, 4
	ret i16 %shl
	}

	define i32 @fun2(i8 zeroext %v) {
	; X86-LABEL: fun2:
	; X86: # %bb.0: # %entry
	; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: andl $-16, %eax
	; X86-NEXT: retl
	;
	; X64-LABEL: fun2:
	; X64: # %bb.0: # %entry
	; X64-NEXT: movl %edi, %eax
	; X64-NEXT: andl $-16, %eax
	; X64-NEXT: retq
	entry:
	%shr = lshr i8 %v, 4
	%ext = zext i8 %shr to i32
	%shl = shl i32 %ext, 4
	ret i32 %shl
	}

	define i32 @fun3(i16 zeroext %v) {
	; X86-LABEL: fun3:
	; X86: # %bb.0: # %entry
	; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: andl $-16, %eax
	; X86-NEXT: retl
	;
	; X64-LABEL: fun3:
	; X64: # %bb.0: # %entry
	; X64-NEXT: movl %edi, %eax
	; X64-NEXT: andl $-16, %eax
	; X64-NEXT: retq
	entry:
	%shr = lshr i16 %v, 4
	%ext = zext i16 %shr to i32
	%shl = shl i32 %ext, 4
	ret i32 %shl
	}

	define i64 @fun4(i8 zeroext %v) {
	; X86-LABEL: fun4:
	; X86: # %bb.0: # %entry
	; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: andl $-16, %eax
	; X86-NEXT: xorl %edx, %edx
	; X86-NEXT: retl
	;
	; X64-LABEL: fun4:
	; X64: # %bb.0: # %entry
	; X64-NEXT: movl %edi, %eax
	; X64-NEXT: andl $-16, %eax
	; X64-NEXT: retq
	entry:
	%shr = lshr i8 %v, 4
	%ext = zext i8 %shr to i64
	%shl = shl i64 %ext, 4
	ret i64 %shl
	}

	define i64 @fun5(i16 zeroext %v) {
	; X86-LABEL: fun5:
	; X86: # %bb.0: # %entry
	; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: andl $-16, %eax
	; X86-NEXT: xorl %edx, %edx
	; X86-NEXT: retl
	;
	; X64-LABEL: fun5:
	; X64: # %bb.0: # %entry
	; X64-NEXT: movl %edi, %eax
	; X64-NEXT: andl $-16, %eax
	; X64-NEXT: retq
	entry:
	%shr = lshr i16 %v, 4
	%ext = zext i16 %shr to i64
	%shl = shl i64 %ext, 4
	ret i64 %shl
	}

	define i64 @fun6(i32 zeroext %v) {
	; X86-LABEL: fun6:
	; X86: # %bb.0: # %entry
	; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: andl $-16, %eax
	; X86-NEXT: xorl %edx, %edx
	; X86-NEXT: retl
	;
	; X64-LABEL: fun6:
	; X64: # %bb.0: # %entry
	; X64-NEXT: movl %edi, %eax
	; X64-NEXT: andl $-16, %eax
	; X64-NEXT: retq
	entry:
	%shr = lshr i32 %v, 4
	%ext = zext i32 %shr to i64
	%shl = shl i64 %ext, 4
	ret i64 %shl
	}

	; Don't fold the pattern if we use arithmetic shifts.

	define i64 @fun7(i8 zeroext %v) {
	; X86-LABEL: fun7:
	; X86: # %bb.0: # %entry
	; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: sarb $4, %al
	; X86-NEXT: movzbl %al, %eax
	; X86-NEXT: shll $4, %eax
	; X86-NEXT: xorl %edx, %edx
	; X86-NEXT: retl
	;
	; X64-LABEL: fun7:
	; X64: # %bb.0: # %entry
	; X64-NEXT: sarb $4, %dil
	; X64-NEXT: movzbl %dil, %eax
	; X64-NEXT: shll $4, %eax
	; X64-NEXT: retq
	entry:
	%shr = ashr i8 %v, 4
	%ext = zext i8 %shr to i64
	%shl = shl i64 %ext, 4
	ret i64 %shl
	}

	define i64 @fun8(i16 zeroext %v) {
	; X86-LABEL: fun8:
	; X86: # %bb.0: # %entry
	; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: andl $1048560, %eax # imm = 0xFFFF0
	; X86-NEXT: xorl %edx, %edx
	; X86-NEXT: retl
	;
	; X64-LABEL: fun8:
	; X64: # %bb.0: # %entry
	; X64-NEXT: movswl %di, %eax
	; X64-NEXT: andl $1048560, %eax # imm = 0xFFFF0
	; X64-NEXT: retq
	entry:
	%shr = ashr i16 %v, 4
	%ext = zext i16 %shr to i64
	%shl = shl i64 %ext, 4
	ret i64 %shl
	}

	define i64 @fun9(i32 zeroext %v) {
	; X86-LABEL: fun9:
	; X86: # %bb.0: # %entry
	; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: movl %eax, %edx
	; X86-NEXT: sarl $4, %edx
	; X86-NEXT: andl $-16, %eax
	; X86-NEXT: shrl $28, %edx
	; X86-NEXT: retl
	;
	; X64-LABEL: fun9:
	; X64: # %bb.0: # %entry
	; X64-NEXT: movl %edi, %eax
	; X64-NEXT: sarl $4, %eax
	; X64-NEXT: shlq $4, %rax
	; X64-NEXT: retq
	entry:
	%shr = ashr i32 %v, 4
	%ext = zext i32 %shr to i64
	%shl = shl i64 %ext, 4
	ret i64 %shl
	}

	; Don't fold the pattern if there is more than one use of the
	; operand in input to the shift left.

	define i64 @fun10(i8 zeroext %v) {
	; X86-LABEL: fun10:
	; X86: # %bb.0: # %entry
	; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: shrb $4, %al
	; X86-NEXT: movzbl %al, %ecx
	; X86-NEXT: movl %ecx, %eax
	; X86-NEXT: shll $4, %eax
	; X86-NEXT: orl %ecx, %eax
	; X86-NEXT: xorl %edx, %edx
	; X86-NEXT: retl
	;
	; X64-LABEL: fun10:
	; X64: # %bb.0: # %entry
	; X64-NEXT: # kill: def $edi killed $edi def $rdi
	; X64-NEXT: movl %edi, %eax
	; X64-NEXT: shrb $4, %al
	; X64-NEXT: movzbl %al, %eax
	; X64-NEXT: andl $-16, %edi
	; X64-NEXT: orq %rdi, %rax
	; X64-NEXT: retq
	entry:
	%shr = lshr i8 %v, 4
	%ext = zext i8 %shr to i64
	%shl = shl i64 %ext, 4
	%add = add i64 %shl, %ext
	ret i64 %add
	}

	define i64 @fun11(i16 zeroext %v) {
	; X86-LABEL: fun11:
	; X86: # %bb.0: # %entry
	; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: movl %eax, %ecx
	; X86-NEXT: shrl $4, %ecx
	; X86-NEXT: andl $-16, %eax
	; X86-NEXT: addl %ecx, %eax
	; X86-NEXT: xorl %edx, %edx
	; X86-NEXT: retl
	;
	; X64-LABEL: fun11:
	; X64: # %bb.0: # %entry
	; X64-NEXT: # kill: def $edi killed $edi def $rdi
	; X64-NEXT: movl %edi, %eax
	; X64-NEXT: shrl $4, %eax
	; X64-NEXT: andl $-16, %edi
	; X64-NEXT: addq %rdi, %rax
	; X64-NEXT: retq
	entry:
	%shr = lshr i16 %v, 4
	%ext = zext i16 %shr to i64
	%shl = shl i64 %ext, 4
	%add = add i64 %shl, %ext
	ret i64 %add
	}

	define i64 @fun12(i32 zeroext %v) {
	; X86-LABEL: fun12:
	; X86: # %bb.0: # %entry
	; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: movl %eax, %ecx
	; X86-NEXT: shrl $4, %ecx
	; X86-NEXT: andl $-16, %eax
	; X86-NEXT: xorl %edx, %edx
	; X86-NEXT: addl %ecx, %eax
	; X86-NEXT: setb %dl
	; X86-NEXT: retl
	;
	; X64-LABEL: fun12:
	; X64: # %bb.0: # %entry
	; X64-NEXT: # kill: def $edi killed $edi def $rdi
	; X64-NEXT: movl %edi, %eax
	; X64-NEXT: shrl $4, %eax
	; X64-NEXT: andl $-16, %edi
	; X64-NEXT: addq %rdi, %rax
	; X64-NEXT: retq
	entry:
	%shr = lshr i32 %v, 4
	%ext = zext i32 %shr to i64
	%shl = shl i64 %ext, 4
	%add = add i64 %shl, %ext
	ret i64 %add
	}

	; PR17380
	; Make sure that the combined dags are legal if we run the DAGCombiner after
	; Legalization took place. The add instruction is redundant and increases by
	; one the number of uses of the zext. This prevents the transformation from
	; firing before dags are legalized and optimized.
	; Once the add is removed, the number of uses becomes one and therefore the
	; dags are canonicalized. After Legalization, we need to make sure that the
	; valuetype for the shift count is legal.
	; Verify also that we correctly fold the shl-shr sequence into an
	; AND with bitmask.

	define void @g(i32 %a) nounwind {
	; X86-LABEL: g:
	; X86: # %bb.0:
	; X86-NEXT: subl $12, %esp
	; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: andl $-4, %eax
	; X86-NEXT: subl $8, %esp
	; X86-NEXT: pushl $0
	; X86-NEXT: pushl %eax
	; X86-NEXT: calll f
	; X86-NEXT: addl $28, %esp
	; X86-NEXT: retl
	;
	; X64-LABEL: g:
	; X64: # %bb.0:
	; X64-NEXT: # kill: def $edi killed $edi def $rdi
	; X64-NEXT: andl $-4, %edi
	; X64-NEXT: jmp f # TAILCALL
	%b = lshr i32 %a, 2
	%c = zext i32 %b to i64
	%d = add i64 %c, 1
	%e = shl i64 %c, 2
	tail call void @f(i64 %e)
	ret void
	}

	define i32 @shift_zext_shl(i8 zeroext %x) {
	; X86-LABEL: shift_zext_shl:
	; X86: # %bb.0:
	; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: andl $64, %eax
	; X86-NEXT: shll $9, %eax
	; X86-NEXT: retl
	;
	; X64-LABEL: shift_zext_shl:
	; X64: # %bb.0:
	; X64-NEXT: movl %edi, %eax
	; X64-NEXT: andl $64, %eax
	; X64-NEXT: shll $9, %eax
	; X64-NEXT: retq
	%a = and i8 %x, 64
	%b = zext i8 %a to i16
	%c = shl i16 %b, 9
	%d = zext i16 %c to i32
	ret i32 %d
	}

	define i32 @shift_zext_shl2(i8 zeroext %x) {
	; X86-LABEL: shift_zext_shl2:
	; X86: # %bb.0:
	; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: andl $64, %eax
	; X86-NEXT: shll $9, %eax
	; X86-NEXT: retl
	;
	; X64-LABEL: shift_zext_shl2:
	; X64: # %bb.0:
	; X64-NEXT: movl %edi, %eax
	; X64-NEXT: andl $64, %eax
	; X64-NEXT: shll $9, %eax
	; X64-NEXT: retq
	%a = and i8 %x, 64
	%b = zext i8 %a to i32
	%c = shl i32 %b, 9
	ret i32 %c
	}

	define <4 x i32> @shift_zext_shl_vec(<4 x i8> %x) nounwind {
	; X86-LABEL: shift_zext_shl_vec:
	; X86: # %bb.0:
	; X86-NEXT: pushl %edi
	; X86-NEXT: pushl %esi
	; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edi
	; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi
	; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
	; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
	; X86-NEXT: andl $64, %ecx
	; X86-NEXT: shll $9, %ecx
	; X86-NEXT: andl $63, %edx
	; X86-NEXT: shll $8, %edx
	; X86-NEXT: andl $31, %esi
	; X86-NEXT: shll $7, %esi
	; X86-NEXT: andl $23, %edi
	; X86-NEXT: shll $6, %edi
	; X86-NEXT: movl %edi, 12(%eax)
	; X86-NEXT: movl %esi, 8(%eax)
	; X86-NEXT: movl %edx, 4(%eax)
	; X86-NEXT: movl %ecx, (%eax)
	; X86-NEXT: popl %esi
	; X86-NEXT: popl %edi
	; X86-NEXT: retl $4
	;
	; X64-LABEL: shift_zext_shl_vec:
	; X64: # %bb.0:
	; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
	; X64-NEXT: pxor %xmm1, %xmm1
	; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
	; X64-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [512,256,128,64,u,u,u,u]
	; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
	; X64-NEXT: retq
	%a = and <4 x i8> %x, <i8 64, i8 63, i8 31, i8 23>
	%b = zext <4 x i8> %a to <4 x i16>
	%c = shl <4 x i16> %b, <i16 9, i16 8, i16 7, i16 6>
	%d = zext <4 x i16> %c to <4 x i32>
	ret <4 x i32> %d
	}

	define <4 x i32> @shift_zext_shl2_vec(<4 x i8> %x) nounwind {
	; X86-LABEL: shift_zext_shl2_vec:
	; X86: # %bb.0:
	; X86-NEXT: pushl %edi
	; X86-NEXT: pushl %esi
	; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
	; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
	; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi
	; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edi
	; X86-NEXT: andl $23, %edi
	; X86-NEXT: andl $31, %esi
	; X86-NEXT: andl $63, %edx
	; X86-NEXT: andl $64, %ecx
	; X86-NEXT: shll $9, %ecx
	; X86-NEXT: shll $8, %edx
	; X86-NEXT: shll $7, %esi
	; X86-NEXT: shll $6, %edi
	; X86-NEXT: movl %edi, 12(%eax)
	; X86-NEXT: movl %esi, 8(%eax)
	; X86-NEXT: movl %edx, 4(%eax)
	; X86-NEXT: movl %ecx, (%eax)
	; X86-NEXT: popl %esi
	; X86-NEXT: popl %edi
	; X86-NEXT: retl $4
	;
	; X64-LABEL: shift_zext_shl2_vec:
	; X64: # %bb.0:
	; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
	; X64-NEXT: pxor %xmm1, %xmm1
	; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
	; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
	; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
	; X64-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
	; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
	; X64-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
	; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
	; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
	; X64-NEXT: retq
	%a = and <4 x i8> %x, <i8 64, i8 63, i8 31, i8 23>
	%b = zext <4 x i8> %a to <4 x i32>
	%c = shl <4 x i32> %b, <i32 9, i32 8, i32 7, i32 6>
	ret <4 x i32> %c
	}

	declare dso_local void @f(i64)