| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86,X86-NOSSE |
| ; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64,X64-BASE |
| ; RUN: llc < %s -mtriple=i686-unknown -mattr=+popcnt | FileCheck %s --check-prefixes=X86-POPCNT |
| ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+popcnt | FileCheck %s --check-prefixes=X64-POPCNT |
| ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd | FileCheck %s --check-prefixes=X64,X64-NDD |
| ; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2 |
| ; RUN: llc < %s -mtriple=i686-unknown -mattr=ssse3 | FileCheck %s --check-prefixes=X86,X86-SSSE3 |
| |
| define i8 @cnt8(i8 %x) nounwind readnone { |
| ; X86-LABEL: cnt8: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201 |
| ; X86-NEXT: shrl $3, %eax |
| ; X86-NEXT: andl $286331153, %eax # imm = 0x11111111 |
| ; X86-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111 |
| ; X86-NEXT: shrl $28, %eax |
| ; X86-NEXT: # kill: def $al killed $al killed $eax |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: cnt8: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movzbl %dil, %eax |
| ; X64-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201 |
| ; X64-NEXT: shrl $3, %eax |
| ; X64-NEXT: andl $286331153, %eax # imm = 0x11111111 |
| ; X64-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111 |
| ; X64-NEXT: shrl $28, %eax |
| ; X64-NEXT: # kill: def $al killed $al killed $eax |
| ; X64-NEXT: retq |
| ; |
| ; X86-POPCNT-LABEL: cnt8: |
| ; X86-POPCNT: # %bb.0: |
| ; X86-POPCNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax |
| ; X86-POPCNT-NEXT: popcntl %eax, %eax |
| ; X86-POPCNT-NEXT: # kill: def $al killed $al killed $eax |
| ; X86-POPCNT-NEXT: retl |
| ; |
| ; X64-POPCNT-LABEL: cnt8: |
| ; X64-POPCNT: # %bb.0: |
| ; X64-POPCNT-NEXT: movzbl %dil, %eax |
| ; X64-POPCNT-NEXT: popcntl %eax, %eax |
| ; X64-POPCNT-NEXT: # kill: def $al killed $al killed $eax |
| ; X64-POPCNT-NEXT: retq |
| %cnt = tail call i8 @llvm.ctpop.i8(i8 %x) |
| ret i8 %cnt |
| } |
| |
| define i16 @cnt16(i16 %x) nounwind readnone { |
| ; X86-LABEL: cnt16: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl %eax, %ecx |
| ; X86-NEXT: shrl %ecx |
| ; X86-NEXT: andl $21845, %ecx # imm = 0x5555 |
| ; X86-NEXT: subl %ecx, %eax |
| ; X86-NEXT: movl %eax, %ecx |
| ; X86-NEXT: andl $13107, %ecx # imm = 0x3333 |
| ; X86-NEXT: shrl $2, %eax |
| ; X86-NEXT: andl $13107, %eax # imm = 0x3333 |
| ; X86-NEXT: addl %ecx, %eax |
| ; X86-NEXT: movl %eax, %ecx |
| ; X86-NEXT: shrl $4, %ecx |
| ; X86-NEXT: addl %eax, %ecx |
| ; X86-NEXT: andl $3855, %ecx # imm = 0xF0F |
| ; X86-NEXT: movl %ecx, %eax |
| ; X86-NEXT: shrl $8, %eax |
| ; X86-NEXT: addl %ecx, %eax |
| ; X86-NEXT: movzbl %al, %eax |
| ; X86-NEXT: # kill: def $ax killed $ax killed $eax |
| ; X86-NEXT: retl |
| ; |
| ; X64-BASE-LABEL: cnt16: |
| ; X64-BASE: # %bb.0: |
| ; X64-BASE-NEXT: movl %edi, %eax |
| ; X64-BASE-NEXT: shrl %eax |
| ; X64-BASE-NEXT: andl $21845, %eax # imm = 0x5555 |
| ; X64-BASE-NEXT: subl %eax, %edi |
| ; X64-BASE-NEXT: movl %edi, %eax |
| ; X64-BASE-NEXT: andl $13107, %eax # imm = 0x3333 |
| ; X64-BASE-NEXT: shrl $2, %edi |
| ; X64-BASE-NEXT: andl $13107, %edi # imm = 0x3333 |
| ; X64-BASE-NEXT: addl %eax, %edi |
| ; X64-BASE-NEXT: movl %edi, %eax |
| ; X64-BASE-NEXT: shrl $4, %eax |
| ; X64-BASE-NEXT: addl %edi, %eax |
| ; X64-BASE-NEXT: andl $3855, %eax # imm = 0xF0F |
| ; X64-BASE-NEXT: movl %eax, %ecx |
| ; X64-BASE-NEXT: shrl $8, %ecx |
| ; X64-BASE-NEXT: addl %eax, %ecx |
| ; X64-BASE-NEXT: movzbl %cl, %eax |
| ; X64-BASE-NEXT: # kill: def $ax killed $ax killed $eax |
| ; X64-BASE-NEXT: retq |
| ; |
| ; X86-POPCNT-LABEL: cnt16: |
| ; X86-POPCNT: # %bb.0: |
| ; X86-POPCNT-NEXT: movzwl {{[0-9]+}}(%esp), %eax |
| ; X86-POPCNT-NEXT: popcntl %eax, %eax |
| ; X86-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax |
| ; X86-POPCNT-NEXT: retl |
| ; |
| ; X64-POPCNT-LABEL: cnt16: |
| ; X64-POPCNT: # %bb.0: |
| ; X64-POPCNT-NEXT: movzwl %di, %eax |
| ; X64-POPCNT-NEXT: popcntl %eax, %eax |
| ; X64-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax |
| ; X64-POPCNT-NEXT: retq |
| ; |
| ; X64-NDD-LABEL: cnt16: |
| ; X64-NDD: # %bb.0: |
| ; X64-NDD-NEXT: shrw %di, %ax |
| ; X64-NDD-NEXT: andw $21845, %ax # imm = 0x5555 |
| ; X64-NDD-NEXT: subw %ax, %di, %ax |
| ; X64-NDD-NEXT: andw $13107, %ax, %cx # imm = 0x3333 |
| ; X64-NDD-NEXT: shrw $2, %ax |
| ; X64-NDD-NEXT: andw $13107, %ax # imm = 0x3333 |
| ; X64-NDD-NEXT: addw %cx, %ax |
| ; X64-NDD-NEXT: shrw $4, %ax, %cx |
| ; X64-NDD-NEXT: addw %cx, %ax |
| ; X64-NDD-NEXT: andw $3855, %ax # imm = 0xF0F |
| ; X64-NDD-NEXT: movzbl %ah, %ecx |
| ; X64-NDD-NEXT: addw %cx, %ax |
| ; X64-NDD-NEXT: movzbl %al, %eax |
| ; X64-NDD-NEXT: # kill: def $ax killed $ax killed $eax |
| ; X64-NDD-NEXT: retq |
| %cnt = tail call i16 @llvm.ctpop.i16(i16 %x) |
| ret i16 %cnt |
| } |
| |
| define i32 @cnt32(i32 %x) nounwind readnone { |
| ; X86-LABEL: cnt32: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl %eax, %ecx |
| ; X86-NEXT: shrl %ecx |
| ; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 |
| ; X86-NEXT: subl %ecx, %eax |
| ; X86-NEXT: movl %eax, %ecx |
| ; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 |
| ; X86-NEXT: shrl $2, %eax |
| ; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 |
| ; X86-NEXT: addl %ecx, %eax |
| ; X86-NEXT: movl %eax, %ecx |
| ; X86-NEXT: shrl $4, %ecx |
| ; X86-NEXT: addl %eax, %ecx |
| ; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F |
| ; X86-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101 |
| ; X86-NEXT: shrl $24, %eax |
| ; X86-NEXT: retl |
| ; |
| ; X64-BASE-LABEL: cnt32: |
| ; X64-BASE: # %bb.0: |
| ; X64-BASE-NEXT: movl %edi, %eax |
| ; X64-BASE-NEXT: shrl %eax |
| ; X64-BASE-NEXT: andl $1431655765, %eax # imm = 0x55555555 |
| ; X64-BASE-NEXT: subl %eax, %edi |
| ; X64-BASE-NEXT: movl %edi, %eax |
| ; X64-BASE-NEXT: andl $858993459, %eax # imm = 0x33333333 |
| ; X64-BASE-NEXT: shrl $2, %edi |
| ; X64-BASE-NEXT: andl $858993459, %edi # imm = 0x33333333 |
| ; X64-BASE-NEXT: addl %eax, %edi |
| ; X64-BASE-NEXT: movl %edi, %eax |
| ; X64-BASE-NEXT: shrl $4, %eax |
| ; X64-BASE-NEXT: addl %edi, %eax |
| ; X64-BASE-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F |
| ; X64-BASE-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 |
| ; X64-BASE-NEXT: shrl $24, %eax |
| ; X64-BASE-NEXT: retq |
| ; |
| ; X86-POPCNT-LABEL: cnt32: |
| ; X86-POPCNT: # %bb.0: |
| ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax |
| ; X86-POPCNT-NEXT: retl |
| ; |
| ; X64-POPCNT-LABEL: cnt32: |
| ; X64-POPCNT: # %bb.0: |
| ; X64-POPCNT-NEXT: popcntl %edi, %eax |
| ; X64-POPCNT-NEXT: retq |
| ; |
| ; X64-NDD-LABEL: cnt32: |
| ; X64-NDD: # %bb.0: |
| ; X64-NDD-NEXT: shrl %edi, %eax |
| ; X64-NDD-NEXT: andl $1431655765, %eax # imm = 0x55555555 |
| ; X64-NDD-NEXT: subl %eax, %edi |
| ; X64-NDD-NEXT: andl $858993459, %edi, %eax # imm = 0x33333333 |
| ; X64-NDD-NEXT: shrl $2, %edi |
| ; X64-NDD-NEXT: andl $858993459, %edi # imm = 0x33333333 |
| ; X64-NDD-NEXT: addl %edi, %eax |
| ; X64-NDD-NEXT: shrl $4, %eax, %ecx |
| ; X64-NDD-NEXT: addl %ecx, %eax |
| ; X64-NDD-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F |
| ; X64-NDD-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 |
| ; X64-NDD-NEXT: shrl $24, %eax |
| ; X64-NDD-NEXT: retq |
| %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) |
| ret i32 %cnt |
| } |
| |
| define i64 @cnt64(i64 %x) nounwind readnone { |
| ; X86-NOSSE-LABEL: cnt64: |
| ; X86-NOSSE: # %bb.0: |
| ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NOSSE-NEXT: movl %ecx, %edx |
| ; X86-NOSSE-NEXT: shrl %edx |
| ; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555 |
| ; X86-NOSSE-NEXT: subl %edx, %ecx |
| ; X86-NOSSE-NEXT: movl %ecx, %edx |
| ; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: shrl $2, %ecx |
| ; X86-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: addl %edx, %ecx |
| ; X86-NOSSE-NEXT: movl %ecx, %edx |
| ; X86-NOSSE-NEXT: shrl $4, %edx |
| ; X86-NOSSE-NEXT: addl %ecx, %edx |
| ; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F |
| ; X86-NOSSE-NEXT: imull $16843009, %edx, %ecx # imm = 0x1010101 |
| ; X86-NOSSE-NEXT: shrl $24, %ecx |
| ; X86-NOSSE-NEXT: movl %eax, %edx |
| ; X86-NOSSE-NEXT: shrl %edx |
| ; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555 |
| ; X86-NOSSE-NEXT: subl %edx, %eax |
| ; X86-NOSSE-NEXT: movl %eax, %edx |
| ; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: shrl $2, %eax |
| ; X86-NOSSE-NEXT: andl $858993459, %eax # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: addl %edx, %eax |
| ; X86-NOSSE-NEXT: movl %eax, %edx |
| ; X86-NOSSE-NEXT: shrl $4, %edx |
| ; X86-NOSSE-NEXT: addl %eax, %edx |
| ; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F |
| ; X86-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101 |
| ; X86-NOSSE-NEXT: shrl $24, %eax |
| ; X86-NOSSE-NEXT: addl %ecx, %eax |
| ; X86-NOSSE-NEXT: xorl %edx, %edx |
| ; X86-NOSSE-NEXT: retl |
| ; |
| ; X64-BASE-LABEL: cnt64: |
| ; X64-BASE: # %bb.0: |
| ; X64-BASE-NEXT: movq %rdi, %rax |
| ; X64-BASE-NEXT: shrq %rax |
| ; X64-BASE-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 |
| ; X64-BASE-NEXT: andq %rax, %rcx |
| ; X64-BASE-NEXT: subq %rcx, %rdi |
| ; X64-BASE-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 |
| ; X64-BASE-NEXT: movq %rdi, %rcx |
| ; X64-BASE-NEXT: andq %rax, %rcx |
| ; X64-BASE-NEXT: shrq $2, %rdi |
| ; X64-BASE-NEXT: andq %rdi, %rax |
| ; X64-BASE-NEXT: addq %rcx, %rax |
| ; X64-BASE-NEXT: movq %rax, %rcx |
| ; X64-BASE-NEXT: shrq $4, %rcx |
| ; X64-BASE-NEXT: addq %rax, %rcx |
| ; X64-BASE-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F |
| ; X64-BASE-NEXT: andq %rcx, %rdx |
| ; X64-BASE-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101 |
| ; X64-BASE-NEXT: imulq %rdx, %rax |
| ; X64-BASE-NEXT: shrq $56, %rax |
| ; X64-BASE-NEXT: retq |
| ; |
| ; X86-POPCNT-LABEL: cnt64: |
| ; X86-POPCNT: # %bb.0: |
| ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx |
| ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax |
| ; X86-POPCNT-NEXT: addl %ecx, %eax |
| ; X86-POPCNT-NEXT: xorl %edx, %edx |
| ; X86-POPCNT-NEXT: retl |
| ; |
| ; X64-POPCNT-LABEL: cnt64: |
| ; X64-POPCNT: # %bb.0: |
| ; X64-POPCNT-NEXT: popcntq %rdi, %rax |
| ; X64-POPCNT-NEXT: retq |
| ; |
| ; X64-NDD-LABEL: cnt64: |
| ; X64-NDD: # %bb.0: |
| ; X64-NDD-NEXT: shrq %rdi, %rax |
| ; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 |
| ; X64-NDD-NEXT: andq %rcx, %rax |
| ; X64-NDD-NEXT: subq %rax, %rdi |
| ; X64-NDD-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 |
| ; X64-NDD-NEXT: andq %rax, %rdi, %rcx |
| ; X64-NDD-NEXT: shrq $2, %rdi |
| ; X64-NDD-NEXT: andq %rdi, %rax |
| ; X64-NDD-NEXT: addq %rcx, %rax |
| ; X64-NDD-NEXT: shrq $4, %rax, %rcx |
| ; X64-NDD-NEXT: addq %rcx, %rax |
| ; X64-NDD-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F |
| ; X64-NDD-NEXT: andq %rcx, %rax |
| ; X64-NDD-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 |
| ; X64-NDD-NEXT: imulq %rcx, %rax |
| ; X64-NDD-NEXT: shrq $56, %rax |
| ; X64-NDD-NEXT: retq |
| ; |
| ; X86-SSE2-LABEL: cnt64: |
| ; X86-SSE2: # %bb.0: |
| ; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero |
| ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 |
| ; X86-SSE2-NEXT: psrlw $1, %xmm1 |
| ; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 |
| ; X86-SSE2-NEXT: psubb %xmm1, %xmm0 |
| ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] |
| ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 |
| ; X86-SSE2-NEXT: pand %xmm1, %xmm2 |
| ; X86-SSE2-NEXT: psrlw $2, %xmm0 |
| ; X86-SSE2-NEXT: pand %xmm1, %xmm0 |
| ; X86-SSE2-NEXT: paddb %xmm2, %xmm0 |
| ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 |
| ; X86-SSE2-NEXT: psrlw $4, %xmm1 |
| ; X86-SSE2-NEXT: paddb %xmm0, %xmm1 |
| ; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 |
| ; X86-SSE2-NEXT: pxor %xmm0, %xmm0 |
| ; X86-SSE2-NEXT: psadbw %xmm1, %xmm0 |
| ; X86-SSE2-NEXT: movd %xmm0, %eax |
| ; X86-SSE2-NEXT: xorl %edx, %edx |
| ; X86-SSE2-NEXT: retl |
| ; |
| ; X86-SSSE3-LABEL: cnt64: |
| ; X86-SSSE3: # %bb.0: |
| ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] |
| ; X86-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero |
| ; X86-SSSE3-NEXT: movdqa %xmm1, %xmm2 |
| ; X86-SSSE3-NEXT: pand %xmm0, %xmm2 |
| ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] |
| ; X86-SSSE3-NEXT: movdqa %xmm3, %xmm4 |
| ; X86-SSSE3-NEXT: pshufb %xmm2, %xmm4 |
| ; X86-SSSE3-NEXT: psrlw $4, %xmm1 |
| ; X86-SSSE3-NEXT: pand %xmm0, %xmm1 |
| ; X86-SSSE3-NEXT: pshufb %xmm1, %xmm3 |
| ; X86-SSSE3-NEXT: paddb %xmm4, %xmm3 |
| ; X86-SSSE3-NEXT: pxor %xmm0, %xmm0 |
| ; X86-SSSE3-NEXT: psadbw %xmm3, %xmm0 |
| ; X86-SSSE3-NEXT: movd %xmm0, %eax |
| ; X86-SSSE3-NEXT: xorl %edx, %edx |
| ; X86-SSSE3-NEXT: retl |
| %cnt = tail call i64 @llvm.ctpop.i64(i64 %x) |
| ret i64 %cnt |
| } |
| |
| define i128 @cnt128(i128 %x) nounwind readnone { |
| ; X86-NOSSE-LABEL: cnt128: |
| ; X86-NOSSE: # %bb.0: |
| ; X86-NOSSE-NEXT: pushl %ebx |
| ; X86-NOSSE-NEXT: pushl %edi |
| ; X86-NOSSE-NEXT: pushl %esi |
| ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi |
| ; X86-NOSSE-NEXT: movl %edi, %ebx |
| ; X86-NOSSE-NEXT: shrl %ebx |
| ; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555 |
| ; X86-NOSSE-NEXT: subl %ebx, %edi |
| ; X86-NOSSE-NEXT: movl %edi, %ebx |
| ; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: shrl $2, %edi |
| ; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: addl %ebx, %edi |
| ; X86-NOSSE-NEXT: movl %edi, %ebx |
| ; X86-NOSSE-NEXT: shrl $4, %ebx |
| ; X86-NOSSE-NEXT: addl %edi, %ebx |
| ; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F |
| ; X86-NOSSE-NEXT: imull $16843009, %ebx, %edi # imm = 0x1010101 |
| ; X86-NOSSE-NEXT: shrl $24, %edi |
| ; X86-NOSSE-NEXT: movl %esi, %ebx |
| ; X86-NOSSE-NEXT: shrl %ebx |
| ; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555 |
| ; X86-NOSSE-NEXT: subl %ebx, %esi |
| ; X86-NOSSE-NEXT: movl %esi, %ebx |
| ; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: shrl $2, %esi |
| ; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: addl %ebx, %esi |
| ; X86-NOSSE-NEXT: movl %esi, %ebx |
| ; X86-NOSSE-NEXT: shrl $4, %ebx |
| ; X86-NOSSE-NEXT: addl %esi, %ebx |
| ; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F |
| ; X86-NOSSE-NEXT: imull $16843009, %ebx, %esi # imm = 0x1010101 |
| ; X86-NOSSE-NEXT: shrl $24, %esi |
| ; X86-NOSSE-NEXT: addl %edi, %esi |
| ; X86-NOSSE-NEXT: movl %edx, %edi |
| ; X86-NOSSE-NEXT: shrl %edi |
| ; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555 |
| ; X86-NOSSE-NEXT: subl %edi, %edx |
| ; X86-NOSSE-NEXT: movl %edx, %edi |
| ; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: shrl $2, %edx |
| ; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: addl %edi, %edx |
| ; X86-NOSSE-NEXT: movl %edx, %edi |
| ; X86-NOSSE-NEXT: shrl $4, %edi |
| ; X86-NOSSE-NEXT: addl %edx, %edi |
| ; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F |
| ; X86-NOSSE-NEXT: imull $16843009, %edi, %edx # imm = 0x1010101 |
| ; X86-NOSSE-NEXT: shrl $24, %edx |
| ; X86-NOSSE-NEXT: movl %ecx, %edi |
| ; X86-NOSSE-NEXT: shrl %edi |
| ; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555 |
| ; X86-NOSSE-NEXT: subl %edi, %ecx |
| ; X86-NOSSE-NEXT: movl %ecx, %edi |
| ; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: shrl $2, %ecx |
| ; X86-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: addl %edi, %ecx |
| ; X86-NOSSE-NEXT: movl %ecx, %edi |
| ; X86-NOSSE-NEXT: shrl $4, %edi |
| ; X86-NOSSE-NEXT: addl %ecx, %edi |
| ; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F |
| ; X86-NOSSE-NEXT: imull $16843009, %edi, %ecx # imm = 0x1010101 |
| ; X86-NOSSE-NEXT: shrl $24, %ecx |
| ; X86-NOSSE-NEXT: addl %edx, %ecx |
| ; X86-NOSSE-NEXT: addl %esi, %ecx |
| ; X86-NOSSE-NEXT: movl %ecx, (%eax) |
| ; X86-NOSSE-NEXT: movl $0, 12(%eax) |
| ; X86-NOSSE-NEXT: movl $0, 8(%eax) |
| ; X86-NOSSE-NEXT: movl $0, 4(%eax) |
| ; X86-NOSSE-NEXT: popl %esi |
| ; X86-NOSSE-NEXT: popl %edi |
| ; X86-NOSSE-NEXT: popl %ebx |
| ; X86-NOSSE-NEXT: retl $4 |
| ; |
| ; X64-BASE-LABEL: cnt128: |
| ; X64-BASE: # %bb.0: |
| ; X64-BASE-NEXT: movq %rsi, %rax |
| ; X64-BASE-NEXT: shrq %rax |
| ; X64-BASE-NEXT: movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555 |
| ; X64-BASE-NEXT: andq %r8, %rax |
| ; X64-BASE-NEXT: subq %rax, %rsi |
| ; X64-BASE-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333 |
| ; X64-BASE-NEXT: movq %rsi, %rax |
| ; X64-BASE-NEXT: andq %rcx, %rax |
| ; X64-BASE-NEXT: shrq $2, %rsi |
| ; X64-BASE-NEXT: andq %rcx, %rsi |
| ; X64-BASE-NEXT: addq %rsi, %rax |
| ; X64-BASE-NEXT: movq %rax, %rdx |
| ; X64-BASE-NEXT: shrq $4, %rdx |
| ; X64-BASE-NEXT: addq %rax, %rdx |
| ; X64-BASE-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F |
| ; X64-BASE-NEXT: andq %rsi, %rdx |
| ; X64-BASE-NEXT: movabsq $72340172838076673, %r9 # imm = 0x101010101010101 |
| ; X64-BASE-NEXT: imulq %r9, %rdx |
| ; X64-BASE-NEXT: shrq $56, %rdx |
| ; X64-BASE-NEXT: movq %rdi, %rax |
| ; X64-BASE-NEXT: shrq %rax |
| ; X64-BASE-NEXT: andq %r8, %rax |
| ; X64-BASE-NEXT: subq %rax, %rdi |
| ; X64-BASE-NEXT: movq %rdi, %rax |
| ; X64-BASE-NEXT: andq %rcx, %rax |
| ; X64-BASE-NEXT: shrq $2, %rdi |
| ; X64-BASE-NEXT: andq %rdi, %rcx |
| ; X64-BASE-NEXT: addq %rax, %rcx |
| ; X64-BASE-NEXT: movq %rcx, %rax |
| ; X64-BASE-NEXT: shrq $4, %rax |
| ; X64-BASE-NEXT: addq %rcx, %rax |
| ; X64-BASE-NEXT: andq %rsi, %rax |
| ; X64-BASE-NEXT: imulq %r9, %rax |
| ; X64-BASE-NEXT: shrq $56, %rax |
| ; X64-BASE-NEXT: addq %rdx, %rax |
| ; X64-BASE-NEXT: xorl %edx, %edx |
| ; X64-BASE-NEXT: retq |
| ; |
| ; X86-POPCNT-LABEL: cnt128: |
| ; X86-POPCNT: # %bb.0: |
| ; X86-POPCNT-NEXT: pushl %esi |
| ; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx |
| ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx |
| ; X86-POPCNT-NEXT: addl %ecx, %edx |
| ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx |
| ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi |
| ; X86-POPCNT-NEXT: addl %ecx, %esi |
| ; X86-POPCNT-NEXT: addl %edx, %esi |
| ; X86-POPCNT-NEXT: movl %esi, (%eax) |
| ; X86-POPCNT-NEXT: movl $0, 12(%eax) |
| ; X86-POPCNT-NEXT: movl $0, 8(%eax) |
| ; X86-POPCNT-NEXT: movl $0, 4(%eax) |
| ; X86-POPCNT-NEXT: popl %esi |
| ; X86-POPCNT-NEXT: retl $4 |
| ; |
| ; X64-POPCNT-LABEL: cnt128: |
| ; X64-POPCNT: # %bb.0: |
| ; X64-POPCNT-NEXT: popcntq %rsi, %rcx |
| ; X64-POPCNT-NEXT: popcntq %rdi, %rax |
| ; X64-POPCNT-NEXT: addq %rcx, %rax |
| ; X64-POPCNT-NEXT: xorl %edx, %edx |
| ; X64-POPCNT-NEXT: retq |
| ; |
| ; X64-NDD-LABEL: cnt128: |
| ; X64-NDD: # %bb.0: |
| ; X64-NDD-NEXT: shrq %rsi, %rax |
| ; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 |
| ; X64-NDD-NEXT: andq %rcx, %rax |
| ; X64-NDD-NEXT: subq %rax, %rsi |
| ; X64-NDD-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 |
| ; X64-NDD-NEXT: andq %rax, %rsi, %rdx |
| ; X64-NDD-NEXT: shrq $2, %rsi |
| ; X64-NDD-NEXT: andq %rax, %rsi |
| ; X64-NDD-NEXT: addq %rsi, %rdx |
| ; X64-NDD-NEXT: shrq $4, %rdx, %rsi |
| ; X64-NDD-NEXT: addq %rsi, %rdx |
| ; X64-NDD-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F |
| ; X64-NDD-NEXT: andq %rsi, %rdx |
| ; X64-NDD-NEXT: movabsq $72340172838076673, %r8 # imm = 0x101010101010101 |
| ; X64-NDD-NEXT: imulq %r8, %rdx |
| ; X64-NDD-NEXT: shrq $56, %rdx |
| ; X64-NDD-NEXT: shrq %rdi, %r9 |
| ; X64-NDD-NEXT: andq %r9, %rcx |
| ; X64-NDD-NEXT: subq %rcx, %rdi |
| ; X64-NDD-NEXT: andq %rax, %rdi, %rcx |
| ; X64-NDD-NEXT: shrq $2, %rdi |
| ; X64-NDD-NEXT: andq %rdi, %rax |
| ; X64-NDD-NEXT: addq %rcx, %rax |
| ; X64-NDD-NEXT: shrq $4, %rax, %rcx |
| ; X64-NDD-NEXT: addq %rcx, %rax |
| ; X64-NDD-NEXT: andq %rsi, %rax |
| ; X64-NDD-NEXT: imulq %r8, %rax |
| ; X64-NDD-NEXT: shrq $56, %rax |
| ; X64-NDD-NEXT: addq %rdx, %rax |
| ; X64-NDD-NEXT: xorl %edx, %edx |
| ; X64-NDD-NEXT: retq |
| ; |
| ; X86-SSE2-LABEL: cnt128: |
| ; X86-SSE2: # %bb.0: |
| ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero |
| ; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 |
| ; X86-SSE2-NEXT: psrlw $1, %xmm0 |
| ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85] |
| ; X86-SSE2-NEXT: pand %xmm1, %xmm0 |
| ; X86-SSE2-NEXT: psubb %xmm0, %xmm2 |
| ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] |
| ; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 |
| ; X86-SSE2-NEXT: pand %xmm0, %xmm3 |
| ; X86-SSE2-NEXT: psrlw $2, %xmm2 |
| ; X86-SSE2-NEXT: pand %xmm0, %xmm2 |
| ; X86-SSE2-NEXT: paddb %xmm3, %xmm2 |
| ; X86-SSE2-NEXT: movdqa %xmm2, %xmm4 |
| ; X86-SSE2-NEXT: psrlw $4, %xmm4 |
| ; X86-SSE2-NEXT: paddb %xmm2, %xmm4 |
| ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] |
| ; X86-SSE2-NEXT: pand %xmm2, %xmm4 |
| ; X86-SSE2-NEXT: pxor %xmm3, %xmm3 |
| ; X86-SSE2-NEXT: psadbw %xmm3, %xmm4 |
| ; X86-SSE2-NEXT: movd %xmm4, %ecx |
| ; X86-SSE2-NEXT: movq {{.*#+}} xmm4 = mem[0],zero |
| ; X86-SSE2-NEXT: movdqa %xmm4, %xmm5 |
| ; X86-SSE2-NEXT: psrlw $1, %xmm5 |
| ; X86-SSE2-NEXT: pand %xmm1, %xmm5 |
| ; X86-SSE2-NEXT: psubb %xmm5, %xmm4 |
| ; X86-SSE2-NEXT: movdqa %xmm4, %xmm1 |
| ; X86-SSE2-NEXT: pand %xmm0, %xmm1 |
| ; X86-SSE2-NEXT: psrlw $2, %xmm4 |
| ; X86-SSE2-NEXT: pand %xmm0, %xmm4 |
| ; X86-SSE2-NEXT: paddb %xmm1, %xmm4 |
| ; X86-SSE2-NEXT: movdqa %xmm4, %xmm0 |
| ; X86-SSE2-NEXT: psrlw $4, %xmm0 |
| ; X86-SSE2-NEXT: paddb %xmm4, %xmm0 |
| ; X86-SSE2-NEXT: pand %xmm2, %xmm0 |
| ; X86-SSE2-NEXT: psadbw %xmm3, %xmm0 |
| ; X86-SSE2-NEXT: movd %xmm0, %edx |
| ; X86-SSE2-NEXT: addl %ecx, %edx |
| ; X86-SSE2-NEXT: movl %edx, (%eax) |
| ; X86-SSE2-NEXT: movl $0, 12(%eax) |
| ; X86-SSE2-NEXT: movl $0, 8(%eax) |
| ; X86-SSE2-NEXT: movl $0, 4(%eax) |
| ; X86-SSE2-NEXT: retl $4 |
| ; |
| ; X86-SSSE3-LABEL: cnt128: |
| ; X86-SSSE3: # %bb.0: |
| ; X86-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] |
| ; X86-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero |
| ; X86-SSSE3-NEXT: movdqa %xmm2, %xmm3 |
| ; X86-SSSE3-NEXT: pand %xmm1, %xmm3 |
| ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] |
| ; X86-SSSE3-NEXT: movdqa %xmm0, %xmm4 |
| ; X86-SSSE3-NEXT: pshufb %xmm3, %xmm4 |
| ; X86-SSSE3-NEXT: psrlw $4, %xmm2 |
| ; X86-SSSE3-NEXT: pand %xmm1, %xmm2 |
| ; X86-SSSE3-NEXT: movdqa %xmm0, %xmm3 |
| ; X86-SSSE3-NEXT: pshufb %xmm2, %xmm3 |
| ; X86-SSSE3-NEXT: paddb %xmm4, %xmm3 |
| ; X86-SSSE3-NEXT: pxor %xmm2, %xmm2 |
| ; X86-SSSE3-NEXT: psadbw %xmm2, %xmm3 |
| ; X86-SSSE3-NEXT: movd %xmm3, %ecx |
| ; X86-SSSE3-NEXT: movq {{.*#+}} xmm3 = mem[0],zero |
| ; X86-SSSE3-NEXT: movdqa %xmm3, %xmm4 |
| ; X86-SSSE3-NEXT: pand %xmm1, %xmm4 |
| ; X86-SSSE3-NEXT: movdqa %xmm0, %xmm5 |
| ; X86-SSSE3-NEXT: pshufb %xmm4, %xmm5 |
| ; X86-SSSE3-NEXT: psrlw $4, %xmm3 |
| ; X86-SSSE3-NEXT: pand %xmm1, %xmm3 |
| ; X86-SSSE3-NEXT: pshufb %xmm3, %xmm0 |
| ; X86-SSSE3-NEXT: paddb %xmm5, %xmm0 |
| ; X86-SSSE3-NEXT: psadbw %xmm2, %xmm0 |
| ; X86-SSSE3-NEXT: movd %xmm0, %edx |
| ; X86-SSSE3-NEXT: addl %ecx, %edx |
| ; X86-SSSE3-NEXT: movl %edx, (%eax) |
| ; X86-SSSE3-NEXT: movl $0, 12(%eax) |
| ; X86-SSSE3-NEXT: movl $0, 8(%eax) |
| ; X86-SSSE3-NEXT: movl $0, 4(%eax) |
| ; X86-SSSE3-NEXT: retl $4 |
| %cnt = tail call i128 @llvm.ctpop.i128(i128 %x) |
| ret i128 %cnt |
| } |
| |
| define i64 @cnt64_noimplicitfloat(i64 %x) nounwind readnone noimplicitfloat { |
| ; X86-LABEL: cnt64_noimplicitfloat: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl %ecx, %edx |
| ; X86-NEXT: shrl %edx |
| ; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 |
| ; X86-NEXT: subl %edx, %ecx |
| ; X86-NEXT: movl %ecx, %edx |
| ; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 |
| ; X86-NEXT: shrl $2, %ecx |
| ; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 |
| ; X86-NEXT: addl %edx, %ecx |
| ; X86-NEXT: movl %ecx, %edx |
| ; X86-NEXT: shrl $4, %edx |
| ; X86-NEXT: addl %ecx, %edx |
| ; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F |
| ; X86-NEXT: imull $16843009, %edx, %ecx # imm = 0x1010101 |
| ; X86-NEXT: shrl $24, %ecx |
| ; X86-NEXT: movl %eax, %edx |
| ; X86-NEXT: shrl %edx |
| ; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 |
| ; X86-NEXT: subl %edx, %eax |
| ; X86-NEXT: movl %eax, %edx |
| ; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 |
| ; X86-NEXT: shrl $2, %eax |
| ; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 |
| ; X86-NEXT: addl %edx, %eax |
| ; X86-NEXT: movl %eax, %edx |
| ; X86-NEXT: shrl $4, %edx |
| ; X86-NEXT: addl %eax, %edx |
| ; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F |
| ; X86-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101 |
| ; X86-NEXT: shrl $24, %eax |
| ; X86-NEXT: addl %ecx, %eax |
| ; X86-NEXT: xorl %edx, %edx |
| ; X86-NEXT: retl |
| ; |
| ; X64-BASE-LABEL: cnt64_noimplicitfloat: |
| ; X64-BASE: # %bb.0: |
| ; X64-BASE-NEXT: movq %rdi, %rax |
| ; X64-BASE-NEXT: shrq %rax |
| ; X64-BASE-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 |
| ; X64-BASE-NEXT: andq %rax, %rcx |
| ; X64-BASE-NEXT: subq %rcx, %rdi |
| ; X64-BASE-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 |
| ; X64-BASE-NEXT: movq %rdi, %rcx |
| ; X64-BASE-NEXT: andq %rax, %rcx |
| ; X64-BASE-NEXT: shrq $2, %rdi |
| ; X64-BASE-NEXT: andq %rdi, %rax |
| ; X64-BASE-NEXT: addq %rcx, %rax |
| ; X64-BASE-NEXT: movq %rax, %rcx |
| ; X64-BASE-NEXT: shrq $4, %rcx |
| ; X64-BASE-NEXT: addq %rax, %rcx |
| ; X64-BASE-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F |
| ; X64-BASE-NEXT: andq %rcx, %rdx |
| ; X64-BASE-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101 |
| ; X64-BASE-NEXT: imulq %rdx, %rax |
| ; X64-BASE-NEXT: shrq $56, %rax |
| ; X64-BASE-NEXT: retq |
| ; |
| ; X86-POPCNT-LABEL: cnt64_noimplicitfloat: |
| ; X86-POPCNT: # %bb.0: |
| ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx |
| ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax |
| ; X86-POPCNT-NEXT: addl %ecx, %eax |
| ; X86-POPCNT-NEXT: xorl %edx, %edx |
| ; X86-POPCNT-NEXT: retl |
| ; |
| ; X64-POPCNT-LABEL: cnt64_noimplicitfloat: |
| ; X64-POPCNT: # %bb.0: |
| ; X64-POPCNT-NEXT: popcntq %rdi, %rax |
| ; X64-POPCNT-NEXT: retq |
| ; |
| ; X64-NDD-LABEL: cnt64_noimplicitfloat: |
| ; X64-NDD: # %bb.0: |
| ; X64-NDD-NEXT: shrq %rdi, %rax |
| ; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 |
| ; X64-NDD-NEXT: andq %rcx, %rax |
| ; X64-NDD-NEXT: subq %rax, %rdi |
| ; X64-NDD-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 |
| ; X64-NDD-NEXT: andq %rax, %rdi, %rcx |
| ; X64-NDD-NEXT: shrq $2, %rdi |
| ; X64-NDD-NEXT: andq %rdi, %rax |
| ; X64-NDD-NEXT: addq %rcx, %rax |
| ; X64-NDD-NEXT: shrq $4, %rax, %rcx |
| ; X64-NDD-NEXT: addq %rcx, %rax |
| ; X64-NDD-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F |
| ; X64-NDD-NEXT: andq %rcx, %rax |
| ; X64-NDD-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 |
| ; X64-NDD-NEXT: imulq %rcx, %rax |
| ; X64-NDD-NEXT: shrq $56, %rax |
| ; X64-NDD-NEXT: retq |
| %cnt = tail call i64 @llvm.ctpop.i64(i64 %x) |
| ret i64 %cnt |
| } |
| |
| define i32 @cnt32_optsize(i32 %x) nounwind readnone optsize { |
| ; X86-LABEL: cnt32_optsize: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl %eax, %ecx |
| ; X86-NEXT: shrl %ecx |
| ; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 |
| ; X86-NEXT: subl %ecx, %eax |
| ; X86-NEXT: movl $858993459, %ecx # imm = 0x33333333 |
| ; X86-NEXT: movl %eax, %edx |
| ; X86-NEXT: andl %ecx, %edx |
| ; X86-NEXT: shrl $2, %eax |
| ; X86-NEXT: andl %ecx, %eax |
| ; X86-NEXT: addl %edx, %eax |
| ; X86-NEXT: movl %eax, %ecx |
| ; X86-NEXT: shrl $4, %ecx |
| ; X86-NEXT: addl %eax, %ecx |
| ; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F |
| ; X86-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101 |
| ; X86-NEXT: shrl $24, %eax |
| ; X86-NEXT: retl |
| ; |
| ; X64-BASE-LABEL: cnt32_optsize: |
| ; X64-BASE: # %bb.0: |
| ; X64-BASE-NEXT: movl %edi, %eax |
| ; X64-BASE-NEXT: shrl %eax |
| ; X64-BASE-NEXT: andl $1431655765, %eax # imm = 0x55555555 |
| ; X64-BASE-NEXT: subl %eax, %edi |
| ; X64-BASE-NEXT: movl $858993459, %eax # imm = 0x33333333 |
| ; X64-BASE-NEXT: movl %edi, %ecx |
| ; X64-BASE-NEXT: andl %eax, %ecx |
| ; X64-BASE-NEXT: shrl $2, %edi |
| ; X64-BASE-NEXT: andl %eax, %edi |
| ; X64-BASE-NEXT: addl %ecx, %edi |
| ; X64-BASE-NEXT: movl %edi, %eax |
| ; X64-BASE-NEXT: shrl $4, %eax |
| ; X64-BASE-NEXT: addl %edi, %eax |
| ; X64-BASE-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F |
| ; X64-BASE-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 |
| ; X64-BASE-NEXT: shrl $24, %eax |
| ; X64-BASE-NEXT: retq |
| ; |
| ; X86-POPCNT-LABEL: cnt32_optsize: |
| ; X86-POPCNT: # %bb.0: |
| ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax |
| ; X86-POPCNT-NEXT: retl |
| ; |
| ; X64-POPCNT-LABEL: cnt32_optsize: |
| ; X64-POPCNT: # %bb.0: |
| ; X64-POPCNT-NEXT: popcntl %edi, %eax |
| ; X64-POPCNT-NEXT: retq |
| ; |
| ; X64-NDD-LABEL: cnt32_optsize: |
| ; X64-NDD: # %bb.0: |
| ; X64-NDD-NEXT: shrl %edi, %eax |
| ; X64-NDD-NEXT: andl $1431655765, %eax # imm = 0x55555555 |
| ; X64-NDD-NEXT: subl %eax, %edi |
| ; X64-NDD-NEXT: movl $858993459, %eax # imm = 0x33333333 |
| ; X64-NDD-NEXT: andl %eax, %edi, %ecx |
| ; X64-NDD-NEXT: shrl $2, %edi |
| ; X64-NDD-NEXT: andl %edi, %eax |
| ; X64-NDD-NEXT: addl %ecx, %eax |
| ; X64-NDD-NEXT: shrl $4, %eax, %ecx |
| ; X64-NDD-NEXT: addl %ecx, %eax |
| ; X64-NDD-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F |
| ; X64-NDD-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 |
| ; X64-NDD-NEXT: shrl $24, %eax |
| ; X64-NDD-NEXT: retq |
| %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) |
| ret i32 %cnt |
| } |
| |
| define i64 @cnt64_optsize(i64 %x) nounwind readnone optsize { |
| ; X86-NOSSE-LABEL: cnt64_optsize: |
| ; X86-NOSSE: # %bb.0: |
| ; X86-NOSSE-NEXT: pushl %ebx |
| ; X86-NOSSE-NEXT: pushl %edi |
| ; X86-NOSSE-NEXT: pushl %esi |
| ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; X86-NOSSE-NEXT: movl %esi, %ecx |
| ; X86-NOSSE-NEXT: shrl %ecx |
| ; X86-NOSSE-NEXT: movl $1431655765, %edx # imm = 0x55555555 |
| ; X86-NOSSE-NEXT: andl %edx, %ecx |
| ; X86-NOSSE-NEXT: subl %ecx, %esi |
| ; X86-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: movl %esi, %edi |
| ; X86-NOSSE-NEXT: andl %ecx, %edi |
| ; X86-NOSSE-NEXT: shrl $2, %esi |
| ; X86-NOSSE-NEXT: andl %ecx, %esi |
| ; X86-NOSSE-NEXT: addl %edi, %esi |
| ; X86-NOSSE-NEXT: movl %esi, %ebx |
| ; X86-NOSSE-NEXT: shrl $4, %ebx |
| ; X86-NOSSE-NEXT: addl %esi, %ebx |
| ; X86-NOSSE-NEXT: movl $252645135, %edi # imm = 0xF0F0F0F |
| ; X86-NOSSE-NEXT: andl %edi, %ebx |
| ; X86-NOSSE-NEXT: imull $16843009, %ebx, %esi # imm = 0x1010101 |
| ; X86-NOSSE-NEXT: shrl $24, %esi |
| ; X86-NOSSE-NEXT: movl %eax, %ebx |
| ; X86-NOSSE-NEXT: shrl %ebx |
| ; X86-NOSSE-NEXT: andl %edx, %ebx |
| ; X86-NOSSE-NEXT: subl %ebx, %eax |
| ; X86-NOSSE-NEXT: movl %eax, %edx |
| ; X86-NOSSE-NEXT: andl %ecx, %edx |
| ; X86-NOSSE-NEXT: shrl $2, %eax |
| ; X86-NOSSE-NEXT: andl %ecx, %eax |
| ; X86-NOSSE-NEXT: addl %edx, %eax |
| ; X86-NOSSE-NEXT: movl %eax, %ecx |
| ; X86-NOSSE-NEXT: shrl $4, %ecx |
| ; X86-NOSSE-NEXT: addl %eax, %ecx |
| ; X86-NOSSE-NEXT: andl %edi, %ecx |
| ; X86-NOSSE-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101 |
| ; X86-NOSSE-NEXT: shrl $24, %eax |
| ; X86-NOSSE-NEXT: addl %esi, %eax |
| ; X86-NOSSE-NEXT: xorl %edx, %edx |
| ; X86-NOSSE-NEXT: popl %esi |
| ; X86-NOSSE-NEXT: popl %edi |
| ; X86-NOSSE-NEXT: popl %ebx |
| ; X86-NOSSE-NEXT: retl |
| ; |
| ; X64-BASE-LABEL: cnt64_optsize: |
| ; X64-BASE: # %bb.0: |
| ; X64-BASE-NEXT: movq %rdi, %rax |
| ; X64-BASE-NEXT: shrq %rax |
| ; X64-BASE-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 |
| ; X64-BASE-NEXT: andq %rax, %rcx |
| ; X64-BASE-NEXT: subq %rcx, %rdi |
| ; X64-BASE-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 |
| ; X64-BASE-NEXT: movq %rdi, %rcx |
| ; X64-BASE-NEXT: andq %rax, %rcx |
| ; X64-BASE-NEXT: shrq $2, %rdi |
| ; X64-BASE-NEXT: andq %rdi, %rax |
| ; X64-BASE-NEXT: addq %rcx, %rax |
| ; X64-BASE-NEXT: movq %rax, %rcx |
| ; X64-BASE-NEXT: shrq $4, %rcx |
| ; X64-BASE-NEXT: addq %rax, %rcx |
| ; X64-BASE-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F |
| ; X64-BASE-NEXT: andq %rcx, %rdx |
| ; X64-BASE-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101 |
| ; X64-BASE-NEXT: imulq %rdx, %rax |
| ; X64-BASE-NEXT: shrq $56, %rax |
| ; X64-BASE-NEXT: retq |
| ; |
| ; X86-POPCNT-LABEL: cnt64_optsize: |
| ; X86-POPCNT: # %bb.0: |
| ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx |
| ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax |
| ; X86-POPCNT-NEXT: addl %ecx, %eax |
| ; X86-POPCNT-NEXT: xorl %edx, %edx |
| ; X86-POPCNT-NEXT: retl |
| ; |
| ; X64-POPCNT-LABEL: cnt64_optsize: |
| ; X64-POPCNT: # %bb.0: |
| ; X64-POPCNT-NEXT: popcntq %rdi, %rax |
| ; X64-POPCNT-NEXT: retq |
| ; |
| ; X64-NDD-LABEL: cnt64_optsize: |
| ; X64-NDD: # %bb.0: |
| ; X64-NDD-NEXT: shrq %rdi, %rax |
| ; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 |
| ; X64-NDD-NEXT: andq %rcx, %rax |
| ; X64-NDD-NEXT: subq %rax, %rdi |
| ; X64-NDD-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 |
| ; X64-NDD-NEXT: andq %rax, %rdi, %rcx |
| ; X64-NDD-NEXT: shrq $2, %rdi |
| ; X64-NDD-NEXT: andq %rdi, %rax |
| ; X64-NDD-NEXT: addq %rcx, %rax |
| ; X64-NDD-NEXT: shrq $4, %rax, %rcx |
| ; X64-NDD-NEXT: addq %rcx, %rax |
| ; X64-NDD-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F |
| ; X64-NDD-NEXT: andq %rcx, %rax |
| ; X64-NDD-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 |
| ; X64-NDD-NEXT: imulq %rcx, %rax |
| ; X64-NDD-NEXT: shrq $56, %rax |
| ; X64-NDD-NEXT: retq |
| ; |
| ; X86-SSE2-LABEL: cnt64_optsize: |
| ; X86-SSE2: # %bb.0: |
| ; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero |
| ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 |
| ; X86-SSE2-NEXT: psrlw $1, %xmm1 |
| ; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 |
| ; X86-SSE2-NEXT: psubb %xmm1, %xmm0 |
| ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] |
| ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 |
| ; X86-SSE2-NEXT: pand %xmm1, %xmm2 |
| ; X86-SSE2-NEXT: psrlw $2, %xmm0 |
| ; X86-SSE2-NEXT: pand %xmm1, %xmm0 |
| ; X86-SSE2-NEXT: paddb %xmm2, %xmm0 |
| ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 |
| ; X86-SSE2-NEXT: psrlw $4, %xmm1 |
| ; X86-SSE2-NEXT: paddb %xmm0, %xmm1 |
| ; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 |
| ; X86-SSE2-NEXT: pxor %xmm0, %xmm0 |
| ; X86-SSE2-NEXT: psadbw %xmm1, %xmm0 |
| ; X86-SSE2-NEXT: movd %xmm0, %eax |
| ; X86-SSE2-NEXT: xorl %edx, %edx |
| ; X86-SSE2-NEXT: retl |
| ; |
| ; X86-SSSE3-LABEL: cnt64_optsize: |
| ; X86-SSSE3: # %bb.0: |
| ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] |
| ; X86-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero |
| ; X86-SSSE3-NEXT: movdqa %xmm1, %xmm2 |
| ; X86-SSSE3-NEXT: pand %xmm0, %xmm2 |
| ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] |
| ; X86-SSSE3-NEXT: movdqa %xmm3, %xmm4 |
| ; X86-SSSE3-NEXT: pshufb %xmm2, %xmm4 |
| ; X86-SSSE3-NEXT: psrlw $4, %xmm1 |
| ; X86-SSSE3-NEXT: pand %xmm0, %xmm1 |
| ; X86-SSSE3-NEXT: pshufb %xmm1, %xmm3 |
| ; X86-SSSE3-NEXT: paddb %xmm4, %xmm3 |
| ; X86-SSSE3-NEXT: pxor %xmm0, %xmm0 |
| ; X86-SSSE3-NEXT: psadbw %xmm3, %xmm0 |
| ; X86-SSSE3-NEXT: movd %xmm0, %eax |
| ; X86-SSSE3-NEXT: xorl %edx, %edx |
| ; X86-SSSE3-NEXT: retl |
| %cnt = tail call i64 @llvm.ctpop.i64(i64 %x) |
| ret i64 %cnt |
| } |
| |
| define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize { |
| ; X86-NOSSE-LABEL: cnt128_optsize: |
| ; X86-NOSSE: # %bb.0: |
| ; X86-NOSSE-NEXT: pushl %ebp |
| ; X86-NOSSE-NEXT: pushl %ebx |
| ; X86-NOSSE-NEXT: pushl %edi |
| ; X86-NOSSE-NEXT: pushl %esi |
| ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx |
| ; X86-NOSSE-NEXT: movl %ebx, %ecx |
| ; X86-NOSSE-NEXT: shrl %ecx |
| ; X86-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555 |
| ; X86-NOSSE-NEXT: andl %edi, %ecx |
| ; X86-NOSSE-NEXT: subl %ecx, %ebx |
| ; X86-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: movl %ebx, %ebp |
| ; X86-NOSSE-NEXT: andl %ecx, %ebp |
| ; X86-NOSSE-NEXT: shrl $2, %ebx |
| ; X86-NOSSE-NEXT: andl %ecx, %ebx |
| ; X86-NOSSE-NEXT: addl %ebp, %ebx |
| ; X86-NOSSE-NEXT: movl %ebx, %ebp |
| ; X86-NOSSE-NEXT: shrl $4, %ebp |
| ; X86-NOSSE-NEXT: addl %ebx, %ebp |
| ; X86-NOSSE-NEXT: movl %eax, %ebx |
| ; X86-NOSSE-NEXT: shrl %ebx |
| ; X86-NOSSE-NEXT: andl %edi, %ebx |
| ; X86-NOSSE-NEXT: subl %ebx, %eax |
| ; X86-NOSSE-NEXT: movl %eax, %ebx |
| ; X86-NOSSE-NEXT: andl %ecx, %ebx |
| ; X86-NOSSE-NEXT: shrl $2, %eax |
| ; X86-NOSSE-NEXT: andl %ecx, %eax |
| ; X86-NOSSE-NEXT: addl %ebx, %eax |
| ; X86-NOSSE-NEXT: movl %eax, %edi |
| ; X86-NOSSE-NEXT: shrl $4, %edi |
| ; X86-NOSSE-NEXT: addl %eax, %edi |
| ; X86-NOSSE-NEXT: movl $252645135, %ebx # imm = 0xF0F0F0F |
| ; X86-NOSSE-NEXT: andl %ebx, %ebp |
| ; X86-NOSSE-NEXT: imull $16843009, %ebp, %eax # imm = 0x1010101 |
| ; X86-NOSSE-NEXT: shrl $24, %eax |
| ; X86-NOSSE-NEXT: andl %ebx, %edi |
| ; X86-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101 |
| ; X86-NOSSE-NEXT: shrl $24, %edi |
| ; X86-NOSSE-NEXT: addl %eax, %edi |
| ; X86-NOSSE-NEXT: movl %esi, %eax |
| ; X86-NOSSE-NEXT: shrl %eax |
| ; X86-NOSSE-NEXT: movl $1431655765, %ebp # imm = 0x55555555 |
| ; X86-NOSSE-NEXT: andl %ebp, %eax |
| ; X86-NOSSE-NEXT: subl %eax, %esi |
| ; X86-NOSSE-NEXT: movl %esi, %eax |
| ; X86-NOSSE-NEXT: andl %ecx, %eax |
| ; X86-NOSSE-NEXT: shrl $2, %esi |
| ; X86-NOSSE-NEXT: andl %ecx, %esi |
| ; X86-NOSSE-NEXT: addl %eax, %esi |
| ; X86-NOSSE-NEXT: movl %esi, %ebp |
| ; X86-NOSSE-NEXT: shrl $4, %ebp |
| ; X86-NOSSE-NEXT: addl %esi, %ebp |
| ; X86-NOSSE-NEXT: movl %edx, %eax |
| ; X86-NOSSE-NEXT: shrl %eax |
| ; X86-NOSSE-NEXT: movl $1431655765, %esi # imm = 0x55555555 |
| ; X86-NOSSE-NEXT: andl %esi, %eax |
| ; X86-NOSSE-NEXT: subl %eax, %edx |
| ; X86-NOSSE-NEXT: movl %edx, %eax |
| ; X86-NOSSE-NEXT: andl %ecx, %eax |
| ; X86-NOSSE-NEXT: shrl $2, %edx |
| ; X86-NOSSE-NEXT: andl %ecx, %edx |
| ; X86-NOSSE-NEXT: addl %eax, %edx |
| ; X86-NOSSE-NEXT: movl %edx, %eax |
| ; X86-NOSSE-NEXT: shrl $4, %eax |
| ; X86-NOSSE-NEXT: addl %edx, %eax |
| ; X86-NOSSE-NEXT: andl %ebx, %ebp |
| ; X86-NOSSE-NEXT: andl %ebx, %eax |
| ; X86-NOSSE-NEXT: imull $16843009, %ebp, %ecx # imm = 0x1010101 |
| ; X86-NOSSE-NEXT: shrl $24, %ecx |
| ; X86-NOSSE-NEXT: imull $16843009, %eax, %edx # imm = 0x1010101 |
| ; X86-NOSSE-NEXT: shrl $24, %edx |
| ; X86-NOSSE-NEXT: addl %ecx, %edx |
| ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NOSSE-NEXT: addl %edi, %edx |
| ; X86-NOSSE-NEXT: xorl %ecx, %ecx |
| ; X86-NOSSE-NEXT: movl %ecx, 12(%eax) |
| ; X86-NOSSE-NEXT: movl %ecx, 8(%eax) |
| ; X86-NOSSE-NEXT: movl %ecx, 4(%eax) |
| ; X86-NOSSE-NEXT: movl %edx, (%eax) |
| ; X86-NOSSE-NEXT: popl %esi |
| ; X86-NOSSE-NEXT: popl %edi |
| ; X86-NOSSE-NEXT: popl %ebx |
| ; X86-NOSSE-NEXT: popl %ebp |
| ; X86-NOSSE-NEXT: retl $4 |
| ; |
| ; X64-BASE-LABEL: cnt128_optsize: |
| ; X64-BASE: # %bb.0: |
| ; X64-BASE-NEXT: movq %rsi, %rax |
| ; X64-BASE-NEXT: shrq %rax |
| ; X64-BASE-NEXT: movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555 |
| ; X64-BASE-NEXT: andq %r8, %rax |
| ; X64-BASE-NEXT: subq %rax, %rsi |
| ; X64-BASE-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333 |
| ; X64-BASE-NEXT: movq %rsi, %rax |
| ; X64-BASE-NEXT: andq %rcx, %rax |
| ; X64-BASE-NEXT: shrq $2, %rsi |
| ; X64-BASE-NEXT: andq %rcx, %rsi |
| ; X64-BASE-NEXT: addq %rsi, %rax |
| ; X64-BASE-NEXT: movq %rax, %rdx |
| ; X64-BASE-NEXT: shrq $4, %rdx |
| ; X64-BASE-NEXT: addq %rax, %rdx |
| ; X64-BASE-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F |
| ; X64-BASE-NEXT: andq %rsi, %rdx |
| ; X64-BASE-NEXT: movabsq $72340172838076673, %r9 # imm = 0x101010101010101 |
| ; X64-BASE-NEXT: imulq %r9, %rdx |
| ; X64-BASE-NEXT: shrq $56, %rdx |
| ; X64-BASE-NEXT: movq %rdi, %rax |
| ; X64-BASE-NEXT: shrq %rax |
| ; X64-BASE-NEXT: andq %r8, %rax |
| ; X64-BASE-NEXT: subq %rax, %rdi |
| ; X64-BASE-NEXT: movq %rdi, %rax |
| ; X64-BASE-NEXT: andq %rcx, %rax |
| ; X64-BASE-NEXT: shrq $2, %rdi |
| ; X64-BASE-NEXT: andq %rdi, %rcx |
| ; X64-BASE-NEXT: addq %rax, %rcx |
| ; X64-BASE-NEXT: movq %rcx, %rax |
| ; X64-BASE-NEXT: shrq $4, %rax |
| ; X64-BASE-NEXT: addq %rcx, %rax |
| ; X64-BASE-NEXT: andq %rsi, %rax |
| ; X64-BASE-NEXT: imulq %r9, %rax |
| ; X64-BASE-NEXT: shrq $56, %rax |
| ; X64-BASE-NEXT: addq %rdx, %rax |
| ; X64-BASE-NEXT: xorl %edx, %edx |
| ; X64-BASE-NEXT: retq |
| ; |
| ; X86-POPCNT-LABEL: cnt128_optsize: |
| ; X86-POPCNT: # %bb.0: |
| ; X86-POPCNT-NEXT: pushl %esi |
| ; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx |
| ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx |
| ; X86-POPCNT-NEXT: addl %ecx, %edx |
| ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx |
| ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi |
| ; X86-POPCNT-NEXT: addl %ecx, %esi |
| ; X86-POPCNT-NEXT: addl %edx, %esi |
| ; X86-POPCNT-NEXT: xorl %ecx, %ecx |
| ; X86-POPCNT-NEXT: movl %ecx, 12(%eax) |
| ; X86-POPCNT-NEXT: movl %ecx, 8(%eax) |
| ; X86-POPCNT-NEXT: movl %ecx, 4(%eax) |
| ; X86-POPCNT-NEXT: movl %esi, (%eax) |
| ; X86-POPCNT-NEXT: popl %esi |
| ; X86-POPCNT-NEXT: retl $4 |
| ; |
| ; X64-POPCNT-LABEL: cnt128_optsize: |
| ; X64-POPCNT: # %bb.0: |
| ; X64-POPCNT-NEXT: popcntq %rsi, %rcx |
| ; X64-POPCNT-NEXT: popcntq %rdi, %rax |
| ; X64-POPCNT-NEXT: addq %rcx, %rax |
| ; X64-POPCNT-NEXT: xorl %edx, %edx |
| ; X64-POPCNT-NEXT: retq |
| ; |
| ; X64-NDD-LABEL: cnt128_optsize: |
| ; X64-NDD: # %bb.0: |
| ; X64-NDD-NEXT: shrq %rsi, %rax |
| ; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 |
| ; X64-NDD-NEXT: andq %rcx, %rax |
| ; X64-NDD-NEXT: subq %rax, %rsi |
| ; X64-NDD-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 |
| ; X64-NDD-NEXT: andq %rax, %rsi, %rdx |
| ; X64-NDD-NEXT: shrq $2, %rsi |
| ; X64-NDD-NEXT: andq %rax, %rsi |
| ; X64-NDD-NEXT: addq %rsi, %rdx |
| ; X64-NDD-NEXT: shrq $4, %rdx, %rsi |
| ; X64-NDD-NEXT: addq %rsi, %rdx |
| ; X64-NDD-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F |
| ; X64-NDD-NEXT: andq %rsi, %rdx |
| ; X64-NDD-NEXT: movabsq $72340172838076673, %r8 # imm = 0x101010101010101 |
| ; X64-NDD-NEXT: imulq %r8, %rdx |
| ; X64-NDD-NEXT: shrq $56, %rdx |
| ; X64-NDD-NEXT: shrq %rdi, %r9 |
| ; X64-NDD-NEXT: andq %r9, %rcx |
| ; X64-NDD-NEXT: subq %rcx, %rdi |
| ; X64-NDD-NEXT: andq %rax, %rdi, %rcx |
| ; X64-NDD-NEXT: shrq $2, %rdi |
| ; X64-NDD-NEXT: andq %rdi, %rax |
| ; X64-NDD-NEXT: addq %rcx, %rax |
| ; X64-NDD-NEXT: shrq $4, %rax, %rcx |
| ; X64-NDD-NEXT: addq %rcx, %rax |
| ; X64-NDD-NEXT: andq %rsi, %rax |
| ; X64-NDD-NEXT: imulq %r8, %rax |
| ; X64-NDD-NEXT: shrq $56, %rax |
| ; X64-NDD-NEXT: addq %rdx, %rax |
| ; X64-NDD-NEXT: xorl %edx, %edx |
| ; X64-NDD-NEXT: retq |
| ; |
| ; X86-SSE2-LABEL: cnt128_optsize: |
| ; X86-SSE2: # %bb.0: |
| ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero |
| ; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 |
| ; X86-SSE2-NEXT: psrlw $1, %xmm0 |
| ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85] |
| ; X86-SSE2-NEXT: pand %xmm1, %xmm0 |
| ; X86-SSE2-NEXT: psubb %xmm0, %xmm2 |
| ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] |
| ; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 |
| ; X86-SSE2-NEXT: pand %xmm0, %xmm3 |
| ; X86-SSE2-NEXT: psrlw $2, %xmm2 |
| ; X86-SSE2-NEXT: pand %xmm0, %xmm2 |
| ; X86-SSE2-NEXT: paddb %xmm3, %xmm2 |
| ; X86-SSE2-NEXT: movdqa %xmm2, %xmm4 |
| ; X86-SSE2-NEXT: psrlw $4, %xmm4 |
| ; X86-SSE2-NEXT: paddb %xmm2, %xmm4 |
| ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] |
| ; X86-SSE2-NEXT: pand %xmm2, %xmm4 |
| ; X86-SSE2-NEXT: pxor %xmm3, %xmm3 |
| ; X86-SSE2-NEXT: psadbw %xmm3, %xmm4 |
| ; X86-SSE2-NEXT: movd %xmm4, %ecx |
| ; X86-SSE2-NEXT: movq {{.*#+}} xmm4 = mem[0],zero |
| ; X86-SSE2-NEXT: movdqa %xmm4, %xmm5 |
| ; X86-SSE2-NEXT: psrlw $1, %xmm5 |
| ; X86-SSE2-NEXT: pand %xmm1, %xmm5 |
| ; X86-SSE2-NEXT: psubb %xmm5, %xmm4 |
| ; X86-SSE2-NEXT: movdqa %xmm4, %xmm1 |
| ; X86-SSE2-NEXT: pand %xmm0, %xmm1 |
| ; X86-SSE2-NEXT: psrlw $2, %xmm4 |
| ; X86-SSE2-NEXT: pand %xmm0, %xmm4 |
| ; X86-SSE2-NEXT: paddb %xmm1, %xmm4 |
| ; X86-SSE2-NEXT: movdqa %xmm4, %xmm0 |
| ; X86-SSE2-NEXT: psrlw $4, %xmm0 |
| ; X86-SSE2-NEXT: paddb %xmm4, %xmm0 |
| ; X86-SSE2-NEXT: pand %xmm2, %xmm0 |
| ; X86-SSE2-NEXT: psadbw %xmm3, %xmm0 |
| ; X86-SSE2-NEXT: movd %xmm0, %edx |
| ; X86-SSE2-NEXT: addl %ecx, %edx |
| ; X86-SSE2-NEXT: xorl %ecx, %ecx |
| ; X86-SSE2-NEXT: movl %ecx, 12(%eax) |
| ; X86-SSE2-NEXT: movl %ecx, 8(%eax) |
| ; X86-SSE2-NEXT: movl %ecx, 4(%eax) |
| ; X86-SSE2-NEXT: movl %edx, (%eax) |
| ; X86-SSE2-NEXT: retl $4 |
| ; |
| ; X86-SSSE3-LABEL: cnt128_optsize: |
| ; X86-SSSE3: # %bb.0: |
| ; X86-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] |
| ; X86-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero |
| ; X86-SSSE3-NEXT: movdqa %xmm2, %xmm3 |
| ; X86-SSSE3-NEXT: pand %xmm1, %xmm3 |
| ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] |
| ; X86-SSSE3-NEXT: movdqa %xmm0, %xmm4 |
| ; X86-SSSE3-NEXT: pshufb %xmm3, %xmm4 |
| ; X86-SSSE3-NEXT: psrlw $4, %xmm2 |
| ; X86-SSSE3-NEXT: pand %xmm1, %xmm2 |
| ; X86-SSSE3-NEXT: movdqa %xmm0, %xmm3 |
| ; X86-SSSE3-NEXT: pshufb %xmm2, %xmm3 |
| ; X86-SSSE3-NEXT: paddb %xmm4, %xmm3 |
| ; X86-SSSE3-NEXT: pxor %xmm2, %xmm2 |
| ; X86-SSSE3-NEXT: psadbw %xmm2, %xmm3 |
| ; X86-SSSE3-NEXT: movd %xmm3, %ecx |
| ; X86-SSSE3-NEXT: movq {{.*#+}} xmm3 = mem[0],zero |
| ; X86-SSSE3-NEXT: movdqa %xmm3, %xmm4 |
| ; X86-SSSE3-NEXT: pand %xmm1, %xmm4 |
| ; X86-SSSE3-NEXT: movdqa %xmm0, %xmm5 |
| ; X86-SSSE3-NEXT: pshufb %xmm4, %xmm5 |
| ; X86-SSSE3-NEXT: psrlw $4, %xmm3 |
| ; X86-SSSE3-NEXT: pand %xmm1, %xmm3 |
| ; X86-SSSE3-NEXT: pshufb %xmm3, %xmm0 |
| ; X86-SSSE3-NEXT: paddb %xmm5, %xmm0 |
| ; X86-SSSE3-NEXT: psadbw %xmm2, %xmm0 |
| ; X86-SSSE3-NEXT: movd %xmm0, %edx |
| ; X86-SSSE3-NEXT: addl %ecx, %edx |
| ; X86-SSSE3-NEXT: xorl %ecx, %ecx |
| ; X86-SSSE3-NEXT: movl %ecx, 12(%eax) |
| ; X86-SSSE3-NEXT: movl %ecx, 8(%eax) |
| ; X86-SSSE3-NEXT: movl %ecx, 4(%eax) |
| ; X86-SSSE3-NEXT: movl %edx, (%eax) |
| ; X86-SSSE3-NEXT: retl $4 |
| %cnt = tail call i128 @llvm.ctpop.i128(i128 %x) |
| ret i128 %cnt |
| } |
| |
| define i32 @cnt32_pgso(i32 %x) nounwind readnone !prof !14 { |
| ; X86-LABEL: cnt32_pgso: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl %eax, %ecx |
| ; X86-NEXT: shrl %ecx |
| ; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 |
| ; X86-NEXT: subl %ecx, %eax |
| ; X86-NEXT: movl %eax, %ecx |
| ; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 |
| ; X86-NEXT: shrl $2, %eax |
| ; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 |
| ; X86-NEXT: addl %ecx, %eax |
| ; X86-NEXT: movl %eax, %ecx |
| ; X86-NEXT: shrl $4, %ecx |
| ; X86-NEXT: addl %eax, %ecx |
| ; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F |
| ; X86-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101 |
| ; X86-NEXT: shrl $24, %eax |
| ; X86-NEXT: retl |
| ; |
| ; X64-BASE-LABEL: cnt32_pgso: |
| ; X64-BASE: # %bb.0: |
| ; X64-BASE-NEXT: movl %edi, %eax |
| ; X64-BASE-NEXT: shrl %eax |
| ; X64-BASE-NEXT: andl $1431655765, %eax # imm = 0x55555555 |
| ; X64-BASE-NEXT: subl %eax, %edi |
| ; X64-BASE-NEXT: movl %edi, %eax |
| ; X64-BASE-NEXT: andl $858993459, %eax # imm = 0x33333333 |
| ; X64-BASE-NEXT: shrl $2, %edi |
| ; X64-BASE-NEXT: andl $858993459, %edi # imm = 0x33333333 |
| ; X64-BASE-NEXT: addl %eax, %edi |
| ; X64-BASE-NEXT: movl %edi, %eax |
| ; X64-BASE-NEXT: shrl $4, %eax |
| ; X64-BASE-NEXT: addl %edi, %eax |
| ; X64-BASE-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F |
| ; X64-BASE-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 |
| ; X64-BASE-NEXT: shrl $24, %eax |
| ; X64-BASE-NEXT: retq |
| ; |
| ; X86-POPCNT-LABEL: cnt32_pgso: |
| ; X86-POPCNT: # %bb.0: |
| ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax |
| ; X86-POPCNT-NEXT: retl |
| ; |
| ; X64-POPCNT-LABEL: cnt32_pgso: |
| ; X64-POPCNT: # %bb.0: |
| ; X64-POPCNT-NEXT: popcntl %edi, %eax |
| ; X64-POPCNT-NEXT: retq |
| ; |
| ; X64-NDD-LABEL: cnt32_pgso: |
| ; X64-NDD: # %bb.0: |
| ; X64-NDD-NEXT: shrl %edi, %eax |
| ; X64-NDD-NEXT: andl $1431655765, %eax # imm = 0x55555555 |
| ; X64-NDD-NEXT: subl %eax, %edi |
| ; X64-NDD-NEXT: andl $858993459, %edi, %eax # imm = 0x33333333 |
| ; X64-NDD-NEXT: shrl $2, %edi |
| ; X64-NDD-NEXT: andl $858993459, %edi # imm = 0x33333333 |
| ; X64-NDD-NEXT: addl %edi, %eax |
| ; X64-NDD-NEXT: shrl $4, %eax, %ecx |
| ; X64-NDD-NEXT: addl %ecx, %eax |
| ; X64-NDD-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F |
| ; X64-NDD-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 |
| ; X64-NDD-NEXT: shrl $24, %eax |
| ; X64-NDD-NEXT: retq |
| %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) |
| ret i32 %cnt |
| } |
| |
| define i64 @cnt64_pgso(i64 %x) nounwind readnone !prof !14 { |
| ; X86-NOSSE-LABEL: cnt64_pgso: |
| ; X86-NOSSE: # %bb.0: |
| ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NOSSE-NEXT: movl %ecx, %edx |
| ; X86-NOSSE-NEXT: shrl %edx |
| ; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555 |
| ; X86-NOSSE-NEXT: subl %edx, %ecx |
| ; X86-NOSSE-NEXT: movl %ecx, %edx |
| ; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: shrl $2, %ecx |
| ; X86-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: addl %edx, %ecx |
| ; X86-NOSSE-NEXT: movl %ecx, %edx |
| ; X86-NOSSE-NEXT: shrl $4, %edx |
| ; X86-NOSSE-NEXT: addl %ecx, %edx |
| ; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F |
| ; X86-NOSSE-NEXT: imull $16843009, %edx, %ecx # imm = 0x1010101 |
| ; X86-NOSSE-NEXT: shrl $24, %ecx |
| ; X86-NOSSE-NEXT: movl %eax, %edx |
| ; X86-NOSSE-NEXT: shrl %edx |
| ; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555 |
| ; X86-NOSSE-NEXT: subl %edx, %eax |
| ; X86-NOSSE-NEXT: movl %eax, %edx |
| ; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: shrl $2, %eax |
| ; X86-NOSSE-NEXT: andl $858993459, %eax # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: addl %edx, %eax |
| ; X86-NOSSE-NEXT: movl %eax, %edx |
| ; X86-NOSSE-NEXT: shrl $4, %edx |
| ; X86-NOSSE-NEXT: addl %eax, %edx |
| ; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F |
| ; X86-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101 |
| ; X86-NOSSE-NEXT: shrl $24, %eax |
| ; X86-NOSSE-NEXT: addl %ecx, %eax |
| ; X86-NOSSE-NEXT: xorl %edx, %edx |
| ; X86-NOSSE-NEXT: retl |
| ; |
| ; X64-BASE-LABEL: cnt64_pgso: |
| ; X64-BASE: # %bb.0: |
| ; X64-BASE-NEXT: movq %rdi, %rax |
| ; X64-BASE-NEXT: shrq %rax |
| ; X64-BASE-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 |
| ; X64-BASE-NEXT: andq %rax, %rcx |
| ; X64-BASE-NEXT: subq %rcx, %rdi |
| ; X64-BASE-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 |
| ; X64-BASE-NEXT: movq %rdi, %rcx |
| ; X64-BASE-NEXT: andq %rax, %rcx |
| ; X64-BASE-NEXT: shrq $2, %rdi |
| ; X64-BASE-NEXT: andq %rdi, %rax |
| ; X64-BASE-NEXT: addq %rcx, %rax |
| ; X64-BASE-NEXT: movq %rax, %rcx |
| ; X64-BASE-NEXT: shrq $4, %rcx |
| ; X64-BASE-NEXT: addq %rax, %rcx |
| ; X64-BASE-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F |
| ; X64-BASE-NEXT: andq %rcx, %rdx |
| ; X64-BASE-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101 |
| ; X64-BASE-NEXT: imulq %rdx, %rax |
| ; X64-BASE-NEXT: shrq $56, %rax |
| ; X64-BASE-NEXT: retq |
| ; |
| ; X86-POPCNT-LABEL: cnt64_pgso: |
| ; X86-POPCNT: # %bb.0: |
| ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx |
| ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax |
| ; X86-POPCNT-NEXT: addl %ecx, %eax |
| ; X86-POPCNT-NEXT: xorl %edx, %edx |
| ; X86-POPCNT-NEXT: retl |
| ; |
| ; X64-POPCNT-LABEL: cnt64_pgso: |
| ; X64-POPCNT: # %bb.0: |
| ; X64-POPCNT-NEXT: popcntq %rdi, %rax |
| ; X64-POPCNT-NEXT: retq |
| ; |
| ; X64-NDD-LABEL: cnt64_pgso: |
| ; X64-NDD: # %bb.0: |
| ; X64-NDD-NEXT: shrq %rdi, %rax |
| ; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 |
| ; X64-NDD-NEXT: andq %rcx, %rax |
| ; X64-NDD-NEXT: subq %rax, %rdi |
| ; X64-NDD-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 |
| ; X64-NDD-NEXT: andq %rax, %rdi, %rcx |
| ; X64-NDD-NEXT: shrq $2, %rdi |
| ; X64-NDD-NEXT: andq %rdi, %rax |
| ; X64-NDD-NEXT: addq %rcx, %rax |
| ; X64-NDD-NEXT: shrq $4, %rax, %rcx |
| ; X64-NDD-NEXT: addq %rcx, %rax |
| ; X64-NDD-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F |
| ; X64-NDD-NEXT: andq %rcx, %rax |
| ; X64-NDD-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 |
| ; X64-NDD-NEXT: imulq %rcx, %rax |
| ; X64-NDD-NEXT: shrq $56, %rax |
| ; X64-NDD-NEXT: retq |
| ; |
| ; X86-SSE2-LABEL: cnt64_pgso: |
| ; X86-SSE2: # %bb.0: |
| ; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero |
| ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 |
| ; X86-SSE2-NEXT: psrlw $1, %xmm1 |
| ; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 |
| ; X86-SSE2-NEXT: psubb %xmm1, %xmm0 |
| ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] |
| ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 |
| ; X86-SSE2-NEXT: pand %xmm1, %xmm2 |
| ; X86-SSE2-NEXT: psrlw $2, %xmm0 |
| ; X86-SSE2-NEXT: pand %xmm1, %xmm0 |
| ; X86-SSE2-NEXT: paddb %xmm2, %xmm0 |
| ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 |
| ; X86-SSE2-NEXT: psrlw $4, %xmm1 |
| ; X86-SSE2-NEXT: paddb %xmm0, %xmm1 |
| ; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 |
| ; X86-SSE2-NEXT: pxor %xmm0, %xmm0 |
| ; X86-SSE2-NEXT: psadbw %xmm1, %xmm0 |
| ; X86-SSE2-NEXT: movd %xmm0, %eax |
| ; X86-SSE2-NEXT: xorl %edx, %edx |
| ; X86-SSE2-NEXT: retl |
| ; |
| ; X86-SSSE3-LABEL: cnt64_pgso: |
| ; X86-SSSE3: # %bb.0: |
| ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] |
| ; X86-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero |
| ; X86-SSSE3-NEXT: movdqa %xmm1, %xmm2 |
| ; X86-SSSE3-NEXT: pand %xmm0, %xmm2 |
| ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] |
| ; X86-SSSE3-NEXT: movdqa %xmm3, %xmm4 |
| ; X86-SSSE3-NEXT: pshufb %xmm2, %xmm4 |
| ; X86-SSSE3-NEXT: psrlw $4, %xmm1 |
| ; X86-SSSE3-NEXT: pand %xmm0, %xmm1 |
| ; X86-SSSE3-NEXT: pshufb %xmm1, %xmm3 |
| ; X86-SSSE3-NEXT: paddb %xmm4, %xmm3 |
| ; X86-SSSE3-NEXT: pxor %xmm0, %xmm0 |
| ; X86-SSSE3-NEXT: psadbw %xmm3, %xmm0 |
| ; X86-SSSE3-NEXT: movd %xmm0, %eax |
| ; X86-SSSE3-NEXT: xorl %edx, %edx |
| ; X86-SSSE3-NEXT: retl |
| %cnt = tail call i64 @llvm.ctpop.i64(i64 %x) |
| ret i64 %cnt |
| } |
| |
| define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 { |
| ; X86-NOSSE-LABEL: cnt128_pgso: |
| ; X86-NOSSE: # %bb.0: |
| ; X86-NOSSE-NEXT: pushl %ebx |
| ; X86-NOSSE-NEXT: pushl %edi |
| ; X86-NOSSE-NEXT: pushl %esi |
| ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi |
| ; X86-NOSSE-NEXT: movl %edi, %ebx |
| ; X86-NOSSE-NEXT: shrl %ebx |
| ; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555 |
| ; X86-NOSSE-NEXT: subl %ebx, %edi |
| ; X86-NOSSE-NEXT: movl %edi, %ebx |
| ; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: shrl $2, %edi |
| ; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: addl %ebx, %edi |
| ; X86-NOSSE-NEXT: movl %edi, %ebx |
| ; X86-NOSSE-NEXT: shrl $4, %ebx |
| ; X86-NOSSE-NEXT: addl %edi, %ebx |
| ; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F |
| ; X86-NOSSE-NEXT: imull $16843009, %ebx, %edi # imm = 0x1010101 |
| ; X86-NOSSE-NEXT: shrl $24, %edi |
| ; X86-NOSSE-NEXT: movl %esi, %ebx |
| ; X86-NOSSE-NEXT: shrl %ebx |
| ; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555 |
| ; X86-NOSSE-NEXT: subl %ebx, %esi |
| ; X86-NOSSE-NEXT: movl %esi, %ebx |
| ; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: shrl $2, %esi |
| ; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: addl %ebx, %esi |
| ; X86-NOSSE-NEXT: movl %esi, %ebx |
| ; X86-NOSSE-NEXT: shrl $4, %ebx |
| ; X86-NOSSE-NEXT: addl %esi, %ebx |
| ; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F |
| ; X86-NOSSE-NEXT: imull $16843009, %ebx, %esi # imm = 0x1010101 |
| ; X86-NOSSE-NEXT: shrl $24, %esi |
| ; X86-NOSSE-NEXT: addl %edi, %esi |
| ; X86-NOSSE-NEXT: movl %edx, %edi |
| ; X86-NOSSE-NEXT: shrl %edi |
| ; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555 |
| ; X86-NOSSE-NEXT: subl %edi, %edx |
| ; X86-NOSSE-NEXT: movl %edx, %edi |
| ; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: shrl $2, %edx |
| ; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: addl %edi, %edx |
| ; X86-NOSSE-NEXT: movl %edx, %edi |
| ; X86-NOSSE-NEXT: shrl $4, %edi |
| ; X86-NOSSE-NEXT: addl %edx, %edi |
| ; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F |
| ; X86-NOSSE-NEXT: imull $16843009, %edi, %edx # imm = 0x1010101 |
| ; X86-NOSSE-NEXT: shrl $24, %edx |
| ; X86-NOSSE-NEXT: movl %ecx, %edi |
| ; X86-NOSSE-NEXT: shrl %edi |
| ; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555 |
| ; X86-NOSSE-NEXT: subl %edi, %ecx |
| ; X86-NOSSE-NEXT: movl %ecx, %edi |
| ; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: shrl $2, %ecx |
| ; X86-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333 |
| ; X86-NOSSE-NEXT: addl %edi, %ecx |
| ; X86-NOSSE-NEXT: movl %ecx, %edi |
| ; X86-NOSSE-NEXT: shrl $4, %edi |
| ; X86-NOSSE-NEXT: addl %ecx, %edi |
| ; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F |
| ; X86-NOSSE-NEXT: imull $16843009, %edi, %ecx # imm = 0x1010101 |
| ; X86-NOSSE-NEXT: shrl $24, %ecx |
| ; X86-NOSSE-NEXT: addl %edx, %ecx |
| ; X86-NOSSE-NEXT: addl %esi, %ecx |
| ; X86-NOSSE-NEXT: xorl %edx, %edx |
| ; X86-NOSSE-NEXT: movl %edx, 12(%eax) |
| ; X86-NOSSE-NEXT: movl %edx, 8(%eax) |
| ; X86-NOSSE-NEXT: movl %edx, 4(%eax) |
| ; X86-NOSSE-NEXT: movl %ecx, (%eax) |
| ; X86-NOSSE-NEXT: popl %esi |
| ; X86-NOSSE-NEXT: popl %edi |
| ; X86-NOSSE-NEXT: popl %ebx |
| ; X86-NOSSE-NEXT: retl $4 |
| ; |
| ; X64-BASE-LABEL: cnt128_pgso: |
| ; X64-BASE: # %bb.0: |
| ; X64-BASE-NEXT: movq %rsi, %rax |
| ; X64-BASE-NEXT: shrq %rax |
| ; X64-BASE-NEXT: movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555 |
| ; X64-BASE-NEXT: andq %r8, %rax |
| ; X64-BASE-NEXT: subq %rax, %rsi |
| ; X64-BASE-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333 |
| ; X64-BASE-NEXT: movq %rsi, %rax |
| ; X64-BASE-NEXT: andq %rcx, %rax |
| ; X64-BASE-NEXT: shrq $2, %rsi |
| ; X64-BASE-NEXT: andq %rcx, %rsi |
| ; X64-BASE-NEXT: addq %rsi, %rax |
| ; X64-BASE-NEXT: movq %rax, %rdx |
| ; X64-BASE-NEXT: shrq $4, %rdx |
| ; X64-BASE-NEXT: addq %rax, %rdx |
| ; X64-BASE-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F |
| ; X64-BASE-NEXT: andq %rsi, %rdx |
| ; X64-BASE-NEXT: movabsq $72340172838076673, %r9 # imm = 0x101010101010101 |
| ; X64-BASE-NEXT: imulq %r9, %rdx |
| ; X64-BASE-NEXT: shrq $56, %rdx |
| ; X64-BASE-NEXT: movq %rdi, %rax |
| ; X64-BASE-NEXT: shrq %rax |
| ; X64-BASE-NEXT: andq %r8, %rax |
| ; X64-BASE-NEXT: subq %rax, %rdi |
| ; X64-BASE-NEXT: movq %rdi, %rax |
| ; X64-BASE-NEXT: andq %rcx, %rax |
| ; X64-BASE-NEXT: shrq $2, %rdi |
| ; X64-BASE-NEXT: andq %rdi, %rcx |
| ; X64-BASE-NEXT: addq %rax, %rcx |
| ; X64-BASE-NEXT: movq %rcx, %rax |
| ; X64-BASE-NEXT: shrq $4, %rax |
| ; X64-BASE-NEXT: addq %rcx, %rax |
| ; X64-BASE-NEXT: andq %rsi, %rax |
| ; X64-BASE-NEXT: imulq %r9, %rax |
| ; X64-BASE-NEXT: shrq $56, %rax |
| ; X64-BASE-NEXT: addq %rdx, %rax |
| ; X64-BASE-NEXT: xorl %edx, %edx |
| ; X64-BASE-NEXT: retq |
| ; |
| ; X86-POPCNT-LABEL: cnt128_pgso: |
| ; X86-POPCNT: # %bb.0: |
| ; X86-POPCNT-NEXT: pushl %esi |
| ; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx |
| ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx |
| ; X86-POPCNT-NEXT: addl %ecx, %edx |
| ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx |
| ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi |
| ; X86-POPCNT-NEXT: addl %ecx, %esi |
| ; X86-POPCNT-NEXT: addl %edx, %esi |
| ; X86-POPCNT-NEXT: xorl %ecx, %ecx |
| ; X86-POPCNT-NEXT: movl %ecx, 12(%eax) |
| ; X86-POPCNT-NEXT: movl %ecx, 8(%eax) |
| ; X86-POPCNT-NEXT: movl %ecx, 4(%eax) |
| ; X86-POPCNT-NEXT: movl %esi, (%eax) |
| ; X86-POPCNT-NEXT: popl %esi |
| ; X86-POPCNT-NEXT: retl $4 |
| ; |
| ; X64-POPCNT-LABEL: cnt128_pgso: |
| ; X64-POPCNT: # %bb.0: |
| ; X64-POPCNT-NEXT: popcntq %rsi, %rcx |
| ; X64-POPCNT-NEXT: popcntq %rdi, %rax |
| ; X64-POPCNT-NEXT: addq %rcx, %rax |
| ; X64-POPCNT-NEXT: xorl %edx, %edx |
| ; X64-POPCNT-NEXT: retq |
| ; |
| ; X64-NDD-LABEL: cnt128_pgso: |
| ; X64-NDD: # %bb.0: |
| ; X64-NDD-NEXT: shrq %rsi, %rax |
| ; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 |
| ; X64-NDD-NEXT: andq %rcx, %rax |
| ; X64-NDD-NEXT: subq %rax, %rsi |
| ; X64-NDD-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 |
| ; X64-NDD-NEXT: andq %rax, %rsi, %rdx |
| ; X64-NDD-NEXT: shrq $2, %rsi |
| ; X64-NDD-NEXT: andq %rax, %rsi |
| ; X64-NDD-NEXT: addq %rsi, %rdx |
| ; X64-NDD-NEXT: shrq $4, %rdx, %rsi |
| ; X64-NDD-NEXT: addq %rsi, %rdx |
| ; X64-NDD-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F |
| ; X64-NDD-NEXT: andq %rsi, %rdx |
| ; X64-NDD-NEXT: movabsq $72340172838076673, %r8 # imm = 0x101010101010101 |
| ; X64-NDD-NEXT: imulq %r8, %rdx |
| ; X64-NDD-NEXT: shrq $56, %rdx |
| ; X64-NDD-NEXT: shrq %rdi, %r9 |
| ; X64-NDD-NEXT: andq %r9, %rcx |
| ; X64-NDD-NEXT: subq %rcx, %rdi |
| ; X64-NDD-NEXT: andq %rax, %rdi, %rcx |
| ; X64-NDD-NEXT: shrq $2, %rdi |
| ; X64-NDD-NEXT: andq %rdi, %rax |
| ; X64-NDD-NEXT: addq %rcx, %rax |
| ; X64-NDD-NEXT: shrq $4, %rax, %rcx |
| ; X64-NDD-NEXT: addq %rcx, %rax |
| ; X64-NDD-NEXT: andq %rsi, %rax |
| ; X64-NDD-NEXT: imulq %r8, %rax |
| ; X64-NDD-NEXT: shrq $56, %rax |
| ; X64-NDD-NEXT: addq %rdx, %rax |
| ; X64-NDD-NEXT: xorl %edx, %edx |
| ; X64-NDD-NEXT: retq |
| ; |
| ; X86-SSE2-LABEL: cnt128_pgso: |
| ; X86-SSE2: # %bb.0: |
| ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero |
| ; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 |
| ; X86-SSE2-NEXT: psrlw $1, %xmm0 |
| ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85] |
| ; X86-SSE2-NEXT: pand %xmm1, %xmm0 |
| ; X86-SSE2-NEXT: psubb %xmm0, %xmm2 |
| ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] |
| ; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 |
| ; X86-SSE2-NEXT: pand %xmm0, %xmm3 |
| ; X86-SSE2-NEXT: psrlw $2, %xmm2 |
| ; X86-SSE2-NEXT: pand %xmm0, %xmm2 |
| ; X86-SSE2-NEXT: paddb %xmm3, %xmm2 |
| ; X86-SSE2-NEXT: movdqa %xmm2, %xmm4 |
| ; X86-SSE2-NEXT: psrlw $4, %xmm4 |
| ; X86-SSE2-NEXT: paddb %xmm2, %xmm4 |
| ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] |
| ; X86-SSE2-NEXT: pand %xmm2, %xmm4 |
| ; X86-SSE2-NEXT: pxor %xmm3, %xmm3 |
| ; X86-SSE2-NEXT: psadbw %xmm3, %xmm4 |
| ; X86-SSE2-NEXT: movd %xmm4, %ecx |
| ; X86-SSE2-NEXT: movq {{.*#+}} xmm4 = mem[0],zero |
| ; X86-SSE2-NEXT: movdqa %xmm4, %xmm5 |
| ; X86-SSE2-NEXT: psrlw $1, %xmm5 |
| ; X86-SSE2-NEXT: pand %xmm1, %xmm5 |
| ; X86-SSE2-NEXT: psubb %xmm5, %xmm4 |
| ; X86-SSE2-NEXT: movdqa %xmm4, %xmm1 |
| ; X86-SSE2-NEXT: pand %xmm0, %xmm1 |
| ; X86-SSE2-NEXT: psrlw $2, %xmm4 |
| ; X86-SSE2-NEXT: pand %xmm0, %xmm4 |
| ; X86-SSE2-NEXT: paddb %xmm1, %xmm4 |
| ; X86-SSE2-NEXT: movdqa %xmm4, %xmm0 |
| ; X86-SSE2-NEXT: psrlw $4, %xmm0 |
| ; X86-SSE2-NEXT: paddb %xmm4, %xmm0 |
| ; X86-SSE2-NEXT: pand %xmm2, %xmm0 |
| ; X86-SSE2-NEXT: psadbw %xmm3, %xmm0 |
| ; X86-SSE2-NEXT: movd %xmm0, %edx |
| ; X86-SSE2-NEXT: addl %ecx, %edx |
| ; X86-SSE2-NEXT: xorl %ecx, %ecx |
| ; X86-SSE2-NEXT: movl %ecx, 12(%eax) |
| ; X86-SSE2-NEXT: movl %ecx, 8(%eax) |
| ; X86-SSE2-NEXT: movl %ecx, 4(%eax) |
| ; X86-SSE2-NEXT: movl %edx, (%eax) |
| ; X86-SSE2-NEXT: retl $4 |
| ; |
| ; X86-SSSE3-LABEL: cnt128_pgso: |
| ; X86-SSSE3: # %bb.0: |
| ; X86-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] |
| ; X86-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero |
| ; X86-SSSE3-NEXT: movdqa %xmm2, %xmm3 |
| ; X86-SSSE3-NEXT: pand %xmm1, %xmm3 |
| ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] |
| ; X86-SSSE3-NEXT: movdqa %xmm0, %xmm4 |
| ; X86-SSSE3-NEXT: pshufb %xmm3, %xmm4 |
| ; X86-SSSE3-NEXT: psrlw $4, %xmm2 |
| ; X86-SSSE3-NEXT: pand %xmm1, %xmm2 |
| ; X86-SSSE3-NEXT: movdqa %xmm0, %xmm3 |
| ; X86-SSSE3-NEXT: pshufb %xmm2, %xmm3 |
| ; X86-SSSE3-NEXT: paddb %xmm4, %xmm3 |
| ; X86-SSSE3-NEXT: pxor %xmm2, %xmm2 |
| ; X86-SSSE3-NEXT: psadbw %xmm2, %xmm3 |
| ; X86-SSSE3-NEXT: movd %xmm3, %ecx |
| ; X86-SSSE3-NEXT: movq {{.*#+}} xmm3 = mem[0],zero |
| ; X86-SSSE3-NEXT: movdqa %xmm3, %xmm4 |
| ; X86-SSSE3-NEXT: pand %xmm1, %xmm4 |
| ; X86-SSSE3-NEXT: movdqa %xmm0, %xmm5 |
| ; X86-SSSE3-NEXT: pshufb %xmm4, %xmm5 |
| ; X86-SSSE3-NEXT: psrlw $4, %xmm3 |
| ; X86-SSSE3-NEXT: pand %xmm1, %xmm3 |
| ; X86-SSSE3-NEXT: pshufb %xmm3, %xmm0 |
| ; X86-SSSE3-NEXT: paddb %xmm5, %xmm0 |
| ; X86-SSSE3-NEXT: psadbw %xmm2, %xmm0 |
| ; X86-SSSE3-NEXT: movd %xmm0, %edx |
| ; X86-SSSE3-NEXT: addl %ecx, %edx |
| ; X86-SSSE3-NEXT: xorl %ecx, %ecx |
| ; X86-SSSE3-NEXT: movl %ecx, 12(%eax) |
| ; X86-SSSE3-NEXT: movl %ecx, 8(%eax) |
| ; X86-SSSE3-NEXT: movl %ecx, 4(%eax) |
| ; X86-SSSE3-NEXT: movl %edx, (%eax) |
| ; X86-SSSE3-NEXT: retl $4 |
| %cnt = tail call i128 @llvm.ctpop.i128(i128 %x) |
| ret i128 %cnt |
| } |
| |
| define i32 @popcount_zext_i32(i16 zeroext %x) { |
| ; X86-LABEL: popcount_zext_i32: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl %eax, %ecx |
| ; X86-NEXT: shrl %ecx |
| ; X86-NEXT: andl $21845, %ecx # imm = 0x5555 |
| ; X86-NEXT: subl %ecx, %eax |
| ; X86-NEXT: movl %eax, %ecx |
| ; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 |
| ; X86-NEXT: shrl $2, %eax |
| ; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 |
| ; X86-NEXT: addl %ecx, %eax |
| ; X86-NEXT: movl %eax, %ecx |
| ; X86-NEXT: shrl $4, %ecx |
| ; X86-NEXT: addl %eax, %ecx |
| ; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F |
| ; X86-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101 |
| ; X86-NEXT: shrl $24, %eax |
| ; X86-NEXT: retl |
| ; |
| ; X64-BASE-LABEL: popcount_zext_i32: |
| ; X64-BASE: # %bb.0: |
| ; X64-BASE-NEXT: movl %edi, %eax |
| ; X64-BASE-NEXT: shrl %eax |
| ; X64-BASE-NEXT: andl $21845, %eax # imm = 0x5555 |
| ; X64-BASE-NEXT: subl %eax, %edi |
| ; X64-BASE-NEXT: movl %edi, %eax |
| ; X64-BASE-NEXT: andl $858993459, %eax # imm = 0x33333333 |
| ; X64-BASE-NEXT: shrl $2, %edi |
| ; X64-BASE-NEXT: andl $858993459, %edi # imm = 0x33333333 |
| ; X64-BASE-NEXT: addl %eax, %edi |
| ; X64-BASE-NEXT: movl %edi, %eax |
| ; X64-BASE-NEXT: shrl $4, %eax |
| ; X64-BASE-NEXT: addl %edi, %eax |
| ; X64-BASE-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F |
| ; X64-BASE-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 |
| ; X64-BASE-NEXT: shrl $24, %eax |
| ; X64-BASE-NEXT: retq |
| ; |
| ; X86-POPCNT-LABEL: popcount_zext_i32: |
| ; X86-POPCNT: # %bb.0: |
| ; X86-POPCNT-NEXT: movzwl {{[0-9]+}}(%esp), %eax |
| ; X86-POPCNT-NEXT: popcntl %eax, %eax |
| ; X86-POPCNT-NEXT: retl |
| ; |
| ; X64-POPCNT-LABEL: popcount_zext_i32: |
| ; X64-POPCNT: # %bb.0: |
| ; X64-POPCNT-NEXT: popcntl %edi, %eax |
| ; X64-POPCNT-NEXT: retq |
| ; |
| ; X64-NDD-LABEL: popcount_zext_i32: |
| ; X64-NDD: # %bb.0: |
| ; X64-NDD-NEXT: shrl %edi, %eax |
| ; X64-NDD-NEXT: andl $21845, %eax # imm = 0x5555 |
| ; X64-NDD-NEXT: subl %eax, %edi |
| ; X64-NDD-NEXT: andl $858993459, %edi, %eax # imm = 0x33333333 |
| ; X64-NDD-NEXT: shrl $2, %edi |
| ; X64-NDD-NEXT: andl $858993459, %edi # imm = 0x33333333 |
| ; X64-NDD-NEXT: addl %edi, %eax |
| ; X64-NDD-NEXT: shrl $4, %eax, %ecx |
| ; X64-NDD-NEXT: addl %ecx, %eax |
| ; X64-NDD-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F |
| ; X64-NDD-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 |
| ; X64-NDD-NEXT: shrl $24, %eax |
| ; X64-NDD-NEXT: retq |
| %z = zext i16 %x to i32 |
| %cnt = tail call i32 @llvm.ctpop.i32(i32 %z) |
| ret i32 %cnt |
| } |
| |
| define i32 @popcount_i16_zext(i16 zeroext %x) { |
| ; X86-LABEL: popcount_i16_zext: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl %eax, %ecx |
| ; X86-NEXT: shrl %ecx |
| ; X86-NEXT: andl $21845, %ecx # imm = 0x5555 |
| ; X86-NEXT: subl %ecx, %eax |
| ; X86-NEXT: movl %eax, %ecx |
| ; X86-NEXT: andl $13107, %ecx # imm = 0x3333 |
| ; X86-NEXT: shrl $2, %eax |
| ; X86-NEXT: andl $13107, %eax # imm = 0x3333 |
| ; X86-NEXT: addl %ecx, %eax |
| ; X86-NEXT: movl %eax, %ecx |
| ; X86-NEXT: shrl $4, %ecx |
| ; X86-NEXT: addl %eax, %ecx |
| ; X86-NEXT: andl $3855, %ecx # imm = 0xF0F |
| ; X86-NEXT: movl %ecx, %eax |
| ; X86-NEXT: shrl $8, %eax |
| ; X86-NEXT: addl %ecx, %eax |
| ; X86-NEXT: movzbl %al, %eax |
| ; X86-NEXT: retl |
| ; |
| ; X64-BASE-LABEL: popcount_i16_zext: |
| ; X64-BASE: # %bb.0: |
| ; X64-BASE-NEXT: movl %edi, %eax |
| ; X64-BASE-NEXT: shrl %eax |
| ; X64-BASE-NEXT: andl $21845, %eax # imm = 0x5555 |
| ; X64-BASE-NEXT: subl %eax, %edi |
| ; X64-BASE-NEXT: movl %edi, %eax |
| ; X64-BASE-NEXT: andl $13107, %eax # imm = 0x3333 |
| ; X64-BASE-NEXT: shrl $2, %edi |
| ; X64-BASE-NEXT: andl $13107, %edi # imm = 0x3333 |
| ; X64-BASE-NEXT: addl %eax, %edi |
| ; X64-BASE-NEXT: movl %edi, %eax |
| ; X64-BASE-NEXT: shrl $4, %eax |
| ; X64-BASE-NEXT: addl %edi, %eax |
| ; X64-BASE-NEXT: andl $3855, %eax # imm = 0xF0F |
| ; X64-BASE-NEXT: movl %eax, %ecx |
| ; X64-BASE-NEXT: shrl $8, %ecx |
| ; X64-BASE-NEXT: addl %eax, %ecx |
| ; X64-BASE-NEXT: movzbl %cl, %eax |
| ; X64-BASE-NEXT: retq |
| ; |
| ; X86-POPCNT-LABEL: popcount_i16_zext: |
| ; X86-POPCNT: # %bb.0: |
| ; X86-POPCNT-NEXT: movzwl {{[0-9]+}}(%esp), %eax |
| ; X86-POPCNT-NEXT: popcntl %eax, %eax |
| ; X86-POPCNT-NEXT: retl |
| ; |
| ; X64-POPCNT-LABEL: popcount_i16_zext: |
| ; X64-POPCNT: # %bb.0: |
| ; X64-POPCNT-NEXT: popcntl %edi, %eax |
| ; X64-POPCNT-NEXT: retq |
| ; |
| ; X64-NDD-LABEL: popcount_i16_zext: |
| ; X64-NDD: # %bb.0: |
| ; X64-NDD-NEXT: shrw %di, %ax |
| ; X64-NDD-NEXT: andw $21845, %ax # imm = 0x5555 |
| ; X64-NDD-NEXT: subw %ax, %di, %ax |
| ; X64-NDD-NEXT: andw $13107, %ax, %cx # imm = 0x3333 |
| ; X64-NDD-NEXT: shrw $2, %ax |
| ; X64-NDD-NEXT: andw $13107, %ax # imm = 0x3333 |
| ; X64-NDD-NEXT: addw %cx, %ax |
| ; X64-NDD-NEXT: shrw $4, %ax, %cx |
| ; X64-NDD-NEXT: addw %cx, %ax |
| ; X64-NDD-NEXT: andw $3855, %ax # imm = 0xF0F |
| ; X64-NDD-NEXT: movzbl %ah, %ecx |
| ; X64-NDD-NEXT: addw %cx, %ax |
| ; X64-NDD-NEXT: movzbl %al, %eax |
| ; X64-NDD-NEXT: movzwl %ax, %eax |
| ; X64-NDD-NEXT: retq |
| %cnt = tail call i16 @llvm.ctpop.i16(i16 %x) |
| %z = zext i16 %cnt to i32 |
| ret i32 %z |
| } |
| |
| declare i8 @llvm.ctpop.i8(i8) nounwind readnone |
| declare i16 @llvm.ctpop.i16(i16) nounwind readnone |
| declare i32 @llvm.ctpop.i32(i32) nounwind readnone |
| declare i64 @llvm.ctpop.i64(i64) nounwind readnone |
| declare i128 @llvm.ctpop.i128(i128) nounwind readnone |
| |
| !llvm.module.flags = !{!0} |
| !0 = !{i32 1, !"ProfileSummary", !1} |
| !1 = !{!2, !3, !4, !5, !6, !7, !8, !9} |
| !2 = !{!"ProfileFormat", !"InstrProf"} |
| !3 = !{!"TotalCount", i64 10000} |
| !4 = !{!"MaxCount", i64 10} |
| !5 = !{!"MaxInternalCount", i64 1} |
| !6 = !{!"MaxFunctionCount", i64 1000} |
| !7 = !{!"NumCounts", i64 3} |
| !8 = !{!"NumFunctions", i64 3} |
| !9 = !{!"DetailedSummary", !10} |
| !10 = !{!11, !12, !13} |
| !11 = !{i32 10000, i64 100, i32 1} |
| !12 = !{i32 999000, i64 100, i32 1} |
| !13 = !{i32 999999, i64 1, i32 2} |
| !14 = !{!"function_entry_count", i64 0} |