| from peachpy import * |
| from peachpy.x86_64 import * |
| |
| import fp16.avx, fp16.avx2 |
| |
| |
| arg_fp16 = Argument(ptr(const_uint16_t), name="fp16") |
| arg_fp32 = Argument(ptr(uint32_t), name="fp32") |
| |
| with Function("fp16_alt_xmm_to_fp32_ymm_peachpy__avx2", (arg_fp16, arg_fp32), target=uarch.default + isa.avx2): |
| |
| reg_fp16 = GeneralPurposeRegister64() |
| LOAD.ARGUMENT(reg_fp16, arg_fp16) |
| |
| reg_fp32 = GeneralPurposeRegister64() |
| LOAD.ARGUMENT(reg_fp32, arg_fp32) |
| |
| xmm_fp16 = XMMRegister() |
| VMOVUPS(xmm_fp16, [reg_fp16]) |
| ymm_fp32 = fp16.avx2.fp16_alt_xmm_to_fp32_ymm(xmm_fp16) |
| VMOVUPS([reg_fp32], ymm_fp32) |
| |
| RETURN() |
| |
| with Function("fp16_alt_xmm_to_fp32_xmm_peachpy__avx", (arg_fp16, arg_fp32), target=uarch.default + isa.avx): |
| |
| reg_fp16 = GeneralPurposeRegister64() |
| LOAD.ARGUMENT(reg_fp16, arg_fp16) |
| |
| reg_fp32 = GeneralPurposeRegister64() |
| LOAD.ARGUMENT(reg_fp32, arg_fp32) |
| |
| xmm_fp16 = XMMRegister() |
| VMOVUPS(xmm_fp16, [reg_fp16]) |
| xmm_fp32 = fp16.avx.fp16_alt_xmm_to_fp32_xmm(xmm_fp16) |
| VMOVUPS([reg_fp32], xmm_fp32) |
| |
| RETURN() |