| // Copyright 2020 Google LLC |
| // This source code is licensed under the BSD-style license found in the |
| // LICENSE file in the root directory of this source tree. |
| #include <xnnpack/unpool.h> |
| void xnn_x32_unpool_ukernel__neon( |
| // Pre-initialize outputs with constant. |
| const uint32x4_t vfill = vdupq_n_u32(fill); |
| vst1q_u32(o, vfill); o += 4; |
| vst1_u32(o, vget_low_u32(vfill)); o += 2; |
| vst1q_lane_u32(o, vfill, 0); |
| } while (--kernel_elements != 0); |
| // Copy indexed elements to output. |
| const uint32_t i = *index++; |
| *((uint32_t*) ((uintptr_t) output[i] + offset)) = *input++; |
| offset += sizeof(uint32_t); |
| } while (--channels != 0); |