Implement stp (x registers) for aarch64 assembler

PiperOrigin-RevId: 426163361
diff --git a/src/jit/aarch64-assembler.cc b/src/jit/aarch64-assembler.cc
index b2bf11e..bc336df 100644
--- a/src/jit/aarch64-assembler.cc
+++ b/src/jit/aarch64-assembler.cc
@@ -291,6 +291,16 @@
   emit32(0xD65F0000 | rn(x30));
 }
 
+void Assembler::stp(XRegister xt1, XRegister xt2, MemOperand xn) {
+  if (!imm7_offset_valid(xn.offset, xt1)) {
+    error_ = Error::kInvalidOperand;
+    return;
+  }
+
+  const uint32_t offset = (xn.offset >> 3) & kImm7Mask;
+  emit32(0xA9000000 | wb(xn) | offset << 15 | rt2(xt2) | rn(xn.base) | rt(xt1));
+}
+
 void Assembler::sub(XRegister xd, XRegister xn, XRegister xm) {
   emit32(0xCB000000 | rm(xm) | rn(xn) | rd(xd));
 }
diff --git a/src/xnnpack/aarch64-assembler.h b/src/xnnpack/aarch64-assembler.h
index 084746d..e966fe4 100644
--- a/src/xnnpack/aarch64-assembler.h
+++ b/src/xnnpack/aarch64-assembler.h
@@ -339,6 +339,7 @@
   void ldr(XRegister xt, MemOperand xn);
   void prfm(PrefetchOp prfop, MemOperand xn);
   void ret();
+  void stp(XRegister xt1, XRegister xt2, MemOperand xn);
   void sub(XRegister xd, XRegister xn, XRegister xm);
   void subs(XRegister xd, XRegister xn, uint16_t imm12);
   void tbnz(XRegister xd, uint8_t bit, Label& l);
diff --git a/test/aarch64-assembler.cc b/test/aarch64-assembler.cc
index cfd7444..2add642 100644
--- a/test/aarch64-assembler.cc
+++ b/test/aarch64-assembler.cc
@@ -64,6 +64,14 @@
 
   CHECK_ENCODING(0xCB020083, a.sub(x3, x4, x2));
 
+  CHECK_ENCODING(0xA90457F4, a.stp(x20, x21, mem[sp, 64]));
+  CHECK_ENCODING(0xA98457F4, a.stp(x20, x21, mem[sp, 64]++));
+  CHECK_ENCODING(0xA91FD7F4, a.stp(x20, x21, mem[sp, 504]));
+  CHECK_ENCODING(0xA92057F4, a.stp(x20, x21, mem[sp, -512]));
+  EXPECT_ERROR(Error::kInvalidOperand, a.stp(x20, x21, mem[sp, 3]));
+  EXPECT_ERROR(Error::kInvalidOperand, a.stp(x20, x21, mem[sp, 512]));
+  EXPECT_ERROR(Error::kInvalidOperand, a.stp(x20, x21, mem[sp, -520]));
+
   CHECK_ENCODING(0xF1008040, a.subs(x0, x2, 32));
   CHECK_ENCODING(0xF13FFC40, a.subs(x0, x2, 4095));
   EXPECT_ERROR(Error::kInvalidOperand, a.subs(x0, x2, -32));