security: implement RSA private key ops (for PC only)

definitely not fast, but we're ok with it - it will be only used
rarely to sign things
tested:

gcc -o test main.c -O2 -DRSA_SUPPORT_PRIV_OP_LOWRAM -I.&& ./test

main.c:

//no leading zeroes please. full 32-bit multiples please
static const char *k_mod = "dee5437c422df7afd324a5df1337b0059f88e10bc92b83746d1de004e41f494516c233ed1f9996d96ee517814888e6d85e4d3cb65b091ac59afcd8d395b33cc8af98e50cb7a46c8af93edbb9d319a0d82ab0782c2145797924b3e93ba80a9a8849661f928a384268c5f2e8362ba17bf9c8219846cd0e92147fa4f799bc8b1bc201c265eef74424a8b35fcab91250a9a7fbdf81ad0e1253d183a315b184ec0f6c3f6230ecfe3cf04bed12f7dc6b94a9f0844f952c3e5da4a861de3242b8ea3ff8fbbaaf9e117c7283eb31b03a77384fec0b700e0811015b836e5ae93be527173d940f43337f77dd1382a871b710721b550d72f2a92ab3a975350a50d0542bed67";
static const char *k_priv = "a4ee646757d907c434aa938a637ffb61cdd0cd25996ff74f794677f36691b7f7c5856242fdf4fa685de46ae621341c0cd28ab877ff18d7948cc0b7f8a876fe6d49720b5ac3b08918b4a1cc98ce19d2a8e1bdbece78b81dd0845614292e88da1c2aa3c48cfc75f0bdf26b71645418a3d20a42c198f236ebe4fb26a8883bb97b894b94ed8c75102e64f2848f0f7df70aee19f46aec5b7df18342827d83245b9d3681c4dd0452f9e74329b38078e345b73fae03c0ebd56e8f5e990c2e86ebdc10f59df926cd137ec1b9941937134e9190902ea00faab8f2f8be0913dce418240bdca32f1b861f6562982a13196c5512b84ec52a5d5b8139f5e6c5018cf10d344cf1";

static bool readkey(uint32_t *to, const char *k)
{
    const char *end = k + strlen(k);
    uint32_t i, t;

    if (end - k != RSA_BYTES * 2)
        return false;

    while (end != k) {
        t = 0;
        end -= 8;
        for (i = 0; i < 8; i++) {
            char ch = end[i];
            t <<= 4;
            if (ch >= '0' && ch <= '9')
                t += ch - '0';
            else if (ch >= 'a' && ch <= 'f')
                t += ch + 10 - 'a';
            else if (ch >= 'A' && ch <= 'F')
                t += ch + 10 - 'A';
            else
                return false;
        }
        *to++ = t;
    }
    return true;
}

static void printnum(const char *nm, uint32_t *num, uint32_t len)
{
    int32_t i;

    printf("%s = 0x", nm);
    for (i = RSA_LIMBS - 1; i >= 0; i--)
         printf("_%08lx", (unsigned long)(i < len ? num[i] : 0));
    printf("\n");
}

int main(int argc, char** argv)
{
    struct RsaState s;

    //real test
    {
        uint32_t priv[RSA_LIMBS], mod[RSA_LIMBS], msg[RSA_LIMBS] = {0}, encr[RSA_LIMBS], decr[RSA_LIMBS];
        memcpy(msg, &main, RSA_BYTES - 1); //code fo this func is as good a test as any last byte zeroes to make sure we're smaller than modulus

        if (!readkey(priv, k_priv))
            return -1;
        if (!readkey(mod, k_mod))
            return -1;

        printnum("priv", priv, RSA_LIMBS);
        printnum("mod ", mod, RSA_LIMBS);
        printnum("msg ", msg, RSA_LIMBS);
        memcpy(encr, rsaPubOp(&s, msg, mod), RSA_BYTES);
        memcpy(decr, rsaPrivOp(&s, encr, priv, mod), RSA_BYTES);
        printnum("encr", encr, RSA_LIMBS);
        printnum("decr", decr, RSA_LIMBS);
    }

    return 0;
}

Change-Id: I473a80253f1d6b8047ff1c25c632820f425cfc8f
diff --git a/firmware/inc/rsa.h b/firmware/inc/rsa.h
index c776eb4..62c3cfc 100644
--- a/firmware/inc/rsa.h
+++ b/firmware/inc/rsa.h
@@ -10,10 +10,26 @@
 struct RsaState {
     uint32_t tmpA[RSA_LIMBS * 2];
     uint32_t tmpB[RSA_LIMBS + 1];
+
+#if defined(RSA_SUPPORT_PRIV_OP_LOWRAM)
+    uint32_t tmpC[RSA_LIMBS + 1];
+#elif defined (RSA_SUPPORT_PRIV_OP_BIGRAM)
+    uint32_t tmpC[RSA_LIMBS * 2];
+#endif
 };
 
 //calculate a ^ 65537 mod c, where a and c are each exactly RSA_LEN bits long, result is only valid as long as state is. state needs no init
 const uint32_t* rsaPubOp(struct RsaState* state, const uint32_t *a, const uint32_t *c);
 
+#if defined(RSA_SUPPORT_PRIV_OP_LOWRAM) || defined (RSA_SUPPORT_PRIV_OP_BIGRAM)
+//calculate a ^ b mod c, where a and c are each exactly RSA_LEN bits long, result is only valid as long as state is. state needs no init
+const uint32_t* rsaPrivOp(struct RsaState* state, const uint32_t *a, const uint32_t *b, const uint32_t *c);
+
+#ifdef ARM
+#error "RSA private ops should never be compiled into firmware. You *ARE* doing something wrong! Stop!"
+#endif
+
+#endif
+
 #endif
 
diff --git a/firmware/src/rsa.c b/firmware/src/rsa.c
index f01f7a8..af79807 100644
--- a/firmware/src/rsa.c
+++ b/firmware/src/rsa.c
@@ -121,6 +121,44 @@
     return state->tmpA;
 }
 
+#if defined(RSA_SUPPORT_PRIV_OP_LOWRAM) || defined (RSA_SUPPORT_PRIV_OP_BIGRAM)
+const uint32_t* rsaPrivOp(struct RsaState* state, const uint32_t *a, const uint32_t *b, const uint32_t *c)
+{
+    uint32_t i;
+
+    memcpy(state->tmpC, a, RSA_BYTES);  //tC will hold our powers of a
+
+    memset(state->tmpA, 0, RSA_BYTES * 2); //tA will hold result
+    state->tmpA[0] = 1;
+
+    for (i = 0; i < RSA_LEN; i++) {
+        //if the bit is set, multiply the current power of A into result
+        if (b[i / 32] & (1 << (i % 32))) {
+            memcpy(state->tmpB, state->tmpA, RSA_BYTES);
+            biMul(state->tmpA, state->tmpB, state->tmpC);
+            biMod(state->tmpA, c, state->tmpB);
+        }
+
+        //calculate the next power of a and modulus it
+#if defined(RSA_SUPPORT_PRIV_OP_LOWRAM)
+        memcpy(state->tmpB, state->tmpA, RSA_BYTES); //save tA
+        biMul(state->tmpA, state->tmpC, state->tmpC);
+        biMod(state->tmpA, c, state->tmpC);
+        memcpy(state->tmpC, state->tmpA, RSA_BYTES);
+        memcpy(state->tmpA, state->tmpB, RSA_BYTES); //restore tA
+#elif defined (RSA_SUPPORT_PRIV_OP_BIGRAM)
+        memcpy(state->tmpB, state->tmpC, RSA_BYTES);
+        biMul(state->tmpC, state->tmpB, state->tmpB);
+        biMod(state->tmpC, c, state->tmpB);
+#endif
+    }
+
+    return state->tmpA;
+}
+#endif
+
+
+