| /* |
| * Copyright (C) 2016 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include <stdint.h> |
| #include <stdbool.h> |
| #include <string.h> |
| #include <nanohub/rsa.h> |
| |
| |
| static bool biModIterative(uint32_t *num, const uint32_t *denum, uint32_t *tmp, uint32_t *state1, uint32_t *state2, uint32_t step) |
| //num %= denum where num is RSA_LEN * 2 and denum is RSA_LEN and tmp is RSA_LEN + limb_sz |
| //will need to be called till it returns true (up to RSA_LEN * 2 + 2 times) |
| { |
| uint32_t bitsh = *state1, limbsh = *state2; |
| bool ret = false; |
| int64_t t; |
| int32_t i; |
| |
| //first step is init |
| if (!step) { |
| //initially set it up left shifted as far as possible |
| memcpy(tmp + 1, denum, RSA_BYTES); |
| tmp[0] = 0; |
| bitsh = 32; |
| limbsh = RSA_LIMBS - 1; |
| goto out; |
| } |
| |
| //second is shifting denum |
| if (step == 1) { |
| while (!(tmp[RSA_LIMBS] & 0x80000000)) { |
| for (i = RSA_LIMBS; i > 0; i--) { |
| tmp[i] <<= 1; |
| if (tmp[i - 1] & 0x80000000) |
| tmp[i]++; |
| } |
| //no need to adjust tmp[0] as it is still zero |
| bitsh++; |
| } |
| goto out; |
| } |
| |
| //all future steps do the division |
| |
| //check if we should subtract (uses less space than subtracting and unroling it later) |
| for (i = RSA_LIMBS; i >= 0; i--) { |
| if (num[limbsh + i] < tmp[i]) |
| goto dont_subtract; |
| if (num[limbsh + i] > tmp[i]) |
| break; |
| } |
| |
| //subtract |
| t = 0; |
| for (i = 0; i <= RSA_LIMBS; i++) { |
| t += (uint64_t)num[limbsh + i]; |
| t -= (uint64_t)tmp[i]; |
| num[limbsh + i] = t; |
| t >>= 32; |
| } |
| |
| //carry the subtraction's carry to the end |
| for (i = RSA_LIMBS + limbsh + 1; i < RSA_LIMBS * 2; i++) { |
| t += (uint64_t)num[i]; |
| num[i] = t; |
| t >>= 32; |
| } |
| |
| dont_subtract: |
| //handle bitshifts/refills |
| if (!bitsh) { // tmp = denum << 32 |
| if (!limbsh) { |
| ret = true; |
| goto out; |
| } |
| |
| memcpy(tmp + 1, denum, RSA_BYTES); |
| tmp[0] = 0; |
| bitsh = 32; |
| limbsh--; |
| } |
| else { // tmp >>= 1 |
| for (i = 0; i < RSA_LIMBS; i++) { |
| tmp[i] >>= 1; |
| if (tmp[i + 1] & 1) |
| tmp[i] += 0x80000000; |
| } |
| tmp[i] >>= 1; |
| bitsh--; |
| } |
| |
| |
| out: |
| *state1 = bitsh; |
| *state2 = limbsh; |
| return ret; |
| } |
| |
| static void biMulIterative(uint32_t *ret, const uint32_t *a, const uint32_t *b, uint32_t step) //ret = a * b, call with step = [0..RSA_LIMBS) |
| { |
| uint32_t j, c; |
| uint64_t r; |
| |
| //zero the result on first call |
| if (!step) |
| memset(ret, 0, RSA_BYTES * 2); |
| |
| //produce a partial sum & add it in |
| c = 0; |
| for (j = 0; j < RSA_LIMBS; j++) { |
| r = (uint64_t)a[step] * b[j] + c + ret[step + j]; |
| ret[step + j] = r; |
| c = r >> 32; |
| } |
| |
| //carry the carry to the end |
| for (j = step + RSA_LIMBS; j < RSA_LIMBS * 2; j++) { |
| r = (uint64_t)ret[j] + c; |
| ret[j] = r; |
| c = r >> 32; |
| } |
| } |
| |
| /* |
| * Piecewise RSA: |
| * normal RSA public op with 65537 exponent does 34 operations. 17 muls and 17 mods, as follows: |
| * 16x {mul, mod} to calculate a ^ 65536 mod c |
| * 1x {mul, mod} to calculate a ^ 65537 mod c |
| * we break up each mul and mod itself into more steps. mul needs RSA_LIMBS steps, and mod needs up to RSA_LEN * 2 + 2 steps |
| * so if we allocate RSA_LEN * 3 step values to mod, each mul-mod pair will use <= RSA_LEN * 4 step values |
| * and the whole opetaion will need <= RSA_LEN * 4 * 34 step values, which fits into a uint32. cool. In fact |
| * some values will be skipped, but this makes life easier, really. Call this func with *stepP = 0, and keep calling till |
| * output stepP is zero. We'll call each of the RSA_LEN * 4 pieces a gigastep, and have 17 of them as seen above. Each |
| * will be logically separated into 4 megasteps. First will contain the MUL, last 3 the MOD and maybe the memcpy. |
| * In the first 16 gigasteps, the very last step of the gigastep will be used for the memcpy call. |
| * |
| * The initial non-iterative RSA logic looks as follows, shown here for clarity: |
| * |
| * memcpy(state->tmpB, a, RSA_BYTES); |
| * for (i = 0; i < 16; i++) { |
| * biMul(state->tmpA, state->tmpB, state->tmpB); |
| * biMod(state->tmpA, c, state->tmpB); |
| * memcpy(state->tmpB, state->tmpA, RSA_BYTES); |
| * } |
| * |
| * //calculate a ^ 65537 mod c into state->tmpA [ at this point this means do state->tmpA = (state->tmpB * a) % c ] |
| * biMul(state->tmpA, state->tmpB, a); |
| * biMod(state->tmpA, c, state->tmpB); |
| * |
| * //return result |
| * return state->tmpA; |
| * |
| */ |
| |
| const uint32_t* rsaPubOpIterative(struct RsaState* state, const uint32_t *a, const uint32_t *c, uint32_t *state1, uint32_t *state2, uint32_t *stepP) |
| { |
| uint32_t step = *stepP, gigastep, gigastepBase, gigastepSubstep, megaSubstep; |
| |
| //step 0: copy a -> tmpB |
| if (!step) { |
| memcpy(state->tmpB, a, RSA_BYTES); |
| step = 1; |
| } |
| else { //subsequent steps: do real work |
| |
| |
| gigastep = (step - 1) / (RSA_LEN * 4); |
| gigastepSubstep = (step - 1) % (RSA_LEN * 4); |
| gigastepBase = gigastep * (RSA_LEN * 4); |
| megaSubstep = gigastepSubstep / RSA_LEN; |
| |
| if (!megaSubstep) { // first megastep of the gigastep - MUL |
| biMulIterative(state->tmpA, state->tmpB, gigastep == 16 ? a : state->tmpB, gigastepSubstep); |
| if (gigastepSubstep == RSA_LIMBS - 1) //MUL is done - do mod next |
| step = gigastepBase + RSA_LEN + 1; |
| else //More of MUL is left to do |
| step++; |
| } |
| else if (gigastepSubstep != RSA_LEN * 4 - 1){ // second part of gigastep - MOD |
| if (biModIterative(state->tmpA, c, state->tmpB, state1, state2, gigastepSubstep - RSA_LEN)) { //MOD is done |
| if (gigastep == 16) // we're done |
| step = 0; |
| else // last part of the gigastep is a copy |
| step = gigastepBase + RSA_LEN * 4 - 1 + 1; |
| } |
| else |
| step++; |
| } |
| else { //last part - memcpy |
| memcpy(state->tmpB, state->tmpA, RSA_BYTES); |
| step++; |
| } |
| } |
| |
| *stepP = step; |
| return state->tmpA; |
| } |
| |
| #if defined(RSA_SUPPORT_PRIV_OP_LOWRAM) || defined (RSA_SUPPORT_PRIV_OP_BIGRAM) |
| #include <stdio.h> |
| const uint32_t* rsaPubOp(struct RsaState* state, const uint32_t *a, const uint32_t *c) |
| { |
| const uint32_t *ret; |
| uint32_t state1 = 0, state2 = 0, step = 0, ns = 0; |
| |
| do { |
| ret = rsaPubOpIterative(state, a, c, &state1, &state2, &step); |
| ns++; |
| } while(step); |
| |
| fprintf(stderr, "steps: %u\n", ns); |
| |
| return ret; |
| } |
| |
| static void biMod(uint32_t *num, const uint32_t *denum, uint32_t *tmp) |
| { |
| uint32_t state1 = 0, state2 = 0, step; |
| |
| for (step = 0; !biModIterative(num, denum, tmp, &state1, &state2, step); step++); |
| } |
| |
| static void biMul(uint32_t *ret, const uint32_t *a, const uint32_t *b) |
| { |
| uint32_t step; |
| |
| for (step = 0; step < RSA_LIMBS; step++) |
| biMulIterative(ret, a, b, step); |
| } |
| |
| const uint32_t* rsaPrivOp(struct RsaState* state, const uint32_t *a, const uint32_t *b, const uint32_t *c) |
| { |
| uint32_t i; |
| |
| memcpy(state->tmpC, a, RSA_BYTES); //tC will hold our powers of a |
| |
| memset(state->tmpA, 0, RSA_BYTES * 2); //tA will hold result |
| state->tmpA[0] = 1; |
| |
| for (i = 0; i < RSA_LEN; i++) { |
| //if the bit is set, multiply the current power of A into result |
| if (b[i / 32] & (1 << (i % 32))) { |
| memcpy(state->tmpB, state->tmpA, RSA_BYTES); |
| biMul(state->tmpA, state->tmpB, state->tmpC); |
| biMod(state->tmpA, c, state->tmpB); |
| } |
| |
| //calculate the next power of a and modulus it |
| #if defined(RSA_SUPPORT_PRIV_OP_LOWRAM) |
| memcpy(state->tmpB, state->tmpA, RSA_BYTES); //save tA |
| biMul(state->tmpA, state->tmpC, state->tmpC); |
| biMod(state->tmpA, c, state->tmpC); |
| memcpy(state->tmpC, state->tmpA, RSA_BYTES); |
| memcpy(state->tmpA, state->tmpB, RSA_BYTES); //restore tA |
| #elif defined (RSA_SUPPORT_PRIV_OP_BIGRAM) |
| memcpy(state->tmpB, state->tmpC, RSA_BYTES); |
| biMul(state->tmpC, state->tmpB, state->tmpB); |
| biMod(state->tmpC, c, state->tmpB); |
| #endif |
| } |
| |
| return state->tmpA; |
| } |
| #endif |
| |
| |
| |
| |
| |
| |
| |
| |