diff options
Diffstat (limited to 'Cryptlib/OpenSSL/crypto/bn/bn_mont.c')
| -rw-r--r-- | Cryptlib/OpenSSL/crypto/bn/bn_mont.c | 404 |
1 files changed, 79 insertions, 325 deletions
diff --git a/Cryptlib/OpenSSL/crypto/bn/bn_mont.c b/Cryptlib/OpenSSL/crypto/bn/bn_mont.c index bf40e823..aadd5db1 100644 --- a/Cryptlib/OpenSSL/crypto/bn/bn_mont.c +++ b/Cryptlib/OpenSSL/crypto/bn/bn_mont.c @@ -122,20 +122,7 @@ #define MONT_WORD /* use the faster word-based algorithm */ -#if defined(MONT_WORD) && defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32) -/* - * This condition means we have a specific non-default build: In the 0.9.8 - * branch, OPENSSL_BN_ASM_MONT is normally not set for any BN_BITS2<=32 - * platform; an explicit "enable-montasm" is required. I.e., if we are here, - * the user intentionally deviates from the normal stable build to get better - * Montgomery performance from the 0.9.9-dev backport. In this case only, we - * also enable BN_from_montgomery_word() (another non-stable feature from - * 0.9.9-dev). - */ -# define MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD -#endif - -#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD +#ifdef MONT_WORD static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont); #endif @@ -150,12 +137,7 @@ int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, if (num > 1 && a->top == num && b->top == num) { if (bn_wexpand(r, num) == NULL) return (0); -# if 0 /* for OpenSSL 0.9.9 mont->n0 */ - if (bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) -# else - if (bn_mul_mont(r->d, a->d, b->d, mont->N.d, &mont->n0, num)) -# endif - { + if (bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) { r->neg = a->neg ^ b->neg; r->top = num; bn_correct_top(r); @@ -178,7 +160,7 @@ int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, goto err; } /* reduce from aRR to aR */ -#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD +#ifdef MONT_WORD if (!BN_from_montgomery_word(r, tmp, mont)) goto err; #else @@ -192,49 +174,43 @@ int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, return (ret); } -#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD +#ifdef MONT_WORD static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) { BIGNUM *n; - BN_ULONG *ap, *np, *rp, n0, v, *nrp; - int al, nl, max, i, x, ri; + BN_ULONG *ap, *np, *rp, n0, v, carry; + int nl, max, i; n = &(mont->N); - /* - * mont->ri is the size of mont->N in bits (rounded up to the word size) - */ - al = ri = mont->ri / BN_BITS2; - nl = n->top; - if ((al == 0) || (nl == 0)) { + if (nl == 0) { ret->top = 0; return (1); } - max = (nl + al + 1); /* allow for overflow (no?) XXX */ + max = (2 * nl); /* carry is stored separately */ if (bn_wexpand(r, max) == NULL) return (0); r->neg ^= n->neg; np = n->d; rp = r->d; - nrp = &(r->d[nl]); /* clear the top words of T */ +# if 1 for (i = r->top; i < max; i++) /* memset? XXX */ - r->d[i] = 0; + rp[i] = 0; +# else + memset(&(rp[r->top]), 0, (max - r->top) * sizeof(BN_ULONG)); +# endif r->top = max; -# if 0 /* for OpenSSL 0.9.9 mont->n0 */ n0 = mont->n0[0]; -# else - n0 = mont->n0; -# endif # ifdef BN_COUNT fprintf(stderr, "word BN_from_montgomery_word %d * %d\n", nl, nl); # endif - for (i = 0; i < nl; i++) { + for (carry = 0, i = 0; i < nl; i++, rp++) { # ifdef __TANDEM { long long t1; @@ -251,285 +227,78 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) # else v = bn_mul_add_words(rp, np, nl, (rp[0] * n0) & BN_MASK2); # endif - nrp++; - rp++; - if (((nrp[-1] += v) & BN_MASK2) >= v) - continue; - else { - if (((++nrp[0]) & BN_MASK2) != 0) - continue; - if (((++nrp[1]) & BN_MASK2) != 0) - continue; - for (x = 2; (((++nrp[x]) & BN_MASK2) == 0); x++) ; - } - } - bn_correct_top(r); - - /* - * mont->ri will be a multiple of the word size and below code is kind of - * BN_rshift(ret,r,mont->ri) equivalent - */ - if (r->top <= ri) { - ret->top = 0; - return (1); + v = (v + carry + rp[nl]) & BN_MASK2; + carry |= (v != rp[nl]); + carry &= (v <= rp[nl]); + rp[nl] = v; } - al = r->top - ri; - if (bn_wexpand(ret, ri) == NULL) + if (bn_wexpand(ret, nl) == NULL) return (0); - x = 0 - (((al - ri) >> (sizeof(al) * 8 - 1)) & 1); - ret->top = x = (ri & ~x) | (al & x); /* min(ri,al) */ + ret->top = nl; ret->neg = r->neg; rp = ret->d; - ap = &(r->d[ri]); + ap = &(r->d[nl]); +# define BRANCH_FREE 1 +# if BRANCH_FREE { - size_t m1, m2; + BN_ULONG *nrp; + size_t m; - v = bn_sub_words(rp, ap, np, ri); + v = bn_sub_words(rp, ap, np, nl) - carry; /* - * this ----------------^^ works even in al<ri case thanks to zealous - * zeroing of top of the vector in the beginning. + * if subtraction result is real, then trick unconditional memcpy + * below to perform in-place "refresh" instead of actual copy. */ - - /* if (al==ri && !v) || al>ri) nrp=rp; else nrp=ap; */ - /* - * in other words if subtraction result is real, then trick - * unconditional memcpy below to perform in-place "refresh" instead - * of actual copy. - */ - m1 = 0 - (size_t)(((al - ri) >> (sizeof(al) * 8 - 1)) & 1); /* al<ri */ - m2 = 0 - (size_t)(((ri - al) >> (sizeof(al) * 8 - 1)) & 1); /* al>ri */ - m1 |= m2; /* (al!=ri) */ - m1 |= (0 - (size_t)v); /* (al!=ri || v) */ - m1 &= ~m2; /* (al!=ri || v) && !al>ri */ - nrp = (BN_ULONG *)(((size_t)rp & ~m1) | ((size_t)ap & m1)); - } - - /* - * 'i<ri' is chosen to eliminate dependency on input data, even though it - * results in redundant copy in al<ri case. - */ - for (i = 0, ri -= 4; i < ri; i += 4) { - BN_ULONG t1, t2, t3, t4; - - t1 = nrp[i + 0]; - t2 = nrp[i + 1]; - t3 = nrp[i + 2]; - ap[i + 0] = 0; - t4 = nrp[i + 3]; - ap[i + 1] = 0; - rp[i + 0] = t1; - ap[i + 2] = 0; - rp[i + 1] = t2; - ap[i + 3] = 0; - rp[i + 2] = t3; - rp[i + 3] = t4; + m = (0 - (size_t)v); + nrp = + (BN_ULONG *)(((PTR_SIZE_INT) rp & ~m) | ((PTR_SIZE_INT) ap & m)); + + for (i = 0, nl -= 4; i < nl; i += 4) { + BN_ULONG t1, t2, t3, t4; + + t1 = nrp[i + 0]; + t2 = nrp[i + 1]; + t3 = nrp[i + 2]; + ap[i + 0] = 0; + t4 = nrp[i + 3]; + ap[i + 1] = 0; + rp[i + 0] = t1; + ap[i + 2] = 0; + rp[i + 1] = t2; + ap[i + 3] = 0; + rp[i + 2] = t3; + rp[i + 3] = t4; + } + for (nl += 4; i < nl; i++) + rp[i] = nrp[i], ap[i] = 0; } - for (ri += 4; i < ri; i++) - rp[i] = nrp[i], ap[i] = 0; +# else + if (bn_sub_words(rp, ap, np, nl) - carry) + memcpy(rp, ap, nl * sizeof(BN_ULONG)); +# endif bn_correct_top(r); bn_correct_top(ret); bn_check_top(ret); return (1); } +#endif /* MONT_WORD */ int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, BN_CTX *ctx) { int retn = 0; +#ifdef MONT_WORD BIGNUM *t; BN_CTX_start(ctx); if ((t = BN_CTX_get(ctx)) && BN_copy(t, a)) retn = BN_from_montgomery_word(ret, t, mont); BN_CTX_end(ctx); - return retn; -} - -#else /* !MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD */ - -int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, - BN_CTX *ctx) -{ - int retn = 0; - -# ifdef MONT_WORD - BIGNUM *n, *r; - BN_ULONG *ap, *np, *rp, n0, v, *nrp; - int al, nl, max, i, x, ri; - - BN_CTX_start(ctx); - if ((r = BN_CTX_get(ctx)) == NULL) - goto err; - - if (!BN_copy(r, a)) - goto err; - n = &(mont->N); - - ap = a->d; - /* - * mont->ri is the size of mont->N in bits (rounded up to the word size) - */ - al = ri = mont->ri / BN_BITS2; - - nl = n->top; - if ((al == 0) || (nl == 0)) { - r->top = 0; - return (1); - } - - max = (nl + al + 1); /* allow for overflow (no?) XXX */ - if (bn_wexpand(r, max) == NULL) - goto err; - - r->neg = a->neg ^ n->neg; - np = n->d; - rp = r->d; - nrp = &(r->d[nl]); - - /* clear the top words of T */ -# if 1 - for (i = r->top; i < max; i++) /* memset? XXX */ - r->d[i] = 0; -# else - memset(&(r->d[r->top]), 0, (max - r->top) * sizeof(BN_ULONG)); -# endif - - r->top = max; - n0 = mont->n0; - -# ifdef BN_COUNT - fprintf(stderr, "word BN_from_montgomery %d * %d\n", nl, nl); -# endif - for (i = 0; i < nl; i++) { -# ifdef __TANDEM - { - long long t1; - long long t2; - long long t3; - t1 = rp[0] * (n0 & 0177777); - t2 = 037777600000l; - t2 = n0 & t2; - t3 = rp[0] & 0177777; - t2 = (t3 * t2) & BN_MASK2; - t1 = t1 + t2; - v = bn_mul_add_words(rp, np, nl, (BN_ULONG)t1); - } -# else - v = bn_mul_add_words(rp, np, nl, (rp[0] * n0) & BN_MASK2); -# endif - nrp++; - rp++; - if (((nrp[-1] += v) & BN_MASK2) >= v) - continue; - else { - if (((++nrp[0]) & BN_MASK2) != 0) - continue; - if (((++nrp[1]) & BN_MASK2) != 0) - continue; - for (x = 2; (((++nrp[x]) & BN_MASK2) == 0); x++) ; - } - } - bn_correct_top(r); - - /* - * mont->ri will be a multiple of the word size and below code is kind of - * BN_rshift(ret,r,mont->ri) equivalent - */ - if (r->top <= ri) { - ret->top = 0; - retn = 1; - goto err; - } - al = r->top - ri; - -# define BRANCH_FREE 1 -# if BRANCH_FREE - if (bn_wexpand(ret, ri) == NULL) - goto err; - x = 0 - (((al - ri) >> (sizeof(al) * 8 - 1)) & 1); - ret->top = x = (ri & ~x) | (al & x); /* min(ri,al) */ - ret->neg = r->neg; - - rp = ret->d; - ap = &(r->d[ri]); - - { - size_t m1, m2; - - v = bn_sub_words(rp, ap, np, ri); - /* - * this ----------------^^ works even in al<ri case thanks to zealous - * zeroing of top of the vector in the beginning. - */ - - /* if (al==ri && !v) || al>ri) nrp=rp; else nrp=ap; */ - /* - * in other words if subtraction result is real, then trick - * unconditional memcpy below to perform in-place "refresh" instead - * of actual copy. - */ - m1 = 0 - (size_t)(((al - ri) >> (sizeof(al) * 8 - 1)) & 1); /* al<ri */ - m2 = 0 - (size_t)(((ri - al) >> (sizeof(al) * 8 - 1)) & 1); /* al>ri */ - m1 |= m2; /* (al!=ri) */ - m1 |= (0 - (size_t)v); /* (al!=ri || v) */ - m1 &= ~m2; /* (al!=ri || v) && !al>ri */ - nrp = (BN_ULONG *)(((size_t)rp & ~m1) | ((size_t)ap & m1)); - } - - /* - * 'i<ri' is chosen to eliminate dependency on input data, even though it - * results in redundant copy in al<ri case. - */ - for (i = 0, ri -= 4; i < ri; i += 4) { - BN_ULONG t1, t2, t3, t4; - - t1 = nrp[i + 0]; - t2 = nrp[i + 1]; - t3 = nrp[i + 2]; - ap[i + 0] = 0; - t4 = nrp[i + 3]; - ap[i + 1] = 0; - rp[i + 0] = t1; - ap[i + 2] = 0; - rp[i + 1] = t2; - ap[i + 3] = 0; - rp[i + 2] = t3; - rp[i + 3] = t4; - } - for (ri += 4; i < ri; i++) - rp[i] = nrp[i], ap[i] = 0; - bn_correct_top(r); - bn_correct_top(ret); -# else - if (bn_wexpand(ret, al) == NULL) - goto err; - ret->top = al; - ret->neg = r->neg; - - rp = ret->d; - ap = &(r->d[ri]); - al -= 4; - for (i = 0; i < al; i += 4) { - BN_ULONG t1, t2, t3, t4; - - t1 = ap[i + 0]; - t2 = ap[i + 1]; - t3 = ap[i + 2]; - t4 = ap[i + 3]; - rp[i + 0] = t1; - rp[i + 1] = t2; - rp[i + 2] = t3; - rp[i + 3] = t4; - } - al += 4; - for (; i < al; i++) - rp[i] = ap[i]; -# endif -# else /* !MONT_WORD */ +#else /* !MONT_WORD */ BIGNUM *t1, *t2; BN_CTX_start(ctx); @@ -552,21 +321,18 @@ int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, goto err; if (!BN_rshift(ret, t2, mont->ri)) goto err; -# endif /* MONT_WORD */ -# if !defined(BRANCH_FREE) || BRANCH_FREE==0 if (BN_ucmp(ret, &(mont->N)) >= 0) { if (!BN_usub(ret, ret, &(mont->N))) goto err; } -# endif retn = 1; bn_check_top(ret); err: BN_CTX_end(ctx); +#endif /* MONT_WORD */ return (retn); } -#endif /* MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD */ BN_MONT_CTX *BN_MONT_CTX_new(void) { @@ -586,11 +352,7 @@ void BN_MONT_CTX_init(BN_MONT_CTX *ctx) BN_init(&(ctx->RR)); BN_init(&(ctx->N)); BN_init(&(ctx->Ni)); -#if 0 /* for OpenSSL 0.9.9 mont->n0 */ ctx->n0[0] = ctx->n0[1] = 0; -#else - ctx->n0 = 0; -#endif ctx->flags = 0; } @@ -624,32 +386,25 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) BIGNUM tmod; BN_ULONG buf[2]; - mont->ri = (BN_num_bits(mod) + (BN_BITS2 - 1)) / BN_BITS2 * BN_BITS2; - BN_zero(R); -# if 0 /* for OpenSSL 0.9.9 mont->n0, would be "#if - * defined(OPENSSL_BN_ASM_MONT) && - * (BN_BITS2<=32)", only certain BN_BITS2<=32 - * platforms actually need this */ - if (!(BN_set_bit(R, 2 * BN_BITS2))) - goto err; /* R */ -# else - if (!(BN_set_bit(R, BN_BITS2))) - goto err; /* R */ -# endif - - buf[0] = mod->d[0]; /* tmod = N mod word size */ - buf[1] = 0; - BN_init(&tmod); tmod.d = buf; - tmod.top = buf[0] != 0 ? 1 : 0; tmod.dmax = 2; tmod.neg = 0; -# if 0 /* for OpenSSL 0.9.9 mont->n0, would be "#if - * defined(OPENSSL_BN_ASM_MONT) && - * (BN_BITS2<=32)"; only certain BN_BITS2<=32 - * platforms actually need this */ + mont->ri = (BN_num_bits(mod) + (BN_BITS2 - 1)) / BN_BITS2 * BN_BITS2; + +# if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32) + /* + * Only certain BN_BITS2<=32 platforms actually make use of n0[1], + * and we could use the #else case (with a shorter R value) for the + * others. However, currently only the assembler files do know which + * is which. + */ + + BN_zero(R); + if (!(BN_set_bit(R, 2 * BN_BITS2))) + goto err; + tmod.top = 0; if ((buf[0] = mod->d[0])) tmod.top = 1; @@ -681,6 +436,13 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0; mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0; # else + BN_zero(R); + if (!(BN_set_bit(R, BN_BITS2))) + goto err; /* R */ + + buf[0] = mod->d[0]; /* tmod = N mod word size */ + buf[1] = 0; + tmod.top = buf[0] != 0 ? 1 : 0; /* Ri = R^-1 mod N */ if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL) goto err; @@ -699,12 +461,8 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) /* * Ni = (R*Ri-1)/N, keep only least significant word: */ -# if 0 /* for OpenSSL 0.9.9 mont->n0 */ mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0; mont->n0[1] = 0; -# else - mont->n0 = (Ri->top > 0) ? Ri->d[0] : 0; -# endif # endif } #else /* !MONT_WORD */ @@ -753,12 +511,8 @@ BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from) if (!BN_copy(&(to->Ni), &(from->Ni))) return NULL; to->ri = from->ri; -#if 0 /* for OpenSSL 0.9.9 mont->n0 */ to->n0[0] = from->n0[0]; to->n0[1] = from->n0[1]; -#else - to->n0 = from->n0; -#endif return (to); } |
