3 * Copyright (C) 2006-2014 wolfSSL Inc.
5 * This file is part of CyaSSL.
7 * CyaSSL is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * CyaSSL is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 * Based on public domain LibTomMath 0.38 by Tom St Denis, tomstdenis@iahu.ca,
25 * http://math.libtomcrypt.com
33 /* in case user set USE_FAST_MATH there */
34 #include <cyassl/ctaocrypt/settings.h>
40 #include <cyassl/ctaocrypt/integer.h>
42 #ifndef NO_CYASSL_SMALL_STACK
43 #ifndef CYASSL_SMALL_STACK
44 #define CYASSL_SMALL_STACK
48 static void bn_reverse (unsigned char *s, int len);
50 /* math settings check */
51 word32 CheckRunTimeSettings(void)
57 /* handle up to 6 inits */
58 int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e,
63 if (a && ((res = mp_init(a)) != MP_OKAY))
66 if (b && ((res = mp_init(b)) != MP_OKAY)) {
71 if (c && ((res = mp_init(c)) != MP_OKAY)) {
72 mp_clear(a); mp_clear(b);
76 if (d && ((res = mp_init(d)) != MP_OKAY)) {
77 mp_clear(a); mp_clear(b); mp_clear(c);
81 if (e && ((res = mp_init(e)) != MP_OKAY)) {
82 mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d);
86 if (f && ((res = mp_init(f)) != MP_OKAY)) {
87 mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d); mp_clear(e);
95 /* init a new mp_int */
96 int mp_init (mp_int * a)
100 /* allocate memory required and clear it */
101 a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC, 0,
102 DYNAMIC_TYPE_BIGINT);
107 /* set the digits to zero */
108 for (i = 0; i < MP_PREC; i++) {
112 /* set the used to zero, allocated digits to the default precision
113 * and sign to positive */
122 /* clear one (frees) */
124 mp_clear (mp_int * a)
131 /* only do anything if a hasn't been freed previously */
133 /* first zero the digits */
134 for (i = 0; i < a->used; i++) {
139 XFREE(a->dp, 0, DYNAMIC_TYPE_BIGINT);
141 /* reset members to make debugging easier */
143 a->alloc = a->used = 0;
149 /* get the size for an unsigned equivalent */
150 int mp_unsigned_bin_size (mp_int * a)
152 int size = mp_count_bits (a);
153 return (size / 8 + ((size & 7) != 0 ? 1 : 0));
157 /* returns the number of bits in an int */
159 mp_count_bits (mp_int * a)
169 /* get number of digits and add that */
170 r = (a->used - 1) * DIGIT_BIT;
172 /* take the last digit and count the bits in it */
173 q = a->dp[a->used - 1];
174 while (q > ((mp_digit) 0)) {
176 q >>= ((mp_digit) 1);
182 int mp_leading_bit (mp_int * a)
187 if (mp_init_copy(&t, a) != MP_OKAY)
190 while (mp_iszero(&t) == 0) {
192 bit = (t.dp[0] & 0x80) != 0;
194 bit = (t.dp[0] | ((t.dp[1] & 0x01) << 7)) & 0x80 != 0;
196 if (mp_div_2d (&t, 8, &t, NULL) != MP_OKAY)
204 /* store in unsigned [big endian] format */
205 int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
210 if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
215 while (mp_iszero (&t) == 0) {
217 b[x++] = (unsigned char) (t.dp[0] & 255);
219 b[x++] = (unsigned char) (t.dp[0] | ((t.dp[1] & 0x01) << 7));
221 if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
232 /* creates "a" then copies b into it */
233 int mp_init_copy (mp_int * a, mp_int * b)
237 if ((res = mp_init (a)) != MP_OKAY) {
240 return mp_copy (b, a);
246 mp_copy (mp_int * a, mp_int * b)
250 /* if dst == src do nothing */
256 if (b->alloc < a->used) {
257 if ((res = mp_grow (b, a->used)) != MP_OKAY) {
262 /* zero b and copy the parameters over */
264 register mp_digit *tmpa, *tmpb;
266 /* pointer aliases */
274 /* copy all the digits */
275 for (n = 0; n < a->used; n++) {
279 /* clear high digits */
280 for (; n < b->used; n++) {
285 /* copy used count and sign */
292 /* grow as required */
293 int mp_grow (mp_int * a, int size)
298 /* if the alloc size is smaller alloc more ram */
299 if (a->alloc < size) {
300 /* ensure there are always at least MP_PREC digits extra on top */
301 size += (MP_PREC * 2) - (size % MP_PREC);
303 /* reallocate the array a->dp
305 * We store the return in a temporary variable
306 * in case the operation failed we don't want
307 * to overwrite the dp member of a.
309 tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size, 0,
310 DYNAMIC_TYPE_BIGINT);
312 /* reallocation failed but "a" is still valid [can be freed] */
316 /* reallocation succeeded so set a->dp */
319 /* zero excess digits */
322 for (; i < a->alloc; i++) {
330 /* reverse an array, used for radix code */
332 bn_reverse (unsigned char *s, int len)
349 /* shift right by a certain bit count (store quotient in c, optional
351 int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
357 /* if the shift count is <= 0 then we do no work */
359 res = mp_copy (a, c);
366 if ((res = mp_init (&t)) != MP_OKAY) {
370 /* get the remainder */
372 if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
379 if ((res = mp_copy (a, c)) != MP_OKAY) {
384 /* shift by as many digits in the bit count */
385 if (b >= (int)DIGIT_BIT) {
386 mp_rshd (c, b / DIGIT_BIT);
389 /* shift any bit count < DIGIT_BIT */
404 void mp_zero (mp_int * a)
413 for (n = 0; n < a->alloc; n++) {
419 /* trim unused digits
421 * This is used to ensure that leading zero digits are
422 * trimed and the leading "used" digit will be non-zero
423 * Typically very fast. Also fixes the sign if there
424 * are no more leading digits
427 mp_clamp (mp_int * a)
429 /* decrease used while the most significant digit is
432 while (a->used > 0 && a->dp[a->used - 1] == 0) {
436 /* reset the sign flag if used == 0 */
443 /* swap the elements of two integers, for cases where you can't simply swap the
444 * mp_int pointers around
447 mp_exch (mp_int * a, mp_int * b)
457 /* shift right a certain number of bits */
458 void mp_rshb (mp_int *c, int x)
460 register mp_digit *tmpc, mask, shift;
465 mask = (((mp_digit)1) << D) - 1;
468 shift = DIGIT_BIT - D;
471 tmpc = c->dp + (c->used - 1);
475 for (x = c->used - 1; x >= 0; x--) {
476 /* get the lower bits of this word in a temp */
479 /* shift the current word and mix in the carry bits from previous word */
480 *tmpc = (*tmpc >> D) | (r << shift);
483 /* set the carry to the carry bits of the current word found above */
489 /* shift right a certain amount of digits */
490 void mp_rshd (mp_int * a, int b)
494 /* if b <= 0 then ignore it */
499 /* if b > used then simply zero it and return */
506 register mp_digit *bottom, *top;
508 /* shift the digits down */
513 /* top [offset into digits] */
516 /* this is implemented as a sliding window where
517 * the window is b-digits long and digits from
518 * the top of the window are copied to the bottom
522 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ---->
524 \-------------------/ ---->
526 for (x = 0; x < (a->used - b); x++) {
530 /* zero the top digits */
531 for (; x < a->used; x++) {
536 /* remove excess digits */
541 /* calc a value mod 2**b */
543 mp_mod_2d (mp_int * a, int b, mp_int * c)
547 /* if b is <= 0 then zero the int */
553 /* if the modulus is larger than the value than return */
554 if (b >= (int) (a->used * DIGIT_BIT)) {
555 res = mp_copy (a, c);
560 if ((res = mp_copy (a, c)) != MP_OKAY) {
564 /* zero digits above the last digit of the modulus */
565 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
568 /* clear the digit that is not completely outside/inside the modulus */
569 c->dp[b / DIGIT_BIT] &= (mp_digit) ((((mp_digit) 1) <<
570 (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
576 /* reads a unsigned char array, assumes the msb is stored first [big endian] */
577 int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
581 /* make sure there are at least two digits */
583 if ((res = mp_grow(a, 2)) != MP_OKAY) {
591 /* read the bytes in */
593 if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
601 a->dp[0] = (*b & MP_MASK);
602 a->dp[1] |= ((*b++ >> 7U) & 1);
611 /* shift left by a certain bit count */
612 int mp_mul_2d (mp_int * a, int b, mp_int * c)
619 if ((res = mp_copy (a, c)) != MP_OKAY) {
624 if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
625 if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
630 /* shift by as many digits in the bit count */
631 if (b >= (int)DIGIT_BIT) {
632 if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
637 /* shift any bit count < DIGIT_BIT */
638 d = (mp_digit) (b % DIGIT_BIT);
640 register mp_digit *tmpc, shift, mask, r, rr;
643 /* bitmask for carries */
644 mask = (((mp_digit)1) << d) - 1;
647 shift = DIGIT_BIT - d;
654 for (x = 0; x < c->used; x++) {
655 /* get the higher bits of the current word */
656 rr = (*tmpc >> shift) & mask;
658 /* shift the current word and OR in the carry */
659 *tmpc = ((*tmpc << d) | r) & MP_MASK;
662 /* set the carry to the carry bits of the current word */
666 /* set final carry */
668 c->dp[(c->used)++] = r;
676 /* shift left a certain amount of digits */
677 int mp_lshd (mp_int * a, int b)
681 /* if its less than zero return */
686 /* grow to fit the new digits */
687 if (a->alloc < a->used + b) {
688 if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
694 register mp_digit *top, *bottom;
696 /* increment the used by the shift amount then copy upwards */
700 top = a->dp + a->used - 1;
703 bottom = a->dp + a->used - 1 - b;
705 /* much like mp_rshd this is implemented using a sliding window
706 * except the window goes the otherway around. Copying from
707 * the bottom to the top. see bn_mp_rshd.c for more info.
709 for (x = a->used - 1; x >= b; x--) {
713 /* zero the lower digits */
715 for (x = 0; x < b; x++) {
723 /* this is a shell function that calls either the normal or Montgomery
724 * exptmod functions. Originally the call to the montgomery code was
725 * embedded in the normal function but that wasted alot of stack space
726 * for nothing (since 99% of the time the Montgomery code would be called)
728 int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
732 /* modulus P must be positive */
733 if (P->sign == MP_NEG) {
737 /* if exponent X is negative we have to recurse */
738 if (X->sign == MP_NEG) {
739 #ifdef BN_MP_INVMOD_C
743 /* first compute 1/G mod P */
744 if ((err = mp_init(&tmpG)) != MP_OKAY) {
747 if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
753 if ((err = mp_init(&tmpX)) != MP_OKAY) {
757 if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
763 /* and now compute (1/G)**|X| instead of G**X [X < 0] */
764 err = mp_exptmod(&tmpG, &tmpX, P, Y);
774 /* modified diminished radix reduction */
775 #if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && \
776 defined(BN_S_MP_EXPTMOD_C)
777 if (mp_reduce_is_2k_l(P) == MP_YES) {
778 return s_mp_exptmod(G, X, P, Y, 1);
782 #ifdef BN_MP_DR_IS_MODULUS_C
783 /* is it a DR modulus? */
784 dr = mp_dr_is_modulus(P);
790 #ifdef BN_MP_REDUCE_IS_2K_C
791 /* if not, is it a unrestricted DR modulus? */
793 dr = mp_reduce_is_2k(P) << 1;
797 /* if the modulus is odd or dr != 0 use the montgomery method */
798 #ifdef BN_MP_EXPTMOD_FAST_C
799 if (mp_isodd (P) == 1 || dr != 0) {
800 return mp_exptmod_fast (G, X, P, Y, dr);
803 #ifdef BN_S_MP_EXPTMOD_C
804 /* otherwise use the generic Barrett reduction technique */
805 return s_mp_exptmod (G, X, P, Y, 0);
807 /* no exptmod for evens */
810 #ifdef BN_MP_EXPTMOD_FAST_C
818 * Simple function copies the input and fixes the sign to positive
821 mp_abs (mp_int * a, mp_int * b)
827 if ((res = mp_copy (a, b)) != MP_OKAY) {
832 /* force the sign of b to positive */
839 /* hac 14.61, pp608 */
840 int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
842 /* b cannot be negative */
843 if (b->sign == MP_NEG || mp_iszero(b) == 1) {
847 #ifdef BN_FAST_MP_INVMOD_C
848 /* if the modulus is odd we can use a faster routine instead */
849 if (mp_isodd (b) == 1) {
850 return fast_mp_invmod (a, b, c);
854 #ifdef BN_MP_INVMOD_SLOW_C
855 return mp_invmod_slow(a, b, c);
860 /* computes the modular inverse via binary extended euclidean algorithm,
861 * that is c = 1/a mod b
863 * Based on slow invmod except this is optimized for the case where b is
864 * odd as per HAC Note 14.64 on pp. 610
866 int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
868 mp_int x, y, u, v, B, D;
871 /* 2. [modified] b must be odd */
872 if (mp_iseven (b) == 1) {
876 /* init all our temps */
877 if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D)) != MP_OKAY) {
881 /* x == modulus, y == value to invert */
882 if ((res = mp_copy (b, &x)) != MP_OKAY) {
886 /* we need y = |a| */
887 if ((res = mp_mod (a, b, &y)) != MP_OKAY) {
891 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
892 if ((res = mp_copy (&x, &u)) != MP_OKAY) {
895 if ((res = mp_copy (&y, &v)) != MP_OKAY) {
901 /* 4. while u is even do */
902 while (mp_iseven (&u) == 1) {
904 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
907 /* 4.2 if B is odd then */
908 if (mp_isodd (&B) == 1) {
909 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
914 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
919 /* 5. while v is even do */
920 while (mp_iseven (&v) == 1) {
922 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
925 /* 5.2 if D is odd then */
926 if (mp_isodd (&D) == 1) {
928 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
933 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
938 /* 6. if u >= v then */
939 if (mp_cmp (&u, &v) != MP_LT) {
940 /* u = u - v, B = B - D */
941 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
945 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
949 /* v - v - u, D = D - B */
950 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
954 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
959 /* if not zero goto step 4 */
960 if (mp_iszero (&u) == 0) {
964 /* now a = C, b = D, gcd == g*v */
966 /* if v != 1 then there is no inverse */
967 if (mp_cmp_d (&v, 1) != MP_EQ) {
972 /* b is now the inverse */
974 while (D.sign == MP_NEG) {
975 if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
983 LBL_ERR:mp_clear(&x);
993 /* hac 14.61, pp608 */
994 int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
996 mp_int x, y, u, v, A, B, C, D;
999 /* b cannot be negative */
1000 if (b->sign == MP_NEG || mp_iszero(b) == 1) {
1005 if ((res = mp_init_multi(&x, &y, &u, &v,
1006 &A, &B)) != MP_OKAY) {
1010 /* init rest of tmps temps */
1011 if ((res = mp_init_multi(&C, &D, 0, 0, 0, 0)) != MP_OKAY) {
1016 if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
1019 if ((res = mp_copy (b, &y)) != MP_OKAY) {
1023 /* 2. [modified] if x,y are both even then return an error! */
1024 if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
1029 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
1030 if ((res = mp_copy (&x, &u)) != MP_OKAY) {
1033 if ((res = mp_copy (&y, &v)) != MP_OKAY) {
1040 /* 4. while u is even do */
1041 while (mp_iseven (&u) == 1) {
1043 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
1046 /* 4.2 if A or B is odd then */
1047 if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
1048 /* A = (A+y)/2, B = (B-x)/2 */
1049 if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
1052 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
1056 /* A = A/2, B = B/2 */
1057 if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
1060 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
1065 /* 5. while v is even do */
1066 while (mp_iseven (&v) == 1) {
1068 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
1071 /* 5.2 if C or D is odd then */
1072 if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
1073 /* C = (C+y)/2, D = (D-x)/2 */
1074 if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
1077 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
1081 /* C = C/2, D = D/2 */
1082 if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
1085 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
1090 /* 6. if u >= v then */
1091 if (mp_cmp (&u, &v) != MP_LT) {
1092 /* u = u - v, A = A - C, B = B - D */
1093 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
1097 if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
1101 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
1105 /* v - v - u, C = C - A, D = D - B */
1106 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
1110 if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
1114 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
1119 /* if not zero goto step 4 */
1120 if (mp_iszero (&u) == 0)
1123 /* now a = C, b = D, gcd == g*v */
1125 /* if v != 1 then there is no inverse */
1126 if (mp_cmp_d (&v, 1) != MP_EQ) {
1131 /* if its too low */
1132 while (mp_cmp_d(&C, 0) == MP_LT) {
1133 if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
1139 while (mp_cmp_mag(&C, b) != MP_LT) {
1140 if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
1145 /* C is now the inverse */
1148 LBL_ERR:mp_clear(&x);
1160 /* compare maginitude of two ints (unsigned) */
1161 int mp_cmp_mag (mp_int * a, mp_int * b)
1164 mp_digit *tmpa, *tmpb;
1166 /* compare based on # of non-zero digits */
1167 if (a->used > b->used) {
1171 if (a->used < b->used) {
1176 tmpa = a->dp + (a->used - 1);
1179 tmpb = b->dp + (a->used - 1);
1181 /* compare based on digits */
1182 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
1183 if (*tmpa > *tmpb) {
1187 if (*tmpa < *tmpb) {
1195 /* compare two ints (signed)*/
1197 mp_cmp (mp_int * a, mp_int * b)
1199 /* compare based on sign */
1200 if (a->sign != b->sign) {
1201 if (a->sign == MP_NEG) {
1208 /* compare digits */
1209 if (a->sign == MP_NEG) {
1210 /* if negative compare opposite direction */
1211 return mp_cmp_mag(b, a);
1213 return mp_cmp_mag(a, b);
1218 /* compare a digit */
1219 int mp_cmp_d(mp_int * a, mp_digit b)
1221 /* compare based on sign */
1222 if (a->sign == MP_NEG) {
1226 /* compare based on magnitude */
1231 /* compare the only digit of a to b */
1234 } else if (a->dp[0] < b) {
1242 /* set to a digit */
1243 void mp_set (mp_int * a, mp_digit b)
1246 a->dp[0] = b & MP_MASK;
1247 a->used = (a->dp[0] != 0) ? 1 : 0;
1251 /* c = a mod b, 0 <= c < b */
1253 mp_mod (mp_int * a, mp_int * b, mp_int * c)
1258 if ((res = mp_init (&t)) != MP_OKAY) {
1262 if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
1267 if (t.sign != b->sign) {
1268 res = mp_add (b, &t, c);
1279 /* slower bit-bang division... also smaller */
1280 int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
1282 mp_int ta, tb, tq, q;
1285 /* is divisor zero ? */
1286 if (mp_iszero (b) == 1) {
1290 /* if a < b then q=0, r = a */
1291 if (mp_cmp_mag (a, b) == MP_LT) {
1293 res = mp_copy (a, d);
1303 /* init our temps */
1304 if ((res = mp_init_multi(&ta, &tb, &tq, &q, 0, 0)) != MP_OKAY) {
1310 n = mp_count_bits(a) - mp_count_bits(b);
1311 if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
1312 ((res = mp_abs(b, &tb)) != MP_OKAY) ||
1313 ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
1314 ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
1319 if (mp_cmp(&tb, &ta) != MP_GT) {
1320 if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) ||
1321 ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
1325 if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) ||
1326 ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
1331 /* now q == quotient and ta == remainder */
1333 n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
1336 c->sign = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
1340 d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
1352 int mp_div_2(mp_int * a, mp_int * b)
1354 int x, res, oldused;
1357 if (b->alloc < a->used) {
1358 if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1366 register mp_digit r, rr, *tmpa, *tmpb;
1369 tmpa = a->dp + b->used - 1;
1372 tmpb = b->dp + b->used - 1;
1376 for (x = b->used - 1; x >= 0; x--) {
1377 /* get the carry for the next iteration */
1380 /* shift the current digit, add in carry and store */
1381 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
1383 /* forward carry to next iteration */
1387 /* zero excess digits */
1388 tmpb = b->dp + b->used;
1389 for (x = b->used; x < oldused; x++) {
1399 /* high level addition (handles signs) */
1400 int mp_add (mp_int * a, mp_int * b, mp_int * c)
1404 /* get sign of both inputs */
1408 /* handle two cases, not four */
1410 /* both positive or both negative */
1411 /* add their magnitudes, copy the sign */
1413 res = s_mp_add (a, b, c);
1415 /* one positive, the other negative */
1416 /* subtract the one with the greater magnitude from */
1417 /* the one of the lesser magnitude. The result gets */
1418 /* the sign of the one with the greater magnitude. */
1419 if (mp_cmp_mag (a, b) == MP_LT) {
1421 res = s_mp_sub (b, a, c);
1424 res = s_mp_sub (a, b, c);
1431 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
1433 s_mp_add (mp_int * a, mp_int * b, mp_int * c)
1436 int olduse, res, min, max;
1438 /* find sizes, we let |a| <= |b| which means we have to sort
1439 * them. "x" will point to the input with the most digits
1441 if (a->used > b->used) {
1452 if (c->alloc < max + 1) {
1453 if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
1458 /* get old used digit count and set new one */
1463 register mp_digit u, *tmpa, *tmpb, *tmpc;
1466 /* alias for digit pointers */
1477 /* zero the carry */
1479 for (i = 0; i < min; i++) {
1480 /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
1481 *tmpc = *tmpa++ + *tmpb++ + u;
1483 /* U = carry bit of T[i] */
1484 u = *tmpc >> ((mp_digit)DIGIT_BIT);
1486 /* take away carry bit from T[i] */
1490 /* now copy higher words if any, that is in A+B
1491 * if A or B has more digits add those in
1494 for (; i < max; i++) {
1495 /* T[i] = X[i] + U */
1496 *tmpc = x->dp[i] + u;
1498 /* U = carry bit of T[i] */
1499 u = *tmpc >> ((mp_digit)DIGIT_BIT);
1501 /* take away carry bit from T[i] */
1509 /* clear digits above oldused */
1510 for (i = c->used; i < olduse; i++) {
1520 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
1522 s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
1524 int olduse, res, min, max;
1531 if (c->alloc < max) {
1532 if ((res = mp_grow (c, max)) != MP_OKAY) {
1540 register mp_digit u, *tmpa, *tmpb, *tmpc;
1543 /* alias for digit pointers */
1548 /* set carry to zero */
1550 for (i = 0; i < min; i++) {
1551 /* T[i] = A[i] - B[i] - U */
1552 *tmpc = *tmpa++ - *tmpb++ - u;
1554 /* U = carry bit of T[i]
1555 * Note this saves performing an AND operation since
1556 * if a carry does occur it will propagate all the way to the
1557 * MSB. As a result a single shift is enough to get the carry
1559 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
1561 /* Clear carry from T[i] */
1565 /* now copy higher words if any, e.g. if A has more digits than B */
1566 for (; i < max; i++) {
1567 /* T[i] = A[i] - U */
1568 *tmpc = *tmpa++ - u;
1570 /* U = carry bit of T[i] */
1571 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
1573 /* Clear carry from T[i] */
1577 /* clear digits above used (since we may not have grown result above) */
1578 for (i = c->used; i < olduse; i++) {
1588 /* high level subtraction (handles signs) */
1590 mp_sub (mp_int * a, mp_int * b, mp_int * c)
1598 /* subtract a negative from a positive, OR */
1599 /* subtract a positive from a negative. */
1600 /* In either case, ADD their magnitudes, */
1601 /* and use the sign of the first number. */
1603 res = s_mp_add (a, b, c);
1605 /* subtract a positive from a positive, OR */
1606 /* subtract a negative from a negative. */
1607 /* First, take the difference between their */
1608 /* magnitudes, then... */
1609 if (mp_cmp_mag (a, b) != MP_LT) {
1610 /* Copy the sign from the first */
1612 /* The first has a larger or equal magnitude */
1613 res = s_mp_sub (a, b, c);
1615 /* The result has the *opposite* sign from */
1616 /* the first number. */
1617 c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
1618 /* The second has a larger magnitude */
1619 res = s_mp_sub (b, a, c);
1626 /* determines if reduce_2k_l can be used */
1627 int mp_reduce_is_2k_l(mp_int *a)
1633 } else if (a->used == 1) {
1635 } else if (a->used > 1) {
1636 /* if more than half of the digits are -1 we're sold */
1637 for (iy = ix = 0; ix < a->used; ix++) {
1638 if (a->dp[ix] == MP_MASK) {
1642 return (iy >= (a->used/2)) ? MP_YES : MP_NO;
1649 /* determines if mp_reduce_2k can be used */
1650 int mp_reduce_is_2k(mp_int *a)
1657 } else if (a->used == 1) {
1659 } else if (a->used > 1) {
1660 iy = mp_count_bits(a);
1664 /* Test every bit from the second digit up, must be 1 */
1665 for (ix = DIGIT_BIT; ix < iy; ix++) {
1666 if ((a->dp[iw] & iz) == 0) {
1670 if (iz > (mp_digit)MP_MASK) {
1680 /* determines if a number is a valid DR modulus */
1681 int mp_dr_is_modulus(mp_int *a)
1685 /* must be at least two digits */
1690 /* must be of the form b**k - a [a <= b] so all
1691 * but the first digit must be equal to -1 (mod b).
1693 for (ix = 1; ix < a->used; ix++) {
1694 if (a->dp[ix] != MP_MASK) {
1702 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
1704 * Uses a left-to-right k-ary sliding window to compute the modular
1706 * The value of k changes based on the size of the exponent.
1708 * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
1714 #define TAB_SIZE 256
1717 int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
1720 mp_int M[TAB_SIZE], res;
1722 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
1724 /* use a pointer to the reduction algorithm. This allows us to use
1725 * one of many reduction algorithms without modding the guts of
1726 * the code with if statements everywhere.
1728 int (*redux)(mp_int*,mp_int*,mp_digit);
1730 /* find window size */
1731 x = mp_count_bits (X);
1734 } else if (x <= 36) {
1736 } else if (x <= 140) {
1738 } else if (x <= 450) {
1740 } else if (x <= 1303) {
1742 } else if (x <= 3529) {
1755 /* init first cell */
1756 if ((err = mp_init(&M[1])) != MP_OKAY) {
1760 /* now init the second half of the array */
1761 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
1762 if ((err = mp_init(&M[x])) != MP_OKAY) {
1763 for (y = 1<<(winsize-1); y < x; y++) {
1771 /* determine and setup reduction code */
1773 #ifdef BN_MP_MONTGOMERY_SETUP_C
1774 /* now setup montgomery */
1775 if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
1783 /* automatically pick the comba one if available (saves quite a few
1785 #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
1786 if (((P->used * 2 + 1) < MP_WARRAY) &&
1787 P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
1788 redux = fast_mp_montgomery_reduce;
1792 #ifdef BN_MP_MONTGOMERY_REDUCE_C
1793 /* use slower baseline Montgomery method */
1794 redux = mp_montgomery_reduce;
1800 } else if (redmode == 1) {
1801 #if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
1802 /* setup DR reduction for moduli of the form B**k - b */
1803 mp_dr_setup(P, &mp);
1804 redux = mp_dr_reduce;
1810 #if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
1811 /* setup DR reduction for moduli of the form 2**k - b */
1812 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
1815 redux = mp_reduce_2k;
1823 if ((err = mp_init (&res)) != MP_OKAY) {
1831 * The first half of the table is not computed though accept for M[0] and M[1]
1835 #ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
1836 /* now we need R mod m */
1837 if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
1845 /* now set M[1] to G * R mod m */
1846 if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
1851 if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
1856 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times*/
1857 if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
1861 for (x = 0; x < (winsize - 1); x++) {
1862 if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
1865 if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, mp)) != MP_OKAY) {
1870 /* create upper table */
1871 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
1872 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
1875 if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
1880 /* set initial mode and bit cnt */
1884 digidx = X->used - 1;
1889 /* grab next digit as required */
1890 if (--bitcnt == 0) {
1891 /* if digidx == -1 we are out of digits so break */
1895 /* read next digit and reset bitcnt */
1896 buf = X->dp[digidx--];
1897 bitcnt = (int)DIGIT_BIT;
1900 /* grab the next msb from the exponent */
1901 y = (int)(buf >> (DIGIT_BIT - 1)) & 1;
1902 buf <<= (mp_digit)1;
1904 /* if the bit is zero and mode == 0 then we ignore it
1905 * These represent the leading zero bits before the first 1 bit
1906 * in the exponent. Technically this opt is not required but it
1907 * does lower the # of trivial squaring/reductions used
1909 if (mode == 0 && y == 0) {
1913 /* if the bit is zero and mode == 1 then we square */
1914 if (mode == 1 && y == 0) {
1915 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
1918 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1924 /* else we add it to the window */
1925 bitbuf |= (y << (winsize - ++bitcpy));
1928 if (bitcpy == winsize) {
1929 /* ok window is filled so square as required and multiply */
1931 for (x = 0; x < winsize; x++) {
1932 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
1935 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1941 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
1944 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1948 /* empty window and reset */
1955 /* if bits remain then square/multiply */
1956 if (mode == 2 && bitcpy > 0) {
1957 /* square then multiply if the bit is set */
1958 for (x = 0; x < bitcpy; x++) {
1959 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
1962 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1966 /* get next bit of the window */
1968 if ((bitbuf & (1 << winsize)) != 0) {
1970 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
1973 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1981 /* fixup result if Montgomery reduction is used
1982 * recall that any value in a Montgomery system is
1983 * actually multiplied by R mod n. So we have
1984 * to reduce one more time to cancel out the factor
1987 if ((err = redux(&res, P, mp)) != MP_OKAY) {
1992 /* swap res with Y */
1995 LBL_RES:mp_clear (&res);
1998 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
2005 /* setups the montgomery reduction stuff */
2007 mp_montgomery_setup (mp_int * n, mp_digit * rho)
2011 /* fast inversion mod 2**k
2013 * Based on the fact that
2015 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
2016 * => 2*X*A - X*X*A*A = 1
2017 * => 2*(1) - (1) = 1
2025 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
2026 x *= 2 - b * x; /* here x*a==1 mod 2**8 */
2027 #if !defined(MP_8BIT)
2028 x *= 2 - b * x; /* here x*a==1 mod 2**16 */
2030 #if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT))
2031 x *= 2 - b * x; /* here x*a==1 mod 2**32 */
2034 x *= 2 - b * x; /* here x*a==1 mod 2**64 */
2037 /* rho = -1/m mod b */
2038 /* TAO, switched mp_word casts to mp_digit to shut up compiler */
2039 *rho = (((mp_digit)1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK;
2045 /* computes xR**-1 == x (mod N) via Montgomery Reduction
2047 * This is an optimized implementation of montgomery_reduce
2048 * which uses the comba method to quickly calculate the columns of the
2051 * Based on Algorithm 14.32 on pp.601 of HAC.
2053 int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
2055 int ix, res, olduse;
2056 #ifdef CYASSL_SMALL_STACK
2057 mp_word* W; /* uses dynamic memory and slower */
2059 mp_word W[MP_WARRAY];
2062 /* get old used count */
2065 /* grow a as required */
2066 if (x->alloc < n->used + 1) {
2067 if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
2072 #ifdef CYASSL_SMALL_STACK
2073 W = (mp_word*)XMALLOC(sizeof(mp_word) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
2078 /* first we have to get the digits of the input into
2079 * an array of double precision words W[...]
2082 register mp_word *_W;
2083 register mp_digit *tmpx;
2085 /* alias for the W[] array */
2088 /* alias for the digits of x*/
2091 /* copy the digits of a into W[0..a->used-1] */
2092 for (ix = 0; ix < x->used; ix++) {
2096 /* zero the high words of W[a->used..m->used*2] */
2097 for (; ix < n->used * 2 + 1; ix++) {
2102 /* now we proceed to zero successive digits
2103 * from the least significant upwards
2105 for (ix = 0; ix < n->used; ix++) {
2106 /* mu = ai * m' mod b
2108 * We avoid a double precision multiplication (which isn't required)
2109 * by casting the value down to a mp_digit. Note this requires
2110 * that W[ix-1] have the carry cleared (see after the inner loop)
2112 register mp_digit mu;
2113 mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
2115 /* a = a + mu * m * b**i
2117 * This is computed in place and on the fly. The multiplication
2118 * by b**i is handled by offseting which columns the results
2121 * Note the comba method normally doesn't handle carries in the
2122 * inner loop In this case we fix the carry from the previous
2123 * column since the Montgomery reduction requires digits of the
2124 * result (so far) [see above] to work. This is
2125 * handled by fixing up one carry after the inner loop. The
2126 * carry fixups are done in order so after these loops the
2127 * first m->used words of W[] have the carries fixed
2131 register mp_digit *tmpn;
2132 register mp_word *_W;
2134 /* alias for the digits of the modulus */
2137 /* Alias for the columns set by an offset of ix */
2141 for (iy = 0; iy < n->used; iy++) {
2142 *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
2146 /* now fix carry for next digit, W[ix+1] */
2147 W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
2150 /* now we have to propagate the carries and
2151 * shift the words downward [all those least
2152 * significant digits we zeroed].
2155 register mp_digit *tmpx;
2156 register mp_word *_W, *_W1;
2158 /* nox fix rest of carries */
2160 /* alias for current word */
2163 /* alias for next word, where the carry goes */
2166 for (; ix <= n->used * 2 + 1; ix++) {
2167 *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
2170 /* copy out, A = A/b**n
2172 * The result is A/b**n but instead of converting from an
2173 * array of mp_word to mp_digit than calling mp_rshd
2174 * we just copy them in the right order
2177 /* alias for destination word */
2180 /* alias for shifted double precision result */
2183 for (ix = 0; ix < n->used + 1; ix++) {
2184 *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
2187 /* zero oldused digits, if the input a was larger than
2188 * m->used+1 we'll have to clear the digits
2190 for (; ix < olduse; ix++) {
2195 /* set the max used and clamp */
2196 x->used = n->used + 1;
2199 #ifdef CYASSL_SMALL_STACK
2200 XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
2203 /* if A >= m then A = A - m */
2204 if (mp_cmp_mag (x, n) != MP_LT) {
2205 return s_mp_sub (x, n, x);
2211 /* computes xR**-1 == x (mod N) via Montgomery Reduction */
2213 mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
2218 /* can the fast reduction [comba] method be used?
2220 * Note that unlike in mul you're safely allowed *less*
2221 * than the available columns [255 per default] since carries
2222 * are fixed up in the inner loop.
2224 digs = n->used * 2 + 1;
2225 if ((digs < MP_WARRAY) &&
2227 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2228 return fast_mp_montgomery_reduce (x, n, rho);
2231 /* grow the input as required */
2232 if (x->alloc < digs) {
2233 if ((res = mp_grow (x, digs)) != MP_OKAY) {
2239 for (ix = 0; ix < n->used; ix++) {
2240 /* mu = ai * rho mod b
2242 * The value of rho must be precalculated via
2243 * montgomery_setup() such that
2244 * it equals -1/n0 mod b this allows the
2245 * following inner loop to reduce the
2246 * input one digit at a time
2248 mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
2250 /* a = a + mu * m * b**i */
2253 register mp_digit *tmpn, *tmpx, u;
2256 /* alias for digits of the modulus */
2259 /* alias for the digits of x [the input] */
2262 /* set the carry to zero */
2265 /* Multiply and add in place */
2266 for (iy = 0; iy < n->used; iy++) {
2267 /* compute product and sum */
2268 r = ((mp_word)mu) * ((mp_word)*tmpn++) +
2269 ((mp_word) u) + ((mp_word) * tmpx);
2272 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
2275 *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
2277 /* At this point the ix'th digit of x should be zero */
2280 /* propagate carries upwards as required*/
2283 u = *tmpx >> DIGIT_BIT;
2289 /* at this point the n.used'th least
2290 * significant digits of x are all zero
2291 * which means we can shift x to the
2292 * right by n.used digits and the
2293 * residue is unchanged.
2296 /* x = x/b**n.used */
2298 mp_rshd (x, n->used);
2300 /* if x >= n then x = x - n */
2301 if (mp_cmp_mag (x, n) != MP_LT) {
2302 return s_mp_sub (x, n, x);
2309 /* determines the setup value */
2310 void mp_dr_setup(mp_int *a, mp_digit *d)
2312 /* the casts are required if DIGIT_BIT is one less than
2313 * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
2315 *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
2316 ((mp_word)a->dp[0]));
2320 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
2322 * Based on algorithm from the paper
2324 * "Generating Efficient Primes for Discrete Log Cryptosystems"
2325 * Chae Hoon Lim, Pil Joong Lee,
2326 * POSTECH Information Research Laboratories
2328 * The modulus must be of a special format [see manual]
2330 * Has been modified to use algorithm 7.10 from the LTM book instead
2332 * Input x must be in the range 0 <= x <= (n-1)**2
2335 mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
2339 mp_digit mu, *tmpx1, *tmpx2;
2341 /* m = digits in modulus */
2344 /* ensure that "x" has at least 2m digits */
2345 if (x->alloc < m + m) {
2346 if ((err = mp_grow (x, m + m)) != MP_OKAY) {
2351 /* top of loop, this is where the code resumes if
2352 * another reduction pass is required.
2355 /* aliases for digits */
2356 /* alias for lower half of x */
2359 /* alias for upper half of x, or x/B**m */
2362 /* set carry to zero */
2365 /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
2366 for (i = 0; i < m; i++) {
2367 r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
2368 *tmpx1++ = (mp_digit)(r & MP_MASK);
2369 mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
2372 /* set final carry */
2375 /* zero words above m */
2376 for (i = m + 1; i < x->used; i++) {
2380 /* clamp, sub and return */
2383 /* if x >= n then subtract and reduce again
2384 * Each successive "recursion" makes the input smaller and smaller.
2386 if (mp_cmp_mag (x, n) != MP_LT) {
2394 /* reduces a modulo n where n is of the form 2**p - d */
2395 int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
2400 if ((res = mp_init(&q)) != MP_OKAY) {
2404 p = mp_count_bits(n);
2406 /* q = a/2**p, a = a mod 2**p */
2407 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
2413 if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
2419 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
2423 if (mp_cmp_mag(a, n) != MP_LT) {
2434 /* determines the setup value */
2435 int mp_reduce_2k_setup(mp_int *a, mp_digit *d)
2440 if ((res = mp_init(&tmp)) != MP_OKAY) {
2444 p = mp_count_bits(a);
2445 if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
2450 if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
2461 /* computes a = 2**b
2463 * Simple algorithm which zeroes the int, grows it then just sets one bit
2467 mp_2expt (mp_int * a, int b)
2471 /* zero a as per default */
2474 /* grow a to accomodate the single bit */
2475 if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
2479 /* set the used count of where the bit will go */
2480 a->used = b / DIGIT_BIT + 1;
2482 /* put the single bit in its place */
2483 a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
2489 /* multiply by a digit */
2491 mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
2493 mp_digit u, *tmpa, *tmpc;
2495 int ix, res, olduse;
2497 /* make sure c is big enough to hold a*b */
2498 if (c->alloc < a->used + 1) {
2499 if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
2504 /* get the original destinations used count */
2510 /* alias for a->dp [source] */
2513 /* alias for c->dp [dest] */
2519 /* compute columns */
2520 for (ix = 0; ix < a->used; ix++) {
2521 /* compute product and carry sum for this term */
2522 r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
2524 /* mask off higher bits to get a single digit */
2525 *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
2527 /* send carry into next iteration */
2528 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
2531 /* store final carry [if any] and increment ix offset */
2535 /* now zero digits above the top */
2536 while (ix++ < olduse) {
2540 /* set used count */
2541 c->used = a->used + 1;
2548 /* d = a * b (mod c) */
2549 int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
2554 if ((res = mp_init (&t)) != MP_OKAY) {
2558 if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
2562 res = mp_mod (&t, c, d);
2568 /* computes b = a*a */
2570 mp_sqr (mp_int * a, mp_int * b)
2575 #ifdef BN_FAST_S_MP_SQR_C
2576 /* can we use the fast comba multiplier? */
2577 if ((a->used * 2 + 1) < MP_WARRAY &&
2579 (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
2580 res = fast_s_mp_sqr (a, b);
2583 #ifdef BN_S_MP_SQR_C
2584 res = s_mp_sqr (a, b);
2594 /* high level multiplication (handles sign) */
2595 int mp_mul (mp_int * a, mp_int * b, mp_int * c)
2598 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
2601 /* can we use the fast multiplier?
2603 * The fast multiplier can be used if the output will
2604 * have less than MP_WARRAY digits and the number of
2605 * digits won't affect carry propagation
2607 int digs = a->used + b->used + 1;
2609 #ifdef BN_FAST_S_MP_MUL_DIGS_C
2610 if ((digs < MP_WARRAY) &&
2611 MIN(a->used, b->used) <=
2612 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2613 res = fast_s_mp_mul_digs (a, b, c, digs);
2616 #ifdef BN_S_MP_MUL_DIGS_C
2617 res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
2623 c->sign = (c->used > 0) ? neg : MP_ZPOS;
2629 int mp_mul_2(mp_int * a, mp_int * b)
2631 int x, res, oldused;
2633 /* grow to accomodate result */
2634 if (b->alloc < a->used + 1) {
2635 if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
2644 register mp_digit r, rr, *tmpa, *tmpb;
2646 /* alias for source */
2649 /* alias for dest */
2654 for (x = 0; x < a->used; x++) {
2656 /* get what will be the *next* carry bit from the
2657 * MSB of the current digit
2659 rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
2661 /* now shift up this digit, add in the carry [from the previous] */
2662 *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
2664 /* copy the carry that would be from the source
2665 * digit into the next iteration
2670 /* new leading digit? */
2672 /* add a MSB which is always 1 at this point */
2677 /* now zero any excess digits on the destination
2678 * that we didn't write to
2680 tmpb = b->dp + b->used;
2681 for (x = b->used; x < oldused; x++) {
2690 /* divide by three (based on routine from MPI and the GMP manual) */
2692 mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
2699 /* b = 2**DIGIT_BIT / 3 */
2700 b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3);
2702 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
2709 for (ix = a->used - 1; ix >= 0; ix--) {
2710 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
2713 /* multiply w by [1/3] */
2714 t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
2716 /* now subtract 3 * [w/3] from w, to get the remainder */
2719 /* fixup the remainder as required since
2720 * the optimization is not exact.
2729 q.dp[ix] = (mp_digit)t;
2732 /* [optional] store the remainder */
2737 /* [optional] store the quotient */
2748 /* init an mp_init for a given size */
2749 int mp_init_size (mp_int * a, int size)
2753 /* pad size so there are always extra digits */
2754 size += (MP_PREC * 2) - (size % MP_PREC);
2757 a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size, 0,
2758 DYNAMIC_TYPE_BIGINT);
2759 if (a->dp == NULL) {
2763 /* set the members */
2768 /* zero the digits */
2769 for (x = 0; x < size; x++) {
2777 /* the jist of squaring...
2778 * you do like mult except the offset of the tmpx [one that
2779 * starts closer to zero] can't equal the offset of tmpy.
2780 * So basically you set up iy like before then you min it with
2781 * (ty-tx) so that it never happens. You double all those
2782 * you add in the inner loop
2784 After that loop you do the squares and add them in.
2787 int fast_s_mp_sqr (mp_int * a, mp_int * b)
2789 int olduse, res, pa, ix, iz;
2790 #ifdef CYASSL_SMALL_STACK
2791 mp_digit* W; /* uses dynamic memory and slower */
2793 mp_digit W[MP_WARRAY];
2798 /* grow the destination as required */
2799 pa = a->used + a->used;
2800 if (b->alloc < pa) {
2801 if ((res = mp_grow (b, pa)) != MP_OKAY) {
2807 return MP_RANGE; /* TAO range check */
2809 #ifdef CYASSL_SMALL_STACK
2810 W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
2815 /* number of output digits to produce */
2817 for (ix = 0; ix < pa; ix++) {
2825 /* get offsets into the two bignums */
2826 ty = MIN(a->used-1, ix);
2829 /* setup temp aliases */
2833 /* this is the number of times the loop will iterrate, essentially
2834 while (tx++ < a->used && ty-- >= 0) { ... }
2836 iy = MIN(a->used-tx, ty+1);
2838 /* now for squaring tx can never equal ty
2839 * we halve the distance since they approach at a rate of 2x
2840 * and we have to round because odd cases need to be executed
2842 iy = MIN(iy, (ty-tx+1)>>1);
2845 for (iz = 0; iz < iy; iz++) {
2846 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
2849 /* double the inner product and add carry */
2852 /* even columns have the square term in them */
2854 _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
2858 W[ix] = (mp_digit)(_W & MP_MASK);
2860 /* make next carry */
2861 W1 = _W >> ((mp_word)DIGIT_BIT);
2866 b->used = a->used+a->used;
2871 for (ix = 0; ix < pa; ix++) {
2872 *tmpb++ = W[ix] & MP_MASK;
2875 /* clear unused digits [that existed in the old copy of c] */
2876 for (; ix < olduse; ix++) {
2882 #ifdef CYASSL_SMALL_STACK
2883 XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
2890 /* Fast (comba) multiplier
2892 * This is the fast column-array [comba] multiplier. It is
2893 * designed to compute the columns of the product first
2894 * then handle the carries afterwards. This has the effect
2895 * of making the nested loops that compute the columns very
2896 * simple and schedulable on super-scalar processors.
2898 * This has been modified to produce a variable number of
2899 * digits of output so if say only a half-product is required
2900 * you don't have to compute the upper half (a feature
2901 * required for fast Barrett reduction).
2903 * Based on Algorithm 14.12 on pp.595 of HAC.
2906 int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
2908 int olduse, res, pa, ix, iz;
2909 #ifdef CYASSL_SMALL_STACK
2910 mp_digit* W; /* uses dynamic memory and slower */
2912 mp_digit W[MP_WARRAY];
2914 register mp_word _W;
2916 /* grow the destination as required */
2917 if (c->alloc < digs) {
2918 if ((res = mp_grow (c, digs)) != MP_OKAY) {
2923 /* number of output digits to produce */
2924 pa = MIN(digs, a->used + b->used);
2926 return MP_RANGE; /* TAO range check */
2928 #ifdef CYASSL_SMALL_STACK
2929 W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
2934 /* clear the carry */
2936 for (ix = 0; ix < pa; ix++) {
2939 mp_digit *tmpx, *tmpy;
2941 /* get offsets into the two bignums */
2942 ty = MIN(b->used-1, ix);
2945 /* setup temp aliases */
2949 /* this is the number of times the loop will iterrate, essentially
2950 while (tx++ < a->used && ty-- >= 0) { ... }
2952 iy = MIN(a->used-tx, ty+1);
2955 for (iz = 0; iz < iy; ++iz) {
2956 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
2961 W[ix] = ((mp_digit)_W) & MP_MASK;
2963 /* make next carry */
2964 _W = _W >> ((mp_word)DIGIT_BIT);
2972 register mp_digit *tmpc;
2974 for (ix = 0; ix < pa+1; ix++) {
2975 /* now extract the previous digit [below the carry] */
2979 /* clear unused digits [that existed in the old copy of c] */
2980 for (; ix < olduse; ix++) {
2986 #ifdef CYASSL_SMALL_STACK
2987 XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
2994 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
2995 int s_mp_sqr (mp_int * a, mp_int * b)
2998 int res, ix, iy, pa;
3000 mp_digit u, tmpx, *tmpt;
3003 if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
3007 /* default used is maximum possible size */
3010 for (ix = 0; ix < pa; ix++) {
3011 /* first calculate the digit at 2*ix */
3012 /* calculate double precision result */
3013 r = ((mp_word) t.dp[2*ix]) +
3014 ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
3016 /* store lower part in result */
3017 t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
3020 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3022 /* left hand side of A[ix] * A[iy] */
3025 /* alias for where to store the results */
3026 tmpt = t.dp + (2*ix + 1);
3028 for (iy = ix + 1; iy < pa; iy++) {
3029 /* first calculate the product */
3030 r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
3032 /* now calculate the double precision result, note we use
3033 * addition instead of *2 since it's easier to optimize
3035 r = ((mp_word) *tmpt) + r + r + ((mp_word) u);
3037 /* store lower part */
3038 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3041 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3043 /* propagate upwards */
3044 while (u != ((mp_digit) 0)) {
3045 r = ((mp_word) *tmpt) + ((mp_word) u);
3046 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3047 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3058 /* multiplies |a| * |b| and only computes upto digs digits of result
3059 * HAC pp. 595, Algorithm 14.12 Modified so you can control how
3060 * many digits of output are created.
3062 int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3065 int res, pa, pb, ix, iy;
3068 mp_digit tmpx, *tmpt, *tmpy;
3070 /* can we use the fast multiplier? */
3071 if (((digs) < MP_WARRAY) &&
3072 MIN (a->used, b->used) <
3073 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
3074 return fast_s_mp_mul_digs (a, b, c, digs);
3077 if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
3082 /* compute the digits of the product directly */
3084 for (ix = 0; ix < pa; ix++) {
3085 /* set the carry to zero */
3088 /* limit ourselves to making digs digits of output */
3089 pb = MIN (b->used, digs - ix);
3091 /* setup some aliases */
3092 /* copy of the digit from a used within the nested loop */
3095 /* an alias for the destination shifted ix places */
3098 /* an alias for the digits of b */
3101 /* compute the columns of the output and propagate the carry */
3102 for (iy = 0; iy < pb; iy++) {
3103 /* compute the column as a mp_word */
3104 r = ((mp_word)*tmpt) +
3105 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
3108 /* the new column is the lower part of the result */
3109 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3111 /* get the carry word from the result */
3112 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
3114 /* set carry if it is placed below digs */
3115 if (ix + iy < digs) {
3129 * shifts with subtractions when the result is greater than b.
3131 * The method is slightly modified to shift B unconditionally upto just under
3132 * the leading bit of b. This saves alot of multiple precision shifting.
3134 int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
3138 /* how many bits of last digit does b use */
3139 bits = mp_count_bits (b) % DIGIT_BIT;
3142 if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
3151 /* now compute C = A * B mod b */
3152 for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
3153 if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
3156 if (mp_cmp_mag (a, b) != MP_LT) {
3157 if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
3170 #define TAB_SIZE 256
3173 int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
3175 mp_int M[TAB_SIZE], res, mu;
3177 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
3178 int (*redux)(mp_int*,mp_int*,mp_int*);
3180 /* find window size */
3181 x = mp_count_bits (X);
3184 } else if (x <= 36) {
3186 } else if (x <= 140) {
3188 } else if (x <= 450) {
3190 } else if (x <= 1303) {
3192 } else if (x <= 3529) {
3205 /* init first cell */
3206 if ((err = mp_init(&M[1])) != MP_OKAY) {
3210 /* now init the second half of the array */
3211 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3212 if ((err = mp_init(&M[x])) != MP_OKAY) {
3213 for (y = 1<<(winsize-1); y < x; y++) {
3221 /* create mu, used for Barrett reduction */
3222 if ((err = mp_init (&mu)) != MP_OKAY) {
3227 if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
3232 if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
3235 redux = mp_reduce_2k_l;
3240 * The M table contains powers of the base,
3241 * e.g. M[x] = G**x mod P
3243 * The first half of the table is not
3244 * computed though accept for M[0] and M[1]
3246 if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
3250 /* compute the value at M[1<<(winsize-1)] by squaring
3251 * M[1] (winsize-1) times
3253 if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
3257 for (x = 0; x < (winsize - 1); x++) {
3259 if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
3260 &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
3264 /* reduce modulo P */
3265 if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, &mu)) != MP_OKAY) {
3270 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
3271 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
3273 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
3274 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
3277 if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
3283 if ((err = mp_init (&res)) != MP_OKAY) {
3288 /* set initial mode and bit cnt */
3292 digidx = X->used - 1;
3297 /* grab next digit as required */
3298 if (--bitcnt == 0) {
3299 /* if digidx == -1 we are out of digits */
3303 /* read next digit and reset the bitcnt */
3304 buf = X->dp[digidx--];
3305 bitcnt = (int) DIGIT_BIT;
3308 /* grab the next msb from the exponent */
3309 y = (int)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
3310 buf <<= (mp_digit)1;
3312 /* if the bit is zero and mode == 0 then we ignore it
3313 * These represent the leading zero bits before the first 1 bit
3314 * in the exponent. Technically this opt is not required but it
3315 * does lower the # of trivial squaring/reductions used
3317 if (mode == 0 && y == 0) {
3321 /* if the bit is zero and mode == 1 then we square */
3322 if (mode == 1 && y == 0) {
3323 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3326 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3332 /* else we add it to the window */
3333 bitbuf |= (y << (winsize - ++bitcpy));
3336 if (bitcpy == winsize) {
3337 /* ok window is filled so square as required and multiply */
3339 for (x = 0; x < winsize; x++) {
3340 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3343 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3349 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
3352 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3356 /* empty window and reset */
3363 /* if bits remain then square/multiply */
3364 if (mode == 2 && bitcpy > 0) {
3365 /* square then multiply if the bit is set */
3366 for (x = 0; x < bitcpy; x++) {
3367 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3370 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3375 if ((bitbuf & (1 << winsize)) != 0) {
3377 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
3380 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3389 LBL_RES:mp_clear (&res);
3390 LBL_MU:mp_clear (&mu);
3393 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3400 /* pre-calculate the value required for Barrett reduction
3401 * For a given modulus "b" it calulates the value required in "a"
3403 int mp_reduce_setup (mp_int * a, mp_int * b)
3407 if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
3410 return mp_div (a, b, a, NULL);
3414 /* reduces x mod m, assumes 0 < x < m**2, mu is
3415 * precomputed via mp_reduce_setup.
3416 * From HAC pp.604 Algorithm 14.42
3418 int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
3421 int res, um = m->used;
3424 if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
3428 /* q1 = x / b**(k-1) */
3429 mp_rshd (&q, um - 1);
3431 /* according to HAC this optimization is ok */
3432 if (((mp_word) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
3433 if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
3437 #ifdef BN_S_MP_MUL_HIGH_DIGS_C
3438 if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
3441 #elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
3442 if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
3453 /* q3 = q2 / b**(k+1) */
3454 mp_rshd (&q, um + 1);
3456 /* x = x mod b**(k+1), quick (no division) */
3457 if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
3461 /* q = q * m mod b**(k+1), quick (no division) */
3462 if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
3467 if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
3471 /* If x < 0, add b**(k+1) to it */
3472 if (mp_cmp_d (x, 0) == MP_LT) {
3474 if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
3476 if ((res = mp_add (x, &q, x)) != MP_OKAY)
3480 /* Back off if it's too big */
3481 while (mp_cmp (x, m) != MP_LT) {
3482 if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
3494 /* reduces a modulo n where n is of the form 2**p - d
3495 This differs from reduce_2k since "d" can be larger
3496 than a single digit.
3498 int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
3503 if ((res = mp_init(&q)) != MP_OKAY) {
3507 p = mp_count_bits(n);
3509 /* q = a/2**p, a = a mod 2**p */
3510 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
3515 if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
3520 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
3524 if (mp_cmp_mag(a, n) != MP_LT) {
3535 /* determines the setup value */
3536 int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
3541 if ((res = mp_init(&tmp)) != MP_OKAY) {
3545 if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
3549 if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
3559 /* multiplies |a| * |b| and does not compute the lower digs digits
3560 * [meant to get the higher part of the product]
3563 s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3566 int res, pa, pb, ix, iy;
3569 mp_digit tmpx, *tmpt, *tmpy;
3571 /* can we use the fast multiplier? */
3572 #ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
3573 if (((a->used + b->used + 1) < MP_WARRAY)
3574 && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
3575 return fast_s_mp_mul_high_digs (a, b, c, digs);
3579 if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
3582 t.used = a->used + b->used + 1;
3586 for (ix = 0; ix < pa; ix++) {
3587 /* clear the carry */
3590 /* left hand side of A[ix] * B[iy] */
3593 /* alias to the address of where the digits will be stored */
3594 tmpt = &(t.dp[digs]);
3596 /* alias for where to read the right hand side from */
3597 tmpy = b->dp + (digs - ix);
3599 for (iy = digs - ix; iy < pb; iy++) {
3600 /* calculate the double precision result */
3601 r = ((mp_word)*tmpt) +
3602 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
3605 /* get the lower part */
3606 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3608 /* carry the carry */
3609 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
3620 /* this is a modified version of fast_s_mul_digs that only produces
3621 * output digits *above* digs. See the comments for fast_s_mul_digs
3622 * to see how it works.
3624 * This is used in the Barrett reduction since for one of the multiplications
3625 * only the higher digits were needed. This essentially halves the work.
3627 * Based on Algorithm 14.12 on pp.595 of HAC.
3629 int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3631 int olduse, res, pa, ix, iz;
3632 #ifdef CYASSL_SMALL_STACK
3633 mp_digit* W; /* uses dynamic memory and slower */
3635 mp_digit W[MP_WARRAY];
3639 /* grow the destination as required */
3640 pa = a->used + b->used;
3641 if (c->alloc < pa) {
3642 if ((res = mp_grow (c, pa)) != MP_OKAY) {
3648 return MP_RANGE; /* TAO range check */
3650 #ifdef CYASSL_SMALL_STACK
3651 W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
3656 /* number of output digits to produce */
3657 pa = a->used + b->used;
3659 for (ix = digs; ix < pa; ix++) {
3661 mp_digit *tmpx, *tmpy;
3663 /* get offsets into the two bignums */
3664 ty = MIN(b->used-1, ix);
3667 /* setup temp aliases */
3671 /* this is the number of times the loop will iterrate, essentially its
3672 while (tx++ < a->used && ty-- >= 0) { ... }
3674 iy = MIN(a->used-tx, ty+1);
3677 for (iz = 0; iz < iy; iz++) {
3678 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
3682 W[ix] = ((mp_digit)_W) & MP_MASK;
3684 /* make next carry */
3685 _W = _W >> ((mp_word)DIGIT_BIT);
3693 register mp_digit *tmpc;
3695 tmpc = c->dp + digs;
3696 for (ix = digs; ix <= pa; ix++) {
3697 /* now extract the previous digit [below the carry] */
3701 /* clear unused digits [that existed in the old copy of c] */
3702 for (; ix < olduse; ix++) {
3708 #ifdef CYASSL_SMALL_STACK
3709 XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
3716 /* set a 32-bit const */
3717 int mp_set_int (mp_int * a, unsigned long b)
3723 /* set four bits at a time */
3724 for (x = 0; x < 8; x++) {
3725 /* shift the number up four bits */
3726 if ((res = mp_mul_2d (a, 4, a)) != MP_OKAY) {
3730 /* OR in the top four bits of the source */
3731 a->dp[0] |= (b >> 28) & 15;
3733 /* shift the source up to the next four bits */
3736 /* ensure that digits are not clamped off */
3744 #if defined(CYASSL_KEY_GEN) || defined(HAVE_ECC)
3746 /* c = a * a (mod b) */
3747 int mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
3752 if ((res = mp_init (&t)) != MP_OKAY) {
3756 if ((res = mp_sqr (a, &t)) != MP_OKAY) {
3760 res = mp_mod (&t, b, c);
3768 #if defined(HAVE_ECC) || !defined(NO_PWDBASED) || defined(CYASSL_SNIFFER) || defined(CYASSL_HAVE_WOLFSCEP) || defined(CYASSL_KEY_GEN)
3770 /* single digit addition */
3771 int mp_add_d (mp_int* a, mp_digit b, mp_int* c)
3773 int res, ix, oldused;
3774 mp_digit *tmpa, *tmpc, mu;
3776 /* grow c as required */
3777 if (c->alloc < a->used + 1) {
3778 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
3783 /* if a is negative and |a| >= b, call c = |a| - b */
3784 if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
3785 /* temporarily fix sign of a */
3789 res = mp_sub_d(a, b, c);
3792 a->sign = c->sign = MP_NEG;
3800 /* old number of used digits in c */
3803 /* sign always positive */
3809 /* destination alias */
3812 /* if a is positive */
3813 if (a->sign == MP_ZPOS) {
3814 /* add digit, after this we're propagating
3817 *tmpc = *tmpa++ + b;
3818 mu = *tmpc >> DIGIT_BIT;
3821 /* now handle rest of the digits */
3822 for (ix = 1; ix < a->used; ix++) {
3823 *tmpc = *tmpa++ + mu;
3824 mu = *tmpc >> DIGIT_BIT;
3827 /* set final carry */
3828 if (mu != 0 && ix < c->alloc) {
3834 c->used = a->used + 1;
3836 /* a was negative and |a| < b */
3839 /* the result is a single digit */
3841 *tmpc++ = b - a->dp[0];
3846 /* setup count so the clearing of oldused
3847 * can fall through correctly
3852 /* now zero to oldused */
3853 while (ix++ < oldused) {
3862 /* single digit subtraction */
3863 int mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
3865 mp_digit *tmpa, *tmpc, mu;
3866 int res, ix, oldused;
3868 /* grow c as required */
3869 if (c->alloc < a->used + 1) {
3870 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
3875 /* if a is negative just do an unsigned
3876 * addition [with fudged signs]
3878 if (a->sign == MP_NEG) {
3880 res = mp_add_d(a, b, c);
3881 a->sign = c->sign = MP_NEG;
3894 /* if a <= b simply fix the single digit */
3895 if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
3897 *tmpc++ = b - *tmpa;
3903 /* negative/1digit */
3911 /* subtract first digit */
3912 *tmpc = *tmpa++ - b;
3913 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
3916 /* handle rest of the digits */
3917 for (ix = 1; ix < a->used; ix++) {
3918 *tmpc = *tmpa++ - mu;
3919 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
3924 /* zero excess digits */
3925 while (ix++ < oldused) {
3932 #endif /* defined(HAVE_ECC) || !defined(NO_PWDBASED) */
3935 #ifdef CYASSL_KEY_GEN
3937 int mp_cnt_lsb(mp_int *a);
3939 static int s_is_power_of_two(mp_digit b, int *p)
3943 /* fast return if no power of two */
3944 if ((b==0) || (b & (b-1))) {
3948 for (x = 0; x < DIGIT_BIT; x++) {
3949 if (b == (((mp_digit)1)<<x)) {
3957 /* single digit division (based on routine from MPI) */
3958 static int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
3965 /* cannot divide by zero */
3971 if (b == 1 || mp_iszero(a) == 1) {
3976 return mp_copy(a, c);
3981 /* power of two ? */
3982 if (s_is_power_of_two(b, &ix) == 1) {
3984 *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
3987 return mp_div_2d(a, ix, c, NULL);
3992 #ifdef BN_MP_DIV_3_C
3995 return mp_div_3(a, c, d);
3999 /* no easy answer [c'est la vie]. Just division */
4000 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
4007 for (ix = a->used - 1; ix >= 0; ix--) {
4008 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
4011 t = (mp_digit)(w / b);
4012 w -= ((mp_word)t) * ((mp_word)b);
4016 q.dp[ix] = (mp_digit)t;
4033 static int mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
4035 return mp_div_d(a, b, NULL, c);
4039 const mp_digit ltm_prime_tab[] = {
4040 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
4041 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
4042 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
4043 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
4046 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
4047 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
4048 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
4049 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
4051 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
4052 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
4053 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
4054 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
4055 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
4056 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
4057 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
4058 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
4060 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
4061 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
4062 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
4063 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
4064 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
4065 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
4066 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
4067 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
4069 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
4070 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
4071 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
4072 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
4073 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
4074 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
4075 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
4076 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
4081 /* Miller-Rabin test of "a" to the base of "b" as described in
4082 * HAC pp. 139 Algorithm 4.24
4084 * Sets result to 0 if definitely composite or 1 if probably prime.
4085 * Randomly the chance of error is no more than 1/4 and often
4088 static int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
4097 if (mp_cmp_d(b, 1) != MP_GT) {
4101 /* get n1 = a - 1 */
4102 if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
4105 if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
4109 /* set 2**s * r = n1 */
4110 if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
4114 /* count the number of least significant bits
4119 /* now divide n - 1 by 2**s */
4120 if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
4124 /* compute y = b**r mod a */
4125 if ((err = mp_init (&y)) != MP_OKAY) {
4128 if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
4132 /* if y != 1 and y != n1 do */
4133 if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
4135 /* while j <= s-1 and y != n1 */
4136 while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
4137 if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
4141 /* if y == 1 then composite */
4142 if (mp_cmp_d (&y, 1) == MP_EQ) {
4149 /* if y != n1 then composite */
4150 if (mp_cmp (&y, &n1) != MP_EQ) {
4155 /* probably prime now */
4157 LBL_Y:mp_clear (&y);
4158 LBL_R:mp_clear (&r);
4159 LBL_N1:mp_clear (&n1);
4164 /* determines if an integers is divisible by one
4165 * of the first PRIME_SIZE primes or not
4167 * sets result to 0 if not, 1 if yes
4169 static int mp_prime_is_divisible (mp_int * a, int *result)
4174 /* default to not */
4177 for (ix = 0; ix < PRIME_SIZE; ix++) {
4178 /* what is a mod LBL_prime_tab[ix] */
4179 if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
4183 /* is the residue zero? */
4195 * Sets result to 1 if probably prime, 0 otherwise
4197 int mp_prime_is_prime (mp_int * a, int t, int *result)
4205 /* valid value of t? */
4206 if (t <= 0 || t > PRIME_SIZE) {
4210 /* is the input equal to one of the primes in the table? */
4211 for (ix = 0; ix < PRIME_SIZE; ix++) {
4212 if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
4218 /* first perform trial division */
4219 if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
4223 /* return if it was trivially divisible */
4224 if (res == MP_YES) {
4228 /* now perform the miller-rabin rounds */
4229 if ((err = mp_init (&b)) != MP_OKAY) {
4233 for (ix = 0; ix < t; ix++) {
4235 mp_set (&b, ltm_prime_tab[ix]);
4237 if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
4246 /* passed the test */
4248 LBL_B:mp_clear (&b);
4253 /* computes least common multiple as |a*b|/(a, b) */
4254 int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
4260 if ((res = mp_init_multi (&t1, &t2, NULL, NULL, NULL, NULL)) != MP_OKAY) {
4264 /* t1 = get the GCD of the two inputs */
4265 if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
4269 /* divide the smallest by the GCD */
4270 if (mp_cmp_mag(a, b) == MP_LT) {
4271 /* store quotient in t2 such that t2 * b is the LCM */
4272 if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
4275 res = mp_mul(b, &t2, c);
4277 /* store quotient in t2 such that t2 * a is the LCM */
4278 if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
4281 res = mp_mul(a, &t2, c);
4284 /* fix the sign to positive */
4294 static const int lnz[16] = {
4295 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
4298 /* Counts the number of lsbs which are zero before the first zero bit */
4299 int mp_cnt_lsb(mp_int *a)
4305 if (mp_iszero(a) == 1) {
4309 /* scan lower digits until non-zero */
4310 for (x = 0; x < a->used && a->dp[x] == 0; x++);
4314 /* now scan this digit until a 1 is found */
4326 /* Greatest Common Divisor using the binary method */
4327 int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
4330 int k, u_lsb, v_lsb, res;
4332 /* either zero than gcd is the largest */
4333 if (mp_iszero (a) == MP_YES) {
4334 return mp_abs (b, c);
4336 if (mp_iszero (b) == MP_YES) {
4337 return mp_abs (a, c);
4340 /* get copies of a and b we can modify */
4341 if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
4345 if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
4349 /* must be positive for the remainder of the algorithm */
4350 u.sign = v.sign = MP_ZPOS;
4352 /* B1. Find the common power of two for u and v */
4353 u_lsb = mp_cnt_lsb(&u);
4354 v_lsb = mp_cnt_lsb(&v);
4355 k = MIN(u_lsb, v_lsb);
4358 /* divide the power of two out */
4359 if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
4363 if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
4368 /* divide any remaining factors of two out */
4370 if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
4376 if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
4381 while (mp_iszero(&v) == 0) {
4382 /* make sure v is the largest */
4383 if (mp_cmp_mag(&u, &v) == MP_GT) {
4384 /* swap u and v to make sure v is >= u */
4388 /* subtract smallest from largest */
4389 if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
4393 /* Divide out all factors of two */
4394 if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
4399 /* multiply by 2**k which we divided out at the beginning */
4400 if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
4405 LBL_V:mp_clear (&u);
4406 LBL_U:mp_clear (&v);
4412 #endif /* CYASSL_KEY_GEN */
4417 /* chars used in radix conversions */
4418 const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
4420 /* read a string [ASCII] in a given radix */
4421 int mp_read_radix (mp_int * a, const char *str, int radix)
4426 /* zero the digit bignum */
4429 /* make sure the radix is ok */
4430 if (radix < 2 || radix > 64) {
4434 /* if the leading digit is a
4435 * minus set the sign to negative.
4444 /* set the integer to the default of zero */
4447 /* process each digit of the string */
4449 /* if the radix < 36 the conversion is case insensitive
4450 * this allows numbers like 1AB and 1ab to represent the same value
4453 ch = (char) ((radix < 36) ? XTOUPPER(*str) : *str);
4454 for (y = 0; y < 64; y++) {
4455 if (ch == mp_s_rmap[y]) {
4460 /* if the char was found in the map
4461 * and is less than the given radix add it
4462 * to the number, otherwise exit the loop.
4465 if ((res = mp_mul_d (a, (mp_digit) radix, a)) != MP_OKAY) {
4468 if ((res = mp_add_d (a, (mp_digit) y, a)) != MP_OKAY) {
4477 /* set the sign only if a != 0 */
4478 if (mp_iszero(a) != 1) {
4484 #endif /* HAVE_ECC */
4486 #endif /* USE_FAST_MATH */
4488 #endif /* NO_BIG_INT */