3 * Copyright (C) 2006-2012 Sawtooth Consulting Ltd.
5 * This file is part of CyaSSL.
7 * CyaSSL is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * CyaSSL is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
24 * Based on public domain LibTomMath 0.38 by Tom St Denis, tomstdenis@iahu.ca,
25 * http://math.libtomcrypt.com
33 /* in case user set USE_FAST_MATH there */
34 #include <cyassl/ctaocrypt/settings.h>
38 #include <cyassl/ctaocrypt/integer.h>
41 /* math settings check */
42 word32 CheckRunTimeSettings(void)
48 /* handle up to 6 inits */
49 int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e,
54 if (a && ((res = mp_init(a)) != MP_OKAY))
57 if (b && ((res = mp_init(b)) != MP_OKAY)) {
62 if (c && ((res = mp_init(c)) != MP_OKAY)) {
63 mp_clear(a); mp_clear(b);
67 if (d && ((res = mp_init(d)) != MP_OKAY)) {
68 mp_clear(a); mp_clear(b); mp_clear(c);
72 if (e && ((res = mp_init(e)) != MP_OKAY)) {
73 mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d);
77 if (f && ((res = mp_init(f)) != MP_OKAY)) {
78 mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d); mp_clear(e);
86 /* init a new mp_int */
87 int mp_init (mp_int * a)
91 /* allocate memory required and clear it */
92 a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC, 0,
98 /* set the digits to zero */
99 for (i = 0; i < MP_PREC; i++) {
103 /* set the used to zero, allocated digits to the default precision
104 * and sign to positive */
113 /* clear one (frees) */
115 mp_clear (mp_int * a)
119 /* only do anything if a hasn't been freed previously */
121 /* first zero the digits */
122 for (i = 0; i < a->used; i++) {
127 XFREE(a->dp, 0, DYNAMIC_TYPE_BIGINT);
129 /* reset members to make debugging easier */
131 a->alloc = a->used = 0;
137 /* get the size for an unsigned equivalent */
138 int mp_unsigned_bin_size (mp_int * a)
140 int size = mp_count_bits (a);
141 return (size / 8 + ((size & 7) != 0 ? 1 : 0));
145 /* returns the number of bits in an int */
147 mp_count_bits (mp_int * a)
157 /* get number of digits and add that */
158 r = (a->used - 1) * DIGIT_BIT;
160 /* take the last digit and count the bits in it */
161 q = a->dp[a->used - 1];
162 while (q > ((mp_digit) 0)) {
164 q >>= ((mp_digit) 1);
170 /* store in unsigned [big endian] format */
171 int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
176 if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
181 while (mp_iszero (&t) == 0) {
183 b[x++] = (unsigned char) (t.dp[0] & 255);
185 b[x++] = (unsigned char) (t.dp[0] | ((t.dp[1] & 0x01) << 7));
187 if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
198 /* creates "a" then copies b into it */
199 int mp_init_copy (mp_int * a, mp_int * b)
203 if ((res = mp_init (a)) != MP_OKAY) {
206 return mp_copy (b, a);
212 mp_copy (mp_int * a, mp_int * b)
216 /* if dst == src do nothing */
222 if (b->alloc < a->used) {
223 if ((res = mp_grow (b, a->used)) != MP_OKAY) {
228 /* zero b and copy the parameters over */
230 register mp_digit *tmpa, *tmpb;
232 /* pointer aliases */
240 /* copy all the digits */
241 for (n = 0; n < a->used; n++) {
245 /* clear high digits */
246 for (; n < b->used; n++) {
251 /* copy used count and sign */
258 /* grow as required */
259 int mp_grow (mp_int * a, int size)
264 /* if the alloc size is smaller alloc more ram */
265 if (a->alloc < size) {
266 /* ensure there are always at least MP_PREC digits extra on top */
267 size += (MP_PREC * 2) - (size % MP_PREC);
269 /* reallocate the array a->dp
271 * We store the return in a temporary variable
272 * in case the operation failed we don't want
273 * to overwrite the dp member of a.
275 tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size, 0,
276 DYNAMIC_TYPE_BIGINT);
278 /* reallocation failed but "a" is still valid [can be freed] */
282 /* reallocation succeeded so set a->dp */
285 /* zero excess digits */
288 for (; i < a->alloc; i++) {
296 /* reverse an array, used for radix code */
298 bn_reverse (unsigned char *s, int len)
315 /* shift right by a certain bit count (store quotient in c, optional
317 int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
324 /* if the shift count is <= 0 then we do no work */
326 res = mp_copy (a, c);
333 if ((res = mp_init (&t)) != MP_OKAY) {
337 /* get the remainder */
339 if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
346 if ((res = mp_copy (a, c)) != MP_OKAY) {
351 /* shift by as many digits in the bit count */
352 if (b >= (int)DIGIT_BIT) {
353 mp_rshd (c, b / DIGIT_BIT);
356 /* shift any bit count < DIGIT_BIT */
357 D = (mp_digit) (b % DIGIT_BIT);
359 register mp_digit *tmpc, mask, shift;
362 mask = (((mp_digit)1) << D) - 1;
365 shift = DIGIT_BIT - D;
368 tmpc = c->dp + (c->used - 1);
372 for (x = c->used - 1; x >= 0; x--) {
373 /* get the lower bits of this word in a temp */
376 /* shift the current word and mix in the carry bits from the previous
378 *tmpc = (*tmpc >> D) | (r << shift);
381 /* set the carry to the carry bits of the current word found above */
395 void mp_zero (mp_int * a)
404 for (n = 0; n < a->alloc; n++) {
410 /* trim unused digits
412 * This is used to ensure that leading zero digits are
413 * trimed and the leading "used" digit will be non-zero
414 * Typically very fast. Also fixes the sign if there
415 * are no more leading digits
418 mp_clamp (mp_int * a)
420 /* decrease used while the most significant digit is
423 while (a->used > 0 && a->dp[a->used - 1] == 0) {
427 /* reset the sign flag if used == 0 */
434 /* swap the elements of two integers, for cases where you can't simply swap the
435 * mp_int pointers around
438 mp_exch (mp_int * a, mp_int * b)
448 /* shift right a certain amount of digits */
449 void mp_rshd (mp_int * a, int b)
453 /* if b <= 0 then ignore it */
458 /* if b > used then simply zero it and return */
465 register mp_digit *bottom, *top;
467 /* shift the digits down */
472 /* top [offset into digits] */
475 /* this is implemented as a sliding window where
476 * the window is b-digits long and digits from
477 * the top of the window are copied to the bottom
481 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ---->
483 \-------------------/ ---->
485 for (x = 0; x < (a->used - b); x++) {
489 /* zero the top digits */
490 for (; x < a->used; x++) {
495 /* remove excess digits */
500 /* calc a value mod 2**b */
502 mp_mod_2d (mp_int * a, int b, mp_int * c)
506 /* if b is <= 0 then zero the int */
512 /* if the modulus is larger than the value than return */
513 if (b >= (int) (a->used * DIGIT_BIT)) {
514 res = mp_copy (a, c);
519 if ((res = mp_copy (a, c)) != MP_OKAY) {
523 /* zero digits above the last digit of the modulus */
524 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
527 /* clear the digit that is not completely outside/inside the modulus */
528 c->dp[b / DIGIT_BIT] &= (mp_digit) ((((mp_digit) 1) <<
529 (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
535 /* reads a unsigned char array, assumes the msb is stored first [big endian] */
536 int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
540 /* make sure there are at least two digits */
542 if ((res = mp_grow(a, 2)) != MP_OKAY) {
550 /* read the bytes in */
552 if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
560 a->dp[0] = (*b & MP_MASK);
561 a->dp[1] |= ((*b++ >> 7U) & 1);
570 /* shift left by a certain bit count */
571 int mp_mul_2d (mp_int * a, int b, mp_int * c)
578 if ((res = mp_copy (a, c)) != MP_OKAY) {
583 if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
584 if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
589 /* shift by as many digits in the bit count */
590 if (b >= (int)DIGIT_BIT) {
591 if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
596 /* shift any bit count < DIGIT_BIT */
597 d = (mp_digit) (b % DIGIT_BIT);
599 register mp_digit *tmpc, shift, mask, r, rr;
602 /* bitmask for carries */
603 mask = (((mp_digit)1) << d) - 1;
606 shift = DIGIT_BIT - d;
613 for (x = 0; x < c->used; x++) {
614 /* get the higher bits of the current word */
615 rr = (*tmpc >> shift) & mask;
617 /* shift the current word and OR in the carry */
618 *tmpc = ((*tmpc << d) | r) & MP_MASK;
621 /* set the carry to the carry bits of the current word */
625 /* set final carry */
627 c->dp[(c->used)++] = r;
635 /* shift left a certain amount of digits */
636 int mp_lshd (mp_int * a, int b)
640 /* if its less than zero return */
645 /* grow to fit the new digits */
646 if (a->alloc < a->used + b) {
647 if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
653 register mp_digit *top, *bottom;
655 /* increment the used by the shift amount then copy upwards */
659 top = a->dp + a->used - 1;
662 bottom = a->dp + a->used - 1 - b;
664 /* much like mp_rshd this is implemented using a sliding window
665 * except the window goes the otherway around. Copying from
666 * the bottom to the top. see bn_mp_rshd.c for more info.
668 for (x = a->used - 1; x >= b; x--) {
672 /* zero the lower digits */
674 for (x = 0; x < b; x++) {
682 /* this is a shell function that calls either the normal or Montgomery
683 * exptmod functions. Originally the call to the montgomery code was
684 * embedded in the normal function but that wasted alot of stack space
685 * for nothing (since 99% of the time the Montgomery code would be called)
687 int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
691 /* modulus P must be positive */
692 if (P->sign == MP_NEG) {
696 /* if exponent X is negative we have to recurse */
697 if (X->sign == MP_NEG) {
698 #ifdef BN_MP_INVMOD_C
702 /* first compute 1/G mod P */
703 if ((err = mp_init(&tmpG)) != MP_OKAY) {
706 if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
712 if ((err = mp_init(&tmpX)) != MP_OKAY) {
716 if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
722 /* and now compute (1/G)**|X| instead of G**X [X < 0] */
723 err = mp_exptmod(&tmpG, &tmpX, P, Y);
733 /* modified diminished radix reduction */
734 #if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && \
735 defined(BN_S_MP_EXPTMOD_C)
736 if (mp_reduce_is_2k_l(P) == MP_YES) {
737 return s_mp_exptmod(G, X, P, Y, 1);
741 #ifdef BN_MP_DR_IS_MODULUS_C
742 /* is it a DR modulus? */
743 dr = mp_dr_is_modulus(P);
749 #ifdef BN_MP_REDUCE_IS_2K_C
750 /* if not, is it a unrestricted DR modulus? */
752 dr = mp_reduce_is_2k(P) << 1;
756 /* if the modulus is odd or dr != 0 use the montgomery method */
757 #ifdef BN_MP_EXPTMOD_FAST_C
758 if (mp_isodd (P) == 1 || dr != 0) {
759 return mp_exptmod_fast (G, X, P, Y, dr);
762 #ifdef BN_S_MP_EXPTMOD_C
763 /* otherwise use the generic Barrett reduction technique */
764 return s_mp_exptmod (G, X, P, Y, 0);
766 /* no exptmod for evens */
769 #ifdef BN_MP_EXPTMOD_FAST_C
777 * Simple function copies the input and fixes the sign to positive
780 mp_abs (mp_int * a, mp_int * b)
786 if ((res = mp_copy (a, b)) != MP_OKAY) {
791 /* force the sign of b to positive */
798 /* hac 14.61, pp608 */
799 int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
801 /* b cannot be negative */
802 if (b->sign == MP_NEG || mp_iszero(b) == 1) {
806 #ifdef BN_FAST_MP_INVMOD_C
807 /* if the modulus is odd we can use a faster routine instead */
808 if (mp_isodd (b) == 1) {
809 return fast_mp_invmod (a, b, c);
813 #ifdef BN_MP_INVMOD_SLOW_C
814 return mp_invmod_slow(a, b, c);
819 /* computes the modular inverse via binary extended euclidean algorithm,
820 * that is c = 1/a mod b
822 * Based on slow invmod except this is optimized for the case where b is
823 * odd as per HAC Note 14.64 on pp. 610
825 int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
827 mp_int x, y, u, v, B, D;
830 /* 2. [modified] b must be odd */
831 if (mp_iseven (b) == 1) {
835 /* init all our temps */
836 if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D)) != MP_OKAY) {
840 /* x == modulus, y == value to invert */
841 if ((res = mp_copy (b, &x)) != MP_OKAY) {
845 /* we need y = |a| */
846 if ((res = mp_mod (a, b, &y)) != MP_OKAY) {
850 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
851 if ((res = mp_copy (&x, &u)) != MP_OKAY) {
854 if ((res = mp_copy (&y, &v)) != MP_OKAY) {
860 /* 4. while u is even do */
861 while (mp_iseven (&u) == 1) {
863 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
866 /* 4.2 if B is odd then */
867 if (mp_isodd (&B) == 1) {
868 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
873 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
878 /* 5. while v is even do */
879 while (mp_iseven (&v) == 1) {
881 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
884 /* 5.2 if D is odd then */
885 if (mp_isodd (&D) == 1) {
887 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
892 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
897 /* 6. if u >= v then */
898 if (mp_cmp (&u, &v) != MP_LT) {
899 /* u = u - v, B = B - D */
900 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
904 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
908 /* v - v - u, D = D - B */
909 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
913 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
918 /* if not zero goto step 4 */
919 if (mp_iszero (&u) == 0) {
923 /* now a = C, b = D, gcd == g*v */
925 /* if v != 1 then there is no inverse */
926 if (mp_cmp_d (&v, 1) != MP_EQ) {
931 /* b is now the inverse */
933 while (D.sign == MP_NEG) {
934 if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
942 LBL_ERR:mp_clear(&x);
952 /* hac 14.61, pp608 */
953 int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
955 mp_int x, y, u, v, A, B, C, D;
958 /* b cannot be negative */
959 if (b->sign == MP_NEG || mp_iszero(b) == 1) {
964 if ((res = mp_init_multi(&x, &y, &u, &v,
965 &A, &B)) != MP_OKAY) {
969 /* init rest of tmps temps */
970 if ((res = mp_init_multi(&C, &D, 0, 0, 0, 0)) != MP_OKAY) {
975 if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
978 if ((res = mp_copy (b, &y)) != MP_OKAY) {
982 /* 2. [modified] if x,y are both even then return an error! */
983 if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
988 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
989 if ((res = mp_copy (&x, &u)) != MP_OKAY) {
992 if ((res = mp_copy (&y, &v)) != MP_OKAY) {
999 /* 4. while u is even do */
1000 while (mp_iseven (&u) == 1) {
1002 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
1005 /* 4.2 if A or B is odd then */
1006 if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
1007 /* A = (A+y)/2, B = (B-x)/2 */
1008 if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
1011 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
1015 /* A = A/2, B = B/2 */
1016 if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
1019 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
1024 /* 5. while v is even do */
1025 while (mp_iseven (&v) == 1) {
1027 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
1030 /* 5.2 if C or D is odd then */
1031 if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
1032 /* C = (C+y)/2, D = (D-x)/2 */
1033 if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
1036 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
1040 /* C = C/2, D = D/2 */
1041 if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
1044 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
1049 /* 6. if u >= v then */
1050 if (mp_cmp (&u, &v) != MP_LT) {
1051 /* u = u - v, A = A - C, B = B - D */
1052 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
1056 if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
1060 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
1064 /* v - v - u, C = C - A, D = D - B */
1065 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
1069 if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
1073 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
1078 /* if not zero goto step 4 */
1079 if (mp_iszero (&u) == 0)
1082 /* now a = C, b = D, gcd == g*v */
1084 /* if v != 1 then there is no inverse */
1085 if (mp_cmp_d (&v, 1) != MP_EQ) {
1090 /* if its too low */
1091 while (mp_cmp_d(&C, 0) == MP_LT) {
1092 if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
1098 while (mp_cmp_mag(&C, b) != MP_LT) {
1099 if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
1104 /* C is now the inverse */
1107 LBL_ERR:mp_clear(&x);
1119 /* compare maginitude of two ints (unsigned) */
1120 int mp_cmp_mag (mp_int * a, mp_int * b)
1123 mp_digit *tmpa, *tmpb;
1125 /* compare based on # of non-zero digits */
1126 if (a->used > b->used) {
1130 if (a->used < b->used) {
1135 tmpa = a->dp + (a->used - 1);
1138 tmpb = b->dp + (a->used - 1);
1140 /* compare based on digits */
1141 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
1142 if (*tmpa > *tmpb) {
1146 if (*tmpa < *tmpb) {
1154 /* compare two ints (signed)*/
1156 mp_cmp (mp_int * a, mp_int * b)
1158 /* compare based on sign */
1159 if (a->sign != b->sign) {
1160 if (a->sign == MP_NEG) {
1167 /* compare digits */
1168 if (a->sign == MP_NEG) {
1169 /* if negative compare opposite direction */
1170 return mp_cmp_mag(b, a);
1172 return mp_cmp_mag(a, b);
1177 /* compare a digit */
1178 int mp_cmp_d(mp_int * a, mp_digit b)
1180 /* compare based on sign */
1181 if (a->sign == MP_NEG) {
1185 /* compare based on magnitude */
1190 /* compare the only digit of a to b */
1193 } else if (a->dp[0] < b) {
1201 /* set to a digit */
1202 void mp_set (mp_int * a, mp_digit b)
1205 a->dp[0] = b & MP_MASK;
1206 a->used = (a->dp[0] != 0) ? 1 : 0;
1210 /* c = a mod b, 0 <= c < b */
1212 mp_mod (mp_int * a, mp_int * b, mp_int * c)
1217 if ((res = mp_init (&t)) != MP_OKAY) {
1221 if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
1226 if (t.sign != b->sign) {
1227 res = mp_add (b, &t, c);
1238 /* slower bit-bang division... also smaller */
1239 int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
1241 mp_int ta, tb, tq, q;
1244 /* is divisor zero ? */
1245 if (mp_iszero (b) == 1) {
1249 /* if a < b then q=0, r = a */
1250 if (mp_cmp_mag (a, b) == MP_LT) {
1252 res = mp_copy (a, d);
1262 /* init our temps */
1263 if ((res = mp_init_multi(&ta, &tb, &tq, &q, 0, 0)) != MP_OKAY) {
1269 n = mp_count_bits(a) - mp_count_bits(b);
1270 if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
1271 ((res = mp_abs(b, &tb)) != MP_OKAY) ||
1272 ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
1273 ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
1278 if (mp_cmp(&tb, &ta) != MP_GT) {
1279 if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) ||
1280 ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
1284 if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) ||
1285 ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
1290 /* now q == quotient and ta == remainder */
1292 n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
1295 c->sign = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
1299 d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
1311 int mp_div_2(mp_int * a, mp_int * b)
1313 int x, res, oldused;
1316 if (b->alloc < a->used) {
1317 if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1325 register mp_digit r, rr, *tmpa, *tmpb;
1328 tmpa = a->dp + b->used - 1;
1331 tmpb = b->dp + b->used - 1;
1335 for (x = b->used - 1; x >= 0; x--) {
1336 /* get the carry for the next iteration */
1339 /* shift the current digit, add in carry and store */
1340 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
1342 /* forward carry to next iteration */
1346 /* zero excess digits */
1347 tmpb = b->dp + b->used;
1348 for (x = b->used; x < oldused; x++) {
1358 /* high level addition (handles signs) */
1359 int mp_add (mp_int * a, mp_int * b, mp_int * c)
1363 /* get sign of both inputs */
1367 /* handle two cases, not four */
1369 /* both positive or both negative */
1370 /* add their magnitudes, copy the sign */
1372 res = s_mp_add (a, b, c);
1374 /* one positive, the other negative */
1375 /* subtract the one with the greater magnitude from */
1376 /* the one of the lesser magnitude. The result gets */
1377 /* the sign of the one with the greater magnitude. */
1378 if (mp_cmp_mag (a, b) == MP_LT) {
1380 res = s_mp_sub (b, a, c);
1383 res = s_mp_sub (a, b, c);
1390 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
1392 s_mp_add (mp_int * a, mp_int * b, mp_int * c)
1395 int olduse, res, min, max;
1397 /* find sizes, we let |a| <= |b| which means we have to sort
1398 * them. "x" will point to the input with the most digits
1400 if (a->used > b->used) {
1411 if (c->alloc < max + 1) {
1412 if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
1417 /* get old used digit count and set new one */
1422 register mp_digit u, *tmpa, *tmpb, *tmpc;
1425 /* alias for digit pointers */
1436 /* zero the carry */
1438 for (i = 0; i < min; i++) {
1439 /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
1440 *tmpc = *tmpa++ + *tmpb++ + u;
1442 /* U = carry bit of T[i] */
1443 u = *tmpc >> ((mp_digit)DIGIT_BIT);
1445 /* take away carry bit from T[i] */
1449 /* now copy higher words if any, that is in A+B
1450 * if A or B has more digits add those in
1453 for (; i < max; i++) {
1454 /* T[i] = X[i] + U */
1455 *tmpc = x->dp[i] + u;
1457 /* U = carry bit of T[i] */
1458 u = *tmpc >> ((mp_digit)DIGIT_BIT);
1460 /* take away carry bit from T[i] */
1468 /* clear digits above oldused */
1469 for (i = c->used; i < olduse; i++) {
1479 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
1481 s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
1483 int olduse, res, min, max;
1490 if (c->alloc < max) {
1491 if ((res = mp_grow (c, max)) != MP_OKAY) {
1499 register mp_digit u, *tmpa, *tmpb, *tmpc;
1502 /* alias for digit pointers */
1507 /* set carry to zero */
1509 for (i = 0; i < min; i++) {
1510 /* T[i] = A[i] - B[i] - U */
1511 *tmpc = *tmpa++ - *tmpb++ - u;
1513 /* U = carry bit of T[i]
1514 * Note this saves performing an AND operation since
1515 * if a carry does occur it will propagate all the way to the
1516 * MSB. As a result a single shift is enough to get the carry
1518 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
1520 /* Clear carry from T[i] */
1524 /* now copy higher words if any, e.g. if A has more digits than B */
1525 for (; i < max; i++) {
1526 /* T[i] = A[i] - U */
1527 *tmpc = *tmpa++ - u;
1529 /* U = carry bit of T[i] */
1530 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
1532 /* Clear carry from T[i] */
1536 /* clear digits above used (since we may not have grown result above) */
1537 for (i = c->used; i < olduse; i++) {
1547 /* high level subtraction (handles signs) */
1549 mp_sub (mp_int * a, mp_int * b, mp_int * c)
1557 /* subtract a negative from a positive, OR */
1558 /* subtract a positive from a negative. */
1559 /* In either case, ADD their magnitudes, */
1560 /* and use the sign of the first number. */
1562 res = s_mp_add (a, b, c);
1564 /* subtract a positive from a positive, OR */
1565 /* subtract a negative from a negative. */
1566 /* First, take the difference between their */
1567 /* magnitudes, then... */
1568 if (mp_cmp_mag (a, b) != MP_LT) {
1569 /* Copy the sign from the first */
1571 /* The first has a larger or equal magnitude */
1572 res = s_mp_sub (a, b, c);
1574 /* The result has the *opposite* sign from */
1575 /* the first number. */
1576 c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
1577 /* The second has a larger magnitude */
1578 res = s_mp_sub (b, a, c);
1585 /* determines if reduce_2k_l can be used */
1586 int mp_reduce_is_2k_l(mp_int *a)
1592 } else if (a->used == 1) {
1594 } else if (a->used > 1) {
1595 /* if more than half of the digits are -1 we're sold */
1596 for (iy = ix = 0; ix < a->used; ix++) {
1597 if (a->dp[ix] == MP_MASK) {
1601 return (iy >= (a->used/2)) ? MP_YES : MP_NO;
1608 /* determines if mp_reduce_2k can be used */
1609 int mp_reduce_is_2k(mp_int *a)
1616 } else if (a->used == 1) {
1618 } else if (a->used > 1) {
1619 iy = mp_count_bits(a);
1623 /* Test every bit from the second digit up, must be 1 */
1624 for (ix = DIGIT_BIT; ix < iy; ix++) {
1625 if ((a->dp[iw] & iz) == 0) {
1629 if (iz > (mp_digit)MP_MASK) {
1639 /* determines if a number is a valid DR modulus */
1640 int mp_dr_is_modulus(mp_int *a)
1644 /* must be at least two digits */
1649 /* must be of the form b**k - a [a <= b] so all
1650 * but the first digit must be equal to -1 (mod b).
1652 for (ix = 1; ix < a->used; ix++) {
1653 if (a->dp[ix] != MP_MASK) {
1661 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
1663 * Uses a left-to-right k-ary sliding window to compute the modular
1665 * The value of k changes based on the size of the exponent.
1667 * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
1673 #define TAB_SIZE 256
1676 int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
1679 mp_int M[TAB_SIZE], res;
1681 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
1683 /* use a pointer to the reduction algorithm. This allows us to use
1684 * one of many reduction algorithms without modding the guts of
1685 * the code with if statements everywhere.
1687 int (*redux)(mp_int*,mp_int*,mp_digit);
1689 /* find window size */
1690 x = mp_count_bits (X);
1693 } else if (x <= 36) {
1695 } else if (x <= 140) {
1697 } else if (x <= 450) {
1699 } else if (x <= 1303) {
1701 } else if (x <= 3529) {
1714 /* init first cell */
1715 if ((err = mp_init(&M[1])) != MP_OKAY) {
1719 /* now init the second half of the array */
1720 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
1721 if ((err = mp_init(&M[x])) != MP_OKAY) {
1722 for (y = 1<<(winsize-1); y < x; y++) {
1730 /* determine and setup reduction code */
1732 #ifdef BN_MP_MONTGOMERY_SETUP_C
1733 /* now setup montgomery */
1734 if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
1742 /* automatically pick the comba one if available (saves quite a few
1744 #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
1745 if (((P->used * 2 + 1) < MP_WARRAY) &&
1746 P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
1747 redux = fast_mp_montgomery_reduce;
1751 #ifdef BN_MP_MONTGOMERY_REDUCE_C
1752 /* use slower baseline Montgomery method */
1753 redux = mp_montgomery_reduce;
1759 } else if (redmode == 1) {
1760 #if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
1761 /* setup DR reduction for moduli of the form B**k - b */
1762 mp_dr_setup(P, &mp);
1763 redux = mp_dr_reduce;
1769 #if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
1770 /* setup DR reduction for moduli of the form 2**k - b */
1771 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
1774 redux = mp_reduce_2k;
1782 if ((err = mp_init (&res)) != MP_OKAY) {
1790 * The first half of the table is not computed though accept for M[0] and M[1]
1794 #ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
1795 /* now we need R mod m */
1796 if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
1804 /* now set M[1] to G * R mod m */
1805 if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
1810 if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
1815 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times*/
1816 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
1820 for (x = 0; x < (winsize - 1); x++) {
1821 if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
1824 if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
1829 /* create upper table */
1830 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
1831 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
1834 if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
1839 /* set initial mode and bit cnt */
1843 digidx = X->used - 1;
1848 /* grab next digit as required */
1849 if (--bitcnt == 0) {
1850 /* if digidx == -1 we are out of digits so break */
1854 /* read next digit and reset bitcnt */
1855 buf = X->dp[digidx--];
1856 bitcnt = (int)DIGIT_BIT;
1859 /* grab the next msb from the exponent */
1860 y = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
1861 buf <<= (mp_digit)1;
1863 /* if the bit is zero and mode == 0 then we ignore it
1864 * These represent the leading zero bits before the first 1 bit
1865 * in the exponent. Technically this opt is not required but it
1866 * does lower the # of trivial squaring/reductions used
1868 if (mode == 0 && y == 0) {
1872 /* if the bit is zero and mode == 1 then we square */
1873 if (mode == 1 && y == 0) {
1874 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
1877 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1883 /* else we add it to the window */
1884 bitbuf |= (y << (winsize - ++bitcpy));
1887 if (bitcpy == winsize) {
1888 /* ok window is filled so square as required and multiply */
1890 for (x = 0; x < winsize; x++) {
1891 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
1894 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1900 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
1903 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1907 /* empty window and reset */
1914 /* if bits remain then square/multiply */
1915 if (mode == 2 && bitcpy > 0) {
1916 /* square then multiply if the bit is set */
1917 for (x = 0; x < bitcpy; x++) {
1918 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
1921 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1925 /* get next bit of the window */
1927 if ((bitbuf & (1 << winsize)) != 0) {
1929 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
1932 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1940 /* fixup result if Montgomery reduction is used
1941 * recall that any value in a Montgomery system is
1942 * actually multiplied by R mod n. So we have
1943 * to reduce one more time to cancel out the factor
1946 if ((err = redux(&res, P, mp)) != MP_OKAY) {
1951 /* swap res with Y */
1954 LBL_RES:mp_clear (&res);
1957 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
1964 /* setups the montgomery reduction stuff */
1966 mp_montgomery_setup (mp_int * n, mp_digit * rho)
1970 /* fast inversion mod 2**k
1972 * Based on the fact that
1974 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
1975 * => 2*X*A - X*X*A*A = 1
1976 * => 2*(1) - (1) = 1
1984 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
1985 x *= 2 - b * x; /* here x*a==1 mod 2**8 */
1986 #if !defined(MP_8BIT)
1987 x *= 2 - b * x; /* here x*a==1 mod 2**16 */
1989 #if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT))
1990 x *= 2 - b * x; /* here x*a==1 mod 2**32 */
1993 x *= 2 - b * x; /* here x*a==1 mod 2**64 */
1996 /* rho = -1/m mod b */
1997 /* TAO, switched mp_word casts to mp_digit to shut up compiler */
1998 *rho = (((mp_digit)1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK;
2004 /* computes xR**-1 == x (mod N) via Montgomery Reduction
2006 * This is an optimized implementation of montgomery_reduce
2007 * which uses the comba method to quickly calculate the columns of the
2010 * Based on Algorithm 14.32 on pp.601 of HAC.
2012 int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
2014 int ix, res, olduse;
2015 #ifdef CYASSL_SMALL_STACK
2016 mp_word* W; /* uses dynamic memory and slower */
2018 mp_word W[MP_WARRAY];
2021 /* get old used count */
2024 /* grow a as required */
2025 if (x->alloc < n->used + 1) {
2026 if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
2031 #ifdef CYASSL_SMALL_STACK
2032 W = (mp_word*)XMALLOC(sizeof(mp_word) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
2037 /* first we have to get the digits of the input into
2038 * an array of double precision words W[...]
2041 register mp_word *_W;
2042 register mp_digit *tmpx;
2044 /* alias for the W[] array */
2047 /* alias for the digits of x*/
2050 /* copy the digits of a into W[0..a->used-1] */
2051 for (ix = 0; ix < x->used; ix++) {
2055 /* zero the high words of W[a->used..m->used*2] */
2056 for (; ix < n->used * 2 + 1; ix++) {
2061 /* now we proceed to zero successive digits
2062 * from the least significant upwards
2064 for (ix = 0; ix < n->used; ix++) {
2065 /* mu = ai * m' mod b
2067 * We avoid a double precision multiplication (which isn't required)
2068 * by casting the value down to a mp_digit. Note this requires
2069 * that W[ix-1] have the carry cleared (see after the inner loop)
2071 register mp_digit mu;
2072 mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
2074 /* a = a + mu * m * b**i
2076 * This is computed in place and on the fly. The multiplication
2077 * by b**i is handled by offseting which columns the results
2080 * Note the comba method normally doesn't handle carries in the
2081 * inner loop In this case we fix the carry from the previous
2082 * column since the Montgomery reduction requires digits of the
2083 * result (so far) [see above] to work. This is
2084 * handled by fixing up one carry after the inner loop. The
2085 * carry fixups are done in order so after these loops the
2086 * first m->used words of W[] have the carries fixed
2090 register mp_digit *tmpn;
2091 register mp_word *_W;
2093 /* alias for the digits of the modulus */
2096 /* Alias for the columns set by an offset of ix */
2100 for (iy = 0; iy < n->used; iy++) {
2101 *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
2105 /* now fix carry for next digit, W[ix+1] */
2106 W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
2109 /* now we have to propagate the carries and
2110 * shift the words downward [all those least
2111 * significant digits we zeroed].
2114 register mp_digit *tmpx;
2115 register mp_word *_W, *_W1;
2117 /* nox fix rest of carries */
2119 /* alias for current word */
2122 /* alias for next word, where the carry goes */
2125 for (; ix <= n->used * 2 + 1; ix++) {
2126 *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
2129 /* copy out, A = A/b**n
2131 * The result is A/b**n but instead of converting from an
2132 * array of mp_word to mp_digit than calling mp_rshd
2133 * we just copy them in the right order
2136 /* alias for destination word */
2139 /* alias for shifted double precision result */
2142 for (ix = 0; ix < n->used + 1; ix++) {
2143 *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
2146 /* zero oldused digits, if the input a was larger than
2147 * m->used+1 we'll have to clear the digits
2149 for (; ix < olduse; ix++) {
2154 /* set the max used and clamp */
2155 x->used = n->used + 1;
2158 #ifdef CYASSL_SMALL_STACK
2159 XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
2162 /* if A >= m then A = A - m */
2163 if (mp_cmp_mag (x, n) != MP_LT) {
2164 return s_mp_sub (x, n, x);
2170 /* computes xR**-1 == x (mod N) via Montgomery Reduction */
2172 mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
2177 /* can the fast reduction [comba] method be used?
2179 * Note that unlike in mul you're safely allowed *less*
2180 * than the available columns [255 per default] since carries
2181 * are fixed up in the inner loop.
2183 digs = n->used * 2 + 1;
2184 if ((digs < MP_WARRAY) &&
2186 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2187 return fast_mp_montgomery_reduce (x, n, rho);
2190 /* grow the input as required */
2191 if (x->alloc < digs) {
2192 if ((res = mp_grow (x, digs)) != MP_OKAY) {
2198 for (ix = 0; ix < n->used; ix++) {
2199 /* mu = ai * rho mod b
2201 * The value of rho must be precalculated via
2202 * montgomery_setup() such that
2203 * it equals -1/n0 mod b this allows the
2204 * following inner loop to reduce the
2205 * input one digit at a time
2207 mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
2209 /* a = a + mu * m * b**i */
2212 register mp_digit *tmpn, *tmpx, u;
2215 /* alias for digits of the modulus */
2218 /* alias for the digits of x [the input] */
2221 /* set the carry to zero */
2224 /* Multiply and add in place */
2225 for (iy = 0; iy < n->used; iy++) {
2226 /* compute product and sum */
2227 r = ((mp_word)mu) * ((mp_word)*tmpn++) +
2228 ((mp_word) u) + ((mp_word) * tmpx);
2231 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
2234 *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
2236 /* At this point the ix'th digit of x should be zero */
2239 /* propagate carries upwards as required*/
2242 u = *tmpx >> DIGIT_BIT;
2248 /* at this point the n.used'th least
2249 * significant digits of x are all zero
2250 * which means we can shift x to the
2251 * right by n.used digits and the
2252 * residue is unchanged.
2255 /* x = x/b**n.used */
2257 mp_rshd (x, n->used);
2259 /* if x >= n then x = x - n */
2260 if (mp_cmp_mag (x, n) != MP_LT) {
2261 return s_mp_sub (x, n, x);
2268 /* determines the setup value */
2269 void mp_dr_setup(mp_int *a, mp_digit *d)
2271 /* the casts are required if DIGIT_BIT is one less than
2272 * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
2274 *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
2275 ((mp_word)a->dp[0]));
2279 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
2281 * Based on algorithm from the paper
2283 * "Generating Efficient Primes for Discrete Log Cryptosystems"
2284 * Chae Hoon Lim, Pil Joong Lee,
2285 * POSTECH Information Research Laboratories
2287 * The modulus must be of a special format [see manual]
2289 * Has been modified to use algorithm 7.10 from the LTM book instead
2291 * Input x must be in the range 0 <= x <= (n-1)**2
2294 mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
2298 mp_digit mu, *tmpx1, *tmpx2;
2300 /* m = digits in modulus */
2303 /* ensure that "x" has at least 2m digits */
2304 if (x->alloc < m + m) {
2305 if ((err = mp_grow (x, m + m)) != MP_OKAY) {
2310 /* top of loop, this is where the code resumes if
2311 * another reduction pass is required.
2314 /* aliases for digits */
2315 /* alias for lower half of x */
2318 /* alias for upper half of x, or x/B**m */
2321 /* set carry to zero */
2324 /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
2325 for (i = 0; i < m; i++) {
2326 r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
2327 *tmpx1++ = (mp_digit)(r & MP_MASK);
2328 mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
2331 /* set final carry */
2334 /* zero words above m */
2335 for (i = m + 1; i < x->used; i++) {
2339 /* clamp, sub and return */
2342 /* if x >= n then subtract and reduce again
2343 * Each successive "recursion" makes the input smaller and smaller.
2345 if (mp_cmp_mag (x, n) != MP_LT) {
2353 /* reduces a modulo n where n is of the form 2**p - d */
2354 int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
2359 if ((res = mp_init(&q)) != MP_OKAY) {
2363 p = mp_count_bits(n);
2365 /* q = a/2**p, a = a mod 2**p */
2366 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
2372 if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
2378 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
2382 if (mp_cmp_mag(a, n) != MP_LT) {
2393 /* determines the setup value */
2394 int mp_reduce_2k_setup(mp_int *a, mp_digit *d)
2399 if ((res = mp_init(&tmp)) != MP_OKAY) {
2403 p = mp_count_bits(a);
2404 if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
2409 if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
2420 /* computes a = 2**b
2422 * Simple algorithm which zeroes the int, grows it then just sets one bit
2426 mp_2expt (mp_int * a, int b)
2430 /* zero a as per default */
2433 /* grow a to accomodate the single bit */
2434 if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
2438 /* set the used count of where the bit will go */
2439 a->used = b / DIGIT_BIT + 1;
2441 /* put the single bit in its place */
2442 a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
2448 /* multiply by a digit */
2450 mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
2452 mp_digit u, *tmpa, *tmpc;
2454 int ix, res, olduse;
2456 /* make sure c is big enough to hold a*b */
2457 if (c->alloc < a->used + 1) {
2458 if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
2463 /* get the original destinations used count */
2469 /* alias for a->dp [source] */
2472 /* alias for c->dp [dest] */
2478 /* compute columns */
2479 for (ix = 0; ix < a->used; ix++) {
2480 /* compute product and carry sum for this term */
2481 r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
2483 /* mask off higher bits to get a single digit */
2484 *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
2486 /* send carry into next iteration */
2487 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
2490 /* store final carry [if any] and increment ix offset */
2494 /* now zero digits above the top */
2495 while (ix++ < olduse) {
2499 /* set used count */
2500 c->used = a->used + 1;
2507 /* d = a * b (mod c) */
2508 int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
2513 if ((res = mp_init (&t)) != MP_OKAY) {
2517 if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
2521 res = mp_mod (&t, c, d);
2527 /* computes b = a*a */
2529 mp_sqr (mp_int * a, mp_int * b)
2534 #ifdef BN_FAST_S_MP_SQR_C
2535 /* can we use the fast comba multiplier? */
2536 if ((a->used * 2 + 1) < MP_WARRAY &&
2538 (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
2539 res = fast_s_mp_sqr (a, b);
2542 #ifdef BN_S_MP_SQR_C
2543 res = s_mp_sqr (a, b);
2553 /* high level multiplication (handles sign) */
2554 int mp_mul (mp_int * a, mp_int * b, mp_int * c)
2557 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
2560 /* can we use the fast multiplier?
2562 * The fast multiplier can be used if the output will
2563 * have less than MP_WARRAY digits and the number of
2564 * digits won't affect carry propagation
2566 int digs = a->used + b->used + 1;
2568 #ifdef BN_FAST_S_MP_MUL_DIGS_C
2569 if ((digs < MP_WARRAY) &&
2570 MIN(a->used, b->used) <=
2571 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2572 res = fast_s_mp_mul_digs (a, b, c, digs);
2575 #ifdef BN_S_MP_MUL_DIGS_C
2576 res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
2582 c->sign = (c->used > 0) ? neg : MP_ZPOS;
2588 int mp_mul_2(mp_int * a, mp_int * b)
2590 int x, res, oldused;
2592 /* grow to accomodate result */
2593 if (b->alloc < a->used + 1) {
2594 if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
2603 register mp_digit r, rr, *tmpa, *tmpb;
2605 /* alias for source */
2608 /* alias for dest */
2613 for (x = 0; x < a->used; x++) {
2615 /* get what will be the *next* carry bit from the
2616 * MSB of the current digit
2618 rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
2620 /* now shift up this digit, add in the carry [from the previous] */
2621 *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
2623 /* copy the carry that would be from the source
2624 * digit into the next iteration
2629 /* new leading digit? */
2631 /* add a MSB which is always 1 at this point */
2636 /* now zero any excess digits on the destination
2637 * that we didn't write to
2639 tmpb = b->dp + b->used;
2640 for (x = b->used; x < oldused; x++) {
2649 /* divide by three (based on routine from MPI and the GMP manual) */
2651 mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
2658 /* b = 2**DIGIT_BIT / 3 */
2659 b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3);
2661 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
2668 for (ix = a->used - 1; ix >= 0; ix--) {
2669 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
2672 /* multiply w by [1/3] */
2673 t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
2675 /* now subtract 3 * [w/3] from w, to get the remainder */
2678 /* fixup the remainder as required since
2679 * the optimization is not exact.
2688 q.dp[ix] = (mp_digit)t;
2691 /* [optional] store the remainder */
2696 /* [optional] store the quotient */
2707 /* init an mp_init for a given size */
2708 int mp_init_size (mp_int * a, int size)
2712 /* pad size so there are always extra digits */
2713 size += (MP_PREC * 2) - (size % MP_PREC);
2716 a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size, 0,
2717 DYNAMIC_TYPE_BIGINT);
2718 if (a->dp == NULL) {
2722 /* set the members */
2727 /* zero the digits */
2728 for (x = 0; x < size; x++) {
2736 /* the jist of squaring...
2737 * you do like mult except the offset of the tmpx [one that
2738 * starts closer to zero] can't equal the offset of tmpy.
2739 * So basically you set up iy like before then you min it with
2740 * (ty-tx) so that it never happens. You double all those
2741 * you add in the inner loop
2743 After that loop you do the squares and add them in.
2746 int fast_s_mp_sqr (mp_int * a, mp_int * b)
2748 int olduse, res, pa, ix, iz;
2749 #ifdef CYASSL_SMALL_STACK
2750 mp_digit* W; /* uses dynamic memory and slower */
2752 mp_digit W[MP_WARRAY];
2757 /* grow the destination as required */
2758 pa = a->used + a->used;
2759 if (b->alloc < pa) {
2760 if ((res = mp_grow (b, pa)) != MP_OKAY) {
2766 return MP_RANGE; /* TAO range check */
2768 #ifdef CYASSL_SMALL_STACK
2769 W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
2774 /* number of output digits to produce */
2776 for (ix = 0; ix < pa; ix++) {
2784 /* get offsets into the two bignums */
2785 ty = MIN(a->used-1, ix);
2788 /* setup temp aliases */
2792 /* this is the number of times the loop will iterrate, essentially
2793 while (tx++ < a->used && ty-- >= 0) { ... }
2795 iy = MIN(a->used-tx, ty+1);
2797 /* now for squaring tx can never equal ty
2798 * we halve the distance since they approach at a rate of 2x
2799 * and we have to round because odd cases need to be executed
2801 iy = MIN(iy, (ty-tx+1)>>1);
2804 for (iz = 0; iz < iy; iz++) {
2805 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
2808 /* double the inner product and add carry */
2811 /* even columns have the square term in them */
2813 _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
2817 W[ix] = (mp_digit)(_W & MP_MASK);
2819 /* make next carry */
2820 W1 = _W >> ((mp_word)DIGIT_BIT);
2825 b->used = a->used+a->used;
2830 for (ix = 0; ix < pa; ix++) {
2831 *tmpb++ = W[ix] & MP_MASK;
2834 /* clear unused digits [that existed in the old copy of c] */
2835 for (; ix < olduse; ix++) {
2841 #ifdef CYASSL_SMALL_STACK
2842 XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
2849 /* Fast (comba) multiplier
2851 * This is the fast column-array [comba] multiplier. It is
2852 * designed to compute the columns of the product first
2853 * then handle the carries afterwards. This has the effect
2854 * of making the nested loops that compute the columns very
2855 * simple and schedulable on super-scalar processors.
2857 * This has been modified to produce a variable number of
2858 * digits of output so if say only a half-product is required
2859 * you don't have to compute the upper half (a feature
2860 * required for fast Barrett reduction).
2862 * Based on Algorithm 14.12 on pp.595 of HAC.
2865 int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
2867 int olduse, res, pa, ix, iz;
2868 #ifdef CYASSL_SMALL_STACK
2869 mp_digit* W; /* uses dynamic memory and slower */
2871 mp_digit W[MP_WARRAY];
2873 register mp_word _W;
2875 /* grow the destination as required */
2876 if (c->alloc < digs) {
2877 if ((res = mp_grow (c, digs)) != MP_OKAY) {
2882 /* number of output digits to produce */
2883 pa = MIN(digs, a->used + b->used);
2885 return MP_RANGE; /* TAO range check */
2887 #ifdef CYASSL_SMALL_STACK
2888 W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
2893 /* clear the carry */
2895 for (ix = 0; ix < pa; ix++) {
2898 mp_digit *tmpx, *tmpy;
2900 /* get offsets into the two bignums */
2901 ty = MIN(b->used-1, ix);
2904 /* setup temp aliases */
2908 /* this is the number of times the loop will iterrate, essentially
2909 while (tx++ < a->used && ty-- >= 0) { ... }
2911 iy = MIN(a->used-tx, ty+1);
2914 for (iz = 0; iz < iy; ++iz) {
2915 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
2920 W[ix] = ((mp_digit)_W) & MP_MASK;
2922 /* make next carry */
2923 _W = _W >> ((mp_word)DIGIT_BIT);
2931 register mp_digit *tmpc;
2933 for (ix = 0; ix < pa+1; ix++) {
2934 /* now extract the previous digit [below the carry] */
2938 /* clear unused digits [that existed in the old copy of c] */
2939 for (; ix < olduse; ix++) {
2945 #ifdef CYASSL_SMALL_STACK
2946 XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
2953 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
2954 int s_mp_sqr (mp_int * a, mp_int * b)
2957 int res, ix, iy, pa;
2959 mp_digit u, tmpx, *tmpt;
2962 if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
2966 /* default used is maximum possible size */
2969 for (ix = 0; ix < pa; ix++) {
2970 /* first calculate the digit at 2*ix */
2971 /* calculate double precision result */
2972 r = ((mp_word) t.dp[2*ix]) +
2973 ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
2975 /* store lower part in result */
2976 t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
2979 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
2981 /* left hand side of A[ix] * A[iy] */
2984 /* alias for where to store the results */
2985 tmpt = t.dp + (2*ix + 1);
2987 for (iy = ix + 1; iy < pa; iy++) {
2988 /* first calculate the product */
2989 r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
2991 /* now calculate the double precision result, note we use
2992 * addition instead of *2 since it's easier to optimize
2994 r = ((mp_word) *tmpt) + r + r + ((mp_word) u);
2996 /* store lower part */
2997 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3000 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3002 /* propagate upwards */
3003 while (u != ((mp_digit) 0)) {
3004 r = ((mp_word) *tmpt) + ((mp_word) u);
3005 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3006 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3017 /* multiplies |a| * |b| and only computes upto digs digits of result
3018 * HAC pp. 595, Algorithm 14.12 Modified so you can control how
3019 * many digits of output are created.
3021 int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3024 int res, pa, pb, ix, iy;
3027 mp_digit tmpx, *tmpt, *tmpy;
3029 /* can we use the fast multiplier? */
3030 if (((digs) < MP_WARRAY) &&
3031 MIN (a->used, b->used) <
3032 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
3033 return fast_s_mp_mul_digs (a, b, c, digs);
3036 if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
3041 /* compute the digits of the product directly */
3043 for (ix = 0; ix < pa; ix++) {
3044 /* set the carry to zero */
3047 /* limit ourselves to making digs digits of output */
3048 pb = MIN (b->used, digs - ix);
3050 /* setup some aliases */
3051 /* copy of the digit from a used within the nested loop */
3054 /* an alias for the destination shifted ix places */
3057 /* an alias for the digits of b */
3060 /* compute the columns of the output and propagate the carry */
3061 for (iy = 0; iy < pb; iy++) {
3062 /* compute the column as a mp_word */
3063 r = ((mp_word)*tmpt) +
3064 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
3067 /* the new column is the lower part of the result */
3068 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3070 /* get the carry word from the result */
3071 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
3073 /* set carry if it is placed below digs */
3074 if (ix + iy < digs) {
3088 * shifts with subtractions when the result is greater than b.
3090 * The method is slightly modified to shift B unconditionally upto just under
3091 * the leading bit of b. This saves alot of multiple precision shifting.
3093 int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
3097 /* how many bits of last digit does b use */
3098 bits = mp_count_bits (b) % DIGIT_BIT;
3101 if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
3110 /* now compute C = A * B mod b */
3111 for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
3112 if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
3115 if (mp_cmp_mag (a, b) != MP_LT) {
3116 if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
3129 #define TAB_SIZE 256
3132 int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
3134 mp_int M[TAB_SIZE], res, mu;
3136 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
3137 int (*redux)(mp_int*,mp_int*,mp_int*);
3139 /* find window size */
3140 x = mp_count_bits (X);
3143 } else if (x <= 36) {
3145 } else if (x <= 140) {
3147 } else if (x <= 450) {
3149 } else if (x <= 1303) {
3151 } else if (x <= 3529) {
3164 /* init first cell */
3165 if ((err = mp_init(&M[1])) != MP_OKAY) {
3169 /* now init the second half of the array */
3170 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3171 if ((err = mp_init(&M[x])) != MP_OKAY) {
3172 for (y = 1<<(winsize-1); y < x; y++) {
3180 /* create mu, used for Barrett reduction */
3181 if ((err = mp_init (&mu)) != MP_OKAY) {
3186 if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
3191 if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
3194 redux = mp_reduce_2k_l;
3199 * The M table contains powers of the base,
3200 * e.g. M[x] = G**x mod P
3202 * The first half of the table is not
3203 * computed though accept for M[0] and M[1]
3205 if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
3209 /* compute the value at M[1<<(winsize-1)] by squaring
3210 * M[1] (winsize-1) times
3212 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
3216 for (x = 0; x < (winsize - 1); x++) {
3218 if ((err = mp_sqr (&M[1 << (winsize - 1)],
3219 &M[1 << (winsize - 1)])) != MP_OKAY) {
3223 /* reduce modulo P */
3224 if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
3229 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
3230 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
3232 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
3233 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
3236 if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
3242 if ((err = mp_init (&res)) != MP_OKAY) {
3247 /* set initial mode and bit cnt */
3251 digidx = X->used - 1;
3256 /* grab next digit as required */
3257 if (--bitcnt == 0) {
3258 /* if digidx == -1 we are out of digits */
3262 /* read next digit and reset the bitcnt */
3263 buf = X->dp[digidx--];
3264 bitcnt = (int) DIGIT_BIT;
3267 /* grab the next msb from the exponent */
3268 y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
3269 buf <<= (mp_digit)1;
3271 /* if the bit is zero and mode == 0 then we ignore it
3272 * These represent the leading zero bits before the first 1 bit
3273 * in the exponent. Technically this opt is not required but it
3274 * does lower the # of trivial squaring/reductions used
3276 if (mode == 0 && y == 0) {
3280 /* if the bit is zero and mode == 1 then we square */
3281 if (mode == 1 && y == 0) {
3282 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3285 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3291 /* else we add it to the window */
3292 bitbuf |= (y << (winsize - ++bitcpy));
3295 if (bitcpy == winsize) {
3296 /* ok window is filled so square as required and multiply */
3298 for (x = 0; x < winsize; x++) {
3299 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3302 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3308 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
3311 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3315 /* empty window and reset */
3322 /* if bits remain then square/multiply */
3323 if (mode == 2 && bitcpy > 0) {
3324 /* square then multiply if the bit is set */
3325 for (x = 0; x < bitcpy; x++) {
3326 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3329 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3334 if ((bitbuf & (1 << winsize)) != 0) {
3336 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
3339 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3348 LBL_RES:mp_clear (&res);
3349 LBL_MU:mp_clear (&mu);
3352 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3359 /* pre-calculate the value required for Barrett reduction
3360 * For a given modulus "b" it calulates the value required in "a"
3362 int mp_reduce_setup (mp_int * a, mp_int * b)
3366 if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
3369 return mp_div (a, b, a, NULL);
3373 /* reduces x mod m, assumes 0 < x < m**2, mu is
3374 * precomputed via mp_reduce_setup.
3375 * From HAC pp.604 Algorithm 14.42
3377 int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
3380 int res, um = m->used;
3383 if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
3387 /* q1 = x / b**(k-1) */
3388 mp_rshd (&q, um - 1);
3390 /* according to HAC this optimization is ok */
3391 if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
3392 if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
3396 #ifdef BN_S_MP_MUL_HIGH_DIGS_C
3397 if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
3400 #elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
3401 if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
3412 /* q3 = q2 / b**(k+1) */
3413 mp_rshd (&q, um + 1);
3415 /* x = x mod b**(k+1), quick (no division) */
3416 if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
3420 /* q = q * m mod b**(k+1), quick (no division) */
3421 if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
3426 if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
3430 /* If x < 0, add b**(k+1) to it */
3431 if (mp_cmp_d (x, 0) == MP_LT) {
3433 if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
3435 if ((res = mp_add (x, &q, x)) != MP_OKAY)
3439 /* Back off if it's too big */
3440 while (mp_cmp (x, m) != MP_LT) {
3441 if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
3453 /* reduces a modulo n where n is of the form 2**p - d
3454 This differs from reduce_2k since "d" can be larger
3455 than a single digit.
3457 int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
3462 if ((res = mp_init(&q)) != MP_OKAY) {
3466 p = mp_count_bits(n);
3468 /* q = a/2**p, a = a mod 2**p */
3469 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
3474 if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
3479 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
3483 if (mp_cmp_mag(a, n) != MP_LT) {
3494 /* determines the setup value */
3495 int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
3500 if ((res = mp_init(&tmp)) != MP_OKAY) {
3504 if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
3508 if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
3518 /* multiplies |a| * |b| and does not compute the lower digs digits
3519 * [meant to get the higher part of the product]
3522 s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3525 int res, pa, pb, ix, iy;
3528 mp_digit tmpx, *tmpt, *tmpy;
3530 /* can we use the fast multiplier? */
3531 #ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
3532 if (((a->used + b->used + 1) < MP_WARRAY)
3533 && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
3534 return fast_s_mp_mul_high_digs (a, b, c, digs);
3538 if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
3541 t.used = a->used + b->used + 1;
3545 for (ix = 0; ix < pa; ix++) {
3546 /* clear the carry */
3549 /* left hand side of A[ix] * B[iy] */
3552 /* alias to the address of where the digits will be stored */
3553 tmpt = &(t.dp[digs]);
3555 /* alias for where to read the right hand side from */
3556 tmpy = b->dp + (digs - ix);
3558 for (iy = digs - ix; iy < pb; iy++) {
3559 /* calculate the double precision result */
3560 r = ((mp_word)*tmpt) +
3561 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
3564 /* get the lower part */
3565 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3567 /* carry the carry */
3568 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
3579 /* this is a modified version of fast_s_mul_digs that only produces
3580 * output digits *above* digs. See the comments for fast_s_mul_digs
3581 * to see how it works.
3583 * This is used in the Barrett reduction since for one of the multiplications
3584 * only the higher digits were needed. This essentially halves the work.
3586 * Based on Algorithm 14.12 on pp.595 of HAC.
3588 int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3590 int olduse, res, pa, ix, iz;
3591 #ifdef CYASSL_SMALL_STACK
3592 mp_digit* W; /* uses dynamic memory and slower */
3594 mp_digit W[MP_WARRAY];
3598 /* grow the destination as required */
3599 pa = a->used + b->used;
3600 if (c->alloc < pa) {
3601 if ((res = mp_grow (c, pa)) != MP_OKAY) {
3607 return MP_RANGE; /* TAO range check */
3609 #ifdef CYASSL_SMALL_STACK
3610 W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
3615 /* number of output digits to produce */
3616 pa = a->used + b->used;
3618 for (ix = digs; ix < pa; ix++) {
3620 mp_digit *tmpx, *tmpy;
3622 /* get offsets into the two bignums */
3623 ty = MIN(b->used-1, ix);
3626 /* setup temp aliases */
3630 /* this is the number of times the loop will iterrate, essentially its
3631 while (tx++ < a->used && ty-- >= 0) { ... }
3633 iy = MIN(a->used-tx, ty+1);
3636 for (iz = 0; iz < iy; iz++) {
3637 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
3641 W[ix] = ((mp_digit)_W) & MP_MASK;
3643 /* make next carry */
3644 _W = _W >> ((mp_word)DIGIT_BIT);
3652 register mp_digit *tmpc;
3654 tmpc = c->dp + digs;
3655 for (ix = digs; ix <= pa; ix++) {
3656 /* now extract the previous digit [below the carry] */
3660 /* clear unused digits [that existed in the old copy of c] */
3661 for (; ix < olduse; ix++) {
3667 #ifdef CYASSL_SMALL_STACK
3668 XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
3675 /* set a 32-bit const */
3676 int mp_set_int (mp_int * a, unsigned long b)
3682 /* set four bits at a time */
3683 for (x = 0; x < 8; x++) {
3684 /* shift the number up four bits */
3685 if ((res = mp_mul_2d (a, 4, a)) != MP_OKAY) {
3689 /* OR in the top four bits of the source */
3690 a->dp[0] |= (b >> 28) & 15;
3692 /* shift the source up to the next four bits */
3695 /* ensure that digits are not clamped off */
3703 #if defined(CYASSL_KEY_GEN) || defined(HAVE_ECC)
3705 /* c = a * a (mod b) */
3706 int mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
3711 if ((res = mp_init (&t)) != MP_OKAY) {
3715 if ((res = mp_sqr (a, &t)) != MP_OKAY) {
3719 res = mp_mod (&t, b, c);
3727 #if defined(CYASSL_KEY_GEN) || defined(HAVE_ECC) || !defined(NO_PWDBASED)
3729 /* single digit addition */
3730 int mp_add_d (mp_int* a, mp_digit b, mp_int* c)
3732 int res, ix, oldused;
3733 mp_digit *tmpa, *tmpc, mu;
3735 /* grow c as required */
3736 if (c->alloc < a->used + 1) {
3737 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
3742 /* if a is negative and |a| >= b, call c = |a| - b */
3743 if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
3744 /* temporarily fix sign of a */
3748 res = mp_sub_d(a, b, c);
3751 a->sign = c->sign = MP_NEG;
3759 /* old number of used digits in c */
3762 /* sign always positive */
3768 /* destination alias */
3771 /* if a is positive */
3772 if (a->sign == MP_ZPOS) {
3773 /* add digit, after this we're propagating
3776 *tmpc = *tmpa++ + b;
3777 mu = *tmpc >> DIGIT_BIT;
3780 /* now handle rest of the digits */
3781 for (ix = 1; ix < a->used; ix++) {
3782 *tmpc = *tmpa++ + mu;
3783 mu = *tmpc >> DIGIT_BIT;
3786 /* set final carry */
3791 c->used = a->used + 1;
3793 /* a was negative and |a| < b */
3796 /* the result is a single digit */
3798 *tmpc++ = b - a->dp[0];
3803 /* setup count so the clearing of oldused
3804 * can fall through correctly
3809 /* now zero to oldused */
3810 while (ix++ < oldused) {
3819 /* single digit subtraction */
3820 int mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
3822 mp_digit *tmpa, *tmpc, mu;
3823 int res, ix, oldused;
3825 /* grow c as required */
3826 if (c->alloc < a->used + 1) {
3827 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
3832 /* if a is negative just do an unsigned
3833 * addition [with fudged signs]
3835 if (a->sign == MP_NEG) {
3837 res = mp_add_d(a, b, c);
3838 a->sign = c->sign = MP_NEG;
3851 /* if a <= b simply fix the single digit */
3852 if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
3854 *tmpc++ = b - *tmpa;
3860 /* negative/1digit */
3868 /* subtract first digit */
3869 *tmpc = *tmpa++ - b;
3870 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
3873 /* handle rest of the digits */
3874 for (ix = 1; ix < a->used; ix++) {
3875 *tmpc = *tmpa++ - mu;
3876 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
3881 /* zero excess digits */
3882 while (ix++ < oldused) {
3889 #endif /* CYASSL_KEY_GEN || HAVE_ECC */
3892 #ifdef CYASSL_KEY_GEN
3894 int mp_cnt_lsb(mp_int *a);
3896 static int s_is_power_of_two(mp_digit b, int *p)
3900 /* fast return if no power of two */
3901 if ((b==0) || (b & (b-1))) {
3905 for (x = 0; x < DIGIT_BIT; x++) {
3906 if (b == (((mp_digit)1)<<x)) {
3914 /* single digit division (based on routine from MPI) */
3915 static int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
3922 /* cannot divide by zero */
3928 if (b == 1 || mp_iszero(a) == 1) {
3933 return mp_copy(a, c);
3938 /* power of two ? */
3939 if (s_is_power_of_two(b, &ix) == 1) {
3941 *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
3944 return mp_div_2d(a, ix, c, NULL);
3949 #ifdef BN_MP_DIV_3_C
3952 return mp_div_3(a, c, d);
3956 /* no easy answer [c'est la vie]. Just division */
3957 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
3964 for (ix = a->used - 1; ix >= 0; ix--) {
3965 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
3968 t = (mp_digit)(w / b);
3969 w -= ((mp_word)t) * ((mp_word)b);
3973 q.dp[ix] = (mp_digit)t;
3990 static int mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
3992 return mp_div_d(a, b, NULL, c);
3996 const mp_digit ltm_prime_tab[] = {
3997 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
3998 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
3999 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
4000 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
4003 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
4004 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
4005 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
4006 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
4008 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
4009 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
4010 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
4011 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
4012 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
4013 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
4014 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
4015 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
4017 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
4018 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
4019 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
4020 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
4021 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
4022 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
4023 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
4024 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
4026 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
4027 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
4028 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
4029 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
4030 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
4031 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
4032 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
4033 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
4038 /* Miller-Rabin test of "a" to the base of "b" as described in
4039 * HAC pp. 139 Algorithm 4.24
4041 * Sets result to 0 if definitely composite or 1 if probably prime.
4042 * Randomly the chance of error is no more than 1/4 and often
4045 static int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
4054 if (mp_cmp_d(b, 1) != MP_GT) {
4058 /* get n1 = a - 1 */
4059 if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
4062 if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
4066 /* set 2**s * r = n1 */
4067 if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
4071 /* count the number of least significant bits
4076 /* now divide n - 1 by 2**s */
4077 if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
4081 /* compute y = b**r mod a */
4082 if ((err = mp_init (&y)) != MP_OKAY) {
4085 if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
4089 /* if y != 1 and y != n1 do */
4090 if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
4092 /* while j <= s-1 and y != n1 */
4093 while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
4094 if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
4098 /* if y == 1 then composite */
4099 if (mp_cmp_d (&y, 1) == MP_EQ) {
4106 /* if y != n1 then composite */
4107 if (mp_cmp (&y, &n1) != MP_EQ) {
4112 /* probably prime now */
4114 LBL_Y:mp_clear (&y);
4115 LBL_R:mp_clear (&r);
4116 LBL_N1:mp_clear (&n1);
4121 /* determines if an integers is divisible by one
4122 * of the first PRIME_SIZE primes or not
4124 * sets result to 0 if not, 1 if yes
4126 static int mp_prime_is_divisible (mp_int * a, int *result)
4131 /* default to not */
4134 for (ix = 0; ix < PRIME_SIZE; ix++) {
4135 /* what is a mod LBL_prime_tab[ix] */
4136 if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
4140 /* is the residue zero? */
4152 * Sets result to 1 if probably prime, 0 otherwise
4154 int mp_prime_is_prime (mp_int * a, int t, int *result)
4162 /* valid value of t? */
4163 if (t <= 0 || t > PRIME_SIZE) {
4167 /* is the input equal to one of the primes in the table? */
4168 for (ix = 0; ix < PRIME_SIZE; ix++) {
4169 if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
4175 /* first perform trial division */
4176 if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
4180 /* return if it was trivially divisible */
4181 if (res == MP_YES) {
4185 /* now perform the miller-rabin rounds */
4186 if ((err = mp_init (&b)) != MP_OKAY) {
4190 for (ix = 0; ix < t; ix++) {
4192 mp_set (&b, ltm_prime_tab[ix]);
4194 if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
4203 /* passed the test */
4205 LBL_B:mp_clear (&b);
4210 /* computes least common multiple as |a*b|/(a, b) */
4211 int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
4217 if ((res = mp_init_multi (&t1, &t2, NULL, NULL, NULL, NULL)) != MP_OKAY) {
4221 /* t1 = get the GCD of the two inputs */
4222 if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
4226 /* divide the smallest by the GCD */
4227 if (mp_cmp_mag(a, b) == MP_LT) {
4228 /* store quotient in t2 such that t2 * b is the LCM */
4229 if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
4232 res = mp_mul(b, &t2, c);
4234 /* store quotient in t2 such that t2 * a is the LCM */
4235 if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
4238 res = mp_mul(a, &t2, c);
4241 /* fix the sign to positive */
4251 static const int lnz[16] = {
4252 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
4255 /* Counts the number of lsbs which are zero before the first zero bit */
4256 int mp_cnt_lsb(mp_int *a)
4262 if (mp_iszero(a) == 1) {
4266 /* scan lower digits until non-zero */
4267 for (x = 0; x < a->used && a->dp[x] == 0; x++);
4271 /* now scan this digit until a 1 is found */
4283 /* Greatest Common Divisor using the binary method */
4284 int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
4287 int k, u_lsb, v_lsb, res;
4289 /* either zero than gcd is the largest */
4290 if (mp_iszero (a) == MP_YES) {
4291 return mp_abs (b, c);
4293 if (mp_iszero (b) == MP_YES) {
4294 return mp_abs (a, c);
4297 /* get copies of a and b we can modify */
4298 if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
4302 if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
4306 /* must be positive for the remainder of the algorithm */
4307 u.sign = v.sign = MP_ZPOS;
4309 /* B1. Find the common power of two for u and v */
4310 u_lsb = mp_cnt_lsb(&u);
4311 v_lsb = mp_cnt_lsb(&v);
4312 k = MIN(u_lsb, v_lsb);
4315 /* divide the power of two out */
4316 if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
4320 if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
4325 /* divide any remaining factors of two out */
4327 if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
4333 if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
4338 while (mp_iszero(&v) == 0) {
4339 /* make sure v is the largest */
4340 if (mp_cmp_mag(&u, &v) == MP_GT) {
4341 /* swap u and v to make sure v is >= u */
4345 /* subtract smallest from largest */
4346 if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
4350 /* Divide out all factors of two */
4351 if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
4356 /* multiply by 2**k which we divided out at the beginning */
4357 if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
4362 LBL_V:mp_clear (&u);
4363 LBL_U:mp_clear (&v);
4369 #endif /* CYASSL_KEY_GEN */
4374 /* chars used in radix conversions */
4375 const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
4377 /* read a string [ASCII] in a given radix */
4378 int mp_read_radix (mp_int * a, const char *str, int radix)
4383 /* zero the digit bignum */
4386 /* make sure the radix is ok */
4387 if (radix < 2 || radix > 64) {
4391 /* if the leading digit is a
4392 * minus set the sign to negative.
4401 /* set the integer to the default of zero */
4404 /* process each digit of the string */
4406 /* if the radix < 36 the conversion is case insensitive
4407 * this allows numbers like 1AB and 1ab to represent the same value
4410 ch = (char) ((radix < 36) ? XTOUPPER(*str) : *str);
4411 for (y = 0; y < 64; y++) {
4412 if (ch == mp_s_rmap[y]) {
4417 /* if the char was found in the map
4418 * and is less than the given radix add it
4419 * to the number, otherwise exit the loop.
4422 if ((res = mp_mul_d (a, (mp_digit) radix, a)) != MP_OKAY) {
4425 if ((res = mp_add_d (a, (mp_digit) y, a)) != MP_OKAY) {
4434 /* set the sign only if a != 0 */
4435 if (mp_iszero(a) != 1) {
4441 #endif /* HAVE_ECC */
4443 #endif /* USE_FAST_MATH */