Line data Source code
1 : #include <tommath.h>
2 : #ifdef BN_MP_KARATSUBA_MUL_C
3 : /* LibTomMath, multiple-precision integer library -- Tom St Denis
4 : *
5 : * LibTomMath is a library that provides multiple-precision
6 : * integer arithmetic as well as number theoretic functionality.
7 : *
8 : * The library was designed directly after the MPI library by
9 : * Michael Fromberger but has been written from scratch with
10 : * additional optimizations in place.
11 : *
12 : * The library is free for all purposes without any express
13 : * guarantee it works.
14 : *
15 : * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
16 : */
17 :
18 : /* c = |a| * |b| using Karatsuba Multiplication using
19 : * three half size multiplications
20 : *
21 : * Let B represent the radix [e.g. 2**DIGIT_BIT] and
22 : * let n represent half of the number of digits in
23 : * the min(a,b)
24 : *
25 : * a = a1 * B**n + a0
26 : * b = b1 * B**n + b0
27 : *
28 : * Then, a * b =>
29 : a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0
30 : *
31 : * Note that a1b1 and a0b0 are used twice and only need to be
32 : * computed once. So in total three half size (half # of
33 : * digit) multiplications are performed, a0b0, a1b1 and
34 : * (a1+b1)(a0+b0)
35 : *
36 : * Note that a multiplication of half the digits requires
37 : * 1/4th the number of single precision multiplications so in
38 : * total after one call 25% of the single precision multiplications
39 : * are saved. Note also that the call to mp_mul can end up back
40 : * in this function if the a0, a1, b0, or b1 are above the threshold.
41 : * This is known as divide-and-conquer and leads to the famous
42 : * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than
43 : * the standard O(N**2) that the baseline/comba methods use.
44 : * Generally though the overhead of this method doesn't pay off
45 : * until a certain size (N ~ 80) is reached.
46 : */
47 720 : int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
48 : {
49 : mp_int x0, x1, y0, y1, t1, x0y0, x1y1;
50 : int B, err;
51 :
52 : /* default the return code to an error */
53 720 : err = MP_MEM;
54 :
55 : /* min # of digits */
56 720 : B = MIN (a->used, b->used);
57 :
58 : /* now divide in two */
59 720 : B = B >> 1;
60 :
61 : /* init copy all the temps */
62 720 : if (mp_init_size (&x0, B) != MP_OKAY)
63 0 : goto ERR;
64 720 : if (mp_init_size (&x1, a->used - B) != MP_OKAY)
65 0 : goto X0;
66 720 : if (mp_init_size (&y0, B) != MP_OKAY)
67 0 : goto X1;
68 720 : if (mp_init_size (&y1, b->used - B) != MP_OKAY)
69 0 : goto Y0;
70 :
71 : /* init temps */
72 720 : if (mp_init_size (&t1, B * 2) != MP_OKAY)
73 0 : goto Y1;
74 720 : if (mp_init_size (&x0y0, B * 2) != MP_OKAY)
75 0 : goto T1;
76 720 : if (mp_init_size (&x1y1, B * 2) != MP_OKAY)
77 0 : goto X0Y0;
78 :
79 : /* now shift the digits */
80 720 : x0.used = y0.used = B;
81 720 : x1.used = a->used - B;
82 720 : y1.used = b->used - B;
83 :
84 : {
85 : register int x;
86 : register mp_digit *tmpa, *tmpb, *tmpx, *tmpy;
87 :
88 : /* we copy the digits directly instead of using higher level functions
89 : * since we also need to shift the digits
90 : */
91 720 : tmpa = a->dp;
92 720 : tmpb = b->dp;
93 :
94 720 : tmpx = x0.dp;
95 720 : tmpy = y0.dp;
96 49680 : for (x = 0; x < B; x++) {
97 48960 : *tmpx++ = *tmpa++;
98 48960 : *tmpy++ = *tmpb++;
99 : }
100 :
101 720 : tmpx = x1.dp;
102 50400 : for (x = B; x < a->used; x++) {
103 49680 : *tmpx++ = *tmpa++;
104 : }
105 :
106 720 : tmpy = y1.dp;
107 50400 : for (x = B; x < b->used; x++) {
108 49680 : *tmpy++ = *tmpb++;
109 : }
110 : }
111 :
112 : /* only need to clamp the lower words since by definition the
113 : * upper words x1/y1 must have a known number of digits
114 : */
115 720 : mp_clamp (&x0);
116 720 : mp_clamp (&y0);
117 :
118 : /* now calc the products x0y0 and x1y1 */
119 : /* after this x0 is no longer required, free temp [x0==t2]! */
120 720 : if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY)
121 0 : goto X1Y1; /* x0y0 = x0*y0 */
122 720 : if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY)
123 0 : goto X1Y1; /* x1y1 = x1*y1 */
124 :
125 : /* now calc x1+x0 and y1+y0 */
126 720 : if (s_mp_add (&x1, &x0, &t1) != MP_OKAY)
127 0 : goto X1Y1; /* t1 = x1 - x0 */
128 720 : if (s_mp_add (&y1, &y0, &x0) != MP_OKAY)
129 0 : goto X1Y1; /* t2 = y1 - y0 */
130 720 : if (mp_mul (&t1, &x0, &t1) != MP_OKAY)
131 0 : goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */
132 :
133 : /* add x0y0 */
134 720 : if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY)
135 0 : goto X1Y1; /* t2 = x0y0 + x1y1 */
136 720 : if (s_mp_sub (&t1, &x0, &t1) != MP_OKAY)
137 0 : goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */
138 :
139 : /* shift by B */
140 720 : if (mp_lshd (&t1, B) != MP_OKAY)
141 0 : goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
142 720 : if (mp_lshd (&x1y1, B * 2) != MP_OKAY)
143 0 : goto X1Y1; /* x1y1 = x1y1 << 2*B */
144 :
145 720 : if (mp_add (&x0y0, &t1, &t1) != MP_OKAY)
146 0 : goto X1Y1; /* t1 = x0y0 + t1 */
147 720 : if (mp_add (&t1, &x1y1, c) != MP_OKAY)
148 0 : goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */
149 :
150 : /* Algorithm succeeded set the return code to MP_OKAY */
151 720 : err = MP_OKAY;
152 :
153 720 : X1Y1:mp_clear (&x1y1);
154 720 : X0Y0:mp_clear (&x0y0);
155 720 : T1:mp_clear (&t1);
156 720 : Y1:mp_clear (&y1);
157 720 : Y0:mp_clear (&y0);
158 720 : X1:mp_clear (&x1);
159 720 : X0:mp_clear (&x0);
160 720 : ERR:
161 720 : return err;
162 : }
163 : #endif
164 :
165 : /* $Source: /cvs/libtom/libtommath/bn_mp_karatsuba_mul.c,v $ */
166 : /* $Revision: 1.6 $ */
167 : /* $Date: 2006/12/28 01:25:13 $ */
|