Line data Source code
1 : #include <tommath.h>
2 : #ifdef BN_FAST_S_MP_SQR_C
3 : /* LibTomMath, multiple-precision integer library -- Tom St Denis
4 : *
5 : * LibTomMath is a library that provides multiple-precision
6 : * integer arithmetic as well as number theoretic functionality.
7 : *
8 : * The library was designed directly after the MPI library by
9 : * Michael Fromberger but has been written from scratch with
10 : * additional optimizations in place.
11 : *
12 : * The library is free for all purposes without any express
13 : * guarantee it works.
14 : *
15 : * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
16 : */
17 :
18 : /* the jist of squaring...
19 : * you do like mult except the offset of the tmpx [one that
20 : * starts closer to zero] can't equal the offset of tmpy.
21 : * So basically you set up iy like before then you min it with
22 : * (ty-tx) so that it never happens. You double all those
23 : * you add in the inner loop
24 :
25 : After that loop you do the squares and add them in.
26 : */
27 :
28 626934 : int fast_s_mp_sqr (mp_int * a, mp_int * b)
29 : {
30 : int olduse, res, pa, ix, iz;
31 : mp_digit W[MP_WARRAY], *tmpx;
32 : mp_word W1;
33 :
34 : /* grow the destination as required */
35 626934 : pa = a->used + a->used;
36 626934 : if (b->alloc < pa) {
37 876 : if ((res = mp_grow (b, pa)) != MP_OKAY) {
38 0 : return res;
39 : }
40 : }
41 :
42 : /* number of output digits to produce */
43 593802 : W1 = 0;
44 35389756 : for (ix = 0; ix < pa; ix++) {
45 : int tx, ty, iy;
46 : mp_word _W;
47 : mp_digit *tmpy;
48 :
49 : /* clear counter */
50 34795954 : _W = 0;
51 :
52 : /* get offsets into the two bignums */
53 34795954 : ty = MIN(a->used-1, ix);
54 34795954 : tx = ix - ty;
55 :
56 : /* setup temp aliases */
57 34795954 : tmpx = a->dp + tx;
58 34795954 : tmpy = a->dp + ty;
59 :
60 : /* this is the number of times the loop will iterrate, essentially
61 : while (tx++ < a->used && ty-- >= 0) { ... }
62 : */
63 34795954 : iy = MIN(a->used-tx, ty+1);
64 :
65 : /* now for squaring tx can never equal ty
66 : * we halve the distance since they approach at a rate of 2x
67 : * and we have to round because odd cases need to be executed
68 : */
69 34795954 : iy = MIN(iy, (ty-tx+1)>>1);
70 :
71 : /* execute loop */
72 298295206 : for (iz = 0; iz < iy; iz++) {
73 263499252 : _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
74 : }
75 :
76 : /* double the inner product and add carry */
77 34795954 : _W = _W + _W + W1;
78 :
79 : /* even columns have the square term in them */
80 34795954 : if ((ix&1) == 0) {
81 17397977 : _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
82 : }
83 :
84 : /* store it */
85 34795954 : W[ix] = (mp_digit)(_W & MP_MASK);
86 :
87 : /* make next carry */
88 34795954 : W1 = _W >> ((mp_word)DIGIT_BIT);
89 : }
90 :
91 : /* setup dest */
92 626934 : olduse = b->used;
93 626934 : b->used = a->used+a->used;
94 :
95 : {
96 : mp_digit *tmpb;
97 626934 : tmpb = b->dp;
98 35422888 : for (ix = 0; ix < pa; ix++) {
99 34795954 : *tmpb++ = W[ix] & MP_MASK;
100 : }
101 :
102 : /* clear unused digits [that existed in the old copy of c] */
103 593802 : for (; ix < olduse; ix++) {
104 0 : *tmpb++ = 0;
105 : }
106 : }
107 626934 : mp_clamp (b);
108 626934 : return MP_OKAY;
109 : }
110 : #endif
111 :
112 : /* $Source: /cvs/libtom/libtommath/bn_fast_s_mp_sqr.c,v $ */
113 : /* $Revision: 1.4 $ */
114 : /* $Date: 2006/12/28 01:25:13 $ */
|