1 #include "tommath_private.h"
2 #ifdef BN_S_MP_TOOM_MUL_C
3 /* LibTomMath, multiple-precision integer library -- Tom St Denis */
4 /* SPDX-License-Identifier: Unlicense */
6 /* multiplication using the Toom-Cook 3-way algorithm
8 * Much more complicated than Karatsuba but has a lower
9 * asymptotic running time of O(N**1.464). This algorithm is
10 * only particularly useful on VERY large inputs
11 * (we're talking 1000s of digits here...).
15 This file contains code from J. Arndt's book "Matters Computational"
16 and the accompanying FXT-library with permission of the author.
22 Chung, Jaewook, and M. Anwar Hasan. "Asymmetric squaring formulae."
23 18th IEEE Symposium on Computer Arithmetic (ARITH'07). IEEE, 2007.
25 The interpolation from above needed one temporary variable more
26 than the interpolation here:
28 Bodrato, Marco, and Alberto Zanoni. "What about Toom-Cook matrices optimality."
29 Centro Vito Volterra Universita di Roma Tor Vergata (2006)
32 mp_err s_mp_toom_mul(const mp_int *a, const mp_int *b, mp_int *c)
34 mp_int S1, S2, T1, a0, a1, a2, b0, b1, b2;
39 if ((err = mp_init_multi(&S1, &S2, &T1, NULL)) != MP_OKAY) {
44 B = MP_MIN(a->used, b->used) / 3;
46 /** a = a2 * x^2 + a1 * x + a0; */
47 if ((err = mp_init_size(&a0, B)) != MP_OKAY) goto LBL_ERRa0;
49 for (count = 0; count < B; count++) {
50 a0.dp[count] = a->dp[count];
54 if ((err = mp_init_size(&a1, B)) != MP_OKAY) goto LBL_ERRa1;
55 for (; count < (2 * B); count++) {
56 a1.dp[count - B] = a->dp[count];
60 if ((err = mp_init_size(&a2, B + (a->used - (3 * B)))) != MP_OKAY) goto LBL_ERRa2;
61 for (; count < a->used; count++) {
62 a2.dp[count - (2 * B)] = a->dp[count];
67 /** b = b2 * x^2 + b1 * x + b0; */
68 if ((err = mp_init_size(&b0, B)) != MP_OKAY) goto LBL_ERRb0;
69 for (count = 0; count < B; count++) {
70 b0.dp[count] = b->dp[count];
74 if ((err = mp_init_size(&b1, B)) != MP_OKAY) goto LBL_ERRb1;
75 for (; count < (2 * B); count++) {
76 b1.dp[count - B] = b->dp[count];
80 if ((err = mp_init_size(&b2, B + (b->used - (3 * B)))) != MP_OKAY) goto LBL_ERRb2;
81 for (; count < b->used; count++) {
82 b2.dp[count - (2 * B)] = b->dp[count];
87 /** \\ S1 = (a2+a1+a0) * (b2+b1+b0); */
89 if ((err = mp_add(&a2, &a1, &T1)) != MP_OKAY) goto LBL_ERR;
92 if ((err = mp_add(&T1, &a0, &S2)) != MP_OKAY) goto LBL_ERR;
95 if ((err = mp_add(&b2, &b1, c)) != MP_OKAY) goto LBL_ERR;
98 if ((err = mp_add(c, &b0, &S1)) != MP_OKAY) goto LBL_ERR;
101 if ((err = mp_mul(&S1, &S2, &S1)) != MP_OKAY) goto LBL_ERR;
103 /** \\S2 = (4*a2+2*a1+a0) * (4*b2+2*b1+b0); */
105 if ((err = mp_add(&T1, &a2, &T1)) != MP_OKAY) goto LBL_ERR;
108 if ((err = mp_mul_2(&T1, &T1)) != MP_OKAY) goto LBL_ERR;
111 if ((err = mp_add(&T1, &a0, &T1)) != MP_OKAY) goto LBL_ERR;
114 if ((err = mp_add(c, &b2, c)) != MP_OKAY) goto LBL_ERR;
117 if ((err = mp_mul_2(c, c)) != MP_OKAY) goto LBL_ERR;
120 if ((err = mp_add(c, &b0, c)) != MP_OKAY) goto LBL_ERR;
123 if ((err = mp_mul(&T1, c, &S2)) != MP_OKAY) goto LBL_ERR;
125 /** \\S3 = (a2-a1+a0) * (b2-b1+b0); */
127 if ((err = mp_sub(&a2, &a1, &a1)) != MP_OKAY) goto LBL_ERR;
130 if ((err = mp_add(&a1, &a0, &a1)) != MP_OKAY) goto LBL_ERR;
133 if ((err = mp_sub(&b2, &b1, &b1)) != MP_OKAY) goto LBL_ERR;
136 if ((err = mp_add(&b1, &b0, &b1)) != MP_OKAY) goto LBL_ERR;
139 if ((err = mp_mul(&a1, &b1, &a1)) != MP_OKAY) goto LBL_ERR;
142 if ((err = mp_mul(&a2, &b2, &b1)) != MP_OKAY) goto LBL_ERR;
144 /** \\S2 = (S2 - S3)/3; */
146 if ((err = mp_sub(&S2, &a1, &S2)) != MP_OKAY) goto LBL_ERR;
148 /** S2 = S2 / 3; \\ this is an exact division */
149 if ((err = mp_div_3(&S2, &S2, NULL)) != MP_OKAY) goto LBL_ERR;
152 if ((err = mp_sub(&S1, &a1, &a1)) != MP_OKAY) goto LBL_ERR;
155 if ((err = mp_div_2(&a1, &a1)) != MP_OKAY) goto LBL_ERR;
158 if ((err = mp_mul(&a0, &b0, &a0)) != MP_OKAY) goto LBL_ERR;
161 if ((err = mp_sub(&S1, &a0, &S1)) != MP_OKAY) goto LBL_ERR;
164 if ((err = mp_sub(&S2, &S1, &S2)) != MP_OKAY) goto LBL_ERR;
167 if ((err = mp_div_2(&S2, &S2)) != MP_OKAY) goto LBL_ERR;
170 if ((err = mp_sub(&S1, &a1, &S1)) != MP_OKAY) goto LBL_ERR;
173 if ((err = mp_sub(&S1, &b1, &S1)) != MP_OKAY) goto LBL_ERR;
176 if ((err = mp_mul_2(&b1, &T1)) != MP_OKAY) goto LBL_ERR;
179 if ((err = mp_sub(&S2, &T1, &S2)) != MP_OKAY) goto LBL_ERR;
182 if ((err = mp_sub(&a1, &S2, &a1)) != MP_OKAY) goto LBL_ERR;
185 /** P = b1*x^4+ S2*x^3+ S1*x^2+ a1*x + a0; */
186 if ((err = mp_lshd(&b1, 4 * B)) != MP_OKAY) goto LBL_ERR;
187 if ((err = mp_lshd(&S2, 3 * B)) != MP_OKAY) goto LBL_ERR;
188 if ((err = mp_add(&b1, &S2, &b1)) != MP_OKAY) goto LBL_ERR;
189 if ((err = mp_lshd(&S1, 2 * B)) != MP_OKAY) goto LBL_ERR;
190 if ((err = mp_add(&b1, &S1, &b1)) != MP_OKAY) goto LBL_ERR;
191 if ((err = mp_lshd(&a1, 1 * B)) != MP_OKAY) goto LBL_ERR;
192 if ((err = mp_add(&b1, &a1, &b1)) != MP_OKAY) goto LBL_ERR;
193 if ((err = mp_add(&b1, &a0, c)) != MP_OKAY) goto LBL_ERR;
211 mp_clear_multi(&S1, &S2, &T1, NULL);