util/src/TclTk/tcl8.6.12/libtommath/bn_s_mp_toom_mul.c

   1 #include "tommath_private.h"
   2 #ifdef BN_S_MP_TOOM_MUL_C
   3 /* LibTomMath, multiple-precision integer library -- Tom St Denis */
   4 /* SPDX-License-Identifier: Unlicense */
   5
   6 /* multiplication using the Toom-Cook 3-way algorithm
   7  *
   8  * Much more complicated than Karatsuba but has a lower
   9  * asymptotic running time of O(N**1.464).  This algorithm is
  10  * only particularly useful on VERY large inputs
  11  * (we're talking 1000s of digits here...).
  12 */
  13
  14 /*
  15    This file contains code from J. Arndt's book  "Matters Computational"
  16    and the accompanying FXT-library with permission of the author.
  17 */
  18
  19 /*
  20    Setup from
  21
  22      Chung, Jaewook, and M. Anwar Hasan. "Asymmetric squaring formulae."
  23      18th IEEE Symposium on Computer Arithmetic (ARITH'07). IEEE, 2007.
  24
  25    The interpolation from above needed one temporary variable more
  26    than the interpolation here:
  27
  28      Bodrato, Marco, and Alberto Zanoni. "What about Toom-Cook matrices optimality."
  29      Centro Vito Volterra Universita di Roma Tor Vergata (2006)
  30 */
  31
  32 mp_err s_mp_toom_mul(const mp_int *a, const mp_int *b, mp_int *c)
  33 {
  34    mp_int S1, S2, T1, a0, a1, a2, b0, b1, b2;
  35    int B, count;
  36    mp_err err;
  37
  38    /* init temps */
  39    if ((err = mp_init_multi(&S1, &S2, &T1, NULL)) != MP_OKAY) {
  40       return err;
  41    }
  42
  43    /* B */
  44    B = MP_MIN(a->used, b->used) / 3;
  45
  46    /** a = a2 * x^2 + a1 * x + a0; */
  47    if ((err = mp_init_size(&a0, B)) != MP_OKAY)                   goto LBL_ERRa0;
  48
  49    for (count = 0; count < B; count++) {
  50       a0.dp[count] = a->dp[count];
  51       a0.used++;
  52    }
  53    mp_clamp(&a0);
  54    if ((err = mp_init_size(&a1, B)) != MP_OKAY)                   goto LBL_ERRa1;
  55    for (; count < (2 * B); count++) {
  56       a1.dp[count - B] = a->dp[count];
  57       a1.used++;
  58    }
  59    mp_clamp(&a1);
  60    if ((err = mp_init_size(&a2, B + (a->used - (3 * B)))) != MP_OKAY) goto LBL_ERRa2;
  61    for (; count < a->used; count++) {
  62       a2.dp[count - (2 * B)] = a->dp[count];
  63       a2.used++;
  64    }
  65    mp_clamp(&a2);
  66
  67    /** b = b2 * x^2 + b1 * x + b0; */
  68    if ((err = mp_init_size(&b0, B)) != MP_OKAY)                   goto LBL_ERRb0;
  69    for (count = 0; count < B; count++) {
  70       b0.dp[count] = b->dp[count];
  71       b0.used++;
  72    }
  73    mp_clamp(&b0);
  74    if ((err = mp_init_size(&b1, B)) != MP_OKAY)                   goto LBL_ERRb1;
  75    for (; count < (2 * B); count++) {
  76       b1.dp[count - B] = b->dp[count];
  77       b1.used++;
  78    }
  79    mp_clamp(&b1);
  80    if ((err = mp_init_size(&b2, B + (b->used - (3 * B)))) != MP_OKAY) goto LBL_ERRb2;
  81    for (; count < b->used; count++) {
  82       b2.dp[count - (2 * B)] = b->dp[count];
  83       b2.used++;
  84    }
  85    mp_clamp(&b2);
  86
  87    /** \\ S1 = (a2+a1+a0) * (b2+b1+b0); */
  88    /** T1 = a2 + a1; */
  89    if ((err = mp_add(&a2, &a1, &T1)) != MP_OKAY)                  goto LBL_ERR;
  90
  91    /** S2 = T1 + a0; */
  92    if ((err = mp_add(&T1, &a0, &S2)) != MP_OKAY)                  goto LBL_ERR;
  93
  94    /** c = b2 + b1; */
  95    if ((err = mp_add(&b2, &b1, c)) != MP_OKAY)                    goto LBL_ERR;
  96
  97    /** S1 = c + b0; */
  98    if ((err = mp_add(c, &b0, &S1)) != MP_OKAY)                    goto LBL_ERR;
  99
 100    /** S1 = S1 * S2; */
 101    if ((err = mp_mul(&S1, &S2, &S1)) != MP_OKAY)                  goto LBL_ERR;
 102
 103    /** \\S2 = (4*a2+2*a1+a0) * (4*b2+2*b1+b0); */
 104    /** T1 = T1 + a2; */
 105    if ((err = mp_add(&T1, &a2, &T1)) != MP_OKAY)                  goto LBL_ERR;
 106
 107    /** T1 = T1 << 1; */
 108    if ((err = mp_mul_2(&T1, &T1)) != MP_OKAY)                     goto LBL_ERR;
 109
 110    /** T1 = T1 + a0; */
 111    if ((err = mp_add(&T1, &a0, &T1)) != MP_OKAY)                  goto LBL_ERR;
 112
 113    /** c = c + b2; */
 114    if ((err = mp_add(c, &b2, c)) != MP_OKAY)                      goto LBL_ERR;
 115
 116    /** c = c << 1; */
 117    if ((err = mp_mul_2(c, c)) != MP_OKAY)                         goto LBL_ERR;
 118
 119    /** c = c + b0; */
 120    if ((err = mp_add(c, &b0, c)) != MP_OKAY)                      goto LBL_ERR;
 121
 122    /** S2 = T1 * c; */
 123    if ((err = mp_mul(&T1, c, &S2)) != MP_OKAY)                    goto LBL_ERR;
 124
 125    /** \\S3 = (a2-a1+a0) * (b2-b1+b0); */
 126    /** a1 = a2 - a1; */
 127    if ((err = mp_sub(&a2, &a1, &a1)) != MP_OKAY)                  goto LBL_ERR;
 128
 129    /** a1 = a1 + a0; */
 130    if ((err = mp_add(&a1, &a0, &a1)) != MP_OKAY)                  goto LBL_ERR;
 131
 132    /** b1 = b2 - b1; */
 133    if ((err = mp_sub(&b2, &b1, &b1)) != MP_OKAY)                  goto LBL_ERR;
 134
 135    /** b1 = b1 + b0; */
 136    if ((err = mp_add(&b1, &b0, &b1)) != MP_OKAY)                  goto LBL_ERR;
 137
 138    /** a1 = a1 * b1; */
 139    if ((err = mp_mul(&a1, &b1, &a1)) != MP_OKAY)                  goto LBL_ERR;
 140
 141    /** b1 = a2 * b2; */
 142    if ((err = mp_mul(&a2, &b2, &b1)) != MP_OKAY)                  goto LBL_ERR;
 143
 144    /** \\S2 = (S2 - S3)/3; */
 145    /** S2 = S2 - a1; */
 146    if ((err = mp_sub(&S2, &a1, &S2)) != MP_OKAY)                  goto LBL_ERR;
 147
 148    /** S2 = S2 / 3; \\ this is an exact division  */
 149    if ((err = mp_div_3(&S2, &S2, NULL)) != MP_OKAY)               goto LBL_ERR;
 150
 151    /** a1 = S1 - a1; */
 152    if ((err = mp_sub(&S1, &a1, &a1)) != MP_OKAY)                  goto LBL_ERR;
 153
 154    /** a1 = a1 >> 1; */
 155    if ((err = mp_div_2(&a1, &a1)) != MP_OKAY)                     goto LBL_ERR;
 156
 157    /** a0 = a0 * b0; */
 158    if ((err = mp_mul(&a0, &b0, &a0)) != MP_OKAY)                  goto LBL_ERR;
 159
 160    /** S1 = S1 - a0; */
 161    if ((err = mp_sub(&S1, &a0, &S1)) != MP_OKAY)                  goto LBL_ERR;
 162
 163    /** S2 = S2 - S1; */
 164    if ((err = mp_sub(&S2, &S1, &S2)) != MP_OKAY)                  goto LBL_ERR;
 165
 166    /** S2 = S2 >> 1; */
 167    if ((err = mp_div_2(&S2, &S2)) != MP_OKAY)                     goto LBL_ERR;
 168
 169    /** S1 = S1 - a1; */
 170    if ((err = mp_sub(&S1, &a1, &S1)) != MP_OKAY)                  goto LBL_ERR;
 171
 172    /** S1 = S1 - b1; */
 173    if ((err = mp_sub(&S1, &b1, &S1)) != MP_OKAY)                  goto LBL_ERR;
 174
 175    /** T1 = b1 << 1; */
 176    if ((err = mp_mul_2(&b1, &T1)) != MP_OKAY)                     goto LBL_ERR;
 177
 178    /** S2 = S2 - T1; */
 179    if ((err = mp_sub(&S2, &T1, &S2)) != MP_OKAY)                  goto LBL_ERR;
 180
 181    /** a1 = a1 - S2; */
 182    if ((err = mp_sub(&a1, &S2, &a1)) != MP_OKAY)                  goto LBL_ERR;
 183
 184
 185    /** P = b1*x^4+ S2*x^3+ S1*x^2+ a1*x + a0; */
 186    if ((err = mp_lshd(&b1, 4 * B)) != MP_OKAY)                    goto LBL_ERR;
 187    if ((err = mp_lshd(&S2, 3 * B)) != MP_OKAY)                    goto LBL_ERR;
 188    if ((err = mp_add(&b1, &S2, &b1)) != MP_OKAY)                  goto LBL_ERR;
 189    if ((err = mp_lshd(&S1, 2 * B)) != MP_OKAY)                    goto LBL_ERR;
 190    if ((err = mp_add(&b1, &S1, &b1)) != MP_OKAY)                  goto LBL_ERR;
 191    if ((err = mp_lshd(&a1, 1 * B)) != MP_OKAY)                    goto LBL_ERR;
 192    if ((err = mp_add(&b1, &a1, &b1)) != MP_OKAY)                  goto LBL_ERR;
 193    if ((err = mp_add(&b1, &a0, c)) != MP_OKAY)                    goto LBL_ERR;
 194
 195    /** a * b - P */
 196
 197
 198 LBL_ERR:
 199    mp_clear(&b2);
 200 LBL_ERRb2:
 201    mp_clear(&b1);
 202 LBL_ERRb1:
 203    mp_clear(&b0);
 204 LBL_ERRb0:
 205    mp_clear(&a2);
 206 LBL_ERRa2:
 207    mp_clear(&a1);
 208 LBL_ERRa1:
 209    mp_clear(&a0);
 210 LBL_ERRa0:
 211    mp_clear_multi(&S1, &S2, &T1, NULL);
 212    return err;
 213 }
 214
 215 #endif